0% found this document useful (0 votes)

7 views

HPC CODES

The document contains multiple code snippets demonstrating parallel algorithms using OpenMP and CUDA for tasks like tree traversal, sorting, and matrix operations. It includes implementations for parallel breadth-first search (BFS), depth-first search (DFS), bubble sort, merge sort, and vector addition/multiplication. Additionally, it showcases performance comparisons between sequential and parallel algorithms.

Uploaded by

krishnagavane36

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

7 views

HPC CODES

Uploaded by

krishnagavane36

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 18

#include <iostream>

#include <vector>

#include <queue>

#include <omp.h>

using namespace std;

struct Node {

int data;

vector<Node*> children;

};

// Thread-safe level-order traversal using parallelism

void parallel_BFS(Node* root) {

if (!root) return;

queue<Node*> q;

q.push(root);

while (!q.empty()) {

int level_size = q.size();

vector<Node*> next_level;

#pragma omp parallel for shared(q, next_level)

for (int i = 0; i < level_size; ++i) {

Node* current;

// Synchronize access to queue

#pragma omp critical

current = q.front();

q.pop();

// Print node value (thread-safe)

#pragma omp critical

cout << current->data << " ";

}

// Push children to a temporary next-level buffer

#pragma omp critical

for (Node* child : current->children) {

next_level.push_back(child);

for (Node* node : next_level) {

q.push(node);

int main() {

// Create a sample N-ary tree

Node* root = new Node{1, {

new Node{2, {}},

new Node{3, {

new Node{4, {}}

}}

}};

cout << "Parallel BFS Output: ";

parallel_BFS(root);

cout << endl;

return 0;

OUTPUT:
#include <iostream>

#include <vector>

#include <omp.h>

using namespace std;

struct Node {

int data;

vector<Node*> neighbors;

};

// Thread-safe DFS using OpenMP

void parallel_DFS(Node* node, vector<bool>& visited) {

// Use a critical section to avoid race conditions on the visited array

#pragma omp critical

if (visited[node->data]) return;

visited[node->data] = true;

cout << node->data << " ";

// Parallel traversal of neighbors

#pragma omp parallel for

for (int i = 0; i < node->neighbors.size(); ++i) {

Node* neighbor = node->neighbors[i];

// Manual re-check in case visited was updated after critical section

#pragma omp critical

if (!visited[neighbor->data]) {

parallel_DFS(neighbor, visited);
}

int main() {

// Create a sample undirected graph (0-1-3, 0-2-4)

vector<Node> graph(5);

for (int i = 0; i < 5; ++i) {

graph[i].data = i;

graph[0].neighbors = {&graph[1], &graph[2]};

graph[1].neighbors = {&graph[0], &graph[3]};

graph[2].neighbors = {&graph[0], &graph[4]};

graph[3].neighbors = {&graph[1]};

graph[4].neighbors = {&graph[2]};

vector<bool> visited(graph.size(), false);

cout << "Parallel DFS Output: ";

#pragma omp parallel

#pragma omp single

parallel_DFS(&graph[0], visited);

cout << endl;

return 0;

}
INPUT :

OUTPUT:
Code: Parallel and Sequential Sorting Algorithms with Time Comparison
#include <iostream>

#include <vector>

#include <chrono>

#include <omp.h>

#include <cstdlib>

using namespace std;

// Swap two elements

void swap(int& a, int& b) {

int temp = a;

a = b;

b = temp;

// Sequential Bubble Sort

void bubbleSortSequential(vector<int>& arr) {

int n = arr.size();

for (int i = 0; i < n - 1; ++i) {

bool swapped = false;

for (int j = 0; j < n - i - 1; ++j) {

if (arr[j] > arr[j + 1]) {

swap(arr[j], arr[j + 1]);

swapped = true;

if (!swapped) break;

// Parallel Bubble Sort

void bubbleSortParallel(vector<int>& arr) {

int n = arr.size();

for (int i = 0; i < n - 1; ++i) {

int swapped = 0;

#pragma omp parallel for reduction(|:swapped)

for (int j = 0; j < n - i - 1; ++j) {

if (arr[j] > arr[j + 1]) {

swap(arr[j], arr[j + 1]);

swapped = 1;

if (!swapped) break;

// Merge function

void merge(vector<int>& arr, int left, int mid, int right) {

int n1 = mid - left + 1;

int n2 = right - mid;

vector<int> L(n1), R(n2);

for (int i = 0; i < n1; i++) L[i] = arr[left + i];

for (int j = 0; j < n2; j++) R[j] = arr[mid + 1 + j];

int i = 0, j = 0, k = left;

while (i < n1 && j < n2) {

arr[k++] = (L[i] <= R[j]) ? L[i++] : R[j++];

while (i < n1) arr[k++] = L[i++];

while (j < n2) arr[k++] = R[j++];

// Sequential Merge Sort

void mergeSortSequential(vector<int>& arr, int left, int right) {

if (left < right) {

int mid = (left + right) / 2;

mergeSortSequential(arr, left, mid);

mergeSortSequential(arr, mid + 1, right);

merge(arr, left, mid, right);

// Parallel Merge Sort

void mergeSortParallel(vector<int>& arr, int left, int right) {

if (right - left < 1000) { // Small arrays are sorted sequentially

mergeSortSequential(arr, left, right);

} else {

int mid = (left + right) / 2;

#pragma omp task

mergeSortParallel(arr, left, mid);

#pragma omp task

mergeSortParallel(arr, mid + 1, right);

#pragma omp taskwait

merge(arr, left, mid, right);

int main() {

int N;

cout << "Enter the size of the array: ";

cin >> N;

vector<int> original(N);

cout << "Enter elements: ";

for (int i = 0; i < N; ++i) {

cin >> original[i];

}
// Sequential Bubble Sort

vector<int> arr = original;

auto start = chrono::high_resolution_clock::now();

bubbleSortSequential(arr);

auto end = chrono::high_resolution_clock::now();

double seqBubbleTime = chrono::duration<double, milli>(end - start).count();

cout << "Sequential Bubble Sort Time: " << seqBubbleTime << " ms\n";

// Parallel Bubble Sort

arr = original;

start = chrono::high_resolution_clock::now();

bubbleSortParallel(arr);

end = chrono::high_resolution_clock::now();

double parBubbleTime = chrono::duration<double, milli>(end - start).count();

cout << "Parallel Bubble Sort Time: " << parBubbleTime << " ms\n";

// Sequential Merge Sort

arr = original;

start = chrono::high_resolution_clock::now();

mergeSortSequential(arr, 0, N - 1);

end = chrono::high_resolution_clock::now();

double seqMergeTime = chrono::duration<double, milli>(end - start).count();

cout << "Sequential Merge Sort Time: " << seqMergeTime << " ms\n";

// Parallel Merge Sort

arr = original;

start = chrono::high_resolution_clock::now();

#pragma omp parallel

#pragma omp single

mergeSortParallel(arr, 0, N - 1);
}

end = chrono::high_resolution_clock::now();

double parMergeTime = chrono::duration<double, milli>(end - start).count();

cout << "Parallel Merge Sort Time: " << parMergeTime << " ms\n";

return 0;

Input:

Output:
#include <iostream>

#include <omp.h>

#include <climits>

#include <chrono>

using namespace std;

using namespace std::chrono;

// Function to find the minimum value in an array using parallel reduction

void min_reduction(int arr[], int n) {

int min_value = INT_MAX;

#pragma omp parallel for reduction(min: min_value)

for (int i = 0; i < n; i++) {

if (arr[i] < min_value) {

min_value = arr[i];

cout << "Minimum value: " << min_value << endl;

// Function to find the maximum value in an array using parallel reduction

void max_reduction(int arr[], int n) {

int max_value = INT_MIN;

#pragma omp parallel for reduction(max: max_value)

for (int i = 0; i < n; i++) {

if (arr[i] > max_value) {

max_value = arr[i];

cout << "Maximum value: " << max_value << endl;

// Function to calculate the sum of elements in an array using parallel reduction

void sum_reduction(int arr[], int n) {

int sum = 0;

#pragma omp parallel for reduction(+: sum)

for (int i = 0; i < n; i++) {

sum += arr[i];

cout << "Sum: " << sum << endl;

// Function to calculate the average of elements in an array using parallel reduction

void average_reduction(int arr[], int n) {

int sum = 0;

#pragma omp parallel for reduction(+: sum)

for (int i = 0; i < n; i++) {

sum += arr[i];

double average = (double)sum / n; // Fixed average calculation

cout << "Average: " << average << endl;

int main() {

int n;

cout << "\nEnter the total number of elements: ";

cin >> n;

int *arr = new int[n];

cout << "\nEnter the elements: ";

for (int i = 0; i < n; i++) {

cin >> arr[i];

auto start = high_resolution_clock::now();

min_reduction(arr, n);

max_reduction(arr, n);

sum_reduction(arr, n);

average_reduction(arr, n);
auto stop = high_resolution_clock::now();

auto duration = duration_cast<milliseconds>(stop - start);

cout << "Time taken for all reductions: " << duration.count() << " ms" << endl;

delete[] arr;

return 0;
}

Input:

Output:
#include <iostream>

#include <cuda_runtime.h>

using namespace std;

// CUDA kernel to add two vectors

global void addVectors(int* A, int* B, int* C, int n) {

int i = blockIdx.x * blockDim.x + threadIdx.x;

if (i < n) {

C[i] = A[i] + B[i];

int main() {

int n = 1000000;

int* A, B, h_C; // Host pointers

int size = n * sizeof(int);

// Allocate pinned memory on host

cudaMallocHost(&A, size);

cudaMallocHost(&B, size);

cudaMallocHost(&h_C, size); // corrected from malloc

// Initialize vectors A and B

for (int i = 0; i < n; i++) {

A[i] = i;

B[i] = i * 2;

// Allocate memory on the device

int dev_A, dev_B, *dev_C;

cudaMalloc(&dev_A, size);

cudaMalloc(&dev_B, size);

cudaMalloc(&dev_C, size);

// Copy data from host to device

cudaMemcpy(dev_A, A, size, cudaMemcpyHostToDevice);

cudaMemcpy(dev_B, B, size, cudaMemcpyHostToDevice);

// Kernel launch setup

int blockSize = 256;

int numBlocks = (n + blockSize - 1) / blockSize;

// Launch the CUDA kernel

addVectors<<<numBlocks, blockSize>>>(dev_A, dev_B, dev_C, n);

// Wait for the GPU to finish

cudaDeviceSynchronize();

// Copy result from device to host

cudaMemcpy(h_C, dev_C, size, cudaMemcpyDeviceToHost);

// Print first 10 results

cout << "First 10 results of vector addition:\n";

for (int i = 0; i < 10; i++) {

cout << h_C[i] << " ";

cout << endl;

// Free device memory

cudaFree(dev_A);

cudaFree(dev_B);

cudaFree(dev_C);

// Free host memory

cudaFreeHost(A);

cudaFreeHost(B);

cudaFreeHost(h_C);

return 0;

Output:
#include <iostream>

#include <cuda_runtime.h>

using namespace std;

// CUDA code to multiply matrices

global void multiply(int* A, int* B, int* C, int size) {

// Uses thread indices and block indices to compute each element

int row = blockIdx.y * blockDim.y + threadIdx.y;

int col = blockIdx.x * blockDim.x + threadIdx.x;

if (row < size && col < size) {

int sum = 0;

for (int i = 0; i < size; i++) {

sum += A[row * size + i] * B[i * size + col];

C[row * size + col] = sum;

void initialize(int* matrix, int size) {

for (int i = 0; i < size * size; i++) {

matrix[i] = rand() % 10;

void print(int* matrix, int size) {

for (int row = 0; row < size; row++) {

for (int col = 0; col < size; col++) {

cout << matrix[row * size + col] << " ";

cout << '\n';

}
int main() {

int* A, * B, * C;

int N = 4; // Increasing N to 4 for better parallelization

int blockSize = 16;

int matrixSize = N * N;

size_t matrixBytes = matrixSize * sizeof(int);

A = new int[matrixSize];

B = new int[matrixSize];

C = new int[matrixSize];

initialize(A, N);

initialize(B, N);

cout << "Matrix A: \n";

print(A, N);

cout << "Matrix B: \n";

print(B, N);

int* X, * Y, * Z;

// Allocate space on GPU

cudaMalloc(&X, matrixBytes);

cudaMalloc(&Y, matrixBytes);

cudaMalloc(&Z, matrixBytes);

// Copy values from A and B to GPU

cudaMemcpy(X, A, matrixBytes, cudaMemcpyHostToDevice);

cudaMemcpy(Y, B, matrixBytes, cudaMemcpyHostToDevice);

// Set the number of threads per block and blocks per grid

int THREADS = 2; // Each block will have THREADS x THREADS threads

int BLOCKS = (N + THREADS - 1) / THREADS; // Compute the number of blocks needed

// Launch the kernel

dim3 threads(THREADS, THREADS);

dim3 blocks(BLOCKS, BLOCKS);

multiply<<<blocks, threads>>>(X, Y, Z, N);

cudaMemcpy(C, Z, matrixBytes, cudaMemcpyDeviceToHost);

cout << "Multiplication of matrix A and B: \n";

print(C, N);

delete[] A;

delete[] B;

delete[] C;

cudaFree(X);

cudaFree(Y);

cudaFree(Z);

return 0;

}
Output:

Fiscal Devices
0% (1)
Fiscal Devices
2 pages
IEC Certification Kit: Model-Based Design For ISO 26262
No ratings yet
IEC Certification Kit: Model-Based Design For ISO 26262
31 pages
Bricscad BIM Module Training Guide
No ratings yet
Bricscad BIM Module Training Guide
68 pages
HPC Output
No ratings yet
HPC Output
12 pages
converted_text (1)
No ratings yet
converted_text (1)
25 pages
HPC Practicals
No ratings yet
HPC Practicals
26 pages
hpc_123
No ratings yet
hpc_123
6 pages
All HPC Programs
No ratings yet
All HPC Programs
16 pages
HPC Codes-2
No ratings yet
HPC Codes-2
15 pages
bubble Sort
No ratings yet
bubble Sort
2 pages
Hpc Printout 1
No ratings yet
Hpc Printout 1
22 pages
hpc 1_merged
No ratings yet
hpc 1_merged
41 pages
Bubble Sort
No ratings yet
Bubble Sort
1 page
Bubble
No ratings yet
Bubble
1 page
Merge Sort
No ratings yet
Merge Sort
3 pages
MergeSort
No ratings yet
MergeSort
2 pages
HPCPractical2
No ratings yet
HPCPractical2
3 pages
PPA Lab 10
No ratings yet
PPA Lab 10
10 pages
S.No. Name of The Program Date Signat Ur E: Index
No ratings yet
S.No. Name of The Program Date Signat Ur E: Index
25 pages
Practical 01
No ratings yet
Practical 01
3 pages
Container: Vector
No ratings yet
Container: Vector
5 pages
Multisort Omp Cut - Off.c
No ratings yet
Multisort Omp Cut - Off.c
5 pages
Lab 7 Dsa
No ratings yet
Lab 7 Dsa
20 pages
DFS
No ratings yet
DFS
5 pages
dfs
No ratings yet
dfs
2 pages
Lab Exam
No ratings yet
Lab Exam
24 pages
C++ Guides
No ratings yet
C++ Guides
6 pages
Dsa All Assign (5-10)
No ratings yet
Dsa All Assign (5-10)
30 pages
DAA CAT2
No ratings yet
DAA CAT2
27 pages
DFS
No ratings yet
DFS
2 pages
Algorithm Lab Report
No ratings yet
Algorithm Lab Report
19 pages
RECw4
No ratings yet
RECw4
11 pages
DAA Assignment
No ratings yet
DAA Assignment
20 pages
adsa new
No ratings yet
adsa new
27 pages
DSA LAB PRINT
No ratings yet
DSA LAB PRINT
56 pages
Min
No ratings yet
Min
2 pages
Document 22
No ratings yet
Document 22
62 pages
Merge
No ratings yet
Merge
1 page
C Prog
No ratings yet
C Prog
8 pages
CP4292-MCAP
No ratings yet
CP4292-MCAP
24 pages
Binary Search Tree With Tree Tranversal Techniques: EX - NO:9 Date
No ratings yet
Binary Search Tree With Tree Tranversal Techniques: EX - NO:9 Date
19 pages
sds
No ratings yet
sds
22 pages
HPC_codes
No ratings yet
HPC_codes
14 pages
Data Structures Implementation
No ratings yet
Data Structures Implementation
9 pages
Answer
No ratings yet
Answer
43 pages
FinalLabreport
No ratings yet
FinalLabreport
17 pages
APznzaYLTIT2-EQfMM620dyKOyYc90IpuuIa73bDqtiAKutzouebu45YtbYJOHoIhG1c0rwIVRGCjUis GFsAVJdFLd30Ku67R5hkDTfTaF7oRXmjhot5ff6E6LKKBazpugqBcdj Ztn3tGfxu09hXyF0NF9C5Q6Mu6No5ghCA0xDpY9yt45hKAYkI0B86cFRIARpvqALio
No ratings yet
APznzaYLTIT2-EQfMM620dyKOyYc90IpuuIa73bDqtiAKutzouebu45YtbYJOHoIhG1c0rwIVRGCjUis GFsAVJdFLd30Ku67R5hkDTfTaF7oRXmjhot5ff6E6LKKBazpugqBcdj Ztn3tGfxu09hXyF0NF9C5Q6Mu6No5ghCA0xDpY9yt45hKAYkI0B86cFRIARpvqALio
9 pages
Data Structure is All Here You Need
No ratings yet
Data Structure is All Here You Need
67 pages
data structure task
No ratings yet
data structure task
8 pages
DAA Practical
No ratings yet
DAA Practical
68 pages
Lab_2 (fouzia)
No ratings yet
Lab_2 (fouzia)
9 pages
Multicore Architecture and Programming Lab Manual
No ratings yet
Multicore Architecture and Programming Lab Manual
29 pages
Algorithm-Lab-Assignment Mid 118
No ratings yet
Algorithm-Lab-Assignment Mid 118
13 pages
Data Structure and Algorithm
No ratings yet
Data Structure and Algorithm
18 pages
#include -iostream-
No ratings yet
#include -iostream-
4 pages
Daa Practical File Prabhjot
No ratings yet
Daa Practical File Prabhjot
38 pages
Adaspam
No ratings yet
Adaspam
15 pages
All Stl cpp
No ratings yet
All Stl cpp
3 pages
Original With Detail Out Put
No ratings yet
Original With Detail Out Put
64 pages
Bfs
No ratings yet
Bfs
2 pages
DS Code
No ratings yet
DS Code
20 pages
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
unit 3 4 CC
No ratings yet
unit 3 4 CC
20 pages
DL Endsem Notes by Dk?
No ratings yet
DL Endsem Notes by Dk?
77 pages
unit 5 6 CC
No ratings yet
unit 5 6 CC
15 pages
Python Interview Questions
No ratings yet
Python Interview Questions
1 page
Wt Outputs
No ratings yet
Wt Outputs
36 pages
Data Analytics (1)
No ratings yet
Data Analytics (1)
22 pages
Final Lp II Cc Manual
No ratings yet
Final Lp II Cc Manual
32 pages
CNS_LAB.pdf
No ratings yet
CNS_LAB.pdf
103 pages
Trackers & Planners: 1. Digital Planner
No ratings yet
Trackers & Planners: 1. Digital Planner
49 pages
Gnuplot PDF
No ratings yet
Gnuplot PDF
269 pages
Quick Start Guide
No ratings yet
Quick Start Guide
23 pages
6805710
No ratings yet
6805710
18 pages
Computer Architecture Unit 1,2 Practice Questions
No ratings yet
Computer Architecture Unit 1,2 Practice Questions
11 pages
BCA 2019 Batch
No ratings yet
BCA 2019 Batch
94 pages
Watcom C++ V11.0A Installation: This Install Will Require A Reboot
No ratings yet
Watcom C++ V11.0A Installation: This Install Will Require A Reboot
8 pages
Installation and Administration Solidworks 2023
No ratings yet
Installation and Administration Solidworks 2023
2 pages
Ubuntu 18.04 LTS Desktop Installation
No ratings yet
Ubuntu 18.04 LTS Desktop Installation
38 pages
Blockchain Based Pharmaceutical Supply Chain Management System
No ratings yet
Blockchain Based Pharmaceutical Supply Chain Management System
8 pages
Computer Studies JSS 3 3RD Term Editted
No ratings yet
Computer Studies JSS 3 3RD Term Editted
28 pages
Final Ipl Project 1
100% (1)
Final Ipl Project 1
37 pages
Config 1 AULA - Setting Up The Development Environment React Native
No ratings yet
Config 1 AULA - Setting Up The Development Environment React Native
10 pages
Overbridge+User+Manual ENG 231122
No ratings yet
Overbridge+User+Manual ENG 231122
34 pages
Unit 2 Data Science
No ratings yet
Unit 2 Data Science
53 pages
Googdocs-Ess1 Student Worksheet
No ratings yet
Googdocs-Ess1 Student Worksheet
3 pages
Video Steganography Using MATLAB
No ratings yet
Video Steganography Using MATLAB
7 pages
CT Consoles Components All Models
No ratings yet
CT Consoles Components All Models
16 pages
895-SV Ref8
No ratings yet
895-SV Ref8
330 pages
Recruitment and Selection Software Pitch Deck by Slidesgo
No ratings yet
Recruitment and Selection Software Pitch Deck by Slidesgo
40 pages
Bca Sem 4 Unit 3
No ratings yet
Bca Sem 4 Unit 3
43 pages
Question Paper
No ratings yet
Question Paper
3 pages
Red Hat OpenStack Platform-13-Director Installation and Usage-en-US
No ratings yet
Red Hat OpenStack Platform-13-Director Installation and Usage-en-US
212 pages
Motorola Motorolaedge30 Dubai 2023-11-18 14-02-36
No ratings yet
Motorola Motorolaedge30 Dubai 2023-11-18 14-02-36
23 pages
Select Windows Server Editions, Servicing Options, and Activation
No ratings yet
Select Windows Server Editions, Servicing Options, and Activation
10 pages
Computer Architect Assignment
No ratings yet
Computer Architect Assignment
2 pages
Summer Report Muhammad Fiz
No ratings yet
Summer Report Muhammad Fiz
30 pages