0% found this document useful (0 votes)

42 views

HPC Codes-2

codes

Uploaded by

Mikasa Jaeger

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

42 views

HPC Codes-2

codes

Uploaded by

Mikasa Jaeger

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 15

Assignment No.

1:
Design and implement Parallel Breadth-First Search and Depth First Search based on
existing algorithms using OpenMP. Use a Tree or an undirected graph for BFS and DFS.

Code:
%%cu
#include <iostream>
#include <vector>
#include <queue>
#include <omp.h>

using namespace std;

// Graph class representing the adjacency list

class Graph {
int V; // Number of vertices
vector<vector<int>> adj; // Adjacency list

public:
Graph(int V) : V(V), adj(V) {}

// Add an edge to the graph

void addEdge(int v, int w) {
adj[v].push_back(w);
}

// Parallel Depth-First Search

void parallelDFS(int startVertex) {
vector<bool> visited(V, false);
parallelDFSUtil(startVertex, visited);
}

// Parallel DFS utility function

void parallelDFSUtil(int v, vector<bool>& visited) {
visited[v] = true;
cout << v << " ";

#pragma omp parallel for

for (int i = 0; i < adj[v].size(); ++i) {
int n = adj[v][i];
if (!visited[n])
parallelDFSUtil(n, visited);
}
}
// Parallel Breadth-First Search
void parallelBFS(int startVertex) {
vector<bool> visited(V, false);
queue<int> q;

visited[startVertex] = true;
q.push(startVertex);

while (!q.empty()) {
int v = q.front();
q.pop();
cout << v << " ";

#pragma omp parallel for

for (int i = 0; i < adj[v].size(); ++i) {
int n = adj[v][i];
if (!visited[n]) {
visited[n] = true;
q.push(n);
}
}
}
}
};

int main() {
// Create a graph
Graph g(7);
g.addEdge(0, 1);
g.addEdge(0, 2);
g.addEdge(1, 3);
g.addEdge(1, 4);
g.addEdge(2, 5);
g.addEdge(2, 6);

/*
0 -------->1
| /\
| / \
| / \
v v v
2 ----> 3 4
| |
| |
v v
5 6
*/
cout << "Depth-First Search (DFS): ";
g.parallelDFS(0);
cout << endl;

cout << "Breadth-First Search (BFS): ";

g.parallelBFS(0);
cout << endl;

return 0;
}
Output:
Depth-First Search (DFS): 0 1 3 4 2 5 6
Breadth-First Search (BFS): 0 1 2 3 4 5 6
Assignment No. 2:
Write a program to implement Parallel Bubble Sort and Merge sort using OpenMP. Use
existing algorithms and measure the performance of sequential and parallel algorithms.

Code - Parallel Bubble Sort:

#include<iostream>
#include<omp.h>

using namespace std;

void bubble(int array[], int n){

for (int i = 0; i < n - 1; i++){
for (int j = 0; j < n - i - 1; j++){
if (array[j] > array[j + 1]) swap(array[j], array[j + 1]);
}
}
}

void pBubble(int array[], int n){

//Sort odd indexed numbers
for(int i = 0; i < n; ++i){
#pragma omp for
for (int j = 1; j < n; j += 2){
if (array[j] < array[j-1])
{
swap(array[j], array[j - 1]);
}
}

// Synchronize
#pragma omp barrier

//Sort even indexed numbers

#pragma omp for
for (int j = 2; j < n; j += 2){
if (array[j] < array[j-1])
{
swap(array[j], array[j - 1]);
}
}
}
}

void printArray(int arr[], int n){

for(int i = 0; i < n; i++) cout << arr[i] << " ";
cout << "\n";
}

int main(){
// Set up variables
int n = 10;
int arr[n];
int brr[n];
double start_time, end_time;

// Create an array with numbers starting from n to 1

for(int i = 0, j = n; i < n; i++, j--) arr[i] = j;

// Sequential time
start_time = omp_get_wtime();
bubble(arr, n);
end_time = omp_get_wtime();
cout << "Sequential Bubble Sort took : " << end_time - start_time << " seconds.\n";
printArray(arr, n);

// Reset the array

for(int i = 0, j = n; i < n; i++, j--) arr[i] = j;

// Parallel time
start_time = omp_get_wtime();
pBubble(arr, n);
end_time = omp_get_wtime();
cout << "Parallel Bubble Sort took : " << end_time - start_time << " seconds.\n";
printArray(arr, n);
}

Output:
Sequential Bubble Sort took : 0.00957767 seconds.
Parallel Bubble Sort took : 0.00988083 seconds.

Code - Parallel Merge Sort:

#include <iostream>
#include <omp.h>

using namespace std;

void merge(int arr[], int low, int mid, int high) {

// Create arrays of left and right partititons
int n1 = mid - low + 1;
int n2 = high - mid;
int left[n1];
int right[n2];

// Copy all left elements

for (int i = 0; i < n1; i++) left[i] = arr[low + i];

// Copy all right elements

for (int j = 0; j < n2; j++) right[j] = arr[mid + 1 + j];

// Compare and place elements

int i = 0, j = 0, k = low;

while (i < n1 && j < n2) {

if (left[i] <= right[j]){
arr[k] = left[i];
i++;
}
else{
arr[k] = right[j];
j++;
}
k++;
}

// If any elements are left out

while (i < n1) {
arr[k] = left[i];
i++;
k++;
}

while (j < n2) {

arr[k] = right[j];
j++;
k++;
}
}

void parallelMergeSort(int arr[], int low, int high) {

if (low < high) {
int mid = (low + high) / 2;

#pragma omp parallel sections

{
#pragma omp section
{
parallelMergeSort(arr, low, mid);
}
#pragma omp section
{
parallelMergeSort(arr, mid + 1, high);
}
}
merge(arr, low, mid, high);
}
}

void mergeSort(int arr[], int low, int high) {

if (low < high) {
int mid = (low + high) / 2;
mergeSort(arr, low, mid);
mergeSort(arr, mid + 1, high);
merge(arr, low, mid, high);
}
}

int main() {
int n = 1000;
int arr[n];
double start_time, end_time;

// Create an array with numbers starting from n to 1.

for(int i = 0, j = n; i < n; i++, j--) arr[i] = j;

// Measure Sequential Time

start_time = omp_get_wtime();
mergeSort(arr, 0, n - 1);
end_time = omp_get_wtime();
cout << "Time taken by sequential algorithm: " << end_time - start_time << "
seconds\n";

// Reset the array

for(int i = 0, j = n; i < n; i++, j--) arr[i] = j;

//Measure Parallel time

start_time = omp_get_wtime();
parallelMergeSort(arr, 0, n - 1);
end_time = omp_get_wtime();
cout << "Time taken by parallel algorithm: " << end_time - start_time << " seconds";

return 0;
}
Output:
Time taken by sequential algorithm: 0.000135859 seconds
Time taken by parallel algorithm: 0.000123855 seconds
Assignment No. 3:
Implement Min, Max, Sum and Average operations using Parallel Reduction.

.cpp Code:
%%cu
/*
Windows does not support user defined reductions.
This program may not run on MVSC++ compilers for Windows.
Please use Linux Environment.[You can try using "windows subsystem for linux"]
*/

#include<iostream>
#include<omp.h>

using namespace std;

int minval(int arr[], int n){
int minval = arr[0];
#pragma omp parallel for reduction(min : minval)
for(int i = 0; i < n; i++){
if(arr[i] < minval) minval = arr[i];
}
return minval;
}

int maxval(int arr[], int n){

int maxval = arr[0];
#pragma omp parallel for reduction(max : maxval)
for(int i = 0; i < n; i++){
if(arr[i] > maxval) maxval = arr[i];
}
return maxval;
}

int sum(int arr[], int n){

int sum = 0;
#pragma omp parallel for reduction(+ : sum)
for(int i = 0; i < n; i++){
sum += arr[i];
}
return sum;
}

int average(int arr[], int n){

return (double)sum(arr, n) / n;
}

int main(){
int n = 5;
int arr[] = {1,2,3,4,5};
cout << "The minimum value is: " << minval(arr, n) << '\n';
cout << "The maximum value is: " << maxval(arr, n) << '\n';
cout << "The summation is: " << sum(arr, n) << '\n';
cout << "The average is: " << average(arr, n) << '\n';
return 0;
}

Output:
The minimum value is: 1
The maximum value is: 5
The summation is: 15
The average is: 3
Assignment No. 4:
Write a CUDA Program for :
1. Addition of two large vectors
2. Matrix Multiplication using CUDA C

Code - Addition of Two large Vectors:

%%cu
#include <iostream>
using namespace std;

global void add(int* A, int* B, int* C, int size) {

int tid = blockIdx.x * blockDim.x + threadIdx.x;

if (tid < size) {

C[tid] = A[tid] + B[tid];
}
}

void initialize(int* vector, int size) {

for (int i = 0; i < size; i++) {
vector[i] = rand() % 10;
}
}

void print(int* vector, int size) {

for (int i = 0; i < size; i++) {
cout << vector[i] << " ";
}
cout << endl;
}

int main() {
int N = 4;
int* A, * B, * C;

int vectorSize = N;
size_t vectorBytes = vectorSize * sizeof(int);

A = new int[vectorSize];
B = new int[vectorSize];
C = new int[vectorSize];

initialize(A, vectorSize);
initialize(B, vectorSize);
cout << "Vector A: ";
print(A, N);
cout << "Vector B: ";
print(B, N);

int* X, * Y, * Z;
cudaMalloc(&X, vectorBytes);
cudaMalloc(&Y, vectorBytes);
cudaMalloc(&Z, vectorBytes);

cudaMemcpy(X, A, vectorBytes, cudaMemcpyHostToDevice);

cudaMemcpy(Y, B, vectorBytes, cudaMemcpyHostToDevice);

int threadsPerBlock = 256;

int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;

add<<<blocksPerGrid, threadsPerBlock>>>(X, Y, Z, N);

cudaMemcpy(C, Z, vectorBytes, cudaMemcpyDeviceToHost);

cout << "Addition: ";

print(C, N);

delete[] A;
delete[] B;
delete[] C;

cudaFree(X);
cudaFree(Y);
cudaFree(Z);

return 0;
}

Output:
Vector A: 3 6 7 5
Vector B: 3 5 6 2
Addition: 6 11 13 7

Code - Matrix Multiplication using CUDA C:

%%cu
#include <iostream>
using namespace std;

// CUDA code to multiply matrices

__global__ void multiply(int* A, int* B, int* C, int size) {
// Uses thread indices and block indices to compute each element
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;

if (row < size && col < size) {

int sum = 0;
for (int i = 0; i < size; i++) {
sum += A[row * size + i] * B[i * size + col];
}
C[row * size + col] = sum;
}
}

void initialize(int* matrix, int size) {

for (int i = 0; i < size * size; i++) {
matrix[i] = rand() % 10;
}
}

void print(int* matrix, int size) {

for (int row = 0; row < size; row++) {
for (int col = 0; col < size; col++) {
cout << matrix[row * size + col] << " ";
}
cout << '\n';
}
cout << '\n';
}

int main() {
int* A, * B, * C;

int N = 2;
int blockSize = 16;

int matrixSize = N * N;
size_t matrixBytes = matrixSize * sizeof(int);

A = new int[matrixSize];
B = new int[matrixSize];
C = new int[matrixSize];

initialize(A, N);
initialize(B, N);
cout << "Matrix A: \n";
print(A, N);

cout << "Matrix B: \n";

print(B, N);

int* X, * Y, * Z;
// Allocate space
cudaMalloc(&X, matrixBytes);
cudaMalloc(&Y, matrixBytes);
cudaMalloc(&Z, matrixBytes);

// Copy values from A to X

cudaMemcpy(X, A, matrixBytes, cudaMemcpyHostToDevice);

// Copy values from A to X and B to Y

cudaMemcpy(Y, B, matrixBytes, cudaMemcpyHostToDevice);

// Threads per CTA dimension

int THREADS = 2;

// Blocks per grid dimension (assumes THREADS divides N evenly)

int BLOCKS = N / THREADS;

// Use dim3 structs for block and grid dimensions

dim3 threads(THREADS, THREADS);
dim3 blocks(BLOCKS, BLOCKS);

// Launch kernel
multiply<<<blocks, threads>>>(X, Y, Z, N);

cudaMemcpy(C, Z, matrixBytes, cudaMemcpyDeviceToHost);

cout << "Multiplication of matrix A and B: \n";
print(C, N);

delete[] A;
delete[] B;
delete[] C;

cudaFree(X);
cudaFree(Y);
cudaFree(Z);
return 0;
}

Output:
Matrix A:
36
75

Matrix B:
35
62

Multiplication of matrix A and B:

45 27
51 45

Gaussian Integers: (30 Marks) Gaussian Integer Is A Complex Number
No ratings yet
Gaussian Integers: (30 Marks) Gaussian Integer Is A Complex Number
1 page
Logic Gates Worksheet
100% (3)
Logic Gates Worksheet
5 pages
Project - Disneyland Student Handout
0% (1)
Project - Disneyland Student Handout
4 pages
Cot4501sp17 Hw2sol
No ratings yet
Cot4501sp17 Hw2sol
4 pages
HPC Output
No ratings yet
HPC Output
12 pages
All HPC Programs
No ratings yet
All HPC Programs
16 pages
HPC CODES
No ratings yet
HPC CODES
18 pages
Hpc Printout 1
No ratings yet
Hpc Printout 1
22 pages
HPC Practicals
No ratings yet
HPC Practicals
26 pages
converted_text (1)
No ratings yet
converted_text (1)
25 pages
CP4292-MCAP
No ratings yet
CP4292-MCAP
24 pages
Multicore Architecture and Programming Lab Manual
No ratings yet
Multicore Architecture and Programming Lab Manual
29 pages
Bubble Sort
No ratings yet
Bubble Sort
1 page
CP4252 Multicore Architecture and Programming Lab Manual
No ratings yet
CP4252 Multicore Architecture and Programming Lab Manual
26 pages
HPC_codes
No ratings yet
HPC_codes
14 pages
PPA Lab 10
No ratings yet
PPA Lab 10
10 pages
MAP laB mannual
No ratings yet
MAP laB mannual
24 pages
Lab 7
No ratings yet
Lab 7
3 pages
Parallel and Distributed Computing Lab Digital Assignment - 3
No ratings yet
Parallel and Distributed Computing Lab Digital Assignment - 3
10 pages
22l-6831
No ratings yet
22l-6831
9 pages
MergeSort
No ratings yet
MergeSort
2 pages
HPC Programs
No ratings yet
HPC Programs
19 pages
PDC Lab 2-5
No ratings yet
PDC Lab 2-5
5 pages
bubble Sort
No ratings yet
bubble Sort
2 pages
Multicore
No ratings yet
Multicore
23 pages
CP 4292 MCP lab manual
No ratings yet
CP 4292 MCP lab manual
20 pages
PDC LAB Experiment 2
No ratings yet
PDC LAB Experiment 2
12 pages
OpenMP Matrix
No ratings yet
OpenMP Matrix
6 pages
HPC ASSIGNMENT 2-1
No ratings yet
HPC ASSIGNMENT 2-1
5 pages
20bce2126 PDC Lab Da 3
No ratings yet
20bce2126 PDC Lab Da 3
11 pages
hpc_123
No ratings yet
hpc_123
6 pages
22l-6819
No ratings yet
22l-6819
8 pages
MAP lab completed doc
No ratings yet
MAP lab completed doc
29 pages
Lab Manual
No ratings yet
Lab Manual
31 pages
Exp 3 HPC
No ratings yet
Exp 3 HPC
8 pages
OpenMP Programs
No ratings yet
OpenMP Programs
4 pages
Lab 3
No ratings yet
Lab 3
23 pages
Radix Sort
No ratings yet
Radix Sort
10 pages
PDC-Lab 21BCE10419
No ratings yet
PDC-Lab 21BCE10419
20 pages
Lab # 2 by Akram
No ratings yet
Lab # 2 by Akram
14 pages
Assignment 2
No ratings yet
Assignment 2
2 pages
Par - 1 In-Term Exam - Course 2017/18-Q2
No ratings yet
Par - 1 In-Term Exam - Course 2017/18-Q2
7 pages
MPC LAB Manual new
No ratings yet
MPC LAB Manual new
24 pages
Assignment 04 (2)
No ratings yet
Assignment 04 (2)
16 pages
Project Assignment 3 Multi Processor System (DV 2544) : Susheel Sagar
No ratings yet
Project Assignment 3 Multi Processor System (DV 2544) : Susheel Sagar
4 pages
Bubble Sort - OpenMP
No ratings yet
Bubble Sort - OpenMP
4 pages
.Trashed-1650000204-Hpc Prac Exam
No ratings yet
.Trashed-1650000204-Hpc Prac Exam
5 pages
Oriental College of Technology: Department of Computer Science & Engineering
No ratings yet
Oriental College of Technology: Department of Computer Science & Engineering
38 pages
4 Performance.4x
No ratings yet
4 Performance.4x
14 pages
C++ OPERATING SYTEM THREADS TASK
No ratings yet
C++ OPERATING SYTEM THREADS TASK
16 pages
Untitled document
No ratings yet
Untitled document
23 pages
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
Web GPU
0% (1)
Web GPU
40 pages
gauravkumar_221it027@it301_Lab2
No ratings yet
gauravkumar_221it027@it301_Lab2
28 pages
CP4292-MCAP(1)
No ratings yet
CP4292-MCAP(1)
15 pages
Department of Computer Engineering BE Laboratory Practice-I A.Y 2021-22 SEM1
No ratings yet
Department of Computer Engineering BE Laboratory Practice-I A.Y 2021-22 SEM1
45 pages
TP2
No ratings yet
TP2
4 pages
Bubble
No ratings yet
Bubble
1 page
DAA Practical
No ratings yet
DAA Practical
68 pages
Excelente
No ratings yet
Excelente
64 pages
HPC 3
No ratings yet
HPC 3
7 pages
Module 4 - 4.6 - Understanding Shared Variables and Their Protection Mechanisms in OpenMP
No ratings yet
Module 4 - 4.6 - Understanding Shared Variables and Their Protection Mechanisms in OpenMP
5 pages
Micro
No ratings yet
Micro
30 pages
E 3 (Openmp - Iii) : Matrix Multiplication
No ratings yet
E 3 (Openmp - Iii) : Matrix Multiplication
10 pages
Solutions To The Combinatorics Problems
No ratings yet
Solutions To The Combinatorics Problems
5 pages
Bellman Ford Algorithm
No ratings yet
Bellman Ford Algorithm
17 pages
BCA mathmatics (sem2)
No ratings yet
BCA mathmatics (sem2)
18 pages
Permutations and Combination Notes For Class 11 Maths Chapter 7 PDF
100% (1)
Permutations and Combination Notes For Class 11 Maths Chapter 7 PDF
8 pages
Lect 4
No ratings yet
Lect 4
14 pages
CSC-335 ADT's & Data Structures (Chapter 10 - ADT Implementation: Recursion, Algorithm Analysis, and Standard Algorithms)
No ratings yet
CSC-335 ADT's & Data Structures (Chapter 10 - ADT Implementation: Recursion, Algorithm Analysis, and Standard Algorithms)
32 pages
CSE - 208 Lab 3
No ratings yet
CSE - 208 Lab 3
5 pages
7 1-CompletingtheSquare
No ratings yet
7 1-CompletingtheSquare
17 pages
Fibonacci Numbers and Binet Formula: (An Introduction To Number Theory)
No ratings yet
Fibonacci Numbers and Binet Formula: (An Introduction To Number Theory)
27 pages
Cormen Solution
No ratings yet
Cormen Solution
4 pages
Notes On Discontinuities
No ratings yet
Notes On Discontinuities
1 page
Tutorial 1 - Answers
No ratings yet
Tutorial 1 - Answers
3 pages
Consecutive Integer Level 3 Session 3 SK
No ratings yet
Consecutive Integer Level 3 Session 3 SK
5 pages
Implementation of Binary To Floating Point Converter Using HDL
No ratings yet
Implementation of Binary To Floating Point Converter Using HDL
41 pages
Number Theory Part 1
No ratings yet
Number Theory Part 1
12 pages
Algorithms Tutorials
No ratings yet
Algorithms Tutorials
577 pages
Quantum Computing: Models of Quantum Computation
No ratings yet
Quantum Computing: Models of Quantum Computation
17 pages
Solution To CS243 Assignment3: 1. Text (Sipser, Second Edition) Chapter 2 (p.128) 2.4 2.4b
No ratings yet
Solution To CS243 Assignment3: 1. Text (Sipser, Second Edition) Chapter 2 (p.128) 2.4 2.4b
4 pages
Sept 3 8th HW Cubes Abd Cube Roots
No ratings yet
Sept 3 8th HW Cubes Abd Cube Roots
2 pages
Grade 5 HCF and LCM
No ratings yet
Grade 5 HCF and LCM
15 pages
National Round-Mathematics-Category-6
No ratings yet
National Round-Mathematics-Category-6
3 pages
Matrix
100% (1)
Matrix
105 pages
Basic Mathematics: Topic1: Numerical Literacy (1 Credit/15 Hours)
No ratings yet
Basic Mathematics: Topic1: Numerical Literacy (1 Credit/15 Hours)
27 pages
Lab 08 Elementary Sorting Algorithms and Searching: Data Structures CS218 Instructor: Anam Qureshi
No ratings yet
Lab 08 Elementary Sorting Algorithms and Searching: Data Structures CS218 Instructor: Anam Qureshi
22 pages
Graph Slide
No ratings yet
Graph Slide
30 pages
Combinatorics A Guided Tour 1st Edition David R. Mazur - Discover the ebook with all chapters in just a few seconds
100% (4)
Combinatorics A Guided Tour 1st Edition David R. Mazur - Discover the ebook with all chapters in just a few seconds
57 pages

Uploaded by

Uploaded by

Assignment No.

using namespace std;

// Graph class representing the adjacency list

// Add an edge to the graph

// Parallel Depth-First Search

// Parallel DFS utility function

#pragma omp parallel for

#pragma omp parallel for

cout << "Breadth-First Search (BFS): ";

Code - Parallel Bubble Sort:

using namespace std;

void bubble(int array[], int n){

void pBubble(int array[], int n){

//Sort even indexed numbers

void printArray(int arr[], int n){

// Create an array with numbers starting from n to 1

// Reset the array

Code - Parallel Merge Sort:

using namespace std;

void merge(int arr[], int low, int mid, int high) {

// Copy all left elements

// Copy all right elements

// Compare and place elements

while (i < n1 && j < n2) {

// If any elements are left out

while (j < n2) {

void parallelMergeSort(int arr[], int low, int high) {

#pragma omp parallel sections

void mergeSort(int arr[], int low, int high) {

// Create an array with numbers starting from n to 1.

// Measure Sequential Time

// Reset the array

//Measure Parallel time

using namespace std;

int maxval(int arr[], int n){

int sum(int arr[], int n){

int average(int arr[], int n){

Code - Addition of Two large Vectors:

__global__ void add(int* A, int* B, int* C, int size) {

if (tid < size) {

void initialize(int* vector, int size) {

void print(int* vector, int size) {

cudaMemcpy(X, A, vectorBytes, cudaMemcpyHostToDevice);

int threadsPerBlock = 256;

add<<<blocksPerGrid, threadsPerBlock>>>(X, Y, Z, N);

cudaMemcpy(C, Z, vectorBytes, cudaMemcpyDeviceToHost);

cout << "Addition: ";

Code - Matrix Multiplication using CUDA C:

// CUDA code to multiply matrices

if (row < size && col < size) {

void initialize(int* matrix, int size) {

void print(int* matrix, int size) {

cout << "Matrix B: \n";

// Copy values from A to X

// Copy values from A to X and B to Y

// Threads per CTA dimension

// Blocks per grid dimension (assumes THREADS divides N evenly)

// Use dim3 structs for block and grid dimensions

cudaMemcpy(C, Z, matrixBytes, cudaMemcpyDeviceToHost);

Multiplication of matrix A and B:

You might also like

global void add(int* A, int* B, int* C, int size) {