0% found this document useful (0 votes)

3 views

converted_text (1)

The document contains implementations of various parallel algorithms using OpenMP and CUDA, including Parallel Breadth First Search, Depth First Search, Bubble Sort, Merge Sort, and reduction operations for Min, Max, Sum, and Average. Additionally, it includes CUDA programs for matrix multiplication and vector addition. Each section provides code snippets and performance measurements for sequential and parallel executions.

Uploaded by

25-A-Sujal Lunawat

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

3 views

converted_text (1)

Uploaded by

25-A-Sujal Lunawat

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 25

Design and implement Parallel Breadth First Search and Depth First Search based on existing

algorithms using OpenMP. Use a Tree or an undirected graph for

BFS

#include<iostream>
#include<omp.h>
#include<vector>
#include<stack>
#include<queue>
#include<chrono>
using namespace std;

class Graph{
public:
int vertices = 6;
int edges = 5;

vector<vector<int>> graph = {{1},{0,2,3},{1,4,5},{1,4},{2,3},{2}};

vector<bool> visited;

void addEdge(int a, int b){

graph[a].push_back(b);
graph[b].push_back(a);
}

void printGraph(){
for(int i = 0; i < vertices; i++){
cout << i << " -> ";
for(int j = 0; j < graph[i].size();j++){
cout << graph[i][j] << " ";
}
cout << endl;
}
}

void initialize_visited(){
visited.assign(vertices,false);
}

void bfs(int i){

queue<int> q;
q.push(i);
visited[i] = true;

while(q.empty() != true){
int current = q.front();
q.pop();
cout << current << " ";

for (int j = 0; j < graph[current].size(); ++j) {

int neighbor = graph[current][j];
if (!visited[neighbor]) {
q.push(neighbor);
visited[neighbor] = true;
}
}
}
}

void parallel_bfs(int i){

queue<int> q;
q.push(i);
visited[i] = true;

while(q.empty() != true){
int current;
#pragma omp critical
{ current = q.front(); q.pop(); }

cout << current << " ";

#pragma omp parallel for

for (int j = 0; j < graph[current].size(); j++) {
int neighbor = graph[current][j];
#pragma omp critical
if (!visited[neighbor]) {
q.push(neighbor);
visited[neighbor] = true;
}
}
}
}
};

int main(int argc, char const *argv[])

{
Graph g;
cout << "Adjacency List:\n";
g.printGraph();
g.initialize_visited();

cout << "Breadth First Search: \n";

g.initialize_visited();
auto start = chrono::high_resolution_clock::now();
g.bfs(0);
cout << endl;
auto end = chrono::high_resolution_clock::now();
cout << "Time taken: "<< chrono::duration_cast<chrono::microseconds>(end - start).count() << "
microseconds" << endl;

cout << "Parallel Breadth First Search: \n";

g.initialize_visited();
start = chrono::high_resolution_clock::now();
g.parallel_bfs(0);
cout << endl;
end = chrono::high_resolution_clock::now();
cout << "Time taken: " << chrono::duration_cast<chrono::microseconds>(end - start).count() << "
microseconds" << endl;

return 0;
}

=========================================

DFS

#include<iostream>
#include<omp.h>
#include<vector>
#include<stack>
#include<queue>
#include<chrono>
using namespace std;

class Graph{
public:
int vertices = 6;
int edges = 5;
vector<vector<int>> graph = {{1},{0,2,3},{1,4,5},{1,4},{2,3},{2}};
vector<bool> visited;

void addEdge(int a, int b){

graph[a].push_back(b);
graph[b].push_back(a);
}

void printGraph(){
for(int i = 0; i < vertices; i++){
cout << i << " -> ";
for(int j = 0; j < graph[i].size();j++){
cout << graph[i][j] << " ";
}
cout << endl;
}
}

void initialize_visited(){
visited.assign(vertices,false);
}

void dfs(int i){

stack<int> s;
s.push(i);
visited[i] = true;
while(s.empty() != true){
int current = s.top();
cout << current << " ";
s.pop();
for(int j = 0; j < graph[current].size();j++){
int neighbor = graph[current][j];
if(!visited[neighbor]){
s.push(neighbor);
visited[neighbor] = true;
}
}
}
}

void parallel_dfs(int i){

stack<int> s;
s.push(i);
visited[i] = true;
while(s.empty() != true){
int current;
#pragma omp critical
{
current = s.top();
s.pop();
cout << current << " ";
}

#pragma omp parallel for

for(int j = 0; j < graph[current].size();j++){
int neighbor = graph[current][j];
#pragma omp critical
if(!visited[neighbor]){
s.push(neighbor);
visited[neighbor] = true;
}
}
}
}

};
int main()
{
Graph g;
cout << "Adjacency List:\n";
g.printGraph();
g.initialize_visited(); //set all vertices to unvisited , -1

cout << "Depth First Search: \n";

auto start = chrono::high_resolution_clock::now();
g.dfs(0);
cout << endl;
auto end = chrono::high_resolution_clock::now();
cout << "Time taken: " << chrono::duration_cast<chrono::microseconds>(end - start).count() << "
microseconds" << endl;

cout << "Parallel Depth First Search: \n";

g.initialize_visited();
start = chrono::high_resolution_clock::now();
g.parallel_dfs(0);
cout << endl;
end = chrono::high_resolution_clock::now();
cout << "Time taken: "<< chrono::duration_cast<chrono::microseconds>(end - start).count() << "
microseconds" << endl;

return 0;
}

============================================

Write a program to implement Parallel Bubble Sort and Merge sort using OpenMP. Use existing
algorithms and measure the performance of sequential and parallel algorithms.

Bubble-Sort

#include <iostream>
#include <vector>
#include <algorithm>
#include <chrono>
#include <omp.h>

using namespace std;

using namespace std::chrono;
// ------------------------- Sequential Bubble Sort -------------------------
void sequential_bubble_sort(vector<int>& arr) {

for (int i = 0; i < arr.size() - 1; i++) {

for (int j = 0; j < arr.size() - i - 1; j++) {
if (arr[j] > arr[j + 1]) {
swap(arr[j], arr[j + 1]);
}
}
}
}

// ------------------------- Parallel Bubble Sort -------------------------

void parallel_bubble_sort(vector<int>& arr) {

for (int k = 0; k < arr.size(); k++) {

//to avoid race condition, we need to use odd/even indexed elements
//in odd phase, we compare even indexed elements with their next odd indexed elements
//in even phase, we compare odd indexed elements with their next even indexed elements

if (k % 2 == 0) {
#pragma omp parallel for
for (int i = 1; i < arr.size() - 1; i += 2) {
if (arr[i] > arr[i + 1]) {
swap(arr[i], arr[i + 1]);
}
}
} else {
#pragma omp parallel for
for (int i = 0; i < arr.size() - 1; i += 2) {
if (arr[i] > arr[i + 1]) {
swap(arr[i], arr[i + 1]);
}
}
}
}
}

// ------------------------- Main Function -------------------------

int main() {

vector<int> array;

for (int i = 0; i < 20; i++) {

array.push_back( rand() % 300);
}
cout << "\nOriginal Array:\n";
for (int val : array) {
cout << val << " ";
}

auto start = high_resolution_clock::now();

// sequential_bubble_sort(array);
parallel_bubble_sort(array);

auto end = high_resolution_clock::now();

auto duration = duration_cast<microseconds>(end - start);

cout << "\nTotal Time Required: " << duration.count() << " µs\n";

cout << "\n\nSorted Array:\n";

for (int val : array) {
cout << val << " ";
}

==================================================
Merge-Sort

#include <iostream>
#include <vector>
#include <algorithm>
#include <chrono>
#include <omp.h>

using namespace std;

using namespace std::chrono;

// ------------------------- Sequential Merge Sort -------------------------

void merge(vector<int>& array, int low, int mid, int high) {
vector<int> temp(high - low + 1);
int i = low, j = mid + 1, k = 0;
while (i <= mid && j <= high) {
if (array[i] <= array[j]) {
temp[k++] = array[i++];
} else {
temp[k++] = array[j++];
}
}

while (i <= mid) temp[k++] = array[i++];

while (j <= high) temp[k++] = array[j++];

for (i = low, k = 0; i <= high; i++, k++) {

array[i] = temp[k];
}
}

void sequential_merge_sort(vector<int>& array, int low, int high) {

if (low < high) {
int mid = (low + high) / 2;
sequential_merge_sort(array, low, mid);
sequential_merge_sort(array, mid + 1, high);
merge(array, low, mid, high);
}
}

// ------------------------- Parallel Merge Sort -------------------------

void parallel_merge_sort(vector<int>& array, int low, int high) {
if (low < high) {
int mid = (low + high) / 2;

#pragma omp parallel sections

{
#pragma omp section
parallel_merge_sort(array, low, mid);

#pragma omp section

parallel_merge_sort(array, mid + 1, high);
}

merge(array, low, mid, high);

}
}

// ------------------------- Main Function -------------------------

int main() {
vector<int> array;

// Initialize the array with random values

for (int i = 0; i < 20; i++) {
array.push_back(rand() % 300);
}

// Display original array

cout << "\nOriginal Array:\n";
for (int val : array) {
cout << val << " ";
}
cout << endl;

// ------------------------- Sequential Merge Sort -------------------------

vector<int> seq_array = array; // Copy of original array
auto start = high_resolution_clock::now();
sequential_merge_sort(seq_array, 0, seq_array.size() - 1);
auto end = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(end - start);
cout << "\nSequential Merge Sort Time: " << duration.count() << " micro seconds\n";

cout << "\nSorted Array (Sequential):\n";

for (int val : seq_array) {
cout << val << " ";
}
cout << endl;

// ------------------------- Parallel Merge Sort -------------------------

vector<int> par_array = array; // Copy of original array
start = high_resolution_clock::now();
parallel_merge_sort(par_array, 0, par_array.size() - 1);
end = high_resolution_clock::now();
duration = duration_cast<microseconds>(end - start);
cout << "\nParallel Merge Sort Time: " << duration.count() << " micro seconds\n";

cout << "\nSorted Array (Parallel):\n";

for (int val : par_array) {
cout << val << " ";
}
cout << endl;

return 0;
}
======================================================

3.Implement Min, Max, Sum and Average operations using Parallel Reduction.

#include <iostream>
#include <vector>
#include <omp.h>
#include <climits>

using namespace std;

void min_reduction(vector<int>& arr) {

int min_value = INT_MAX;
#pragma omp parallel for reduction(min: min_value)
for (int i = 0; i < arr.size(); i++) {
if (arr[i] < min_value) {
min_value = arr[i];
}
}
cout << "Minimum value: " << min_value << endl;
}

void max_reduction(vector<int>& arr) {

int max_value = INT_MIN;
#pragma omp parallel for reduction(max: max_value)
for (int i = 0; i < arr.size(); i++) {
if (arr[i] > max_value) {
max_value = arr[i];
}
}
cout << "Maximum value: " << max_value << endl;
}

void sum_reduction(vector<int>& arr) {

int sum = 0;
#pragma omp parallel for reduction(+: sum)
for (int i = 0; i < arr.size(); i++) {
sum += arr[i];
}
cout << "Sum: " << sum << endl;
}
void average_reduction(vector<int>& arr) {
int sum = 0;
#pragma omp parallel for reduction(+: sum)
for (int i = 0; i < arr.size(); i++) {
sum += arr[i];
}
cout << "Average: " << (double)sum / arr.size() << endl;
}

int main() {
vector<int> arr;
arr.push_back(5);
arr.push_back(2);
arr.push_back(9);
arr.push_back(1);
arr.push_back(7);
arr.push_back(6);
arr.push_back(8);
arr.push_back(3);
arr.push_back(4);

min_reduction(arr);
max_reduction(arr);
sum_reduction(arr);
average_reduction(arr);
}

=============================
Write a CUDA Program for : Matrix Multiplication

#include <cuda_runtime.h>
#include <iostream>

global void matmul(int* A, int* B, int* C, int N) {

int Row = blockIdx.y*blockDim.y+threadIdx.y;
int Col = blockIdx.x*blockDim.x+threadIdx.x;
if (Row < N && Col < N) {
int Pvalue = 0;
for (int k = 0; k < N; k++) {
Pvalue += A[Row*N+k] * B[k*N+Col];
}
C[Row*N+Col] = Pvalue;
}
}
int main() {
int N = 512;
int size = N * N * sizeof(int);
int* A, * B, * C;
int* dev_A, * dev_B, * dev_C;
cudaMallocHost(&A, size);
cudaMallocHost(&B, size);
cudaMallocHost(&C, size);
cudaMalloc(&dev_A, size);
cudaMalloc(&dev_B, size);
cudaMalloc(&dev_C, size);

// Initialize matrices A and B

for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[i*N+j] = i*N+j;
B[i*N+j] = j*N+i;
}
}

cudaMemcpy(dev_A, A, size, cudaMemcpyHostToDevice);

cudaMemcpy(dev_B, B, size, cudaMemcpyHostToDevice);

dim3 dimBlock(16, 16);

dim3 dimGrid(N/dimBlock.x, N/dimBlock.y);

matmul<<<dimGrid, dimBlock>>>(dev_A, dev_B, dev_C, N);

cudaMemcpy(C, dev_C, size, cudaMemcpyDeviceToHost);

// Print the result

for (int i = 0; i < 10; i++) {
for (int j = 0; j < 10; j++) {
std::cout << C[i*N+j] << " ";
}
std::cout << std::endl;
}

// Free memory
cudaFree(dev_A);
cudaFree(dev_B);
cudaFree(dev_C);
cudaFreeHost(A);
cudaFreeHost(B);
cudaFreeHost(C);
return 0;
}

============================

Write a CUDA Program for :1. Addition of two large vectors

#include <iostream>
#include <cuda_runtime.h>

using namespace std;

global void addVectors(int* A, int* B, int* C, int n)

{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
C[i] = A[i] + B[i];
}
}

int main()
{
int n = 1000000;
int* A, * B, * C;
int size = n * sizeof(int);

// Allocate memory on the host

cudaMallocHost(&A, size);
cudaMallocHost(&B, size);
cudaMallocHost(&C, size);

// Initialize the vectors

for (int i = 0; i < n; i++)
{
A[i] = i;
B[i] = i * 2;
}
// Allocate memory on the device
int* dev_A, * dev_B, * dev_C;
cudaMalloc(&dev_A, size);
cudaMalloc(&dev_B, size);
cudaMalloc(&dev_C, size);
// Copy data from host to device
cudaMemcpy(dev_A, A, size, cudaMemcpyHostToDevice);
cudaMemcpy(dev_B, B, size, cudaMemcpyHostToDevice);

// Launch the kernel

int blockSize = 256;
int numBlocks = (n + blockSize - 1) / blockSize;
addVectors<<<numBlocks, blockSize>>>(dev_A, dev_B, dev_C, n);

// Copy data from device to host

cudaMemcpy(C, dev_C, size, cudaMemcpyDeviceToHost);

// Print the results

for (int i = 0; i < 10; i++)
{
cout << C[i] << " ";
}
cout << endl;

// Free memory
cudaFree(dev_A);
cudaFree(dev_B);
cudaFree(dev_C);
cudaFreeHost(A);
cudaFreeHost(B);
cudaFreeHost(C);

return 0;
}

==============================================================================
===========

deep learning

Assignment 1: Linear regression by using Deep Neural network: Implement Boston housing price
prediction problem by Linear regression using Deep Neural network. Use Boston House price
prediction dataset
# Import necessary libraries
import numpy as np # For numerical operations
import pandas as pd # For handling datasets
from sklearn.model_selection import train_test_split # Splitting data into train & test sets
from sklearn.linear_model import LinearRegression # Linear Regression Model
from sklearn.preprocessing import StandardScaler # Standardization of data
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score # Evaluation
metrics

# Importing Keras (for Neural Network)

import keras
from keras.models import Sequential # To define a sequential model
from keras.layers import Dense # Fully connected layers

# Importing Google Colab file handling utility

from google.colab import files

# Uploading and Loading Dataset

uploaded = files.upload() # Opens file upload dialogue in Google Colab
boston = pd.read_csv("boston_house_prices.csv") # Reads CSV file into a DataFrame

# Selecting Features and Target

# Selecting 3 input features:
# 1. LSTAT (Percentage of lower status population)
# 2. RM (Average number of rooms per dwelling)
# 3. PTRATIO (Pupil-teacher ratio by town)
X = boston[['LSTAT', 'RM', 'PTRATIO']]

# Target variable: House Price

y = boston['PRICE']

# Splitting the Dataset into Training and Testing Sets

# 80% of data used for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

# Standardizing the Dataset (Feature Scaling)

# Standardization improves model performance by normalizing feature values
scaler = StandardScaler() # Initializing StandardScaler
X_train_scaled = scaler.fit_transform(X_train) # Fit and transform training data
X_test_scaled = scaler.transform(X_test) # Transform test data using the same scaler

# Linear Regression Model

lr_model = LinearRegression() # Initializing Linear Regression Model
lr_model.fit(X_train_scaled, y_train) # Training the model using scaled training data
# Predicting house prices on test data
y_pred_lr = lr_model.predict(X_test_scaled)

# Evaluating Linear Regression Model

mse_lr = mean_squared_error(y_test, y_pred_lr) # Mean Squared Error
mae_lr = mean_absolute_error(y_test, y_pred_lr) # Mean Absolute Error
r2_lr = r2_score(y_test, y_pred_lr) # R² Score (Model accuracy measure)

# Displaying evaluation metrics

print("Linear Regression Model Evaluation:")
print(f"Mean Squared Error: {mse_lr}")
print(f"Mean Absolute Error: {mae_lr}")
print(f"R2 Score: {r2_lr}")

# Neural Network (ANN) Model

# Creating a Deep Learning Model using Keras Sequential API
model = Sequential([
Dense(128, activation='relu', input_dim=3), # Input layer (3 features) & first hidden layer (128
neurons)
Dense(64, activation='relu'), # Second hidden layer with 64 neurons
Dense(32, activation='relu'), # Third hidden layer with 32 neurons
Dense(16, activation='relu'), # Fourth hidden layer with 16 neurons
Dense(1) # Output layer (Predicting a single value - House Price)
])

# Compiling the model

model.compile(optimizer='adam', loss='mse', metrics=['mae'])
# Optimizer: Adam (Adaptive Learning Rate Optimization)
# Loss function: Mean Squared Error (MSE) - Suitable for regression problems
# Metric: Mean Absolute Error (MAE) - Helps measure performance

# Training the Neural Network

history = model.fit(X_train_scaled, y_train, epochs=100, validation_split=0.05, verbose=1)
# Training for 100 epochs
# Using 5% of training data as validation set to monitor overfitting
# `verbose=1` displays detailed training progress
#Epoch 1/100
#12/12 4s 26ms/step - loss: 547.8306 - #mae: 21.6359 - val_loss: 445.7750
- val_mae: 20.1572
#Epoch 2/100
#12/12 0s 8ms/step - loss: 550.6208 - #mae: 21.6498 - val_loss: 403.5681 -
val_mae: 19.1308
#Epoch 3/100
#12/12 0s 8ms/step - loss: 433.7596 -
# Evaluating the Neural Network Model
y_pred_nn = model.predict(X_test_scaled) # Predicting house prices on test data
mse_nn, mae_nn = model.evaluate(X_test_scaled, y_test) # Evaluating model performance
# Displaying Neural Network Evaluation Metrics
print("\nNeural Network Model Evaluation:")
print(f"Mean Squared Error: {mse_nn}")
print(f"Mean Absolute Error: {mae_nn}")

# House Price Prediction for New Data

new_data = np.array([[0.1, 10.0, 5.0]])
# New input values: LSTAT=0.1, RM=10.0, PTRATIO=5.0

new_data_scaled = scaler.transform(new_data)
# Applying the same standardization as training data

# Predicting price using trained neural network model

prediction = model.predict(new_data_scaled)

# Displaying the predicted house price

print("\nPredicted House Price:", prediction[0][0])

#OutPut
1/1 0s 36ms/step

Predicted House Price: 79.24278

=====================================

2. Multiclass classification using Deep Neural Networks: Example: Use the OCR letter

recognition

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
from sklearn import metrics

# Load the OCR dataset

# The MNIST dataset is a built-in dataset provided by Keras.
# It consists of 70,000 28x28 grayscale images, each of which displays a single handwritten digit
from 0 to 9.
# The training set consists of 60,000 images, while the test set has 10,000 images.

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# X_train and X_test are our array of images while y_train and y_test are our array of labels for
each image.
# The first tuple contains the training set features (X_train) and the training set labels (y_train).
# The second tuple contains the testing set features (X_test) and the testing set labels (y_test).
# For example, if the image shows a handwritten 7, then the label will be the intger 7.

plt.imshow(x_train[0], cmap='gray') # imshow() function which simply displays an image.

plt.show() # cmap is responsible for mapping a specific colormap to the values found in the
array that you passed as the first argument.

# image appears black and white and that each axis of the plot ranges from 0 to 28.

# This is because of the format that all the images in the dataset have:

# 1. All the images are grayscale, meaning they only contain black, white and grey.
# 2. The images are 28 pixels by 25 pixels in size (28x28).

print(x_train[0])

# image data is just an array of digits. You can almost make out a 5 from the pattern of the digits
in the array.
# Array of 28 values
# a grayscale pixel is stored as a digit between 0 and 255 where 0 is black, 255 is white and
values in between are different shades of gray.
# Therefore, each value in the [28][28] array tells the computer which color to put in that position
when we display the actual image.

# reformat our X_train array and our X_test array because they do not have the correct shape.
# Reshape the data to fit the model
print("X_train shape", x_train.shape)
print("y_train shape", y_train.shape)
print("X_test shape", x_test.shape)
print("y_test shape", y_test.shape)
# Here you can see that for the training sets we have 60,000 elements and the testing sets have
10,000 elements.

# y_train and y_test only have 1 dimensional shapes because they are just the labels of each
element.

# x_train and x_test have 3 dimensional shapes because they have a width and height (28x28
pixels) for each element.

# (60000, 28, 28) 1st parameter in the tuple shows us how much image we have 2nd and 3rd
parameters are the pixel values from x to y (28x28)
# The pixel value varies between 0 to 255.
# (60000,) Training labels with integers from 0-9 with dtype of uint8. It has the shape (60000,).
# (10000, 28, 28) Testing data that consists of grayscale images. It has the shape (10000, 28, 28)
and the dtype of uint8. The pixel value varies between 0 to 255.
# (10000,) Testing labels that consist of integers from 0-9 with dtype uint8. It has the shape
(10000,).

# X: Training data of shape (n_samples, n_features)

# y: Training label values of shape (n_samples, n_labels)
# 2D array of height and width, 28 pixels by 28 pixels will just become 784 pixels (28 squared).
# Remember that X_train has 60,000 elemenets, each with 784 total pixels so will become shape
(60000, 784).
# Whereas X_test has 10,000 elements, each with each with 784 total pixels so will become
shape (10000, 784).

x_train = x_train.reshape(60000, 784)

x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32') # use 32-bit precision when training a neural network, so at one
point the training data will have to be converted to 32 bit floats. Since the dataset fits easily in
RAM, we might as well convert to float immediately.
x_test = x_test.astype('float32')
x_train /= 255 # Each image has Intensity from 0 to 255
x_test /= 255

# Regarding the division by 255, this is the maximum value of a byte (the input feature's type
before the conversion to float32),
# so this will ensure that the input features are scaled between 0.0 and 1.0.
# USING svm-https://mgta.gmu.edu/courses/ml-with-python/
handwrittenDigitRecognition.php#:~:text=Remember%20that%20X_train%20has
%2060%2C000,keras.
# Convert class vectors to binary class matrices
num_classes = 10
y_train = np.eye(num_classes)[y_train] # Return a 2-D array with ones on the diagonal and zeros
elsewhere.
y_test = np.eye(num_classes)[y_test] # f your particular categories is present then it mark as 1
else 0 in remain row

# Define the model architecture

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,))) # The input_shape argument is
passed to the foremost layer. It comprises of a tuple shape,
model.add(Dropout(0.2)) # DROP OUT RATIO 20%
model.add(Dense(512, activation='relu')) #returns a sequence of vectors of dimension 512
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model

model.compile(loss='categorical_crossentropy', # for a multi-class classification problem
optimizer=RMSprop(),
metrics=['accuracy'])

# Train the model

batch_size = 128 # batch_size argument is passed to the layer to define a batch size for the
inputs.
epochs = 20
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1, # verbose=1 will show you an animated progress bar eg. [==========]
validation_data=(x_test, y_test)) # Using validation_data means you are providing the
training set and validation set yourself,
# validation_split means you only provide a training set and keras
splits it into a training set and a validation set
# Evaluate the model
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

========================================================================

3. Convolutional neural network (CNN): Use MNIST Fashion Dataset and create a classifier

to classify fashion clothing into categories.

import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
import numpy as np

(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

# There are 10 image classes in this dataset and each class has a mapping corresponding to the
following labels:

#0 T-shirt/top
#1 Trouser
#2 pullover
#3 Dress
#4 Coat
#5 sandals
#6 shirt
#7 sneaker
#8 bag
#9 ankle boot
# https://ml-course.github.io/master/09%20-%20Convolutional%20Neural%20Networks.pdf

plt.imshow(x_train[1])

plt.imshow(x_train[0])

# Next, we will preprocess the data by scaling the pixel values to be between 0 and 1, and then
reshaping the images to be 28x28 pixels.

x_train = x_train.astype('float32') / 255.0

x_test = x_test.astype('float32') / 255.0

x_train = x_train.reshape(-1, 28, 28, 1)

x_test = x_test.reshape(-1, 28, 28, 1)

# 28, 28 comes from width, height, 1 comes from the number of channels
# -1 means that the length in that dimension is inferred.
# This is done based on the constraint that the number of elements in an ndarray or Tensor when
reshaped must remain the same.

# each image is a row vector (784 elements) and there are lots of such rows (let it be n, so there
are 784n elements). So TensorFlow can infer that -1 is n.

# converting the training_images array to 4 dimensional array with sizes 60000, 28, 28, 1 for 0th
to 3rd dimension.

x_train.shape
x_test.shape

y_train.shape

y_test.shape

# We will use a convolutional neural network (CNN) to classify the fashion items.
# The CNN will consist of multiple convolutional layers followed by max pooling,
# dropout, and dense layers. Here is the code for the model:

model = keras.Sequential([
keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
# 32 filters (default), randomly initialized
# 3*3 is Size of Filter
# 28,28,1 size of Input Image
# No zero-padding: every output 2 pixels less in every dimension
# in Paramter shwon 320 is value of weights: (3x3 filter weights + 32 bias) * 32 filters
# 32*3*3=288(Total)+32(bias)= 320

keras.layers.MaxPooling2D((2,2)),
# It shown 13 * 13 size image with 32 channel or filter or depth.

keras.layers.Dropout(0.25),
# Reduce Overfitting of Training sample drop out 25% Neuron

keras.layers.Conv2D(64, (3,3), activation='relu'),

# Deeper layers use 64 filters
# 3*3 is Size of Filter
# Observe how the input image on 28x28x1 is transformed to a 3x3x64 feature map
# 13(Size)-3(Filter Size )+1(bias)=11 Size for Width and Height with 64 Depth or filtter or
channel
# in Paramter shwon 18496 is value of weights: (3x3 filter weights + 64 bias) * 64 filters
# 64*3*3=576+1=577*32 + 32(bias)=18496

keras.layers.MaxPooling2D((2,2)),
# It shown 5 * 5 size image with 64 channel or filter or depth.
keras.layers.Dropout(0.25),

keras.layers.Conv2D(128, (3,3), activation='relu'),

# Deeper layers use 128 filters
# 3*3 is Size of Filter
# Observe how the input image on 28x28x1 is transformed to a 3x3x128 feature map
# It show 5(Size)-3(Filter Size )+1(bias)=3 Size for Width and Height with 64 Depth or filtter or
channel
# 128*3*3=1152+1=1153*64 + 64(bias)= 73856

# To classify the images, we still need a Dense and Softmax layer.

# We need to flatten the 3x3x128 feature map to a vector of size 1152
# https://medium.com/@iamvarman/how-to-calculate-the-number-of-parameters-in-the-
cnn-5bd55364d7ca

keras.layers.Flatten(),
keras.layers.Dense(128, activation='relu'),
# 128 Size of Node in Dense Layer
# 1152*128 = 147584

keras.layers.Dropout(0.25),
keras.layers.Dense(10, activation='softmax')
# 10 Size of Node another Dense Layer
# 128*10+10 bias= 1290
])

model.summary()

# Compile and Train the Model

# After defining the model, we will compile it and train it on the training data.

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))

# 1875 is a number of batches. By default batches contain 32 samles.60000 / 32 = 1875

# Finally, we will evaluate the performance of the model on the test data.

test_loss, test_acc = model.evaluate(x_test, y_test)

print('Test accuracy:', test_acc)

==========================================================================

Lab Manual Ocs351 Artificial Intelligence and Machine Learning Fundamentals
No ratings yet
Lab Manual Ocs351 Artificial Intelligence and Machine Learning Fundamentals
41 pages
HPC CODES
No ratings yet
HPC CODES
18 pages
HPC Practicals
No ratings yet
HPC Practicals
26 pages
hpc 1_merged
No ratings yet
hpc 1_merged
41 pages
HPC Output
No ratings yet
HPC Output
12 pages
HPCPractical2
No ratings yet
HPCPractical2
3 pages
All HPC Programs
No ratings yet
All HPC Programs
16 pages
hpc_123
No ratings yet
hpc_123
6 pages
HPC Codes-2
No ratings yet
HPC Codes-2
15 pages
Practical 01
No ratings yet
Practical 01
3 pages
DFS
No ratings yet
DFS
2 pages
DFS
No ratings yet
DFS
5 pages
Ads 1
No ratings yet
Ads 1
24 pages
Part-A 1. Aim: Write A Program To Implement The Deque (Double Ended Queue) ADT Using A Doubly Linked List. Source Code
No ratings yet
Part-A 1. Aim: Write A Program To Implement The Deque (Double Ended Queue) ADT Using A Doubly Linked List. Source Code
51 pages
Task 5
No ratings yet
Task 5
9 pages
Breath First Searc3
No ratings yet
Breath First Searc3
12 pages
Using Namespace: #Include #Include #Include #Include #Include #Include
No ratings yet
Using Namespace: #Include #Include #Include #Include #Include #Include
2 pages
Solution Uniform Cost Search in C++ Language Complete Code
No ratings yet
Solution Uniform Cost Search in C++ Language Complete Code
14 pages
Uniform Cost Search Solution by Muhammad Ahmad Teacher
No ratings yet
Uniform Cost Search Solution by Muhammad Ahmad Teacher
15 pages
sds
No ratings yet
sds
22 pages
FinalLabreport
No ratings yet
FinalLabreport
17 pages
DS Code
No ratings yet
DS Code
20 pages
t3l14
No ratings yet
t3l14
4 pages
Assembly Line Scheduling and Optimal Path
No ratings yet
Assembly Line Scheduling and Optimal Path
57 pages
S.No. Name of The Program Date Signat Ur E: Index
No ratings yet
S.No. Name of The Program Date Signat Ur E: Index
25 pages
BFS
No ratings yet
BFS
2 pages
cpp
No ratings yet
cpp
5 pages
Project Report Graph Algorithm Simulator .
No ratings yet
Project Report Graph Algorithm Simulator .
15 pages
Daa
No ratings yet
Daa
4 pages
BFS and DFS
No ratings yet
BFS and DFS
5 pages
1683802476726_Assignment_1_HPC
No ratings yet
1683802476726_Assignment_1_HPC
9 pages
Lab_7_ds
No ratings yet
Lab_7_ds
8 pages
Tabij
No ratings yet
Tabij
16 pages
dfs
No ratings yet
dfs
2 pages
238801235 - Sayed Mizanur Rahman(project-7).pdf
No ratings yet
238801235 - Sayed Mizanur Rahman(project-7).pdf
9 pages
C++ Code For Classic Algorithms: Dijkstra's Algo
No ratings yet
C++ Code For Classic Algorithms: Dijkstra's Algo
4 pages
Daa Practical File Prabhjot
No ratings yet
Daa Practical File Prabhjot
38 pages
Hpc Printout 1
No ratings yet
Hpc Printout 1
22 pages
Homework 3: Implement Dijkstra's Algorithm
No ratings yet
Homework 3: Implement Dijkstra's Algorithm
10 pages
DFS AND BFS 11 - 12
No ratings yet
DFS AND BFS 11 - 12
8 pages
Document ?
No ratings yet
Document ?
7 pages
Binary Search Tree With Tree Tranversal Techniques: EX - NO:9 Date
No ratings yet
Binary Search Tree With Tree Tranversal Techniques: EX - NO:9 Date
19 pages
Dsa All Assign (5-10)
No ratings yet
Dsa All Assign (5-10)
30 pages
Dsa Exp 8 9 10
No ratings yet
Dsa Exp 8 9 10
10 pages
Krishna 3.3
No ratings yet
Krishna 3.3
4 pages
Here is a Complete Explanation of Graph Theory in C
No ratings yet
Here is a Complete Explanation of Graph Theory in C
5 pages
Ds Assignment - 9 - Debajyoti - Dhar - Bcs - 021
No ratings yet
Ds Assignment - 9 - Debajyoti - Dhar - Bcs - 021
17 pages
DSAOE_EXP6B_HARSH_034
No ratings yet
DSAOE_EXP6B_HARSH_034
8 pages
LP v Lab Manual 2022-23 Semester II
No ratings yet
LP v Lab Manual 2022-23 Semester II
45 pages
Qef G
No ratings yet
Qef G
9 pages
LAB2 Jatin
No ratings yet
LAB2 Jatin
7 pages
dsa 18to20
No ratings yet
dsa 18to20
10 pages
Assignment 1
No ratings yet
Assignment 1
9 pages
Data Structure and Algorithm
No ratings yet
Data Structure and Algorithm
18 pages
Document 22
No ratings yet
Document 22
62 pages
Advanced Data Structures
No ratings yet
Advanced Data Structures
29 pages
Lab Exam
No ratings yet
Lab Exam
24 pages
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
No Ph.D. Game Design With Three.js
From Everand
No Ph.D. Game Design With Three.js
Nikiforos Kontopoulos
No ratings yet
150+ C Pattern Programs
From Everand
150+ C Pattern Programs
Hernando Abella
No ratings yet
BFS and DFS
No ratings yet
BFS and DFS
2 pages
Graph Traversal: BFS: Breadth First Search
No ratings yet
Graph Traversal: BFS: Breadth First Search
40 pages
Eng 105
No ratings yet
Eng 105
36 pages
NP Algoritm
No ratings yet
NP Algoritm
23 pages
2401 WDM11 - 01 AB Decision 1 January 2024
No ratings yet
2401 WDM11 - 01 AB Decision 1 January 2024
20 pages
AIML LAB MANUAL
No ratings yet
AIML LAB MANUAL
38 pages
Tree Algorithm Slides
No ratings yet
Tree Algorithm Slides
367 pages
Optimization Techniques: J. Fisher
No ratings yet
Optimization Techniques: J. Fisher
11 pages
CUET CP Syllabus
100% (1)
CUET CP Syllabus
31 pages
Unit 2 Analysis of Algorithm Complexity Theory
No ratings yet
Unit 2 Analysis of Algorithm Complexity Theory
66 pages
Operations Research Center at MIT Ingrid Yoerg Larson instant download
No ratings yet
Operations Research Center at MIT Ingrid Yoerg Larson instant download
42 pages
UPTU B.tech Computer Science 3rd 4th Yr
No ratings yet
UPTU B.tech Computer Science 3rd 4th Yr
51 pages
150 DSA Questions
No ratings yet
150 DSA Questions
9 pages
Maximum Flow: Max-Flow Min-Cut Theorem (Ford Fukerson's Algorithm)
No ratings yet
Maximum Flow: Max-Flow Min-Cut Theorem (Ford Fukerson's Algorithm)
33 pages
CSL 356: Analysis and Design of Algorithms: Ragesh Jaiswal CSE, IIT Delhi
No ratings yet
CSL 356: Analysis and Design of Algorithms: Ragesh Jaiswal CSE, IIT Delhi
19 pages
Midterm Exam
No ratings yet
Midterm Exam
15 pages
CBSE Class 6 Data Handling Worksheet
No ratings yet
CBSE Class 6 Data Handling Worksheet
7 pages
Precal Chapter 1 Test Key
No ratings yet
Precal Chapter 1 Test Key
3 pages
Chinease Postman Problem
No ratings yet
Chinease Postman Problem
9 pages
36_40
No ratings yet
36_40
9 pages
Prims Vs Kruskal
No ratings yet
Prims Vs Kruskal
3 pages
AOI Assignment
No ratings yet
AOI Assignment
2 pages
Dynamic Programming Exercises Solution
100% (1)
Dynamic Programming Exercises Solution
12 pages
CST306 ALGORITHM ANALYSIS AND DESIGN, MAY 2024
No ratings yet
CST306 ALGORITHM ANALYSIS AND DESIGN, MAY 2024
4 pages
Lecture 2-Uninformed Search
No ratings yet
Lecture 2-Uninformed Search
37 pages
Review of Applications of Graph Theory in Engineering: S. N. Banasode, Y.M.Umathar
No ratings yet
Review of Applications of Graph Theory in Engineering: S. N. Banasode, Y.M.Umathar
6 pages
Design Analysis Algorithm Aug Sep2023
No ratings yet
Design Analysis Algorithm Aug Sep2023
2 pages
Unit Test - Slope and Linear Graphs
No ratings yet
Unit Test - Slope and Linear Graphs
7 pages
4 Trip-Assignment
No ratings yet
4 Trip-Assignment
15 pages
PhD-Syllabus-2023-24_batch-20-12-2024
No ratings yet
PhD-Syllabus-2023-24_batch-20-12-2024
24 pages

Uploaded by

Uploaded by

Design and implement Parallel Breadth First Search and Depth First Search based on existing

algorithms using OpenMP. Use a Tree or an undirected graph for

vector<vector<int>> graph = {{1},{0,2,3},{1,4,5},{1,4},{2,3},{2}};

void addEdge(int a, int b){

void bfs(int i){

for (int j = 0; j < graph[current].size(); ++j) {

void parallel_bfs(int i){

cout << current << " ";

#pragma omp parallel for

int main(int argc, char const *argv[])

cout << "Breadth First Search: \n";

cout << "Parallel Breadth First Search: \n";

void addEdge(int a, int b){

void dfs(int i){

void parallel_dfs(int i){

#pragma omp parallel for

cout << "Depth First Search: \n";

cout << "Parallel Depth First Search: \n";

using namespace std;

for (int i = 0; i < arr.size() - 1; i++) {

// ------------------------- Parallel Bubble Sort -------------------------

for (int k = 0; k < arr.size(); k++) {

// ------------------------- Main Function -------------------------

for (int i = 0; i < 20; i++) {

auto start = high_resolution_clock::now();

auto end = high_resolution_clock::now();

auto duration = duration_cast<microseconds>(end - start);

cout << "\n\nSorted Array:\n";

using namespace std;

// ------------------------- Sequential Merge Sort -------------------------

while (i <= mid) temp[k++] = array[i++];

for (i = low, k = 0; i <= high; i++, k++) {

void sequential_merge_sort(vector<int>& array, int low, int high) {

// ------------------------- Parallel Merge Sort -------------------------

#pragma omp parallel sections

#pragma omp section

merge(array, low, mid, high);

// ------------------------- Main Function -------------------------

// Initialize the array with random values

// Display original array

// ------------------------- Sequential Merge Sort -------------------------

cout << "\nSorted Array (Sequential):\n";

// ------------------------- Parallel Merge Sort -------------------------

cout << "\nSorted Array (Parallel):\n";

using namespace std;

void min_reduction(vector<int>& arr) {

void max_reduction(vector<int>& arr) {

void sum_reduction(vector<int>& arr) {

__global__ void matmul(int* A, int* B, int* C, int N) {

// Initialize matrices A and B

cudaMemcpy(dev_A, A, size, cudaMemcpyHostToDevice);

dim3 dimBlock(16, 16);

matmul<<<dimGrid, dimBlock>>>(dev_A, dev_B, dev_C, N);

cudaMemcpy(C, dev_C, size, cudaMemcpyDeviceToHost);

// Print the result

Write a CUDA Program for :1. Addition of two large vectors

using namespace std;

__global__ void addVectors(int* A, int* B, int* C, int n)

// Allocate memory on the host

// Initialize the vectors

// Launch the kernel

// Copy data from device to host

// Print the results

# Importing Keras (for Neural Network)

# Importing Google Colab file handling utility

# Uploading and Loading Dataset

# Selecting Features and Target

# Target variable: House Price

# Splitting the Dataset into Training and Testing Sets

# Standardizing the Dataset (Feature Scaling)

# Linear Regression Model

# Evaluating Linear Regression Model

# Displaying evaluation metrics

# Neural Network (ANN) Model

# Compiling the model

# Training the Neural Network

# House Price Prediction for New Data

# Predicting price using trained neural network model

global void matmul(int* A, int* B, int* C, int N) {

global void addVectors(int* A, int* B, int* C, int n)