0% found this document useful (0 votes)
3 views

converted_text (1)

The document contains implementations of various parallel algorithms using OpenMP and CUDA, including Parallel Breadth First Search, Depth First Search, Bubble Sort, Merge Sort, and reduction operations for Min, Max, Sum, and Average. Additionally, it includes CUDA programs for matrix multiplication and vector addition. Each section provides code snippets and performance measurements for sequential and parallel executions.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views

converted_text (1)

The document contains implementations of various parallel algorithms using OpenMP and CUDA, including Parallel Breadth First Search, Depth First Search, Bubble Sort, Merge Sort, and reduction operations for Min, Max, Sum, and Average. Additionally, it includes CUDA programs for matrix multiplication and vector addition. Each section provides code snippets and performance measurements for sequential and parallel executions.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 25

Design and implement Parallel Breadth First Search and Depth First Search based on existing

algorithms using OpenMP. Use a Tree or an undirected graph for

BFS

#include<iostream>
#include<omp.h>
#include<vector>
#include<stack>
#include<queue>
#include<chrono>
using namespace std;

class Graph{
public:
int vertices = 6;
int edges = 5;

vector<vector<int>> graph = {{1},{0,2,3},{1,4,5},{1,4},{2,3},{2}};


vector<bool> visited;

void addEdge(int a, int b){


graph[a].push_back(b);
graph[b].push_back(a);
}

void printGraph(){
for(int i = 0; i < vertices; i++){
cout << i << " -> ";
for(int j = 0; j < graph[i].size();j++){
cout << graph[i][j] << " ";
}
cout << endl;
}
}

void initialize_visited(){
visited.assign(vertices,false);
}

void bfs(int i){


queue<int> q;
q.push(i);
visited[i] = true;

while(q.empty() != true){
int current = q.front();
q.pop();
cout << current << " ";

for (int j = 0; j < graph[current].size(); ++j) {


int neighbor = graph[current][j];
if (!visited[neighbor]) {
q.push(neighbor);
visited[neighbor] = true;
}
}
}
}

void parallel_bfs(int i){


queue<int> q;
q.push(i);
visited[i] = true;

while(q.empty() != true){
int current;
#pragma omp critical
{ current = q.front(); q.pop(); }

cout << current << " ";

#pragma omp parallel for


for (int j = 0; j < graph[current].size(); j++) {
int neighbor = graph[current][j];
#pragma omp critical
if (!visited[neighbor]) {
q.push(neighbor);
visited[neighbor] = true;
}
}
}
}
};

int main(int argc, char const *argv[])


{
Graph g;
cout << "Adjacency List:\n";
g.printGraph();
g.initialize_visited();

cout << "Breadth First Search: \n";


g.initialize_visited();
auto start = chrono::high_resolution_clock::now();
g.bfs(0);
cout << endl;
auto end = chrono::high_resolution_clock::now();
cout << "Time taken: "<< chrono::duration_cast<chrono::microseconds>(end - start).count() << "
microseconds" << endl;

cout << "Parallel Breadth First Search: \n";


g.initialize_visited();
start = chrono::high_resolution_clock::now();
g.parallel_bfs(0);
cout << endl;
end = chrono::high_resolution_clock::now();
cout << "Time taken: " << chrono::duration_cast<chrono::microseconds>(end - start).count() << "
microseconds" << endl;

return 0;
}

=========================================

DFS

#include<iostream>
#include<omp.h>
#include<vector>
#include<stack>
#include<queue>
#include<chrono>
using namespace std;

class Graph{
public:
int vertices = 6;
int edges = 5;
vector<vector<int>> graph = {{1},{0,2,3},{1,4,5},{1,4},{2,3},{2}};
vector<bool> visited;

void addEdge(int a, int b){


graph[a].push_back(b);
graph[b].push_back(a);
}

void printGraph(){
for(int i = 0; i < vertices; i++){
cout << i << " -> ";
for(int j = 0; j < graph[i].size();j++){
cout << graph[i][j] << " ";
}
cout << endl;
}
}

void initialize_visited(){
visited.assign(vertices,false);
}

void dfs(int i){


stack<int> s;
s.push(i);
visited[i] = true;
while(s.empty() != true){
int current = s.top();
cout << current << " ";
s.pop();
for(int j = 0; j < graph[current].size();j++){
int neighbor = graph[current][j];
if(!visited[neighbor]){
s.push(neighbor);
visited[neighbor] = true;
}
}
}
}

void parallel_dfs(int i){


stack<int> s;
s.push(i);
visited[i] = true;
while(s.empty() != true){
int current;
#pragma omp critical
{
current = s.top();
s.pop();
cout << current << " ";
}

#pragma omp parallel for


for(int j = 0; j < graph[current].size();j++){
int neighbor = graph[current][j];
#pragma omp critical
if(!visited[neighbor]){
s.push(neighbor);
visited[neighbor] = true;
}
}
}
}

};
int main()
{
Graph g;
cout << "Adjacency List:\n";
g.printGraph();
g.initialize_visited(); //set all vertices to unvisited , -1

cout << "Depth First Search: \n";


auto start = chrono::high_resolution_clock::now();
g.dfs(0);
cout << endl;
auto end = chrono::high_resolution_clock::now();
cout << "Time taken: " << chrono::duration_cast<chrono::microseconds>(end - start).count() << "
microseconds" << endl;

cout << "Parallel Depth First Search: \n";


g.initialize_visited();
start = chrono::high_resolution_clock::now();
g.parallel_dfs(0);
cout << endl;
end = chrono::high_resolution_clock::now();
cout << "Time taken: "<< chrono::duration_cast<chrono::microseconds>(end - start).count() << "
microseconds" << endl;

return 0;
}

============================================

Write a program to implement Parallel Bubble Sort and Merge sort using OpenMP. Use existing
algorithms and measure the performance of sequential and parallel algorithms.

Bubble-Sort

#include <iostream>
#include <vector>
#include <algorithm>
#include <chrono>
#include <omp.h>

using namespace std;


using namespace std::chrono;
// ------------------------- Sequential Bubble Sort -------------------------
void sequential_bubble_sort(vector<int>& arr) {

for (int i = 0; i < arr.size() - 1; i++) {


for (int j = 0; j < arr.size() - i - 1; j++) {
if (arr[j] > arr[j + 1]) {
swap(arr[j], arr[j + 1]);
}
}
}
}

// ------------------------- Parallel Bubble Sort -------------------------


void parallel_bubble_sort(vector<int>& arr) {

for (int k = 0; k < arr.size(); k++) {


//to avoid race condition, we need to use odd/even indexed elements
//in odd phase, we compare even indexed elements with their next odd indexed elements
//in even phase, we compare odd indexed elements with their next even indexed elements

if (k % 2 == 0) {
#pragma omp parallel for
for (int i = 1; i < arr.size() - 1; i += 2) {
if (arr[i] > arr[i + 1]) {
swap(arr[i], arr[i + 1]);
}
}
} else {
#pragma omp parallel for
for (int i = 0; i < arr.size() - 1; i += 2) {
if (arr[i] > arr[i + 1]) {
swap(arr[i], arr[i + 1]);
}
}
}
}
}

// ------------------------- Main Function -------------------------


int main() {

vector<int> array;

for (int i = 0; i < 20; i++) {


array.push_back( rand() % 300);
}
cout << "\nOriginal Array:\n";
for (int val : array) {
cout << val << " ";
}

auto start = high_resolution_clock::now();


// sequential_bubble_sort(array);
parallel_bubble_sort(array);

auto end = high_resolution_clock::now();

auto duration = duration_cast<microseconds>(end - start);

cout << "\nTotal Time Required: " << duration.count() << " µs\n";

cout << "\n\nSorted Array:\n";


for (int val : array) {
cout << val << " ";
}

==================================================
Merge-Sort

#include <iostream>
#include <vector>
#include <algorithm>
#include <chrono>
#include <omp.h>

using namespace std;


using namespace std::chrono;

// ------------------------- Sequential Merge Sort -------------------------


void merge(vector<int>& array, int low, int mid, int high) {
vector<int> temp(high - low + 1);
int i = low, j = mid + 1, k = 0;
while (i <= mid && j <= high) {
if (array[i] <= array[j]) {
temp[k++] = array[i++];
} else {
temp[k++] = array[j++];
}
}

while (i <= mid) temp[k++] = array[i++];


while (j <= high) temp[k++] = array[j++];

for (i = low, k = 0; i <= high; i++, k++) {


array[i] = temp[k];
}
}

void sequential_merge_sort(vector<int>& array, int low, int high) {


if (low < high) {
int mid = (low + high) / 2;
sequential_merge_sort(array, low, mid);
sequential_merge_sort(array, mid + 1, high);
merge(array, low, mid, high);
}
}

// ------------------------- Parallel Merge Sort -------------------------


void parallel_merge_sort(vector<int>& array, int low, int high) {
if (low < high) {
int mid = (low + high) / 2;

#pragma omp parallel sections


{
#pragma omp section
parallel_merge_sort(array, low, mid);

#pragma omp section


parallel_merge_sort(array, mid + 1, high);
}

merge(array, low, mid, high);


}
}

// ------------------------- Main Function -------------------------


int main() {
vector<int> array;

// Initialize the array with random values


for (int i = 0; i < 20; i++) {
array.push_back(rand() % 300);
}

// Display original array


cout << "\nOriginal Array:\n";
for (int val : array) {
cout << val << " ";
}
cout << endl;

// ------------------------- Sequential Merge Sort -------------------------


vector<int> seq_array = array; // Copy of original array
auto start = high_resolution_clock::now();
sequential_merge_sort(seq_array, 0, seq_array.size() - 1);
auto end = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(end - start);
cout << "\nSequential Merge Sort Time: " << duration.count() << " micro seconds\n";

cout << "\nSorted Array (Sequential):\n";


for (int val : seq_array) {
cout << val << " ";
}
cout << endl;

// ------------------------- Parallel Merge Sort -------------------------


vector<int> par_array = array; // Copy of original array
start = high_resolution_clock::now();
parallel_merge_sort(par_array, 0, par_array.size() - 1);
end = high_resolution_clock::now();
duration = duration_cast<microseconds>(end - start);
cout << "\nParallel Merge Sort Time: " << duration.count() << " micro seconds\n";

cout << "\nSorted Array (Parallel):\n";


for (int val : par_array) {
cout << val << " ";
}
cout << endl;

return 0;
}
======================================================

3.Implement Min, Max, Sum and Average operations using Parallel Reduction.

#include <iostream>
#include <vector>
#include <omp.h>
#include <climits>

using namespace std;

void min_reduction(vector<int>& arr) {


int min_value = INT_MAX;
#pragma omp parallel for reduction(min: min_value)
for (int i = 0; i < arr.size(); i++) {
if (arr[i] < min_value) {
min_value = arr[i];
}
}
cout << "Minimum value: " << min_value << endl;
}

void max_reduction(vector<int>& arr) {


int max_value = INT_MIN;
#pragma omp parallel for reduction(max: max_value)
for (int i = 0; i < arr.size(); i++) {
if (arr[i] > max_value) {
max_value = arr[i];
}
}
cout << "Maximum value: " << max_value << endl;
}

void sum_reduction(vector<int>& arr) {


int sum = 0;
#pragma omp parallel for reduction(+: sum)
for (int i = 0; i < arr.size(); i++) {
sum += arr[i];
}
cout << "Sum: " << sum << endl;
}
void average_reduction(vector<int>& arr) {
int sum = 0;
#pragma omp parallel for reduction(+: sum)
for (int i = 0; i < arr.size(); i++) {
sum += arr[i];
}
cout << "Average: " << (double)sum / arr.size() << endl;
}

int main() {
vector<int> arr;
arr.push_back(5);
arr.push_back(2);
arr.push_back(9);
arr.push_back(1);
arr.push_back(7);
arr.push_back(6);
arr.push_back(8);
arr.push_back(3);
arr.push_back(4);

min_reduction(arr);
max_reduction(arr);
sum_reduction(arr);
average_reduction(arr);
}

=============================
Write a CUDA Program for : Matrix Multiplication

#include <cuda_runtime.h>
#include <iostream>

__global__ void matmul(int* A, int* B, int* C, int N) {


int Row = blockIdx.y*blockDim.y+threadIdx.y;
int Col = blockIdx.x*blockDim.x+threadIdx.x;
if (Row < N && Col < N) {
int Pvalue = 0;
for (int k = 0; k < N; k++) {
Pvalue += A[Row*N+k] * B[k*N+Col];
}
C[Row*N+Col] = Pvalue;
}
}
int main() {
int N = 512;
int size = N * N * sizeof(int);
int* A, * B, * C;
int* dev_A, * dev_B, * dev_C;
cudaMallocHost(&A, size);
cudaMallocHost(&B, size);
cudaMallocHost(&C, size);
cudaMalloc(&dev_A, size);
cudaMalloc(&dev_B, size);
cudaMalloc(&dev_C, size);

// Initialize matrices A and B


for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[i*N+j] = i*N+j;
B[i*N+j] = j*N+i;
}
}

cudaMemcpy(dev_A, A, size, cudaMemcpyHostToDevice);


cudaMemcpy(dev_B, B, size, cudaMemcpyHostToDevice);

dim3 dimBlock(16, 16);


dim3 dimGrid(N/dimBlock.x, N/dimBlock.y);

matmul<<<dimGrid, dimBlock>>>(dev_A, dev_B, dev_C, N);

cudaMemcpy(C, dev_C, size, cudaMemcpyDeviceToHost);

// Print the result


for (int i = 0; i < 10; i++) {
for (int j = 0; j < 10; j++) {
std::cout << C[i*N+j] << " ";
}
std::cout << std::endl;
}

// Free memory
cudaFree(dev_A);
cudaFree(dev_B);
cudaFree(dev_C);
cudaFreeHost(A);
cudaFreeHost(B);
cudaFreeHost(C);
return 0;
}

============================

Write a CUDA Program for :1. Addition of two large vectors

#include <iostream>
#include <cuda_runtime.h>

using namespace std;

__global__ void addVectors(int* A, int* B, int* C, int n)


{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
C[i] = A[i] + B[i];
}
}

int main()
{
int n = 1000000;
int* A, * B, * C;
int size = n * sizeof(int);

// Allocate memory on the host


cudaMallocHost(&A, size);
cudaMallocHost(&B, size);
cudaMallocHost(&C, size);

// Initialize the vectors


for (int i = 0; i < n; i++)
{
A[i] = i;
B[i] = i * 2;
}
// Allocate memory on the device
int* dev_A, * dev_B, * dev_C;
cudaMalloc(&dev_A, size);
cudaMalloc(&dev_B, size);
cudaMalloc(&dev_C, size);
// Copy data from host to device
cudaMemcpy(dev_A, A, size, cudaMemcpyHostToDevice);
cudaMemcpy(dev_B, B, size, cudaMemcpyHostToDevice);

// Launch the kernel


int blockSize = 256;
int numBlocks = (n + blockSize - 1) / blockSize;
addVectors<<<numBlocks, blockSize>>>(dev_A, dev_B, dev_C, n);

// Copy data from device to host


cudaMemcpy(C, dev_C, size, cudaMemcpyDeviceToHost);

// Print the results


for (int i = 0; i < 10; i++)
{
cout << C[i] << " ";
}
cout << endl;

// Free memory
cudaFree(dev_A);
cudaFree(dev_B);
cudaFree(dev_C);
cudaFreeHost(A);
cudaFreeHost(B);
cudaFreeHost(C);

return 0;
}

==============================================================================
===========

deep learning

Assignment 1: Linear regression by using Deep Neural network: Implement Boston housing price
prediction problem by Linear regression using Deep Neural network. Use Boston House price
prediction dataset
# Import necessary libraries
import numpy as np # For numerical operations
import pandas as pd # For handling datasets
from sklearn.model_selection import train_test_split # Splitting data into train & test sets
from sklearn.linear_model import LinearRegression # Linear Regression Model
from sklearn.preprocessing import StandardScaler # Standardization of data
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score # Evaluation
metrics

# Importing Keras (for Neural Network)


import keras
from keras.models import Sequential # To define a sequential model
from keras.layers import Dense # Fully connected layers

# Importing Google Colab file handling utility


from google.colab import files

# Uploading and Loading Dataset


uploaded = files.upload() # Opens file upload dialogue in Google Colab
boston = pd.read_csv("boston_house_prices.csv") # Reads CSV file into a DataFrame

# Selecting Features and Target


# Selecting 3 input features:
# 1. LSTAT (Percentage of lower status population)
# 2. RM (Average number of rooms per dwelling)
# 3. PTRATIO (Pupil-teacher ratio by town)
X = boston[['LSTAT', 'RM', 'PTRATIO']]

# Target variable: House Price


y = boston['PRICE']

# Splitting the Dataset into Training and Testing Sets


# 80% of data used for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

# Standardizing the Dataset (Feature Scaling)


# Standardization improves model performance by normalizing feature values
scaler = StandardScaler() # Initializing StandardScaler
X_train_scaled = scaler.fit_transform(X_train) # Fit and transform training data
X_test_scaled = scaler.transform(X_test) # Transform test data using the same scaler

# Linear Regression Model


lr_model = LinearRegression() # Initializing Linear Regression Model
lr_model.fit(X_train_scaled, y_train) # Training the model using scaled training data
# Predicting house prices on test data
y_pred_lr = lr_model.predict(X_test_scaled)

# Evaluating Linear Regression Model


mse_lr = mean_squared_error(y_test, y_pred_lr) # Mean Squared Error
mae_lr = mean_absolute_error(y_test, y_pred_lr) # Mean Absolute Error
r2_lr = r2_score(y_test, y_pred_lr) # R² Score (Model accuracy measure)

# Displaying evaluation metrics


print("Linear Regression Model Evaluation:")
print(f"Mean Squared Error: {mse_lr}")
print(f"Mean Absolute Error: {mae_lr}")
print(f"R2 Score: {r2_lr}")

# Neural Network (ANN) Model


# Creating a Deep Learning Model using Keras Sequential API
model = Sequential([
Dense(128, activation='relu', input_dim=3), # Input layer (3 features) & first hidden layer (128
neurons)
Dense(64, activation='relu'), # Second hidden layer with 64 neurons
Dense(32, activation='relu'), # Third hidden layer with 32 neurons
Dense(16, activation='relu'), # Fourth hidden layer with 16 neurons
Dense(1) # Output layer (Predicting a single value - House Price)
])

# Compiling the model


model.compile(optimizer='adam', loss='mse', metrics=['mae'])
# Optimizer: Adam (Adaptive Learning Rate Optimization)
# Loss function: Mean Squared Error (MSE) - Suitable for regression problems
# Metric: Mean Absolute Error (MAE) - Helps measure performance

# Training the Neural Network


history = model.fit(X_train_scaled, y_train, epochs=100, validation_split=0.05, verbose=1)
# Training for 100 epochs
# Using 5% of training data as validation set to monitor overfitting
# `verbose=1` displays detailed training progress
#Epoch 1/100
#12/12 4s 26ms/step - loss: 547.8306 - #mae: 21.6359 - val_loss: 445.7750
- val_mae: 20.1572
#Epoch 2/100
#12/12 0s 8ms/step - loss: 550.6208 - #mae: 21.6498 - val_loss: 403.5681 -
val_mae: 19.1308
#Epoch 3/100
#12/12 0s 8ms/step - loss: 433.7596 -
# Evaluating the Neural Network Model
y_pred_nn = model.predict(X_test_scaled) # Predicting house prices on test data
mse_nn, mae_nn = model.evaluate(X_test_scaled, y_test) # Evaluating model performance
# Displaying Neural Network Evaluation Metrics
print("\nNeural Network Model Evaluation:")
print(f"Mean Squared Error: {mse_nn}")
print(f"Mean Absolute Error: {mae_nn}")

# House Price Prediction for New Data


new_data = np.array([[0.1, 10.0, 5.0]])
# New input values: LSTAT=0.1, RM=10.0, PTRATIO=5.0

new_data_scaled = scaler.transform(new_data)
# Applying the same standardization as training data

# Predicting price using trained neural network model


prediction = model.predict(new_data_scaled)

# Displaying the predicted house price


print("\nPredicted House Price:", prediction[0][0])

#OutPut
1/1 0s 36ms/step

Predicted House Price: 79.24278

=====================================

2. Multiclass classification using Deep Neural Networks: Example: Use the OCR letter

recognition

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
from sklearn import metrics

# Load the OCR dataset


# The MNIST dataset is a built-in dataset provided by Keras.
# It consists of 70,000 28x28 grayscale images, each of which displays a single handwritten digit
from 0 to 9.
# The training set consists of 60,000 images, while the test set has 10,000 images.

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# X_train and X_test are our array of images while y_train and y_test are our array of labels for
each image.
# The first tuple contains the training set features (X_train) and the training set labels (y_train).
# The second tuple contains the testing set features (X_test) and the testing set labels (y_test).
# For example, if the image shows a handwritten 7, then the label will be the intger 7.

plt.imshow(x_train[0], cmap='gray') # imshow() function which simply displays an image.


plt.show() # cmap is responsible for mapping a specific colormap to the values found in the
array that you passed as the first argument.

# image appears black and white and that each axis of the plot ranges from 0 to 28.

# This is because of the format that all the images in the dataset have:

# 1. All the images are grayscale, meaning they only contain black, white and grey.
# 2. The images are 28 pixels by 25 pixels in size (28x28).

print(x_train[0])

# image data is just an array of digits. You can almost make out a 5 from the pattern of the digits
in the array.
# Array of 28 values
# a grayscale pixel is stored as a digit between 0 and 255 where 0 is black, 255 is white and
values in between are different shades of gray.
# Therefore, each value in the [28][28] array tells the computer which color to put in that position
when we display the actual image.

# reformat our X_train array and our X_test array because they do not have the correct shape.
# Reshape the data to fit the model
print("X_train shape", x_train.shape)
print("y_train shape", y_train.shape)
print("X_test shape", x_test.shape)
print("y_test shape", y_test.shape)
# Here you can see that for the training sets we have 60,000 elements and the testing sets have
10,000 elements.

# y_train and y_test only have 1 dimensional shapes because they are just the labels of each
element.

# x_train and x_test have 3 dimensional shapes because they have a width and height (28x28
pixels) for each element.

# (60000, 28, 28) 1st parameter in the tuple shows us how much image we have 2nd and 3rd
parameters are the pixel values from x to y (28x28)
# The pixel value varies between 0 to 255.
# (60000,) Training labels with integers from 0-9 with dtype of uint8. It has the shape (60000,).
# (10000, 28, 28) Testing data that consists of grayscale images. It has the shape (10000, 28, 28)
and the dtype of uint8. The pixel value varies between 0 to 255.
# (10000,) Testing labels that consist of integers from 0-9 with dtype uint8. It has the shape
(10000,).

# X: Training data of shape (n_samples, n_features)


# y: Training label values of shape (n_samples, n_labels)
# 2D array of height and width, 28 pixels by 28 pixels will just become 784 pixels (28 squared).
# Remember that X_train has 60,000 elemenets, each with 784 total pixels so will become shape
(60000, 784).
# Whereas X_test has 10,000 elements, each with each with 784 total pixels so will become
shape (10000, 784).

x_train = x_train.reshape(60000, 784)


x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32') # use 32-bit precision when training a neural network, so at one
point the training data will have to be converted to 32 bit floats. Since the dataset fits easily in
RAM, we might as well convert to float immediately.
x_test = x_test.astype('float32')
x_train /= 255 # Each image has Intensity from 0 to 255
x_test /= 255

# Regarding the division by 255, this is the maximum value of a byte (the input feature's type
before the conversion to float32),
# so this will ensure that the input features are scaled between 0.0 and 1.0.
# USING svm-https://mgta.gmu.edu/courses/ml-with-python/
handwrittenDigitRecognition.php#:~:text=Remember%20that%20X_train%20has
%2060%2C000,keras.
# Convert class vectors to binary class matrices
num_classes = 10
y_train = np.eye(num_classes)[y_train] # Return a 2-D array with ones on the diagonal and zeros
elsewhere.
y_test = np.eye(num_classes)[y_test] # f your particular categories is present then it mark as 1
else 0 in remain row

# Define the model architecture


model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,))) # The input_shape argument is
passed to the foremost layer. It comprises of a tuple shape,
model.add(Dropout(0.2)) # DROP OUT RATIO 20%
model.add(Dense(512, activation='relu')) #returns a sequence of vectors of dimension 512
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model


model.compile(loss='categorical_crossentropy', # for a multi-class classification problem
optimizer=RMSprop(),
metrics=['accuracy'])

# Train the model


batch_size = 128 # batch_size argument is passed to the layer to define a batch size for the
inputs.
epochs = 20
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1, # verbose=1 will show you an animated progress bar eg. [==========]
validation_data=(x_test, y_test)) # Using validation_data means you are providing the
training set and validation set yourself,
# validation_split means you only provide a training set and keras
splits it into a training set and a validation set
# Evaluate the model
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

========================================================================

3. Convolutional neural network (CNN): Use MNIST Fashion Dataset and create a classifier

to classify fashion clothing into categories.

import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow import keras
import numpy as np

(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

# There are 10 image classes in this dataset and each class has a mapping corresponding to the
following labels:

#0 T-shirt/top
#1 Trouser
#2 pullover
#3 Dress
#4 Coat
#5 sandals
#6 shirt
#7 sneaker
#8 bag
#9 ankle boot
# https://ml-course.github.io/master/09%20-%20Convolutional%20Neural%20Networks.pdf

plt.imshow(x_train[1])

plt.imshow(x_train[0])

# Next, we will preprocess the data by scaling the pixel values to be between 0 and 1, and then
reshaping the images to be 28x28 pixels.

x_train = x_train.astype('float32') / 255.0


x_test = x_test.astype('float32') / 255.0

x_train = x_train.reshape(-1, 28, 28, 1)


x_test = x_test.reshape(-1, 28, 28, 1)

# 28, 28 comes from width, height, 1 comes from the number of channels
# -1 means that the length in that dimension is inferred.
# This is done based on the constraint that the number of elements in an ndarray or Tensor when
reshaped must remain the same.

# each image is a row vector (784 elements) and there are lots of such rows (let it be n, so there
are 784n elements). So TensorFlow can infer that -1 is n.

# converting the training_images array to 4 dimensional array with sizes 60000, 28, 28, 1 for 0th
to 3rd dimension.

x_train.shape
x_test.shape

y_train.shape

y_test.shape

# We will use a convolutional neural network (CNN) to classify the fashion items.
# The CNN will consist of multiple convolutional layers followed by max pooling,
# dropout, and dense layers. Here is the code for the model:

model = keras.Sequential([
keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
# 32 filters (default), randomly initialized
# 3*3 is Size of Filter
# 28,28,1 size of Input Image
# No zero-padding: every output 2 pixels less in every dimension
# in Paramter shwon 320 is value of weights: (3x3 filter weights + 32 bias) * 32 filters
# 32*3*3=288(Total)+32(bias)= 320

keras.layers.MaxPooling2D((2,2)),
# It shown 13 * 13 size image with 32 channel or filter or depth.

keras.layers.Dropout(0.25),
# Reduce Overfitting of Training sample drop out 25% Neuron

keras.layers.Conv2D(64, (3,3), activation='relu'),


# Deeper layers use 64 filters
# 3*3 is Size of Filter
# Observe how the input image on 28x28x1 is transformed to a 3x3x64 feature map
# 13(Size)-3(Filter Size )+1(bias)=11 Size for Width and Height with 64 Depth or filtter or
channel
# in Paramter shwon 18496 is value of weights: (3x3 filter weights + 64 bias) * 64 filters
# 64*3*3=576+1=577*32 + 32(bias)=18496

keras.layers.MaxPooling2D((2,2)),
# It shown 5 * 5 size image with 64 channel or filter or depth.
keras.layers.Dropout(0.25),

keras.layers.Conv2D(128, (3,3), activation='relu'),


# Deeper layers use 128 filters
# 3*3 is Size of Filter
# Observe how the input image on 28x28x1 is transformed to a 3x3x128 feature map
# It show 5(Size)-3(Filter Size )+1(bias)=3 Size for Width and Height with 64 Depth or filtter or
channel
# 128*3*3=1152+1=1153*64 + 64(bias)= 73856

# To classify the images, we still need a Dense and Softmax layer.


# We need to flatten the 3x3x128 feature map to a vector of size 1152
# https://medium.com/@iamvarman/how-to-calculate-the-number-of-parameters-in-the-
cnn-5bd55364d7ca

keras.layers.Flatten(),
keras.layers.Dense(128, activation='relu'),
# 128 Size of Node in Dense Layer
# 1152*128 = 147584

keras.layers.Dropout(0.25),
keras.layers.Dense(10, activation='softmax')
# 10 Size of Node another Dense Layer
# 128*10+10 bias= 1290
])

model.summary()

# Compile and Train the Model


# After defining the model, we will compile it and train it on the training data.

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))


# 1875 is a number of batches. By default batches contain 32 samles.60000 / 32 = 1875

# Finally, we will evaluate the performance of the model on the test data.

test_loss, test_acc = model.evaluate(x_test, y_test)

print('Test accuracy:', test_acc)

==========================================================================

You might also like