Source code for models.nn

import numpy as np

[docs] class NN: """ :param input: The input data matrix of shape (m, n), where m is the number of samples and n is the number of features. :type input: numpy.ndarray :param labels: The target labels of shape (m, 1). :type labels: numpy.ndarray :param num_features: The number of features in the input data. :type num_features: int :param num_classes: The number of classes in the classification task. :type num_classes: int :param hidden_size: The number of units in the hidden layer. :type hidden_size: int :param alpha: The learning rate for gradient descent. :type alpha: float :param epochs: The number of epochs for training. :type epochs: int """ def __init__(self, input, labels, num_features, num_classes, hidden_size, alpha, epochs): self.hidden_size = hidden_size self.input = input self.labels = labels self.num_features = num_features self.num_classes = num_classes self.hidden_size = hidden_size self.alpha = alpha self.epochs = epochs self.outputs = [] self.params = [] self.gradients = [] self.one_hot_y = None self.pred = None self.l = None self.pred_label = None
[docs] def init_params(self): """ Initialize the parameters (weights and biases) for the neural network. :return: Tuple containing the weights and biases for the hidden and output layers. :rtype: tuple """ w1 = np.random.rand(self.hidden_size, self.num_features) * np.sqrt(1 / self.num_features) b1 = np.zeros((self.hidden_size, 1)) w2 = np.random.rand(self.num_classes, self.hidden_size) * np.sqrt( 1/ self.hidden_size) b2 = np.zeros((self.num_classes, 1)) self.params = w1, b1, w2, b2 return self.params
[docs] def forward(self): """ Perform a forward pass through the neural network. :return: The predicted probabilities for each class :rtype: numpy.ndarray """ w1, b1, w2, b2 = self.params z1 = np.dot(w1, self.input) a1 = self.ReLU(z1) z2 = np.dot(w2, a1) self.pred = self.softmax(z2) self.outputs = z1, a1, z2 return self.pred
[docs] def ReLU(self, z): """ Apply the Rectified Linear Unit (ReLU) activation function element-wise to the input. :param z: The input to the ReLU function. :type z: numpy.ndarray :return: The output of the ReLU function. :rtype: numpy.ndarray """ return np.maximum(z, 0)
[docs] def softmax(self, z): """ Apply the softmax activation function to the input. :param z: The input to the softmax function. :type z: numpy.ndarray :return: The output of the softmax function. :rtype: numpy.ndarray """ eps = 1e-10 return np.exp(z + eps) / np.sum(np.exp(z + eps), axis = 0, keepdims = True)
[docs] def ReLU_deriv(self, z): """ Compute the derivative of the ReLU function. :param z: The input to the ReLU function. :type z: numpy.ndarray :return: The derivative of the ReLU function. :rtype: numpy.ndarray """ return z > 0
[docs] def one_hot(self): """ Convert the target labels into one-hot encoded format. :return: The one-hot encoded labels. :rtype: numpy.ndarray """ self.one_hot_y = np.zeros((self.num_classes, self.labels.size)) self.one_hot_y[self.labels, np.arange(self.labels.size)] = 1 return self.one_hot_y
[docs] def cat_cross_entropy(self): """ Calculate the categorical cross-entropy loss between the predicted and actual labels. :return: The categorical cross-entropy loss. :rtype: float """ self.l = - np.sum(self.one_hot_y * np.log(self.pred)) / self.one_hot_y.shape[1] return self.l
[docs] def accuracy(self): """ Calculate the accuracy of the model. :return: The accuracy of the model as a percentage. :rtype: float """ self.pred_label = np.argmax(self.pred, axis = 0) acc = np.sum(self.pred_label == self.labels) / self.labels.size * 100 return acc
[docs] def backward(self): """ Perform a backward pass through the neural network to compute gradients. :return: Tuple containing the gradients of the weights and biases for the hidden and output layers. :rtype: tuple """ z1, a1, _ = self.outputs _, _, w2, _ = self.params dz2 = self.pred - self.one_hot_y dw2 = np.dot(dz2, a1.T) / self.labels.size db2 = np.sum(dz2, axis=1, keepdims = True) / self.labels.size dz1 = np.dot(w2.T, dz2) * self.ReLU_deriv(z1) dw1 = np.dot(dz1, self.input.T) / self.labels.size db1 = np.sum(dz1, axis = 1, keepdims = True) / self.labels.size self.gradients = dw1, db1, dw2, db2 return self.gradients
[docs] def update(self): """ Update the weights and biases of the neural network using gradient descent. :return: Tuple containing the updated weights and biases for the hidden and output layers. :rtype: tuple """ w1, b1, w2, b2 = self.params dw1, db1, dw2, db2 = self.gradients w2 = w2 - self.alpha * dw2 b2 = b2 - self.alpha * db2 w1 = w1 - self.alpha * dw1 b1 = b1 - self.alpha * db1 self.params = w1, b1, w2, b2 return self.params
[docs] def gradient_descent(self): """ Perform gradient descent to train the neural network. :return: Tuple containing the final weights and biases for the hidden and output layers. :rtype: tuple """ self.one_hot_y = self.one_hot() for epoch in range(self.epochs): self.pred = self.forward() acc = self.accuracy() self.l = self.cat_cross_entropy() self.gradients = self.backward() self.params = self.update() print(f"Epoch: {epoch}") print(f"Accuracy: {acc}%") print(f"Loss: {self.l}\n") return self.params
[docs] def model(self): """ Run the entire neural network model. :return: Tuple containing the final weights and biases for the hidden and output layers. :rtype: tuple """ self.params = self.init_params() self.params = self.gradient_descent() return self.params