Advertisement

【CS231n】Two Layer Neural Network 代码实现

阅读量:

1. 代码实现

github:https://github.com/GIGpanda/CS231n

主要包括连个.py文件,一个是two_layer_net.py,另外一个是neural_net.py

1.1 two_layer_net.py

1.1.1 初始化

复制代码
 # two layer net

    
  
    
 # A bit of setup
    
  
    
 from __future__ import print_function
    
 import numpy as np
    
 import matplotlib.pyplot as plt
    
 from cs231n.classifiers.neural_net import TwoLayerNet
    
 from cs231n.gradient_check import eval_numerical_gradient
    
 from cs231n.data_utils import load_CIFAR10
    
 from cs231n.vis_utils import visualize_grid
    
  
    
 plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
    
 plt.rcParams['image.interpolation'] = 'nearest'
    
 plt.rcParams['image.cmap'] = 'gray'
    
  
    
 def rel_error(x, y):
    
     """ returns relative error """
    
     return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
    
  
    
 # Create a small net and some toy data to check your implementations.
    
 # Note that we set the random seed for repeatable experiments.
    
  
    
 input_size = 4
    
 hidden_size = 10
    
 num_classes = 3
    
 num_inputs = 5
    
  
    
 def init_toy_model():
    
     np.random.seed(0)
    
     return TwoLayerNet(input_size, hidden_size, num_classes, std=1e-1)
    
  
    
 def init_toy_data():
    
     np.random.seed(1)
    
     X = 10 * np.random.randn(num_inputs, input_size)
    
     y = np.array([0, 1, 2, 2, 1])
    
     return X, y
    
  
    
 net = init_toy_model()
    
 X, y = init_toy_data()

1.1.2 计算loss并验证

复制代码
 scores = net.loss(X)

    
 print('Your scores:')
    
 print(scores)
    
 print()
    
 print('correct scores:')
    
 correct_scores = np.asarray([
    
   [-0.81233741, -1.27654624, -0.70335995],
    
   [-0.17129677, -1.18803311, -0.47310444],
    
   [-0.51590475, -1.01354314, -0.8504215 ],
    
   [-0.15419291, -0.48629638, -0.52901952],
    
   [-0.00618733, -0.12435261, -0.15226949]])
    
 print(correct_scores)
    
 print()
    
  
    
 # The difference should be very small. We get < 1e-7
    
 print('Difference between your scores and correct scores:')
    
 print(np.sum(np.abs(scores - correct_scores)))
    
  
    
 loss, _ = net.loss(X, y, reg=0.05)
    
 correct_loss = 1.30378789133
    
  
    
 # should be very small, we get < 1e-12
    
 print('Difference between your loss and correct loss:')
    
 print(np.sum(np.abs(loss - correct_loss)))

1.1.3 计算梯度并验证

复制代码
 # Use numeric gradient checking to check your implementation of the backward pass.

    
 # If your implementation is correct, the difference between the numeric and
    
 # analytic gradients should be less than 1e-8 for each of W1, W2, b1, and b2.
    
  
    
 loss, grads = net.loss(X, y, reg=0.05)
    
  
    
 # these should all be less than 1e-8 or so
    
 for param_name in grads:
    
     f = lambda W: net.loss(X, y, reg=0.05)[0]
    
     param_grad_num = eval_numerical_gradient(f, net.params[param_name], verbose=False)
    
     print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))
    
  
    
  
    
 net = init_toy_model()
    
 stats = net.train(X, y, X, y,
    
         learning_rate=1e-1, reg=5e-6,
    
         num_iters=100, verbose=False)
    
  
    
 print('Final training loss: ', stats['loss_history'][-1])
    
  
    
 # plot the loss history
    
 plt.plot(stats['loss_history'])
    
 plt.xlabel('iteration')
    
 plt.ylabel('training loss')
    
 plt.title('Training Loss history')
    
 plt.show()

1.1.4 数据加载

复制代码
 def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):

    
     """
    
     Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    
     it for the two-layer neural net classifier. These are the same steps as
    
     we used for the SVM, but condensed to a single function.
    
     """
    
     # Load the raw CIFAR-10 data
    
     cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    
  
    
     X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    
  
    
     # Subsample the data
    
     mask = list(range(num_training, num_training + num_validation))
    
     X_val = X_train[mask]
    
     y_val = y_train[mask]
    
     mask = list(range(num_training))
    
     X_train = X_train[mask]
    
     y_train = y_train[mask]
    
     mask = list(range(num_test))
    
     X_test = X_test[mask]
    
     y_test = y_test[mask]
    
  
    
     # Normalize the data: subtract the mean image
    
     mean_image = np.mean(X_train, axis=0)
    
     X_train -= mean_image
    
     X_val -= mean_image
    
     X_test -= mean_image
    
  
    
     # Reshape data to rows
    
     X_train = X_train.reshape(num_training, -1)
    
     X_val = X_val.reshape(num_validation, -1)
    
     X_test = X_test.reshape(num_test, -1)
    
  
    
     return X_train, y_train, X_val, y_val, X_test, y_test
    
  
    
  
    
 # Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
    
 try:
    
     del X_train, y_train
    
     del X_test, y_test
    
     print('Clear previously loaded data.')
    
 except:
    
     pass
    
  
    
 # Invoke the above function to get our data.
    
 X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    
 print('Train data shape: ', X_train.shape)
    
 print('Train labels shape: ', y_train.shape)
    
 print('Validation data shape: ', X_val.shape)
    
 print('Validation labels shape: ', y_val.shape)
    
 print('Test data shape: ', X_test.shape)
    
 print('Test labels shape: ', y_test.shape)

1.1.5 训练模型

复制代码
 input_size = 32 * 32 

    
 hidden_size = 50
    
 num_classes = 10
    
 net = TwoLayerNet(input_size, hidden_size, num_classes)
    
  
    
 # Train the network
    
 stats = net.train(X_train, y_train, X_val, y_val,
    
         num_iters=1000, batch_size=200,
    
         learning_rate=1e-4, learning_rate_decay=0.95,
    
         reg=0.25, verbose=True)
    
  
    
 # Predict on the validation set
    
 val_acc = (net.predict(X_val) == y_val).mean()
    
 print('Validation accuracy: ', val_acc)
    
  
    
 # Plot the loss function and train / validation accuracies
    
 plt.subplot(2, 1, 1)
    
 plt.plot(stats['loss_history'])
    
 plt.title('Loss history')
    
 plt.xlabel('Iteration')
    
 plt.ylabel('Loss')
    
  
    
 plt.subplot(2, 1, 2)
    
 plt.plot(stats['train_acc_history'], label='train')
    
 plt.plot(stats['val_acc_history'], label='val')
    
 plt.title('Classification accuracy history')
    
 plt.xlabel('Epoch')
    
 plt.ylabel('Clasification accuracy')
    
 plt.legend()
    
 plt.show()

1.1.6 可视化权重

复制代码
 # Visualize the weights of the network

    
  
    
 def show_net_weights(net):
    
     W1 = net.params['W1']
    
     W1 = W1.reshape(32, 32, 3, -1).transpose(3, 0, 1, 2)
    
     plt.imshow(visualize_grid(W1, padding=3).astype('uint8'))
    
     plt.gca().axis('off')
    
     plt.show()
    
  
    
 show_net_weights(net)

1.1.7 调整超参数

复制代码
 best_net = None # store the best model into this

    
  
    
 #################################################################################
    
 # TODO: Tune hyperparameters using the validation set. Store your best trained  #
    
 # model in best_net.                                                            #
    
 #                                                                               #
    
 # To help debug your network, it may help to use visualizations similar to the  #
    
 # ones we used above; these visualizations will have significant qualitative    #
    
 # differences from the ones we saw above for the poorly tuned network.          #
    
 #                                                                               #
    
 # Tweaking hyperparameters by hand can be fun, but you might find it useful to  #
    
 # write code to sweep through possible combinations of hyperparameters          #
    
 # automatically like we did on the previous exercises.                          #
    
 #################################################################################
    
 # Your code
    
 input_size = 32 * 32 
    
 num_classes = 10
    
 best_accuaracy = 0
    
  
    
 for epoch in range(3):
    
     num_iters = np.random.randint(800,1200)
    
     batch_size = np.random.randint(160, 240)
    
     learning_rate = 10 ** np.random.uniform(-5, -3)
    
     reg = 0.25
    
     hidden_size = np.random.randint(100, 150)
    
     net = TwoLayerNet(input_size, hidden_size, num_classes)
    
     stats = net.train(X_train, y_train, X_val, y_val,
    
                   num_iters=num_iters, batch_size = batch_size,
    
                   learning_rate = learning_rate, learning_rate_decay = 0.95,
    
                   reg=reg, verbose=True)
    
     val_acc = (net.predict(X_val) == y_val).mean()
    
     if(val_acc > best_accuaracy) :
    
     best_accuaracy = val_acc
    
     best_net = net
    
  
    
  
    
  
    
 #################################################################################
    
 #                               END OF YOUR CODE                                #
    
 #################################################################################
    
  
    
 # visualize the weights of the best network
    
 show_net_weights(best_net)
    
  
    
 test_acc = (best_net.predict(X_test) == y_test).mean()
    
 print('Test accuracy: ', test_acc)

1.2 neural_net.py

1.2.1 初始化

复制代码
 from __future__ import print_function

    
  
    
 import numpy as np
    
 import matplotlib.pyplot as plt
    
  
    
  
    
 class TwoLayerNet(object):
    
   """
    
   A two-layer fully-connected neural network. The net has an input dimension of
    
   N, a hidden layer dimension of H, and performs classification over C classes.
    
   We train the network with a softmax loss function and L2 regularization on the
    
   weight matrices. The network uses a ReLU nonlinearity after the first fully
    
   connected layer.
    
   15.   In other words, the network has the following architecture:
    
   17.   input - fully connected layer - ReLU - fully connected layer - softmax
    
   19.   The outputs of the second fully-connected layer are the scores for each class.
    
   """
    
  
    
   def __init__(self, input_size, hidden_size, output_size, std=1e-4):
    
     """
    
     Initialize the model. Weights are initialized to small random values and
    
     biases are initialized to zero. Weights and biases are stored in the
    
     variable self.params, which is a dictionary with the following keys:
    
   28.     W1: First layer weights; has shape (D, H)
    
     b1: First layer biases; has shape (H,)
    
     W2: Second layer weights; has shape (H, C)
    
     b2: Second layer biases; has shape (C,)
    
   33.     Inputs:
    
     - input_size: The dimension D of the input data.
    
     - hidden_size: The number of neurons H in the hidden layer.
    
     - output_size: The number of classes C.
    
     """
    
     self.params = {}
    
     self.params['W1'] = std * np.random.randn(input_size, hidden_size)
    
     self.params['b1'] = np.zeros(hidden_size)
    
     self.params['W2'] = std * np.random.randn(hidden_size, output_size)
    
     self.params['b2'] = np.zeros(output_size)

1.2.2 计算loss、grad

复制代码
   def loss(self, X, y=None, reg=0.0):

    
     """
    
     Compute the loss and gradients for a two layer fully connected neural
    
     network.
    
   6.     Inputs:
    
     - X: Input data of shape (N, D). Each X[i] is a training sample.
    
     - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
    
       an integer in the range 0 <= y[i] < C. This parameter is optional; if it
    
       is not passed then we only return scores, and if it is passed then we
    
       instead return the loss and gradients.
    
     - reg: Regularization strength.
    
   14.     Returns:
    
     If y is None, return a matrix scores of shape (N, C) where scores[i, c] is
    
     the score for class c on input X[i].
    
   18.     If y is not None, instead return a tuple of:
    
     - loss: Loss (data loss and regularization loss) for this batch of training
    
       samples.
    
     - grads: Dictionary mapping parameter names to gradients of those parameters
    
       with respect to the loss function; has the same keys as self.params.
    
     """
    
     # Unpack variables from the params dictionary
    
     W1, b1 = self.params['W1'], self.params['b1']
    
     W2, b2 = self.params['W2'], self.params['b2']
    
     N, D = X.shape
    
  
    
     # Compute the forward pass
    
     scores = None
    
     #############################################################################
    
     # TODO: Perform the forward pass, computing the class scores for the input. #
    
     # Store the result in the scores variable, which should be an array of      #
    
     # shape (N, C).                                                             #
    
     #############################################################################
    
     # input - fully connected layer - ReLU - fully connected layer - softmax
    
     # (N, D) - (D, H) - max(0, x) - (H, C) - softmax
    
     y1 = X.dot(W1) + b1
    
     y1[y1<0] = 0
    
     y2 = y1.dot(W2) + b2
    
     scores = y2
    
  
    
  
    
     #############################################################################
    
     #                              END OF YOUR CODE                             #
    
     #############################################################################
    
  
    
     # If the targets are not given then jump out, we're done
    
     if y is None:
    
       return scores
    
  
    
     # Compute the loss
    
     loss = None
    
     #############################################################################
    
     # TODO: Finish the forward pass, and compute the loss. This should include  #
    
     # both the data loss and L2 regularization for W1 and W2. Store the result  #
    
     # in the variable loss, which should be a scalar. Use the Softmax           #
    
     # classifier loss.                                                          #
    
     #############################################################################
    
     expy2 = np.exp(y2)
    
     expmom = np.sum(expy2, axis=1)
    
  
    
     loss = -np.log(expy2[np.arange(N), y] / expmom)
    
     loss = np.sum(loss)
    
     loss /= N
    
     loss += reg * (np.sum(W1*W1) + np.sum(W2*W2))
    
  
    
     #############################################################################
    
     #                              END OF YOUR CODE                             #
    
     #############################################################################
    
  
    
     # Backward pass: compute gradients
    
     grads = {}
    
     #############################################################################
    
     # TODO: Compute the backward pass, computing the derivatives of the weights #
    
     # and biases. Store the results in the grads dictionary. For example,       #
    
     # grads['W1'] should store the gradient on W1, and be a matrix of same size #
    
     #############################################################################
    
     expmom = np.reshape(np.repeat(expmom, expy2.shape[1]), expy2.shape)
    
     dW = expy2 / expmom
    
     dW[np.arange((N)), y] -= 1
    
     dW /= N
    
  
    
     tempW = dW.dot(W2.T)
    
     # !!! bug tempW[y1 < 0] = 0
    
     tempW[y1 <= 0] = 0
    
     grads['W1'] = X.T.dot(tempW) + 2 * reg * W1
    
     grads['b1'] = np.sum(tempW, axis=0)
    
     grads['W2'] = y1.T.dot(dW) + 2 * reg * W2
    
     grads['b2'] = np.sum(dW, axis=0)
    
     #############################################################################
    
     #                              END OF YOUR CODE                             #
    
     #############################################################################
    
  
    
     return loss, grads

1.2.3 训练模型

复制代码
   def train(self, X, y, X_val, y_val,

    
         learning_rate=1e-3, learning_rate_decay=0.95,
    
         reg=5e-6, num_iters=100,
    
         batch_size=200, verbose=False):
    
     """
    
     Train this neural network using stochastic gradient descent.
    
   8.     Inputs:
    
     - X: A numpy array of shape (N, D) giving training data.
    
     - y: A numpy array f shape (N,) giving training labels; y[i] = c means that
    
       X[i] has label c, where 0 <= c < C.
    
     - X_val: A numpy array of shape (N_val, D) giving validation data.
    
     - y_val: A numpy array of shape (N_val,) giving validation labels.
    
     - learning_rate: Scalar giving learning rate for optimization.
    
     - learning_rate_decay: Scalar giving factor used to decay the learning rate
    
       after each epoch.
    
     - reg: Scalar giving regularization strength.
    
     - num_iters: Number of steps to take when optimizing.
    
     - batch_size: Number of training examples to use per step.
    
     - verbose: boolean; if true print progress during optimization.
    
     """
    
     num_train = X.shape[0]
    
     iterations_per_epoch = max(num_train / batch_size, 1)
    
  
    
     # Use SGD to optimize the parameters in self.model
    
     loss_history = []
    
     train_acc_history = []
    
     val_acc_history = []
    
  
    
     for it in range(num_iters):
    
       X_batch = None
    
       y_batch = None
    
  
    
       #########################################################################
    
       # TODO: Create a random minibatch of training data and labels, storing  #
    
       # them in X_batch and y_batch respectively.                             #
    
       #########################################################################
    
       mask = np.random.choice(len(X), batch_size, replace=True)
    
       X_batch = X[mask]
    
       y_batch = y[mask]
    
       #########################################################################
    
       #                             END OF YOUR CODE                          #
    
       #########################################################################
    
  
    
       # Compute loss and gradients using the current minibatch
    
       loss, grads = self.loss(X_batch, y=y_batch, reg=reg)
    
       loss_history.append(loss)
    
  
    
       #########################################################################
    
       # TODO: Use the gradients in the grads dictionary to update the         #
    
       # parameters of the network (stored in the dictionary self.params)      #
    
       # using stochastic gradient descent. You'll need to use the gradients   #
    
       # stored in the grads dictionary defined above.                         #
    
       #########################################################################
    
       self.params['W1'] += -grads['W1'] * learning_rate
    
       self.params['b1'] += -grads['b1'] * learning_rate
    
       self.params['W2'] += -grads['W2'] * learning_rate
    
       self.params['b2'] += -grads['b2'] * learning_rate
    
       #########################################################################
    
       #                             END OF YOUR CODE                          #
    
       #########################################################################
    
  
    
       if verbose and it % 100 == 0:
    
     print('iteration %d / %d: loss %f' % (it, num_iters, loss))
    
  
    
       # Every epoch, check train and val accuracy and decay learning rate.
    
       if it % iterations_per_epoch == 0:
    
     # Check accuracy
    
     train_acc = (self.predict(X_batch) == y_batch).mean()
    
     val_acc = (self.predict(X_val) == y_val).mean()
    
     train_acc_history.append(train_acc)
    
     val_acc_history.append(val_acc)
    
  
    
     # Decay learning rate
    
     learning_rate *= learning_rate_decay
    
  
    
     return {
    
       'loss_history': loss_history,
    
       'train_acc_history': train_acc_history,
    
       'val_acc_history': val_acc_history,
    
     }

1.2.4 预测

复制代码
  
    
   def predict(self, X):
    
     """
    
     Use the trained weights of this two-layer network to predict labels for
    
     data points. For each data point we predict scores for each of the C
    
     classes, and assign each data point to the class with the highest score.
    
   8.     Inputs:
    
     - X: A numpy array of shape (N, D) giving N D-dimensional data points to
    
       classify.
    
   12.     Returns:
    
     - y_pred: A numpy array of shape (N,) giving predicted labels for each of
    
       the elements of X. For all i, y_pred[i] = c means that X[i] is predicted
    
       to have class c, where 0 <= c < C.
    
     """
    
     y_pred = None
    
  
    
     ###########################################################################
    
     # TODO: Implement this function; it should be VERY simple!                #
    
     ###########################################################################
    
     y1 = X.dot(self.params['W1']) + self.params['b1']
    
     y1[y1<=0] = 0
    
     y2 = y1.dot(self.params['W2']) + self.params['b2']
    
     y_pred = y2.argmax(axis = 1)
    
     ###########################################################################
    
     #                              END OF YOUR CODE                           #
    
     ###########################################################################
    
  
    
     return y_pred

全部评论 (0)

还没有任何评论哟~