Advertisement

斯坦福CS231n 课程学习笔记--线性分类器(Assignment1代码实现)

阅读量:

最近系统性地研习了斯坦福大学2016年冬季学期CS231n课程,在深度学习与卷积神经网络的学习上深感收益良多。在学习过程中, 我主要参考了知乎读者在课程内容上的深入解析, 这项工作令人 impressions and is particularly commendable. 此外, 我将诸多知乎读者在课程内容上的深入解析整理并汇编在此专栏中, 可在此专栏汇总并供其他读者参考. 在完成所有教学视频及配套笔记后, 我完成了包括三个作业在内的实践环节. 计划未来将以梳理笔记与代码实现相结合的方式持续输出相关文章内容.

本文旨在实现Assignment1中的linear_svm、softmax和linear_classifier三个核心任务。其中包含可直接使用的代码文件。具体操作步骤则需参考这两个Jupyter notebook文件:svm.ipynb和softmax.ipynb。关于numpy中一些数组、向量、矩阵的操作可以参见该教程,其中介绍了如何从矩阵中选取特定位置处的元素等等接下来会用到的操作

复制代码
    import numpy as np
    def svm_loss_naive(W, X, y, reg):
    dW = np.zeros(W.shape)   # initialize the gradient as zero
    # compute the loss and the gradient
    num_classes = W.shape[1]
    num_train = X.shape[0]
    loss = 0.0
    #遍历样本集中的每一个样本
    for i in xrange(num_train):  
        #计算得分,点乘  
        scores = X[i].dot(W) 
        #记录正确类别的得分   
        correct_class_score = scores[y[i]]
        #遍历C个类别
        for j in xrange(num_classes):
            if j == y[i]:    
                continue
            #计算折页损失,和梯度公式。公式推导在上篇博客中已经介绍过了
            margin = scores[j] - correct_class_score + 1   # note delta = 1
            if margin > 0:
                loss += margin
                dW[:, y[i]] += -X[i, :]     # compute the correct_class gradients
                dW[:, j] += X[i, :]         # compute the wrong_class gradients
    # Right now the loss is a sum over all training examples, but we want it
    # to be an average instead so we divide by num_train.
    loss /= num_train
    dW /= num_train
    # Add regularization to the loss.添加正则损失
    loss += 0.5 * reg * np.sum(W * W)
    dW += reg * W
    return loss, dW
    
    #使用向量运算计算loss和梯度
    def svm_loss_vectorized(W, X, y, reg):
    loss = 0.0
    dW = np.zeros(W.shape)   # initialize the gradient as zero
    #计算得分,是个矩阵N*C
    scores = X.dot(W)        # N by C
    num_train = X.shape[0]
    num_classes = W.shape[1]
    #记录所有样本的正确类别得分。该操作是获取1~N行中第y(数组n*1)个元素
    scores_correct = scores[np.arange(num_train), y]   # 1 by N
    scores_correct = np.reshape(scores_correct, (num_train, 1))  # N by 1
    #计算分数与正确分类的差
    margins = scores - scores_correct + 1.0     # N by C
    #正确分类处的误差为0
    margins[np.arange(num_train), y] = 0.0
    #小于0的位置也设置为0,即分类正确
    margins[margins <= 0] = 0.0
    loss += np.sum(margins) / num_train
    loss += 0.5 * reg * np.sum(W * W)
    # compute the gradient
    margins[margins > 0] = 1.0
    row_sum = np.sum(margins, axis=1)                  # 1 by N
    margins[np.arange(num_train), y] = -row_sum        
    dW += np.dot(X.T, margins)/num_train + reg * W     # D by C
    
    return loss, dW

2,softmax.py

复制代码
    import numpy as np
    #原理和svm一样。也分为两种计算方法。
    def softmax_loss_naive(W, X, y, reg):    
    
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)    # D by C
    dW_each = np.zeros_like(W)
    num_train, dim = X.shape
    num_class = W.shape[1]
    f = X.dot(W)    # N by C
    # Considering the Numeric Stability
    #考虑数值稳定性。减去f分值中最大的项。见上篇博客
    f_max = np.reshape(np.max(f, axis=1), (num_train, 1))   # N by 1
    #计算归一化概率
    prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True) # N by C
    y_trueClass = np.zeros_like(prob)
    y_trueClass[np.arange(num_train), y] = 1.0
    for i in xrange(num_train):
        for j in xrange(num_class):    
            loss += -(y_trueClass[i, j] * np.log(prob[i, j]))    
            dW_each[:, j] = -(y_trueClass[i, j] - prob[i, j]) * X[i, :]
        dW += dW_each
    loss /= num_train
    loss += 0.5 * reg * np.sum(W * W)
    dW /= num_train
    dW += reg * W
    
    return loss, dW
    
    def softmax_loss_vectorized(W, X, y, reg):
    # Initialize the loss and gradient to zero.    
    loss = 0.0    
    dW = np.zeros_like(W)    # D by C    
    num_train, dim = X.shape
    
    f = X.dot(W)    # N by C
    # Considering the Numeric Stability
    f_max = np.reshape(np.max(f, axis=1), (num_train, 1))   # N by 1
    prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True)
    y_trueClass = np.zeros_like(prob)
    y_trueClass[range(num_train), y] = 1.0    # N by C
    loss += -np.sum(y_trueClass * np.log(prob)) / num_train + 0.5 * reg * np.sum(W * W)
    dW += -np.dot(X.T, y_trueClass - prob) / num_train + reg * W
    
    return loss, dW

3,linear_classifier.py

复制代码
    from linear_svm import *
    from softmax import *
    
    class LinearClassifier(object):    
    
    def __init__(self):        
        self.W = None    
    
    def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, batch_size=200, verbose=True):
        num_train, dim = X.shape
        # assume y takes values 0...K-1 where K is number of classes
        num_classes = np.max(y) + 1  
        if self.W is None:
            # lazily initialize W
            self.W = 0.001 * np.random.randn(dim, num_classes)   # D by C
    
        # Run stochastic gradient descent(Mini-Batch) to optimize W
        loss_history = []
        for it in xrange(num_iters): 
            X_batch = None
            y_batch = None
            # Sampling with replacement is faster than sampling without replacement.
            sample_index = np.random.choice(num_train, batch_size, replace=False)
            X_batch = X[sample_index, :]   # batch_size by D
            y_batch = y[sample_index]      # 1 by batch_size
            # evaluate loss and gradient
            loss, grad = self.loss(X_batch, y_batch, reg)
            loss_history.append(loss)
    
            # perform parameter update
            self.W += -learning_rate * grad
            if verbose and it % 100 == 0:
                print 'Iteration %d / %d: loss %f' % (it, num_iters, loss)
    
        return loss_history
    
    def predict(self, X):
        y_pred = np.zeros(X.shape[1])    # 1 by N
        y_pred = np.argmax(np.dot(self.W.T, X.T), axis=0)
        return y_pred
    
    def loss(self, X_batch, y_batch, reg): 
        pass
    
    class LinearSVM(LinearClassifier):
      """ A subclass that uses the Multiclass SVM loss function """
    
      def loss(self, X_batch, y_batch, reg):
    return svm_loss_vectorized(self.W, X_batch, y_batch, reg)
    
    class Softmax(LinearClassifier):
      """ A subclass that uses the Softmax + Cross-entropy loss function """
    
      def loss(self, X_batch, y_batch, reg):
    return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)

notebook上面的代码,其实主要是实现超参数调优的功能。

复制代码
    # softmax.ipynb
    
    from cs231n.classifiers import Softmax
    results = {}
    best_val = -1
    best_softmax = None
    learning_rates = [5e-6, 1e-7, 5e-7]
    regularization_strengths = [1e4, 5e4, 1e5]
    params = [(x,y) for x in learning_rates for y in regularization_strengths ]
    for lrate, regular in params:
    softmax = Softmax()
    loss_hist = softmax.train(X_train, y_train, learning_rate=lrate, reg=regular,
                             num_iters=700, verbose=True)
    y_train_pred = softmax.predict(X_train)
    accuracy_train = np.mean( y_train == y_train_pred)
    y_val_pred = softmax.predict(X_val)
    accuracy_val = np.mean(y_val == y_val_pred)
    results[(lrate, regular)] = (accuracy_train, accuracy_val)
    if(best_val < accuracy_val):
        best_val = accuracy_val
        best_softmax = softmax
    
    # Print out results.
    for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
    print 'lr %e reg %e train accuracy: %f val accuracy: %f' % (
                lr, reg, train_accuracy, val_accuracy)
    
    print 'best validation accuracy achieved during cross-validation: %f' % best_val
    
    
    # svm.ipynb
    
    from cs231n.classifiers import LinearSVM
    learning_rates = [1e-7, 5e-5]
    regularization_strengths = [5e4, 1e5]
    
    results = {}
    best_val = -1   # The highest validation accuracy that we have seen so far.
    best_svm = None # The LinearSVM object that achieved the highest validation rate.
    
    iters= 1000
    for lr in learning_rates:
    for rs in regularization_strengths:
        svm = LinearSVM()
        svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters)
        y_train_pred = svm.predict(X_train)
        acc_train = np.mean(y_train == y_train_pred)
        y_val_pred = svm.predict(X_val)
        acc_val = np.mean(y_val == y_val_pred)
        results[(lr, rs)] = (acc_train, acc_val)
        if best_val < acc_val:
            best_val = acc_val
            best_svm = svm
    
    # Print out results.
    for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
    print 'lr %e reg %e train accuracy: %f val accuracy: %f' % (
                lr, reg, train_accuracy, val_accuracy)
    
    print 'best validation accuracy achieved during cross-validation: %f' % best_val

贴上几张运行过程中的截图:

这里写图片描述
这里写图片描述
这里写图片描述

全部评论 (0)

还没有任何评论哟~