Advertisement

山东大学计算机科学与技术学院机器学习实验-实验4-逻辑回归 二分类 Python实现

阅读量:

Penn State University College of Computer Science and Technology machine learning experiments - Experiment 4: logistic regression for binary classification in Python implementation

复制代码
    import numpy as np
    import matplotlib.pyplot as plt
    from sympy import hessian
    x=np.loadtxt('ex4x.dat')#80,2
    y=np.loadtxt('ex4y.dat')#80,
    x_class1=np.array([p for p,i in zip(x,y) if i])
    x_class0=np.array([p for p,i in zip(x,y) if not i])
    
    fig1=plt.figure(0)
    plt.scatter(x_class0[:,0],x_class0[:,1],marker='^')
    plt.scatter(x_class1[:,0],x_class1[:,1],marker='s')
    #plt.show()
    theata=np.array([0,0,0])
    theata.reshape(3,1)
    mu = np.mean(x, axis=0)
    sigma = np.std(x, axis=0)
    x_ori=np.c_[np.ones(len(x)),x]
    x[:, 0] = (x[:, 0] - mu[0]) / sigma[0]
    x[:, 1] = (x[:, 1] - mu[1]) / sigma[1]
    x_b=np.c_[np.ones(len(x)),x]
    loss_set=[]
    iter_times=0
    def sigmoid(y):
    return 1/(1+np.exp(-y))
    def loss(h, y):
    return (-y*np.log(h) -(1 - y) * np.log(1 - h)).mean()
    def hessian(x, theta):
    y = np.dot(x, theta)
    h = sigmoid(y)
    diag = np.multiply(h, 1 - h)
    hessian_matrix = np.dot(x.T * diag, x) / len(x)
    return hessian_matrix
    def nd(x,y,theata):
    for i in range(1500):
        global iter_times
        iter_times+=1
        z=np.dot(x,theata)
        print(z.shape)
        print(z)
        h=sigmoid(z)
        print(h.shape)
        loss_set.append(loss(h,y))
        if(len(loss_set)>1):
            if(abs(loss_set[len(loss_set)-1]-loss_set[len(loss_set)-2])<=1e-9):
                break
        gradient=np.dot((h-y),x)/len(x)
        hess=hessian(x,theata)
        print(hess.shape)
        print(gradient.shape)
        theata=theata-np.dot(np.linalg.inv(hess),gradient)
    return theata
    theata=nd(x_b,y,theata)
    
    fig2=plt.figure(1)
    plt.plot(np.linspace(0,len(loss_set),len(loss_set)),loss_set,marker="o",color="b")
    plt.show()
    fig3=plt.figure(2)
    plt.scatter(x_class0[:,0],x_class0[:,1],marker='^')
    plt.scatter(x_class1[:,0],x_class1[:,1],marker='s')
    x_axis = x_ori[:,1]
    y_axis = (-np.dot(theata[0],x_b[:,0]) - np.dot(theata[1],x_b[:,1]))/theata[2]
    y_axis = y_axis*sigma[1] + mu[1]
    plt.plot(x_axis, y_axis, label='{}x + {} = {}y'.format(theata[1], theata[0], theata[2]))
    plt.show()
    print(theata)
    print(iter_times)
    #预测
    
    x_pre=np.array([1,(20-mu[0])/sigma[0],(80-mu[1])/sigma[1]])
    x_pre=x_pre.reshape(1,3)
    y_pre=np.dot(x_pre,theata)
    print(y_pre)
    print("不被录取的概率为"+str(1-sigmoid(y_pre)))

全部评论 (0)

还没有任何评论哟~