机器学习之逻辑回归(Logistic Regression)

阅读量：

复制代码

 """逻辑回归中的Sigmoid函数"""

    
 import numpy as np
    
 import matplotlib.pyplot as plt
    
  
    
 def sigmoid(t):
    
     return 1/(1+np.exp(-t))
    
  
    
 x=np.linspace(-10,10,500)
    
 y=sigmoid(x)
    
  
    
 plt.plot(x,y)
    
 plt.show()

结果：

逻辑回归损失函数的梯度：

逻辑回归算法：

复制代码

 import numpy as np

    
 from metrics import accuracy_score
    
  
    
 class LogisticRegression:
    
  
    
     def __init__(self):
    
     """初始化Logistic Regression模型"""
    
     self.coef_ = None
    
     self.intercept_ = None
    
     self._theta = None
    
  
    
     def _sigmoid(self,t):
    
     return 1. / (1. + np.exp(-t))
    
  
    
     def fit(self, X_train, y_train, eta=0.01, n_iters=1e4):
    
     """根据训练数据集X_train, y_train, 使用梯度下降法训练Linear Regression模型"""
    
     assert X_train.shape[0] == y_train.shape[0], \
    
         "the size of X_train must be equal to the size of y_train"
    
  
    
  
    
  
    
     def J(theta, X_b, y):
    
         """求损失函数"""
    
         y_hat=self._sigmoid(X_b.dot(theta))
    
         try:
    
             return -np.sum(y*np.log(y_hat) + (1-y)*np.log(1-y_hat))/ len(y)
    
         except:
    
             return float('inf')
    
  
    
     def dJ(theta, X_b, y):
    
         """求梯度"""
    
         # res = np.empty(len(theta))
    
         # res[0] = np.sum(X_b.dot(theta) - y)
    
         # for i in range(1, len(theta)):
    
         #     res[i] = (X_b.dot(theta) - y).dot(X_b[:, i])
    
         # return res * 2 / len(X_b)
    
         return X_b.T.dot(self._sigmoid(X_b.dot(theta)) - y) / len(X_b)
    
  
    
     def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8):
    
         """使用批量梯度下降法寻找theta"""
    
         theta = initial_theta
    
         cur_iter = 0
    
  
    
         while cur_iter < n_iters:
    
             gradient = dJ(theta, X_b, y)
    
             last_theta = theta
    
             theta = theta - eta * gradient
    
             if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
    
                 break
    
  
    
             cur_iter += 1
    
  
    
         return theta
    
  
    
     X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
    
     initial_theta = np.zeros(X_b.shape[1])
    
     self._theta = gradient_descent(X_b, y_train, initial_theta, eta, n_iters)
    
  
    
     self.intercept_ = self._theta[0]
    
     self.coef_ = self._theta[1:]
    
  
    
     return self
    
  
    
     def predict_proba(self, X_predict):
    
     """给定待预测数据集X_predict，返回表示X_predict的结果概率向量"""
    
     assert self.intercept_ is not None and self.coef_ is not None, \
    
         "must fit before predict!"
    
     assert X_predict.shape[1] == len(self.coef_), \
    
         "the feature number of X_predict must be equal to X_train"
    
  
    
     X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
    
     return self._sigmoid(X_b.dot(self._theta))
    
  
    
     def predict(self, X_predict):
    
     """给定待预测数据集X_predict，返回表示X_predict的结果向量"""
    
     assert self.intercept_ is not None and self.coef_ is not None, \
    
         "must fit before predict!"
    
     assert X_predict.shape[1] == len(self.coef_), \
    
         "the feature number of X_predict must be equal to X_train"
    
  
    
     proba=self.predict_proba(X_predict)
    
     return np.array(proba>=0.5,dtype='int')
    
  
    
     def score(self, X_test, y_test):
    
     """根据测试数据集 X_test 和 y_test 确定当前模型的准确度"""
    
  
    
     y_predict = self.predict(X_test)
    
     return accuracy_score(y_test, y_predict)
    
  
    
     def __repr__(self):
    
     return "LogisticRegression()"

复制代码

 """实现逻辑回归"""

    
 import numpy as np
    
 import matplotlib.pyplot as plt
    
 from sklearn import datasets
    
  
    
 iris=datasets.load_iris()
    
 X=iris.data
    
 y=iris.target
    
  
    
 X=X[y<2,:2]
    
 y=y[y<2]
    
  
    
 plt.scatter(X[y==0,0],X[y==0,1],color='red')
    
 plt.scatter(X[y==1,0],X[y==1,1],color='blue')
    
 plt.show()
    
  
    
 """使用逻辑回归"""
    
 from model_selection import train_test_split
    
 from LogisticRegression import LogisticRegression
    
  
    
 X_train,X_test,y_train,y_test=train_test_split(X,y,seed=666)
    
 log_reg=LogisticRegression()
    
 log_reg.fit(X_train,y_train)
    
 print(log_reg.score(X_test,y_test))
    
 print(log_reg.predict_proba(X_test))

结果：

复制代码

 E:\pythonspace\KNN_function\venv\Scripts\python.exe E:/pythonspace/KNN_function/try.py

    
 1.0
    
 [0.92972035 0.98664939 0.14852024 0.17601199 0.0369836  0.0186637
    
  0.04936918 0.99669244 0.97993941 0.74524655 0.04473194 0.00339285
    
  0.26131273 0.0369836  0.84192923 0.79892262 0.82890209 0.32358166
    
  0.06535323 0.20735334]
    
  
    
 Process finished with exit code 0

逻辑回归中的决策边界和添加多项式特征：

复制代码

 """在逻辑回归中添加多项式特征"""

    
 import numpy as np
    
 import matplotlib.pyplot as plt
    
  
    
 np.random.seed(666)
    
 X=np.random.normal(0,1,size=(100,2))
    
 y=np.array(X[:,0]**2+X[:,1]**2<1.5,dtype='int')
    
  
    
  
    
 """使用逻辑回归"""
    
 from LogisticRegression import LogisticRegression
    
  
    
 log_reg=LogisticRegression()
    
 log_reg.fit(X,y)
    
  
    
 """绘制思路"""
    
 def plot_decision_boundary(model,axis):
    
     x0,x1 = np.meshgrid(
    
     np.linspace(axis[0],axis[1],int((axis[1]-axis[0])*100)),
    
     np.linspace(axis[2],axis[3],int((axis[3]-axis[2])*100))
    
     )
    
     X_new = np.c_[x0.ravel(),x1.ravel()]
    
     y_predict = model.predict(X_new)
    
     zz = y_predict.reshape(x0.shape)
    
     from matplotlib.colors import ListedColormap
    
     custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
    
     plt.contourf(x0,x1,zz,linewidth=5,cmap=custom_cmap)
    
  
    
 plot_decision_boundary(log_reg,axis=[-4,4,-4,4])
    
 plt.scatter(X[y==0,0],X[y==0,1])
    
 plt.scatter(X[y==1,0],X[y==1,1])
    
 plt.show()
    
  
    
  
    
 """添加特征值，即升维"""
    
 from sklearn.preprocessing import PolynomialFeatures
    
 from sklearn.preprocessing import StandardScaler
    
 from sklearn.pipeline import Pipeline
    
 def PolynomialLogisticRegression(degree):
    
     return Pipeline([
    
     ('Poly',PolynomialFeatures(degree=degree)),
    
     ('std_scaler',StandardScaler()),
    
     ('Logistic',LogisticRegression())
    
     ])
    
 poly_log_reg = PolynomialLogisticRegression(degree=2)
    
 poly_log_reg.fit(X,y)
    
 plot_decision_boundary(poly_log_reg,axis=[-4,4,-4,4])
    
 plt.scatter(X[y==0,0],X[y==0,1])
    
 plt.scatter(X[y==1,0],X[y==1,1])
    
 plt.show()

结果：

复制代码

 """逻辑回归中使用正则化"""

    
 import numpy as np
    
 import matplotlib.pyplot as plt
    
 from sklearn.model_selection import train_test_split
    
 from sklearn.linear_model import LogisticRegression
    
 from sklearn.preprocessing import StandardScaler
    
 from sklearn.pipeline import Pipeline
    
 from sklearn.preprocessing import PolynomialFeatures
    
  
    
 np.random.seed(666)
    
 X=np.random.normal(0,1,size=(200,2))
    
 y=np.array(X[:,0]**2+X[:,1]<1.5,dtype='int')
    
 for _ in range(20):
    
     y[np.random.randint(200)] = 1
    
 plt .scatter(X[y==0,0],X[y==0,1])
    
 plt .scatter(X[y==1,0],X[y==1,1])
    
 plt.show()
    
  
    
 X_train,X_test,y_train,y_test=train_test_split(X,y)
    
 log_reg=LogisticRegression()
    
 log_reg.fit(X,y)
    
 def plot_decision_boundary(model,axis):
    
     x0,x1 = np.meshgrid(
    
     np.linspace(axis[0],axis[1],int((axis[1]-axis[0])*100)),
    
     np.linspace(axis[2],axis[3],int((axis[3]-axis[2])*100))
    
     )
    
     X_new = np.c_[x0.ravel(),x1.ravel()]
    
     y_predict = model.predict(X_new)
    
     zz = y_predict.reshape(x0.shape)
    
     from matplotlib.colors import ListedColormap
    
     custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
    
     plt.contourf(x0,x1,zz,linewidth=5,cmap=custom_cmap)
    
  
    
 def PolynomialLogisticRegression(degree,C=1.0,penalty='l2'):
    
     return Pipeline([
    
     ('Poly',PolynomialFeatures(degree=degree)),
    
     ('std_scaler',StandardScaler()),
    
     ('Logistic',LogisticRegression(C=C,penalty=penalty))
    
 ])
    
 poly_log_reg = PolynomialLogisticRegression(degree=20,C=0.1,penalty='l1')
    
 poly_log_reg.fit(X_train,y_train)
    
  
    
 plot_decision_boundary(poly_log_reg,axis=[-4,4,-4,4])
    
 plt.scatter(X[y==0,0],X[y==0,1])
    
 plt.scatter(X[y==1,0],X[y==1,1])
    
 plt.show()

结果

应用OVR和OVO使逻辑回归处理多分类问题

复制代码

 """OVR和OVO"""

    
 #为了数据可视化方便，我们只使用鸢尾花数据集的前两列特征
    
 from sklearn import datasets
    
 from sklearn.linear_model import LogisticRegression
    
 from sklearn.model_selection import train_test_split
    
 import matplotlib.pyplot as plt
    
 import numpy as np
    
  
    
 iris = datasets.load_iris()
    
 X = iris['data'][:,:2]
    
 y = iris['target']
    
 X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=666)
    
  
    
  
    
 #log_reg = LogisticRegression(multi_class='ovr')   #传入multi_class参数可以指定使用ovr或ovo，默认ovr   #由于只使用前两列特征，导致分类准确度较低
    
 log_reg = LogisticRegression(multi_class='ovr',solver='newton-cg')
    
 log_reg.fit(X_train,y_train)
    
 log_reg.score(X_test,y_test)
    
 def plot_decision_boundary(model,axis):
    
     x0,x1 = np.meshgrid(
    
     np.linspace(axis[0],axis[1],int((axis[1]-axis[0])*100)),
    
     np.linspace(axis[2],axis[3],int((axis[3]-axis[2])*100))
    
     )
    
     X_new = np.c_[x0.ravel(),x1.ravel()]
    
     y_predict = model.predict(X_new)
    
     zz = y_predict.reshape(x0.shape)
    
     from matplotlib.colors import ListedColormap
    
     custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
    
     plt.contourf(x0,x1,zz,linewidth=5,cmap=custom_cmap)
    
  
    
 plot_decision_boundary(log_reg,axis=[4,8.5,1.5,4.5])
    
 plt.scatter(X[y==0,0],X[y==0,1])
    
 plt.scatter(X[y==1,0],X[y==1,1])
    
 plt.scatter(X[y==2,0],X[y==2,1])
    
 plt.show()
    
  
    
  
    
  
    
 """使用全部数据 OVR and OVO"""
    
 from sklearn.multiclass import OneVsOneClassifier
    
 from sklearn.multiclass import OneVsRestClassifier
    
  
    
 from sklearn import datasets
    
 from sklearn.linear_model import LogisticRegression
    
 from sklearn.model_selection import train_test_split
    
 iris = datasets.load_iris()
    
 X = iris.data
    
 y = iris.target
    
 X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=666)
    
  
    
 ovr = OneVsRestClassifier(log_reg) #参数为二分类器
    
 ovr.fit(X_train,y_train)
    
 print(ovr.score(X_test,y_test))
    
 ovo = OneVsOneClassifier(log_reg)
    
 ovo.fit(X_train,y_train)
    
 print(ovo.score(X_test,y_test))

结果：

复制代码

 E:\pythonspace\KNN_function\venv\Scripts\python.exe E:/pythonspace/KNN_function/try.py

    
 E:\pythonspace\KNN_function\venv\lib\site-packages\matplotlib\contour.py:960: UserWarning: The following kwargs were not used by contour: 'linewidth'
    
   s)
    
 0.9736842105263158
    
 1.0
    
  
    
 Process finished with exit code 0

全部评论 (0)

还没有任何评论哟~

机器学习之逻辑回归（Logistic Regression）

逻辑回归是工业界使用频率最高也是最受广泛使用的一种机器学习算法，它是基于线性回归在一些分类问题上由于噪音样本的存在使拟合的直线偏离真实的太大的一种改进，将线性函数的值映射到（0,1）区间范围内，也可以...

机器学习之逻辑回归（logistic regression）

概述逻辑斯蒂回归实质是对数几率回归（广义的线性回归），是用来解决分类问题的。其中sigmoid用来解决二分类问题，softmax解决多分类问题，sigmoid是softmax的特殊情况。数学建模...

机器学习之逻辑回归(Logistic Regression)

逻辑回归中的Sigmoid函数 importnumpyasnp importmatplotlib.pyplotasplt defsigmoidt: return1/1+np.expt x=np.lin...

机器学习：逻辑回归(Logistic Regression)

机器学习：逻辑回归LogisticRegression LogisticRegression. 本文目录： 1.逻辑回归模型 2.Logistic函数 3.交叉熵损失 4.梯度下降法 5.核逻辑回归 ...

【机器学习】逻辑回归（Logistic Regression）

从本质上讲：机器学习就是一个模型对外界的刺激（训练样本）做出反应，趋利避害（评价标准）。 1\.什么是逻辑回归？许多人对线性回归都比较熟悉，但知道逻辑回归的人可能就要少的多。从大的类别上来说，逻辑回...

机器学习-logistic Regression（逻辑回归）

logisticRegression逻辑回归线性边界方程（θ为特征值参数，x为自变量）公式意义：计算某个事件发生的可能性，可以把跟这个事件相关的所有特征加权求和，可以看做是类别分界线（最佳拟合线...

机器学习——逻辑回归（Logistic Regression）

逻辑回归一、理论 1.1线性回归 1.1.1线性回归的原理 1.1.2线性回归的数学推导 1.1.2.1模型设定 1.1.2.2求解参数\\mathbfw 1.1.2.2.1损失函数 1.1.2.2...

机器学习—逻辑回归Logistic Regression

8.逻辑回归 8.1逻辑回归的实现 8.1.1什么是逻辑回归逻辑回归又称LogisticRegression，其本质是一种广义的线性回归模型，常用于解决二分类问题。在前面的多元线性回归中，有，其中...

机器学习——逻辑回归（Logistic Regression）

目录一、简介二、逻辑回归的原理 2.1逻辑回归模型的执行流程 2.2逻辑回归模型的算法原理： 2.2.1sigmoid函数 2.2.2似然函数 2.2.3损失函数三、梯度下降法 3.1介绍 3....

机器学习之逻辑回归（ Logistic Regression，LR）

机器学习之逻辑回归（LogisticRegression，LR）机器学习方法之逻辑回归（LR）原理介绍应用案例之鸢尾花分类逻辑回归优缺点机器学习方法之逻辑回归（LR）本文主要用简单的语言讲...

是否确定退出登录?

机器学习之逻辑回归(Logistic Regression)

全部评论 (0)

相关文章推荐

机器学习之逻辑回归（Logistic Regression）

机器学习之逻辑回归（logistic regression）

机器学习之逻辑回归(Logistic Regression)

机器学习：逻辑回归(Logistic Regression)

【机器学习】逻辑回归（Logistic Regression）

机器学习-logistic Regression（逻辑回归）

机器学习——逻辑回归（Logistic Regression）

机器学习—逻辑回归Logistic Regression

机器学习——逻辑回归（Logistic Regression）

机器学习之逻辑回归（ Logistic Regression，LR）