Advertisement

机器学习实战:逻辑回归示例----从疝气病症预测病马的死亡率

阅读量:

1. 准备数据:处理缺失值

2. 逻辑回归算法:

该算法源自:机器学习实战:逻辑回归 Logistic Regression

复制代码
 def stocGradAscent1(dataMatrix,classLabels,numIter=150):

    
     m,n=np.shape(dataMatrix)
    
     weights=np.ones(n)
    
     for j in range(numIter):
    
     dataIndex=list(range(m))
    
     for i in range(m):
    
         alpha=4/(1.0+j+i)+0.01
    
         randIndex=int(np.random.uniform(0,len(dataIndex)))
    
         h=sigmoid(sum(dataMatrix[randIndex]*weights))
    
         error=classLabels[randIndex]-h
    
         weights=weights+alpha*error*dataMatrix[randIndex]
    
         del(dataIndex[randIndex])
    
     return weights

3.用Logistic回归进行分类

复制代码
 def classifyVector(inX,weights):

    
     prob=sigmoid(sum(inX*weights))
    
     if prob>0.5:
    
     return 1.0
    
     else:
    
     return 0.0
    
  
    
  
    
 def colicTest():
    
     frTrain=open('/Users/xxxx/Downloads/machinelearninginaction-master/Ch05/horseColicTraining.txt')
    
     frTest=open('/Users/xxxx/Downloads/machinelearninginaction-master/Ch05/horseColicTest.txt')
    
     trainingSet=[]
    
     trainingLabels=[]
    
     for line in frTrain.readlines():
    
     currLine=line.strip().split('\t')
    
     lineArr=[]
    
     for i in range(21):
    
         lineArr.append(float(currLine[i]))
    
     trainingSet.append(lineArr)
    
     trainingLabels.append(float(currLine[21]))
    
     trainWeights=stocGradAscent1(np.array(trainingSet),trainingLabels,500)
    
     errorCount=0
    
     numTestVec=0.0
    
     for line in frTest.readlines():
    
     numTestVec+=1.0
    
     currLine=line.strip().split('\t')
    
     lineArr=[]
    
     for i in range(21):
    
         lineArr.append(float(currLine[i]))
    
     if int(classifyVector(np.array(lineArr),trainWeights))!=int(currLine[21]):
    
         errorCount+=1
    
     errorRate=(float(errorCount)/numTestVec)
    
     print("the error rate of this test is:%f" % errorRate)
    
     return errorRate
    
  
    
  
    
  
    
 def multiTest():
    
     numTests=10
    
     errorSum=0.0
    
     for k in range(numTests):
    
     errorSum+=colicTest()
    
     print("after %d iterations the average error rate is: %f" % (numTests,errorSum/float(numTests)))

全部评论 (0)

还没有任何评论哟~