Advertisement

python apriori模块_python实现Apriori算法

阅读量:

coding: utf-8

利用python实现apriori算法

In[1]:

#导入需要的库

from numpy import *

In[2]:

def loadDataSet():

return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]

In[3]:

def createC1(dataSet):

C1=[]

for transaction in dataSet:

for item in transaction:

if not [item] in C1:

C1.append([item])

C1.sort()

return map(frozenset,C1)

In[4]:

#计算Ck在数据集D中的支持度,并返回支持度大于minSupport的数据集

def scanD(D,Ck,minSupport):

ssCnt={}

for tid in D:

for can in Ck:

if can.issubset(tid):

if can not in ssCnt.keys():

ssCnt[can]=1

else :

ssCnt[can]+=1

numItems=float(len(D))

retList=[]

supportData={}

for key in ssCnt:

support=ssCnt[key]/numItems

if support>= minSupport:

retList.insert(0,key)

supportData[key]=support

return retList,supportData

In[15]:

def aprioriGen(Lk,k):

retList=[]

lenLk=len(Lk)

for i in range(lenLk):

for j in range(i+1,lenLk):

L1=list(Lk[i])[:k-2]

L2=list(Lk[j])[:k-2]

L1.sort()

L2.sort()

if L1==L2:

retList.append(Lk[i] | Lk[j])

return retList

In[14]:

def apriori(dataSet, minSupport=0.5):

C1=createC1(dataSet)

D=list(map(set,dataSet))

print('D:',D)

L1,supportData= scanD(D,C1,minSupport)

L=[L1]

k=2

while (len(L[k-2])>0):

Ck=aprioriGen(L[k-2], k)

Lk,supK= scanD(D,Ck,minSupport)

supportData.update(supK)

if len(Lk)==0:

break

L.append(Lk)

k+=1

return L,supportData

In[19]:

def calConf(freqSet,H,supportData,brl,minConf=0.7):

prunedH=[]

for conseq in H:

conf=supportData[freqSet]/supportData[freqSet-conseq]

if conf >= minConf:

print(freqSet-conseq, '-->',conseq,'conf',conf)

brl.append((freqSet-conseq,conseq,conf))

prunedH.append(conseq)

return prunedH

In[21]:

def rulesFromConseq(freqSet,H,supportData,brl,minConf=0.7):

m=len(H[0])

if(len(freqSet)>(m+1)):

Hmpl=aprioriGen(H,m+1)

Hmpl=calConf(freqSet,Hmpl,supportData,brl,minConf)

print('Hmpl=',Hmpl)

print('len(Hmpl)=',len(Hmpl),'len(freqSet)=',len(freqSet))

if(len(Hmpl)>1):

rulesFromConseq(freqSet,Hmpl,supportData,brl,minConf)

In[9]:

def generateRules(L,supportData,minConf=0.7):

bigRuleList=[]

for i in range(1,len(L)):

for freqSet in L[i]:

H1=[frozenset([item]) for item in freqSet]

if(i>1):

rulesFromConseq(freqSet,H1,supportData,bigRuleList,minConf)

else:

calConf(freqSet,H1,supportData,bigRuleList,minConf)

return bigRuleList

In[10]:

def testApriori():

dataSet=loadDataSet()

print('dataSet:',dataSet)

L1,supportData1=apriori(dataSet,minSupport=0.7)

print('L(0.7):',L1)

print('supportData(0.7):',supportData1)

print('------------------------------------------')

L2,supportData2=apriori(dataSet,minSupport=0.5)

print('L(0.5):',L2)

print('supportData(0.5:).supportData2')

print('------------------------------------------')

In[11]:

def testGenerateRules():

dataSet=loadDataSet()

L1,supportData1=apriori(dataSet,minSupport=0.2)

print('L(0.2):',L1)

print('minSupport(0.2):',supportData1)

rules=generateRules(L1,supportData1,minConf=1.1)

print('Rules:',rules)

In[12]:

def main():

testApriori()

testGenerateRules()

In[22]:

if name=="main":

main()

全部评论 (0)

还没有任何评论哟~