python3利用朴素贝叶斯计算概率
朴素贝叶斯就是求概率原理可以参考(http://blog.csdn.net/amds123/article/details/70173402)
以下是实例代码:
class bys(): def classify(self,data, attrIndex): classes = {} for item in data: try: classes[item[attrIndex]].append(item) except: classes[item[attrIndex]] = [item] return classes def p(self,data, index, key): count = 0 for item in data: if item[index] == key: count += 1 return 1.0 * count / len(data) def max(self,p): keys = list(p.keys()) maxKey = keys[0] for key in keys: if p[key] > p[maxKey]: maxKey = key return maxKey def do(self,trainData,testData): # 训练实例 dataset = trainData # 测试实例 attributes = testData #获取no和yes的字典 classes = self.classify(dataset, len(trainData[0]) - 1) p0 = [] p1 = {} for key in classes.keys(): p1[key] = 1.0 * len(classes[key]) / len(dataset) p0.append(p1)#获取训练数据no和yes的概率 for index in range(0, len(attributes)): p1 = {} for key in classes.keys(): #求某个特征值在no和yes的改路 p1[key] = self.p(classes[key], index, attributes[index]) # print (key, attributes[index], p1[key]) p0.append(p1) p1 = {} for key in classes.keys(): P = 1.0; for item in p0: P = P * item[key] p1[key] = P print(p1) print (max(p1)) if __name__=='__main__': #训练数据 trainData = [['Sunny', 'Hot', 'High', 'Weak', 'no'], ['Sunny', 'Hot', 'High', 'Strong', 'no'], ['Overcast', 'Hot', 'High', 'Weak', 'yes'], ['Rainy', 'Mild', 'High', 'Weak', 'yes'], ['Rainy', 'Cool', 'Normal', 'Weak', 'yes'], ['Rainy', 'Cool', 'Normal', 'Strong', 'no'], ['Overcast', 'Cool', 'Normal', 'Strong', 'yes'], ['Sunny', 'Mild', 'High', 'Weak', 'no'], ['Sunny', 'Cool', 'Normal', 'Weak', 'yes'], ['Rainy', 'Mild', 'Normal', 'Weak', 'yes'], ['Sunny', 'Mild', 'Normal', 'Strong', 'yes'], ['Overcast', 'Mild', 'High', 'Strong', 'yes'], ['Overcast', 'Hot', 'Normal', 'Weak', 'yes'], ['Rainy', 'Mild', 'High', 'Strong', 'no']] #测试数据 testData = ('Sunny','Cool','High','Strong') bys = bys() bys.do(trainData,testData)