python3利用朴素贝叶斯计算概率
朴素贝叶斯就是求概率原理可以参考(http://blog.csdn.net/amds123/article/details/70173402)
以下是实例代码:
class bys():
def classify(self,data, attrIndex):
classes = {}
for item in data:
try:
classes[item[attrIndex]].append(item)
except:
classes[item[attrIndex]] = [item]
return classes
def p(self,data, index, key):
count = 0
for item in data:
if item[index] == key:
count += 1
return 1.0 * count / len(data)
def max(self,p):
keys = list(p.keys())
maxKey = keys[0]
for key in keys:
if p[key] > p[maxKey]:
maxKey = key
return maxKey
def do(self,trainData,testData):
# 训练实例
dataset = trainData
# 测试实例
attributes = testData
#获取no和yes的字典
classes = self.classify(dataset, len(trainData[0]) - 1)
p0 = []
p1 = {}
for key in classes.keys():
p1[key] = 1.0 * len(classes[key]) / len(dataset)
p0.append(p1)#获取训练数据no和yes的概率
for index in range(0, len(attributes)):
p1 = {}
for key in classes.keys():
#求某个特征值在no和yes的改路
p1[key] = self.p(classes[key], index, attributes[index])
# print (key, attributes[index], p1[key])
p0.append(p1)
p1 = {}
for key in classes.keys():
P = 1.0;
for item in p0:
P = P * item[key]
p1[key] = P
print(p1)
print (max(p1))
if __name__=='__main__':
#训练数据
trainData = [['Sunny', 'Hot', 'High', 'Weak', 'no'],
['Sunny', 'Hot', 'High', 'Strong', 'no'],
['Overcast', 'Hot', 'High', 'Weak', 'yes'],
['Rainy', 'Mild', 'High', 'Weak', 'yes'],
['Rainy', 'Cool', 'Normal', 'Weak', 'yes'],
['Rainy', 'Cool', 'Normal', 'Strong', 'no'],
['Overcast', 'Cool', 'Normal', 'Strong', 'yes'],
['Sunny', 'Mild', 'High', 'Weak', 'no'],
['Sunny', 'Cool', 'Normal', 'Weak', 'yes'],
['Rainy', 'Mild', 'Normal', 'Weak', 'yes'],
['Sunny', 'Mild', 'Normal', 'Strong', 'yes'],
['Overcast', 'Mild', 'High', 'Strong', 'yes'],
['Overcast', 'Hot', 'Normal', 'Weak', 'yes'],
['Rainy', 'Mild', 'High', 'Strong', 'no']]
#测试数据
testData = ('Sunny','Cool','High','Strong')
bys = bys()
bys.do(trainData,testData)