from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
from sklearn import svm,datasets
import sys
from numpy.random import shuffle
import pandas as pd
from sklearn import metrics
from sklearn.linear_model import LogisticRegression as LR
class iris():
#通过PCA降维选取特征
def PCA_T(self,data):
y = data.target
x = data.data
#查看每个特征的贡献率
pca = PCA()
pca.fit(x)
print(pca.explained_variance_ratio_)
# [0.92461621 0.05301557 0.01718514 0.00518309]
# 可以看见前面两个主成分占了相当大的成分,所以我们就降成2维
pca = PCA(2)
pca.fit(x)
low_d = pca.transform(x)
self.LMTwo(low_d,y)
def LMTwo(self,x,y):
y = pd.DataFrame(y)
x = pd.DataFrame(x)
data = pd.concat([x,y],axis=1).as_matrix()
shuffle(data)
x_train = data[:int(len(data)*0.8),:2]
y_train = data[:int(len(data)*0.8),2]
x_test = data[int(len(data)*0.8):,:2]
y_test = data[int(len(data)*0.8):,2]
clf = svm.SVC()#预测结果变形
clf.fit(x_train,y_train)
predict_result = clf.predict(x_test)
cm_train = metrics.confusion_matrix(y_test, predict_result)
result = pd.DataFrame(cm_train, index = range(0,3), columns = range(0,3))
print("降维后svm预测结果")
print(result)
def LM(self,data):
y = pd.DataFrame(data.target)
x = pd.DataFrame(data.data)
data = pd.concat([x,y],axis=1).as_matrix()
shuffle(data)
x_train = data[:int(len(data)*0.8),:4]
y_train = data[:int(len(data)*0.8),4]
x_test = data[int(len(data)*0.8):,:4]
y_test = data[int(len(data)*0.8):,4]
clf = svm.SVC()#预测结果变形
clf.fit(x_train,y_train)
predict_result = clf.predict(x_test)
cm_train = metrics.confusion_matrix(y_test, predict_result)
result = pd.DataFrame(cm_train, index = range(0,3), columns = range(0,3))
print("降维前svm预测结果")
print(result)
def LRR(self,data):
y = pd.DataFrame(data.target)
x = pd.DataFrame(data.data)
data = pd.concat([x,y],axis=1).as_matrix()
shuffle(data)
x_train = data[:int(len(data)*0.8),:4]
y_train = data[:int(len(data)*0.8),4]
x_test = data[int(len(data)*0.8):,:4]
y_test = data[int(len(data)*0.8):,4]
lr = LR()
lr.fit(x_train,y_train)
predict_result = lr.predict(x_test)
cm_train = metrics.confusion_matrix(y_test, predict_result)
result = pd.DataFrame(cm_train, index = range(0,3), columns = range(0,3))
print("逻辑回归预测结果")
print(result)
if __name__=='__main__':
data = load_iris()
iris = iris()
iris.PCA_T(data)
iris.LM(data)
iris.LRR(data)
#根据结果可以看见降成两维后正确率还是相当高的 每一次不一样是因为每次建模前都会随机打乱数据
#降维后svm预测结果
# 0 1 2
# 0 12 0 0
# 1 0 8 0
# 2 0 0 10
# 降维前svm预测结果
# 0 1 2
# 0 7 0 0
# 1 0 13 1
# 2 0 0 9
# 逻辑回归预测结果
# 0 1 2
# 0 6 0 0
# 1 0 13 1
# 2 0 0 10