# 导入包 from sklearn.datasets import load_wine from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier
# 实例化红酒数据集 wine = load_wine()
# 划分测试集和训练集 x_train, x_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3)
# 实例化决策树和随机森林,random_state=0 clf = DecisionTreeClassifier(random_state=0) rfc = RandomForestClassifier(random_state=0)
# 训练模型 clf.fit(x_train, y_train) rfc.fit(x_train, y_train)
RandomForestClassifier(random_state=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier(random_state=0)
# 返回测试集的分 clf_score = clf.score(x_test, y_test) rfc_score = rfc.score(x_test, y_test) print("sinle tree: {0}\nrandom tree: {1}".format(clf_score, rfc_score))
sinle tree: 0.9074074074074074 random tree: 0.9629629629629629
# 导入交叉验证和画图工具 %matplotlib inline from sklearn.model_selection import cross_val_score import matplotlib.pyplot as plt
# 实例化决策树和随机森林 clf = DecisionTreeClassifier() rfc = RandomForestClassifier(n_estimators=25) #创建25棵树组成的随机森林
# 实例化交叉验证 10次 clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10) rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10)
# 查看决策树和随机森林的最好结果 print("single tree mean socre: {}\nrandom tree mean socre {}".format(clf_corss.mean(), rfc_corss.mean()))
single tree mean socre: 0.8705882352941178 random tree mean socre 0.9722222222222221
# 画出决策树和随机森林对比图 plt.plot(range(1, 11), clf_corss, label="single tree") plt.plot(range(1, 11), rfc_corss, label="random tree") plt.xticks(range(1, 11)) plt.legend()
<matplotlib.legend.Legend at 0x7ff6f4815d50>
clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10) clf_corss
array([0.88888889, 0.88888889, 0.72222222, 0.88888889, 0.83333333, 0.83333333, 1. , 0.94444444, 0.94117647, 0.76470588])
rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10) rfc_corss
array([1. , 1. , 0.94444444, 0.94444444, 0.88888889, 1. , 1. , 1. , 1. , 1. ])
# 创建分数列表 clf_list = [] rfc_list = []
for i in range(10): clf = DecisionTreeClassifier() rfc = RandomForestClassifier(n_estimators=25) clf_corss_mean = cross_val_score(clf, wine.data, wine.target, cv=10).mean() rfc_corss_mean = cross_val_score(rfc, wine.data, wine.target, cv=10).mean() clf_list.append(clf_corss_mean) rfc_list.append(rfc_corss_mean)
# 画出决策树和随机森林对比图 plt.plot(range(1, 11), clf_list, label="single tree") plt.plot(range(1, 11), rfc_list, label="random tree") plt.xticks(range(1, 11)) plt.legend()
<matplotlib.legend.Legend at 0x7ff6f490f670>
# 1-200颗树的学习曲线 superpa = [] for i in range(200): rfc = RandomForestClassifier(n_estimators=i+1, n_jobs=-1) rfc_cross = cross_val_score(rfc, wine.data, wine.target, cv=10).mean() superpa.append(rfc_cross) print(max(superpa), superpa.index(max(superpa))) plt.figure(figsize=(20,8)) plt.plot(range(1,201), superpa, label="rfc_cross_mean") plt.legend()
0.9888888888888889 20 <matplotlib.legend.Legend at 0x7ff6f540f100>