机器学习之决策树算法

使用决策树训练红酒数据集

完整代码：

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import tree, datasets
from sklearn.model_selection import train_test_split# 准备数据，这里使用前两个特征
data = datasets.load_wine()
X, y = data.data[:,:2], data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)# 训练模型
clf = tree.DecisionTreeClassifier(max_depth=1)
clf.fit(X, y)print(clf.score(X_test, y_test))

输出：

0.7555555555555555

绘制决策树的图形

完整代码：

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import tree, datasets
from sklearn.model_selection import train_test_split# 准备数据，这里使用前两个特征
data = datasets.load_wine()
X, y = data.data[:,:2], data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)# 训练模型
clf = tree.DecisionTreeClassifier(max_depth=1)
clf.fit(X, y)# 画图
cmap_light = ListedColormap(["#FFAAAA", "#AAFFAA", "#AAAAFF"])
cmap_bold = ListedColormap(["#FF0000", "#00FF00", "#0000FF"])x_min, x_max = X_train[:,0].min() - 1, X_train[:,0].max() + 1
y_min, y_max = X_train[:,1].min() - 1, X_train[:,1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, .02), np.arange(y_min, y_max, .02))
z = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)plt.figure()
plt.pcolormesh(xx, yy, z, cmap=cmap_light)plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolor="k", s=20)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("Classifier: (max_depth = 1)")
plt.show()

输出：
在这里插入图片描述

从结果来看，分类器的表现并不是特别好，我们可以加大深度试试。

调整决策树的深度

完整代码：

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import tree, datasets
from sklearn.model_selection import train_test_split# 准备数据，这里使用前两个特征
data = datasets.load_wine()
X, y = data.data[:,:2], data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)# 训练模型
clf = tree.DecisionTreeClassifier(max_depth=3)
clf.fit(X, y)
print(clf.score(X_test, y_test))# 画图
cmap_light = ListedColormap(["#FFAAAA", "#AAFFAA", "#AAAAFF"])
cmap_bold = ListedColormap(["#FF0000", "#00FF00", "#0000FF"])x_min, x_max = X_train[:,0].min() - 1, X_train[:,0].max() + 1
y_min, y_max = X_train[:,1].min() - 1, X_train[:,1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, .02), np.arange(y_min, y_max, .02))
z = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)plt.figure()
plt.pcolormesh(xx, yy, z, cmap=cmap_light)plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolor="k", s=20)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("Classifier: (max_depth = 3)")
plt.show()

输出：
在这里插入图片描述

从结果来看，分数变成了0.84，已经是一个比较能够接受的分数了。

另外，从图像来看，不同的点大致都能落入到自己的区域中，相比深度为1的时候更加的准确一点。

继续加大决策树的深度

完整代码：

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import tree, datasets
from sklearn.model_selection import train_test_split# 准备数据，这里使用前两个特征
data = datasets.load_wine()
X, y = data.data[:,:2], data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)# 训练模型
clf = tree.DecisionTreeClassifier(max_depth=5)
clf.fit(X, y)
print(clf.score(X_test, y_test))# 画图
cmap_light = ListedColormap(["#FFAAAA", "#AAFFAA", "#AAAAFF"])
cmap_bold = ListedColormap(["#FF0000", "#00FF00", "#0000FF"])x_min, x_max = X_train[:,0].min() - 1, X_train[:,0].max() + 1
y_min, y_max = X_train[:,1].min() - 1, X_train[:,1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, .02), np.arange(y_min, y_max, .02))
z = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)plt.figure()
plt.pcolormesh(xx, yy, z, cmap=cmap_light)plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolor="k", s=20)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("Classifier: (max_depth = 5)")
plt.show()