让同学进一步了解分类器的设计概念,能够根据自己的设计对线性分类器有更深刻的认识,理解Fisher准则方法确定最佳线性分界面方法的原理,以及Lagrande乘子求解的原理。
硬件:intel(R) Core(TM) i7-9750H CPU @ 2.60GHz RAM 16GB 系统:Windows 10 语言版本:Python 3.8.5 编译环境:Visual Studio Code 1.56 、Anaconda 1.10 依赖库:numpy、matplotlib
已知有两类数据ω_1和ω_2,二者的先验概率未知。
ω_1的数据点如下:
ω_1= [0.2331, 2.3385], [1.5207, 2.1946], [0.6499, 1.6730], [0.7757, 1.6365],
[1.0524, 1.7844], [1.1974, 2.0155], [0.2908, 2.0681], [0.2518, 2.1213],
[0.6682, 2.4797], [0.5622, 1.5118], [0.9023, 1.9692], [0.1333, 1.8340],
[-0.5431, 1.8704], [0.9407, 2.2948], [-0.2126, 1.7714], [0.0507, 2.3939],
[-0.0810, 1.5648], [0.7315, 1.9329], [0.3345, 2.2027], [1.0650, 2.4568],
[-0.0247, 1.7523], [0.1043, 1.6991], [0.3122, 2.4883], [0.6655, 1.7259],
[0.5838, 2.0466], [1.1653, 2.0226], [1.2653, 2.3757], [0.8137, 1.7987],
[-0.3399, 2.0828], [0.5152, 2.0798], [0.7226, 1.9449], [-0.2015, 2.3801],
[0.4070, 2.2373], [-0.1717, 2.1614], [-1.0573, 1.9235], [-0.2099, 2.2604]]
ω_2的数据点如下:
ω_2=[[1.4010, 1.0298], [1.2301, 0.9611], [2.0814, 0.9154], [1.1655, 1.4901],
[1.3740, 0.8200], [1.1829, 0.9399], [1.7632, 1.1405], [1.9739, 1.0678],
[2.4152, 0.8050], [2.5890, 1.2889], [2.8472, 1.4601], [1.9539, 1.4334],
[1.2500, 0.7091], [1.2864, 1.2942], [1.2614, 1.3744], [2.0071, 0.9387],
[2.1831, 1.2266], [1.7909, 1.1833], [1.3322, 0.8798], [1.1466, 0.5592],
[1.7087, 0.5150], [1.5920, 0.9983], [2.9353, 0.9120], [1.4664, 0.7126],
[2.9313, 1.2833], [1.8349, 1.1029], [1.8340, 1.2680], [2.5096, 0.7140],
[2.7198, 1.2446], [2.3148, 1.3392], [2.0353, 1.1808], [2.6030, 0.5503],
[1.2327, 1.4708], [2.1465, 1.1435], [1.5673, 0.7679], [2.9414, 1.1288]]
试用以上数据点作为样本点,按照Fisher准则求出w_0和W^*,画出判别面,并使用该模型判断[1, 1.5], [1.2, 1.0], [2.0, 0.9], [1.2, 1.5], [0.23, 2.33] 分别属于哪一类。
import numpy as np import matplotlib.pyplot as plt data1 = np.array([[0.2331, 2.3385], [1.5207, 2.1946], [0.6499, 1.6730], [0.7757, 1.6365], [1.0524, 1.7844], [1.1974, 2.0155], [0.2908, 2.0681], [0.2518, 2.1213], [0.6682, 2.4797], [0.5622, 1.5118], [0.9023, 1.9692], [0.1333, 1.8340], [-0.5431, 1.8704], [0.9407, 2.2948], [-0.2126, 1.7714], [0.0507, 2.3939], [-0.0810, 1.5648], [0.7315, 1.9329], [0.3345, 2.2027], [1.0650, 2.4568], [-0.0247, 1.7523], [0.1043, 1.6991], [0.3122, 2.4883], [0.6655, 1.7259], [0.5838, 2.0466], [1.1653, 2.0226], [1.2653, 2.3757], [0.8137, 1.7987], [-0.3399, 2.0828], [0.5152, 2.0798], [0.7226, 1.9449], [-0.2015, 2.3801], [0.4070, 2.2373], [-0.1717, 2.1614], [-1.0573, 1.9235], [-0.2099, 2.2604]] ) data2 = np.array([[1.4010, 1.0298], [1.2301, 0.9611], [2.0814, 0.9154], [1.1655, 1.4901], [1.3740, 0.8200], [1.1829, 0.9399], [1.7632, 1.1405], [1.9739, 1.0678], [2.4152, 0.8050], [2.5890, 1.2889], [2.8472, 1.4601], [1.9539, 1.4334], [1.2500, 0.7091], [1.2864, 1.2942], [1.2614, 1.3744], [2.0071, 0.9387], [2.1831, 1.2266], [1.7909, 1.1833], [1.3322, 0.8798], [1.1466, 0.5592], [1.7087, 0.5150], [1.5920, 0.9983], [2.9353, 0.9120], [1.4664, 0.7126], [2.9313, 1.2833], [1.8349, 1.1029], [1.8340, 1.2680], [2.5096, 0.7140], [2.7198, 1.2446], [2.3148, 1.3392], [2.0353, 1.1808], [2.6030, 0.5503], [1.2327, 1.4708], [2.1465, 1.1435], [1.5673, 0.7679], [2.9414, 1.1288]] ) #数据输入 data1_avr = np.mean(data1,axis=0) data2_avr = np.mean(data2,axis=0) #求均值向量 new_data1 = np.zeros(data1.shape) new_data2 = np.zeros(data1.shape) t = 0 for i in data1: new_data1[t] = i - data1_avr t += 1 t = 0 for j in data2: new_data2[t] = j - data2_avr #计算x-x平均 S1 = np.zeros((data1.shape[1],data1.shape[1])) S2 = np.zeros((data2.shape[1],data2.shape[1])) for i in new_data1: temp = i.reshape(2,1) S1 += temp @ temp.T for i in new_data2: temp = i.reshape(2,1) S2 += temp @ temp.T #计算类内离散度矩阵S1和S2 Sw = S1 + S2 Sw_n = np.linalg.inv(Sw) #计算总类内离散度矩阵及其逆矩阵 w = Sw_n @ (data1_avr - data2_avr) #得到最佳投影向量 m1 = w.T @ data1_avr m2 = w.T @ data2_avr w0 = -(data1.shape[1]*m1 + data2.shape[1]*m2)/(data1.shape[1] + data2.shape[1]) #计算分界阈值点 test = np.array([[1, 1.5], [1.2, 1.0], [2.0, 0.9], [1.2, 1.5], [0.23, 2.33]]) test_class1 = [] test_class2 = [] for i in test: y = w.T @ i + w0 if y >0: print('数据'+ str(i) + '被判别为第一类') test_class1.append(i) elif y ==0: print('数据'+ str(i) + '在判别面上 无法判别') else: print('数据'+ str(i) + '被判别为第二类') test_class2.append(i) #判别测试集数据 test_class1 = np.array(test_class1) test_class2 = np.array(test_class2) x1 = data1[:,0] y1 = data1[:,1] x2 = data2[:,0] y2 = data2[:,1] x3 = test_class1[:,0] y3 = test_class1[:,1] x4 = test_class2[:,0] y4 = test_class2[:,1] avr = (data1_avr + data2_avr)/2 temp_x = np.arange(-1,2.5,0.1) k = -w[0]/w[1] b = avr[1]-avr[0]*k temp_y = k * temp_x + b #准备画图数据 plt.scatter(x3,y3,c='g',marker=',',s=25) plt.scatter(x4,y4,c='c',marker=',',s=25) plt.scatter(x1,y1,s=8) plt.scatter(x2,y2,s=8) plt.plot(temp_x,temp_y,'r') plt.legend(['judge line','test class 1','test class 2','class 1','class 2',],loc='upper right') plt.show() #画图