import pandas as pd inputfile = r'C:/Users/Administrator/Desktop/data/bankloan.xls' outputfile = 'C:/Users/Administrator/Desktop/data_type.xls' k = 3 iteration = 500 data = pd.read_excel(inputfile, index_col = '年龄') data_zs = 1.0*(data - data.mean())/data.std() if __name__ == '__main__': from sklearn.cluster import KMeans model = KMeans(n_clusters = k, n_jobs = 4, max_iter = iteration) model.fit(data_zs) r1 = pd.Series(model.labels_).value_counts() r2 = pd.DataFrame(model.cluster_centers_) r = pd.concat([r2, r1], axis = 1) r.columns = list(data.columns) + [u'类别数目'] print(r) r = pd.concat([data, pd.Series(model.labels_, index = data.index)], axis = 1) r.columns = list(data.columns) + [u'聚类类别'] r.to_excel(outputfile) from sklearn.manifold import TSNE tsne = TSNE() tsne.fit_transform(data_zs) tsne = pd.DataFrame(tsne.embedding_, index = data_zs.index) import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False d = tsne[r[u'聚类类别'] == 0] plt.plot(d[0], d[1], 'r.') d = tsne[r[u'聚类类别'] == 1] plt.plot(d[0], d[1], 'go') d = tsne[r[u'聚类类别'] == 2] plt.plot(d[0], d[1], 'b*') plt.show()
'''神经网络测试''' import pandas as pd from keras.models import Sequential from keras.layers.core import Dense, Activation import numpy as np # 参数初始化 inputfile = 'C:/Users/Administrator/Desktop/data/bankloan.xls' data = pd.read_excel(inputfile) x_test = data.iloc[:,:8].values y_test = data.iloc[:,8].values model = Sequential() # 建立模型 model.add(Dense(input_dim = 8, units = 8)) model.add(Activation('relu')) model.add(Dense(input_dim = 8, units = 1)) model.add(Activation('sigmoid')) 函数 model.compile(loss = 'mean_squared_error', optimizer = 'adam') model.fit(x_test, y_test, epochs = 1000, batch_size = 10) predict_x=model.predict(x_test) classes_x=np.argmax(predict_x,axis=1) yp = classes_x.reshape(len(y_test)) def cm_plot(y, yp): from sklearn.metrics import confusion_matrix cm = confusion_matrix(y, yp) import matplotlib.pyplot as plt plt.matshow(cm, cmap=plt.cm.Greens) plt.colorbar() for x in range(len(cm)): for y in range(len(cm)): plt.annotate(cm[x,y], xy=(x, y), horizontalalignment='center', verticalalignment='center') plt.ylabel('True label') plt.xlabel('Predicted label') return plt cm_plot(y_test,yp).show() score = model.evaluate(x_test,y_test,batch_size=128) print(score)
结果: