最新版K-means修正,可能可以解决部分学者的问题
DataFile1 = 'ex7data2.mat' #读取mat文件 parameter_Data=scio.loadmat(DataFile1) X=parameter_Data['X']#X:300x2 K=3#三分类 initial_centroids=np.array([[3,3],[6,2],[8,5]])#手动设置三个分类的坐标 idx=findClosestCentroids(X,initial_centroids)#更新各个坐标所属分类 centroids=computeCentroids(X,idx,K)#更新分类的坐标 idx, centroids_all = runKmeans(X, initial_centroids, 20)#迭代K-means20次 plotData(X, centroids_all, idx)#画图 random_initial_centroids=kMeansInitCentroids(X,K)#随机生成分类
import random import numpy as np import matplotlib.pyplot as plt def findClosestCentroids(X, initial_centroids):#计算每一个x所属分类 K=initial_centroids.shape[0]#3x1 idx=np.zeros((X.shape[0],1))#300x1 for i in range(idx.shape[0]): min=10000 index=0 for j in range(K): c=(X[i][0]-initial_centroids[j][0])**2+(X[i][1]-initial_centroids[j][1])**2 if c<min: min=c index=j idx[i]=index+1 return idx def computeCentroids(X,idx,K):#idx 是每一个x所属的类 重新计算分类点的坐标 m = X.shape[0] n = X.shape[1] centroids = np.zeros((K, n)) counts = np.zeros((K, n)) for i in range(m): centroids[int(idx[i]-1)] += X[i] counts[int(idx[i]-1)] += 1 centroids = centroids / counts return centroids def runKmeans(X, centroids, max_iters):#K-means算法 K = len(centroids) centroids_all = [] centroids_all.append(centroids) centroid_i = centroids for i in range(max_iters): idx = findClosestCentroids(X, centroid_i) centroid_i = computeCentroids(X, idx,K) centroids_all.append(centroid_i) return idx, centroids_all def plotData(X, centroids, idx=None):#画图 """ 可视化聚类结果和簇中心的移动过程 :param X: ndarray,所有的数据 :param idx: ndarray,每个数据所属类标签 :param centroids_all: [ndarray,...]计算过程中每轮的簇中心 :return: None """ plt.scatter(X[..., 0], X[..., 1], c=idx) xx = [] yy = [] for c in centroids: xx.append(c[..., 0]) yy.append(c[..., 1]) plt.plot(xx, yy, 'rx--') plt.show() def kMeansInitCentroids(X,K):#随机初始化!,注意:笔者这里使用了numpy.random.shuffle(),导致原数据顺序都错乱了,特此感谢别的博主 """随机初始化""" m, n = X.shape idx = np.random.choice(m, K) centroids = X[idx] return centroids
#主函数2(图片压缩) A = io.imread('bird_small.png')#(128, 128, 3) A = A/255 X = A.reshape(16384, 3) K = 16 centroids = kMeansInitCentroids(X, K) plt.imshow(A); plt.show() idx, centroids_all = runKmeans(X, centroids, 10)#idx:(16384, 1) img = np.zeros(X.shape)#img:(16384, 3) centroids = centroids_all[-1]#centroids:(16, 3) for i in range(len(idx)): img[i] = centroids[int(idx[i]-1)]#这里要注意,别的博客不知道是否真的实现,感觉它们的语法有点问题,如果我的理解有误,请指正, img = img.reshape((128, 128, 3))#还原图片 plt.imshow(img); plt.show()