1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
| import scipy.io as sio from PIL import Image from sklearn.cluster import KMeans import numpy as np import random import copy import matplotlib.pyplot as plt import operator
def loaddata(): file = 'GaussianData.mat' file = 'ringData.mat' data = sio.loadmat(file) matrix = np.array(data['Dataset']) return matrix def distance(p1,p2): return np.linalg.norm(p1-p2) def getWbyKNN(data,k): points_num = len(data) dis_matrix = np.zeros((points_num,points_num)) W = np.zeros((points_num,points_num)) for i in range(points_num): for j in range(i+1,points_num):
dis_matrix[i][j] = dis_matrix[j][i] = distance(data[i],data[j]) for idx,each in enumerate(dis_matrix): index_array = np.argsort(each) W[idx][index_array[1:k+1]] = 1 tmp_W = np.transpose(W) W = (tmp_W+W)/2 return W def getD(W): points_num = len(W) D = np.diag(np.zeros(points_num)) for i in range(points_num): D[i][i] = sum(W[i]) return D def getEigVec(L,cluster_num): eigval,eigvec = np.linalg.eig(L) lenght = len(eigval) dictEigval = dict(zip(eigval,range(0,lenght))) kEig = np.sort(eigval)[0:cluster_num] ix = [dictEigval[k] for k in kEig] return eigval[ix],eigvec[:,ix] def randRGB(): return (random.randint(0, 255)/255.0, random.randint(0, 255)/255.0, random.randint(0, 255)/255.0) def plot(matrix,C,k): colors = [] for i in range(k): colors.append(randRGB()) for idx,value in enumerate(C): plt.plot(matrix[idx][0],matrix[idx][1],'o',color=colors[int(C[idx])]) plt.show()
if __name__ == '__main__': cluster_num = 2 KNN_k = 10 data = loaddata() W = getWbyKNN(data,KNN_k) D = getD(W) L = D-W eigval,eigvec = getEigVec(L,cluster_num) clf = KMeans(n_clusters=cluster_num) s = clf.fit(eigvec.real) plot(data,s.labels_,cluster_num)
|