Kernel K-means 実装 in Python
環境: Mac, Jupyter-Notebook, Python2.7
Kernel K-meansを実装したんですが、うまく動作しません。コードにエラーは出ません。どこかの計算が間違っているんだと思いますが、どこかわからないので誰か助けてください。
Algorithmについてはこちらがわかりやすいかと思います
Link_01
Link_02
とりあえずデータを貼ります、下に実際のコードを書きました。
from sklearn.metrics.pairwise import pairwise_kernels
class Kernel_Kmeans(object):
def __init__(self, n_clusters=3, max_iter=50, kernel="linear",
gamma=None, degree=3, coef0=1, kernel_params=None):
self.n_clusters = n_clusters
self.max_iter = max_iter
self.kernel = kernel
self.gamma = gamma
self.degree = degree
self.coef0 = coef0
self.kernel_params = kernel_params
def kernel_matrix(self, X, Y = None):
if callable(self.kernel):
prams = self.kernel_params or {}
else:
params = {"gamma": self.gamma,
"degree": self.degree,
"coef0": self.coef0}
return pairwise_kernels(X, Y, metric = self.kernel,
filter_params = True, **params)
def initialize_cluster(self, X):
self.labels = np.random.choice(xrange(self.n_clusters), X.shape[0])
#self.cluster_size = np.bincount(self.labels)
def fit(self, X):
n_samples = X.shape[0]
self.initialize_cluster(X)
for ite in xrange(self.max_iter):
self.cluster_size = np.bincount(self.labels)
for i in xrange(n_samples):
data = X[self.labels == 0]
xm = np.sum(self.kernel_matrix(data, Y = X[i].reshape(1, X.shape[1])))/self.cluster_size[0]
mm = np.sum(self.kernel_matrix(data))/(self.cluster_size[0]**2)
min_dist = mm - 2*xm
for j in xrange(self.n_clusters
):
if j == (self.n_clusters-1):
continue
tmp_data = X[self.labels == j+1]
tmp_xm = np.sum(self.kernel_matrix(data, Y = X[i].reshape(1, X.shape[1])))/self.cluster_size[j+1]
tmp_mm = np.sum(self.kernel_matrix(data))/(self.cluster_size[j+1]**2)
tmp_dist = tmp_mm - 2*tmp_xm
if tmp_dist < min_dist:
min_dist = tmp_dist
self.labels[i] = j