環境: Macbook air, Jupyter-notebook, Python2.7

論文 "Bayesian PCA" (Christopher M. Bishop, 1998) を参考に、ベイズ的主成分分析を実装して、論文と同じartificial dataを作って実行したところ、論文にある結果と異なります。具体的には作ったデータは10次元ですがintrisic dimensionは3なので3列のみ残ったヒントン図になるはずなんです。どこがおかしいのでしょうか?

  • 参考にしたヒントン図のコード (記事のタイトルが同じですが、内容は少し異なっています)
  • Github

    class BayesianPCA(object):

    def __init__(self, X):
        self.x = X
        self.ndim = np.size(X, 1)
        self.num = np.size(X, 0)
        self.n_component =  self.ndim - 1
    
    # INITIALIZE PARAMETERS #########################################################
    def MLpca(self):
        cov = np.cov(self.x, rowvar = 0)
    
        la, v = np.linalg.eigh(cov)
        index = self.ndim - self.n_component
    
        # mu_ML #################################################################
        self.mean = np.mean(self.x, axis = 0)
        self.mu_ml = mean_vector = self.mean
    
        # var_ML  ###############################################################
        self.var_ml = np.mean(la[0])
    
        # W_ML  #################################################################
        Uq = v[:, 1:10]
        Lambdaq = np.diag(la[::-1][:9])
        Iq = np.eye(self.n_component)
        self.W_ml = Uq.dot(np.sqrt(Lambdaq - self.var_ml * Iq))
    
        # alpha  ################################################################
        self.alpha = np.var(self.W_ml, axis = 0)
        for i in xrange(self.n_component):
            gamma = self.ndim
            self.alpha[i] = gamma/(np.linalg.norm(self.W_ml[:,i])**2)
    
    
    def e_step(self):
        M = self.W_ml.T.dot(self.W_ml) + self.var_ml * np.eye(self.n_component)
        Minv = np.linalg.inv(M)
        self.xx = np.zeros((self.num, self.n_component))
        for i in xrange(self.num):
            self.xx[i] = np.dot(np.dot(Minv, self.W_ml.T), self.x[i] - self.mean)
    
    
        self.xxt = np.zeros((self.num, np.size(Minv,0), np.size(Minv,0)))
        for i in xrange(self.num):
            self.xxt[i] = self.var_ml * M + np.dot(self.xx[i][:,np.newaxis], self.xx[i][:,np.newaxis].T)
    
    
    def m_step(self):
        # UPDATE W_ML  ###########################################################
        left = np.zeros((self.ndim, self.n_component))
        right = np.zeros((self.n_component, self.n_component))
        for i in xrange(self.num):
            #from IPython.core.debugger import Pdb; Pdb().set_trace()
            left =  np.dot((self.x[i] - self.mean)[:,np.newaxis], self.xx[i][:,np.newaxis].T)
            right = right + self.xxt[i] + self.var_ml * np.diag(self.alpha)
        #from IPython.core.debugger import Pdb; Pdb().set_trace()
        self.W_ml = np.dot(left, np.linalg.inv(right))
    
        # UPDATE var_ML  ##########################################################
        kakkonai = 0
        for i in xrange(self.num):
            first = np.linalg.norm(self.x[i] - self.mean)**2
            second = np.dot(np.dot(self.xx[i].T, self.W_ml.T), (self.x[i] - self.mean))
            last = np.sum(np.diag(np.dot(np.dot(self.xxt[i], self.W_ml.T), self.W_ml)))
            kakkonai + first + second + last
        self.var_ml = kakkonai/(self.ndim * self.num)
    
        # UPDATE alpha  ###########################################################
        for i in xrange(self.n_component):
            gamma = self.ndim
            self.alpha[i] = gamma/(np.linalg.norm(self.W_ml[:,i])**2)
    
    
    def fit(self, n_iter = 10):
    
        #  INITIALIZE PARAMETERS ##########################################################
        self.MLpca()
    
        for i in xrange(n_iter):
            #  E-step ##########################################################
            self.e_step()
    
            #  M-step ##########################################################
            self.m_step()
    
    
    # MAXIMUM LIKELIHOOD METHOD FROM PRML  #######################################
    def mpca(self, component):
        cov = np.cov(self.x, rowvar = 0)
    
        values, vectors = np.linalg.eigh(cov)
        index = np.size(self.x, 1) - component
        var = None
    
        # PRML (12.46) best sigma^2
        if index == 0:
            var = 0
        else:
            var = np.mean(values[:index])
    
        # PRML (12.45) best W
        W = vectors[:, index:].dot(np.sqrt(np.diag(values[index:]) - var * np.eye(component)))
        return W