""" 'pca_demo.py' PCA David Pan, UAH """ import numpy as np infile = "pca.csv" dataset = np.loadtxt(infile, delimiter=',') X = dataset[:, 0:2] from sklearn.decomposition import PCA pca = PCA(n_components=2) # sklearn automatically centers the input raw data pca.fit(X) # Eigenvectors (loadings) print(pca.components_) # Eigvenvalues (latent) print(pca.explained_variance_) # Scores Y = pca.transform(X) #axis = 0, along the columb; ddof = 1 for dividing by (N-1); np.var(Y, axis = 0, ddof=1) # Reconstruction by keeping only the 1st principal component # setting the 2nd component in Y to zero Y_trunc = Y Y_trunc[:,1] = 0 X_rec = pca.inverse_transform(Y_trunc) # Centered (instead of the raw) input to compare with the reconstructed data X_center = X - np.mean(X) # Mean square error diff = X_rec - X_center diff_sq = diff[:,0]**2 + diff[:,1]**2 np.sum(diff_sq)/(np.size(diff_sq)-1)