"""
'pca_demo.py'

PCA

David Pan, UAH
"""

import numpy as np
infile = "pca.csv"
dataset = np.loadtxt(infile, delimiter=',')
X = dataset[:, 0:2]


from sklearn.decomposition import PCA

pca = PCA(n_components=2)
# sklearn automatically centers the input raw data
pca.fit(X)

# Eigenvectors (loadings)
print(pca.components_)

# Eigvenvalues (latent)
print(pca.explained_variance_)

# Scores
Y = pca.transform(X)

#axis = 0, along the columb; ddof = 1 for dividing by (N-1); 
np.var(Y, axis = 0, ddof=1)  

# Reconstruction by keeping only the 1st principal component
# setting the 2nd component in Y to zero
Y_trunc = Y
Y_trunc[:,1] = 0

X_rec = pca.inverse_transform(Y_trunc)

# Centered (instead of the raw) input to compare with the reconstructed data
X_center = X - np.mean(X)

# Mean square error
diff = X_rec - X_center
diff_sq = diff[:,0]**2 + diff[:,1]**2
np.sum(diff_sq)/(np.size(diff_sq)-1)