import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# create numpy array, simulating 2-dimensional data (each row is a data point)
X = np.array([[-1,-1],[-2,-1],[-3,-2],[1,1],[2,1],[3,2]])

# sets up the default PCA solver to compute firs two principal components
pca = PCA(n_components = 2)
results = pca.fit(X)
# print information about the PCA object (state of solver)
print(results)

# print the covariance matrix of original data
covX = pca.get_covariance()
print("Covariance matrix:",covX)

# print the percentage of (overall) variance explained by each principal component
print("% of variance explained by components:",pca.explained_variance_ratio_)

# transform the original data, so that the first principal component corresponds
# to the x axis and the second principal component corresonds to the y axis
Y = pca.transform(X)
print("Transformed data:",Y)

# Plot original and transformed data
plt.scatter(X[:,0],X[:,1])
plt.scatter(Y[:,0],Y[:,1])
plt.show()