import numpy as np from sklearn.decomposition import PCA import matplotlib.pyplot as plt # create numpy array, simulating 2-dimensional data (each row is a data point) X = np.array([[-1,-1],[-2,-1],[-3,-2],[1,1],[2,1],[3,2]]) # sets up the default PCA solver to compute firs two principal components pca = PCA(n_components = 2) results = pca.fit(X) # print information about the PCA object (state of solver) print(results) # print the covariance matrix of original data covX = pca.get_covariance() print("Covariance matrix:",covX) # print the percentage of (overall) variance explained by each principal component print("% of variance explained by components:",pca.explained_variance_ratio_) # transform the original data, so that the first principal component corresponds # to the x axis and the second principal component corresonds to the y axis Y = pca.transform(X) print("Transformed data:",Y) # Plot original and transformed data plt.scatter(X[:,0],X[:,1]) plt.scatter(Y[:,0],Y[:,1]) plt.show()