文档库 最新最全的文档下载
当前位置:文档库 › pca 程序

pca 程序

pca  程序
pca  程序

function varargout = pca(X, N, method)

%PCA PRINCIPLE COMPONENTS ANALYSIS

%

% Performing principal components analysis on the N1-by-N2 real-valued

% data matrix X, where N1 and N2 are the number of features (N1 variables) % and observations (N2 samples), respectively.

%

% P = pca(X,N,method) returns the N-by-N1 matrix P of basis vectors, one % basis vector per row, in order of decreasing corresponding eigenvalues, % i.e. P*X retains the first N principle components. If N is not

% specified, all components (N=N1) are kept.

%

% Two methods are available: 'eig' (default) and 'svd' which solve the

% problem by eigenvalue decomposition and singular value decomposition, % respectively. 'svd' is running in 'economy' mode. If there are more

% variables than samples (N1>N2), 'svd' is recommended for this code.

%

% [P, D] = pca(X,N,method) also returns all eigenvalues (normalized) in D, % in descending order.

%

% [P, D, Y] = pca(X,N,method) further returns the N-by-N2 matrix Y = P*X, % each column of whom is the projection of the corresponding observation % from X onto the basis vectors contained in P.

%

% Siqing Wu, <6sw21@queensu.ca>

% Version: 1.1, Date: 2008-07-30

error(nargchk(1,3,nargin)) % check the number of arguments

error(nargoutchk(0,3,nargout))

[nr, nc] = size(X);

if nargin<3

method = 'eig'; % default method

end

if nargin<2

N = nr; % keep all components

elseif N < 1 || N > nr || round(N)~=N

fprintf('Input N=%g is not valid; all components will be retained.\n', N)

N = nr;

end

X = X-repmat(mean(X,2),[1 nc]); % center data

switch method

case'eig'

C = X*X.';

% should be C/(nc-1) for unbiased estimate of the covariance matix,

% but won't affect P.

[E,D] = eig(C); % D lists eigenvalues from small to large % rearrange D and E

D = diag(D);

D = D(end:-1:1)/max(D);

E = E(:,end:-1:1)';

P = E(1:N,:);

case'svd'

% Instead of solving X*X' = E*D*E', solve X = U*Sigma*V'

% X*X' = U*Sigma*V'*V*Sigma*U' = U*Sigma^2*U' -> P = U';

[U,sigma] = svd(X,'econ'); % "economy size" decomposition if N > nc

fprintf('SVD -> N=%d is used.\n', nc)

N = nc;

end

D = diag(sigma).^2;

D = D/max(D);

P = U(:,1:N)';

otherwise

error('Undefined method!')

end

fprintf('Top %d components are retained; cumulative eigenvalue contribution is %1.2f\n', N, sum(D(1:N))/sum(D))

switch nargout

case {0,1}

varargout = {P};

case {2}

varargout = {P, D};

case {3}

Y = P*X;

varargout = {P, D, Y};

end

相关文档