现在的位置: 首页 > 综合 > 正文

转摘:PCA算法

2012年08月15日 ⁄ 综合 ⁄ 共 4216字 ⁄ 字号 评论关闭

function [eigvector, eigvalue, elapse] = PCA(data, ReducedDim)
%PCA    Principal Component Analysis
%
%    Usage:
%       [eigvector, eigvalue] = PCA(data, ReducedDim)
%       [eigvector, eigvalue] = PCA(data)
%
%             Input:
%               data       - Data matrix. Each row vector of fea is a data point.
%
%          ReducedDim   - The dimensionality of the reduced subspace. If 0,
%                         all the dimensions will be kept.
%                         Default is 0.
%
%             Output:
%               eigvector - Each column is an embedding function, for a new
%                           data point (row vector) x,  y = x*eigvector
%                           will be the embedding result of x.
%               eigvalue  - The sorted eigvalue of PCA eigen-problem.
%
%    Examples:
%             fea = rand(7,10);
%             [eigvector,eigvalue] = PCA(fea,4);
%           Y = fea*eigvector;
%
%
%   version 2.1 --June/2007
%   version 2.0 --May/2007
%   version 1.1 --Feb/2006
%   version 1.0 --April/2004
%
%   Written by Deng Cai (dengcai2 AT cs.uiuc.edu)
%                                                  

if (~exist('ReducedDim','var'))
   ReducedDim = 0;
end

[nSmp,nFea] = size(data);
if (ReducedDim > nFea) | (ReducedDim <=0)
    ReducedDim = nFea;
end

tmp_T = cputime;

if issparse(data)
    data = full(data);
end
sampleMean = mean(data,1);
data = (data - repmat(sampleMean,nSmp,1));

if nFea/nSmp > 1.0713
    % This is an efficient method which computes the eigvectors of
    % of A*A^T (instead of A^T*A) first, and then convert them back to
    % the eigenvectors of A^T*A.   
    ddata = data*data';
    ddata = max(ddata, ddata');

    dimMatrix = size(ddata,2);
    if dimMatrix > 1000 & ReducedDim < dimMatrix/10  % using eigs to speed up!
        option = struct('disp',0);
        [eigvector, eigvalue] = eigs(ddata,ReducedDim,'la',option);
        eigvalue = diag(eigvalue);
    else
        [eigvector, eigvalue] = eig(ddata);
        eigvalue = diag(eigvalue);

        [junk, index] = sort(-eigvalue);
        eigvalue = eigvalue(index);
        eigvector = eigvector(:, index);
    end

    clear ddata;
    maxEigValue = max(abs(eigvalue));
    eigIdx = find(abs(eigvalue)/maxEigValue < 1e-12);
    eigvalue (eigIdx) = [];
    eigvector (:,eigIdx) = [];

    eigvector = data'*eigvector;        % Eigenvectors of A^T*A
    eigvector = eigvector*diag(1./(sum(eigvector.^2).^0.5)); % Normalization
else
    ddata = data'*data;
    ddata = max(ddata, ddata');

    dimMatrix = size(ddata,2);
    if dimMatrix > 1000 & ReducedDim < dimMatrix/10  % using eigs to speed up!
        option = struct('disp',0);
        [eigvector, eigvalue] = eigs(ddata,ReducedDim,'la',option);
        eigvalue = diag(eigvalue);
    else
        [eigvector, eigvalue] = eig(ddata);
        eigvalue = diag(eigvalue);

        [junk, index] = sort(-eigvalue);
        eigvalue = eigvalue(index);
        eigvector = eigvector(:, index);
    end
    clear ddata;
    maxEigValue = max(abs(eigvalue));
    eigIdx = find(abs(eigvalue)/maxEigValue < 1e-12);
    eigvalue (eigIdx) = [];
    eigvector (:,eigIdx) = [];
end

if ReducedDim < length(eigvalue)
    eigvalue = eigvalue(1:ReducedDim);
    eigvector = eigvector(:, 1:ReducedDim);
end

elapse = cputime - tmp_T;

 

测试:

fea = rand(7,10)
[eigvector,eigvalue] = PCA(fea,4)
Y = fea*eigvector

fea =

    0.0305    0.8594    0.4899    0.6820    0.7224    0.4538    0.8314    0.6280    0.3724    0.7379
    0.7441    0.8055    0.1679    0.0424    0.1499    0.4324    0.8034    0.2920    0.1981    0.2691
    0.5000    0.5767    0.9787    0.0714    0.6596    0.8253    0.0605    0.4317    0.4897    0.4228
    0.4799    0.1829    0.7127    0.5216    0.5186    0.0835    0.3993    0.0155    0.3395    0.5479
    0.9047    0.2399    0.5005    0.0967    0.9730    0.1332    0.5269    0.9841    0.9516    0.9427
    0.6099    0.8865    0.4711    0.8181    0.6490    0.1734    0.4168    0.1672    0.9203    0.4177
    0.6177    0.0287    0.0596    0.8175    0.8003    0.3909    0.6569    0.1062    0.0527    0.9831

eigvector =

   -0.1487    0.1730   -0.3812    0.2153
   -0.1381   -0.5340    0.5429    0.2571
   -0.4056   -0.1441    0.0047   -0.5249
    0.4681    0.1735    0.5405   -0.3343
   -0.1373    0.4380    0.1915   -0.1696
   -0.0795   -0.2602   -0.1359   -0.0552
    0.2845    0.0474    0.1770    0.5382
   -0.4609    0.2519    0.1666    0.4194
   -0.5001    0.1770    0.3892   -0.0415
    0.0814    0.5268    0.0462    0.0352

eigvalue =

    1.5668
    1.4181
    0.9042
    0.8643

Y =

   -0.3170    0.4447    1.3333    0.3162
   -0.3083   -0.0766    0.4278    0.7718
   -1.0658    0.1451    0.4726   -0.2309
   -0.2380    0.5501    0.5203   -0.2640
   -1.1723    1.3025    0.6794    0.4791
   -0.5088    0.3902    1.2730   -0.0102
    0.3133    1.0587    0.5222    0.1090

抱歉!评论已关闭.