STK++ 1.0
STK_LocalVariance.cpp
Go to the documentation of this file.
00001 /*--------------------------------------------------------------------*/
00002 /*     Copyright (C) 2004-2007  Serge Iovleff
00003 
00004     This program is free software; you can redistribute it and/or modify
00005     it under the terms of the GNU Lesser General Public License as
00006     published by the Free Software Foundation; either version 2 of the
00007     License, or (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU Lesser General Public License for more details.
00013 
00014     You should have received a copy of the GNU Lesser General Public
00015     License along with this program; if not, write to the
00016     Free Software Foundation, Inc.,
00017     59 Temple Place,
00018     Suite 330,
00019     Boston, MA 02111-1307
00020     USA
00021 
00022     Contact : Serge.Iovleff@stkpp.org
00023 */
00024 
00025 /*
00026  * Project:  stkpp::Reduct
00027  * created on: 17 avr. 2010
00028  * Purpose:  Implementation of the Index class using the local variance.
00029  * Author:   iovleff, serge.iovleff@stkpp.org
00030  **/
00031 
00036 #include "../include/STK_LocalVariance.h"
00037 
00038 #include "../../Algebra/include/STK_EigenvaluesSymmetric.h"
00039 #include "../../Algebra/include/STK_LinAlgebra2D.h"
00040 #include "../../Algebra/include/STK_LinAlgebra3D.h"
00041 
00042 #include "../../STKernel/include/STK_String_Util.h"
00043 
00044 namespace STK
00045 {
00046 /* convert a String to a TypeReduction.
00047  *  @param type the type of reduction we want to define
00048  *  @return the TypeReduction represented by the String @c type. if the string
00049  *  does not match any known name, the @c unknown_ type is returned.
00050  **/
00051 LocalVariance::TypeGraph LocalVariance::StringToTypeGraph( String const& type)
00052 {
00053   if (toUpperString(type) == toUpperString(_T("prim")))  return prim_;
00054   if (toUpperString(type) == toUpperString(_T("minimalDistance"))) return minimalDistance_;
00055   return unknown_;
00056 }
00057 
00058 /* convert a TypeReduction to a String.
00059  *  @param type the type of reduction we want to convert
00060  *  @return the string associated to this type.
00061  **/
00062 String LocalVariance::TypeGraphToString( TypeGraph const& type)
00063 {
00064   if (type == prim_)  return String(_T("prim"));
00065   if (type == minimalDistance_) return String(_T("minimalDistance"));
00066   return String(_T("unknown"));
00067 }
00068 
00069 /*
00070  * Constructor.
00071  * @param data the input data set
00072  */
00073 LocalVariance::LocalVariance( Matrix const& data, const TypeGraph& type, Integer const& nbNeighbor)
00074                             : ILinearReduct(data)
00075                             , type_(type)
00076                             , nbNeighbor_(nbNeighbor)
00077                             , neighbors_(data.rangeVe(), Range(nbNeighbor_))
00078                             , dist_(data.rangeVe(), Range(nbNeighbor_), Arithmetic<Real>::max())
00079                             , p_localCov_(0)
00080                             , p_dataStatistics_(0)
00081 {
00082   // compute minimal proximity graph of the data set
00083   switch (type_)
00084   {
00085     case prim_:
00086       prim();
00087       break;
00088     case minimalDistance_:
00089       minimalDistance();
00090       break;
00091     case unknown_:
00092       throw runtime_error(_T("Error in LocalVariance::LocalVariance(data, type, nbNeighbor)\nWhat: "
00093                              "unknown proximity graph."));
00094       break;
00095   };
00096 }
00097 
00098 /*
00099  * Destructor
00100  */
00101 LocalVariance::~LocalVariance()
00102 { clear();}
00103 
00104 /*
00105  * set the data set to use.
00106  */
00107 void LocalVariance::update()
00108 {
00109   // update dimensions of the containers for the proximity graph
00110   neighbors_.resize(p_y_->rangeVe(), Range(nbNeighbor_));
00111   dist_.resize(p_y_->rangeVe(), Range(nbNeighbor_));
00112   dist_ = Arithmetic<Real>::max();
00113 
00114   // compute minimal proximity graph of the data set
00115   switch (type_)
00116   {
00117     case prim_:
00118       prim();
00119       break;
00120     case minimalDistance_:
00121       minimalDistance();
00122       break;
00123     case unknown_:
00124       throw runtime_error(_T("Error in LocalVariance::update()\nWhat: "
00125                           "unknown proximity graph."));
00126       break;
00127   };
00128 }
00129 
00130 /*
00131  * Compute the axis by maximizing the ratio of the local variance on the
00132  * total variance of the data set.
00133  */
00134 void LocalVariance::maximizeIndex()
00135 {
00136   // clear allocated memory
00137   clear();
00138   // initialize memory
00139   initializeMemory();
00140   // compute covariance matrices
00141   computeCovarianceMatrices();
00142   // compute the axis
00143   computeAxis();
00144 }
00145 
00146 /*
00147  * Compute the axis by maximizing the ratio of the local variance on the
00148  * total variance of the data set.
00149  */
00150 void LocalVariance::maximizeIndex(Vector const& weights)
00151 {
00152   // clear allocated memory
00153   clear();
00154   // initialize memory
00155   initializeMemory();
00156   // compute covariance matrices
00157   computeCovarianceMatrices(weights);
00158   // compute the axis
00159   computeAxis();
00160 }
00161 
00162 /* compute the covariances matrices of the data set
00163  *  @param nbNeighbor number of neighbors to look at
00164  **/
00165 void LocalVariance::computeCovarianceMatrices()
00166 {
00167   // compute the covariance matrix
00168   p_dataStatistics_->run();
00169   covariance_ = p_dataStatistics_->covariance();
00170 
00171   // constants
00172   const Integer first_ind = p_y_->firstRow();
00173   const Integer last_ind  = p_y_->lastRow();
00174   const Integer first_var = p_y_->firstCol();
00175   const Integer last_var  = p_y_->lastCol();
00176   const Real pond = 2* nbNeighbor_ * p_y_->sizeVe();
00177 
00178   // compute local covariance matrix
00179   for (Integer j=first_var; j<=last_var; j++)
00180   {
00181     // compute local covariance
00182     for (Integer k=first_var; k<=last_var; k++)
00183     {
00184       Real sum = 0.0;
00185       for (Integer i=first_ind; i<=last_ind; i++)
00186       {
00187         for (Integer l = 1; l <= nbNeighbor_; ++l)
00188         {
00189           sum += ((*p_y_)(i, j) - (*p_y_)(neighbors_(i, l), j))
00190                * ((*p_y_)(i, k) - (*p_y_)(neighbors_(i, l), k));
00191         }
00192       }
00193       (*p_localCov_)(j, k) = sum/pond;
00194     }
00195   }
00196 }
00197 
00198 /* compute the weighted covariances matrices of the data set
00199  **/
00200 void LocalVariance::computeCovarianceMatrices( Vector const& weights)
00201 {
00202   // compute the weighted covariance matrix using mMutivariate class
00203   p_dataStatistics_->run(weights);
00204   covariance_ = p_dataStatistics_->covariance();
00205 
00206   // get dimensions
00207   const Integer first_ind = p_y_->firstRow();
00208   const Integer last_ind  = p_y_->lastRow();
00209   const Integer first_var = p_y_->firstCol();
00210   const Integer last_var  = p_y_->lastCol();
00211   const Real pond = 2* nbNeighbor_;
00212   // compute weighted local covariance matrix
00213   for (Integer i=first_var; i<=last_var; i++)
00214   {
00215     // compute the local covariance matrix
00216     for (Integer j=first_var; j<=last_var; j++)
00217     {
00218       Real sum = 0.0;
00219       for (Integer k=first_ind; k<=last_ind; k++)
00220       {
00221         for (Integer l = 1; l <= nbNeighbor_; ++l)
00222         {
00223           sum += (weights[k] * weights[neighbors_(k, l)])
00224                * ((*p_y_)(k, i) - (*p_y_)(neighbors_(k, l), i))
00225                * ((*p_y_)(k, j) - (*p_y_)(neighbors_(k, l), j));
00226         }
00227       }
00228       (*p_localCov_)(i, j) = sum / pond;
00229     }
00230   }
00231 }
00232 
00233 /* compute the axis
00234  **/
00235 void LocalVariance::computeAxis()
00236 {
00237   // compute the number of axis
00238   Range range(p_y_->firstCol(), min(p_y_->firstCol()+dim_-1, p_y_->lastCol()));
00239   // constant
00240   const Integer first_axe = range.first();
00241   const Integer last_axe = range.last();
00242 
00243   // compute the eigenvalues decomposition of the local covariance
00244   EigenvaluesSymmetric* decomp = new EigenvaluesSymmetric(p_localCov_);
00245   decomp->run();
00246   // compute the generalized inverse of the local covariance
00247   MatrixSquare* inv_local = decomp->ginv();
00248   // we can safely remove the decomposition
00249   delete decomp;
00250 
00251   // compute the product
00252   MatrixSquare* prod = mult(*inv_local, covariance_);
00253   // we can safely remove the inverse
00254   delete inv_local;
00255   // compute the eigenvalues decomposition of the product
00256   decomp = new EigenvaluesSymmetric(prod);
00257   decomp->run();
00258   // we can sagely remove the product
00259   delete prod;
00260 
00261   // save axis and index values
00262   axis_.resize(p_y_->rangeHo(), range);
00263   index_values_.resize(range);
00264   for (Integer j=first_axe; j<=last_axe; j++)
00265   {
00266     axis_[j] = decomp->rotation()[j];
00267     index_values_[j] = decomp->eigenvalues()[j];
00268   }
00269   // we can safely remove the decomposition
00270   delete decomp;
00271 }
00272 
00273 void LocalVariance::prim()
00274 {
00275   // get dimensions
00276   const Integer first_ind = p_y_->firstRow();
00277   const Integer last_ind = p_y_->lastRow();
00278   /* value vector : store the minimal value. */
00279   Vector value(p_y_->rangeVe(), Arithmetic<Real>::max());
00280   /* position of the points */
00281   Array1D<Integer> ipos(p_y_->rangeVe());
00282   // Initialization the position array
00283   for (Integer i=first_ind; i<=last_ind; i++) ipos[i] = i;
00284 
00285   // start Prim algorithm
00286   //Initialization of the root
00287   value[first_ind] = 0.0;               // the root have value 0.0
00288   neighbors_(first_ind, 1) = first_ind;          // and have itself as predecessor
00289   Integer imin = first_ind;             // the index of the current minimal value
00290   Real    kmin = 0.0;                   // the current minimal value
00291   // begin iterations
00292   for (Integer iter = last_ind; iter>=first_ind; iter--)
00293   {
00294     // put the minimal key at the end of the array key_
00295     value.swap(imin, iter);  // put the minimal value to the end
00296     ipos.swap(imin, iter);   // update the position of current minimal point
00297     // Update the value for the neighbors points and find minimal value
00298     imin = first_ind;
00299     kmin = value[first_ind];
00300     // ref on the current point
00301     Integer icur = ipos[iter];
00302     Point P((*p_y_)(icur), true);
00303     // update distance of the neighbors point
00304     for (Integer i=first_ind; i<iter; i++)
00305     {
00306       // check if we have a better distance for the neighbors
00307       Real d=dist(P, (*p_y_)(ipos[i]));
00308       if (d < value[i])
00309       {
00310         value[i] = d;
00311         neighbors_(ipos[i], 1) = icur;
00312       }
00313       // minimal key
00314       if (kmin>value[i]) { imin=i; kmin = value[i];}
00315     }
00316   }
00317 }
00318 
00319 void LocalVariance::minimalDistance()
00320 {
00321   // get dimensions
00322   const Integer first_ind = p_y_->firstRow();
00323   const Integer last_ind = p_y_->lastRow();
00324   // start minimal distance algorithm
00325   for (Integer j = first_ind; j<last_ind; j++)
00326   {
00327     // ref on the current point
00328     Point P((*p_y_)(j), true);
00329     // update distance of the neighbors point
00330     for (Integer i=j+1; i<=last_ind; i++)
00331     {
00332       // compute distance between point i and point j
00333       Real d=dist(P, (*p_y_)(i));
00334       // check if we get a better distance for the point j
00335       if (dist_(i, nbNeighbor_) > d )
00336       {
00337         // check if we get a better distance for the point i
00338         Integer pos = nbNeighbor_;
00339         while (dist_(i, pos) > d && pos-- > 1) {}
00340         pos++;
00341         // shift values
00342         for (int k= nbNeighbor_ -1; k>= pos; k--)
00343         {
00344           dist_(i, k+1) = dist_(i, k);
00345           neighbors_(i, k+1) = neighbors_(i, k);
00346         }
00347         // set minimal distance in place
00348         dist_(i, pos) = d;
00349         neighbors_(i, pos) = j;
00350       }
00351       // check if we get a better distance for the point j
00352       if (dist_(j, nbNeighbor_) > d )
00353       {
00354         // insertion sorting algorihtm
00355         Integer pos = nbNeighbor_;
00356         while (dist_(j, pos) > d && pos-- > 1) {}
00357         pos++;
00358         // shift valuesconst
00359         for (int k= nbNeighbor_ -1; k>= pos; k--)
00360         {
00361           dist_(j, k+1) = dist_(j, k);
00362           neighbors_(j, k+1) = neighbors_(j, k);
00363         }
00364         // set minimal distance in place
00365         dist_(j, pos) = d;
00366         neighbors_(j, pos) = i;
00367       }
00368     }
00369   }
00370 }
00371 
00372 /* clear allocated memory */
00373 void LocalVariance::clear()
00374 {
00375   if (p_dataStatistics_) delete p_dataStatistics_;
00376   p_dataStatistics_ = 0;
00377   if (p_localCov_) delete p_localCov_;
00378   p_localCov_ = 0;
00379 }
00380 
00381 /* initialize dimension */
00382 void LocalVariance::initializeMemory()
00383 {
00384   p_dataStatistics_ = new Stat::MultivariateMatrix(p_y_);
00385   p_localCov_       = new MatrixSquare(p_y_->rangeHo());
00386 }
00387 
00388 
00389 } // namespace STK