|
STK++ 1.0
|
00001 /*--------------------------------------------------------------------*/ 00002 /* Copyright (C) 2004-2007 Serge Iovleff 00003 00004 This program is free software; you can redistribute it and/or modify 00005 it under the terms of the GNU Lesser General Public License as 00006 published by the Free Software Foundation; either version 2 of the 00007 License, or (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU Lesser General Public License for more details. 00013 00014 You should have received a copy of the GNU Lesser General Public 00015 License along with this program; if not, write to the 00016 Free Software Foundation, Inc., 00017 59 Temple Place, 00018 Suite 330, 00019 Boston, MA 02111-1307 00020 USA 00021 00022 Contact : Serge.Iovleff@stkpp.org 00023 */ 00024 00025 /* 00026 * Project: stkpp::Reduct 00027 * created on: 17 avr. 2010 00028 * Purpose: Implementation of the Index class using the local variance. 00029 * Author: iovleff, serge.iovleff@stkpp.org 00030 **/ 00031 00036 #include "../include/STK_LocalVariance.h" 00037 00038 #include "../../Algebra/include/STK_EigenvaluesSymmetric.h" 00039 #include "../../Algebra/include/STK_LinAlgebra2D.h" 00040 #include "../../Algebra/include/STK_LinAlgebra3D.h" 00041 00042 #include "../../STKernel/include/STK_String_Util.h" 00043 00044 namespace STK 00045 { 00046 /* convert a String to a TypeReduction. 00047 * @param type the type of reduction we want to define 00048 * @return the TypeReduction represented by the String @c type. if the string 00049 * does not match any known name, the @c unknown_ type is returned. 00050 **/ 00051 LocalVariance::TypeGraph LocalVariance::StringToTypeGraph( String const& type) 00052 { 00053 if (toUpperString(type) == toUpperString(_T("prim"))) return prim_; 00054 if (toUpperString(type) == toUpperString(_T("minimalDistance"))) return minimalDistance_; 00055 return unknown_; 00056 } 00057 00058 /* convert a TypeReduction to a String. 00059 * @param type the type of reduction we want to convert 00060 * @return the string associated to this type. 00061 **/ 00062 String LocalVariance::TypeGraphToString( TypeGraph const& type) 00063 { 00064 if (type == prim_) return String(_T("prim")); 00065 if (type == minimalDistance_) return String(_T("minimalDistance")); 00066 return String(_T("unknown")); 00067 } 00068 00069 /* 00070 * Constructor. 00071 * @param data the input data set 00072 */ 00073 LocalVariance::LocalVariance( Matrix const& data, const TypeGraph& type, Integer const& nbNeighbor) 00074 : ILinearReduct(data) 00075 , type_(type) 00076 , nbNeighbor_(nbNeighbor) 00077 , neighbors_(data.rangeVe(), Range(nbNeighbor_)) 00078 , dist_(data.rangeVe(), Range(nbNeighbor_), Arithmetic<Real>::max()) 00079 , p_localCov_(0) 00080 , p_dataStatistics_(0) 00081 { 00082 // compute minimal proximity graph of the data set 00083 switch (type_) 00084 { 00085 case prim_: 00086 prim(); 00087 break; 00088 case minimalDistance_: 00089 minimalDistance(); 00090 break; 00091 case unknown_: 00092 throw runtime_error(_T("Error in LocalVariance::LocalVariance(data, type, nbNeighbor)\nWhat: " 00093 "unknown proximity graph.")); 00094 break; 00095 }; 00096 } 00097 00098 /* 00099 * Destructor 00100 */ 00101 LocalVariance::~LocalVariance() 00102 { clear();} 00103 00104 /* 00105 * set the data set to use. 00106 */ 00107 void LocalVariance::update() 00108 { 00109 // update dimensions of the containers for the proximity graph 00110 neighbors_.resize(p_y_->rangeVe(), Range(nbNeighbor_)); 00111 dist_.resize(p_y_->rangeVe(), Range(nbNeighbor_)); 00112 dist_ = Arithmetic<Real>::max(); 00113 00114 // compute minimal proximity graph of the data set 00115 switch (type_) 00116 { 00117 case prim_: 00118 prim(); 00119 break; 00120 case minimalDistance_: 00121 minimalDistance(); 00122 break; 00123 case unknown_: 00124 throw runtime_error(_T("Error in LocalVariance::update()\nWhat: " 00125 "unknown proximity graph.")); 00126 break; 00127 }; 00128 } 00129 00130 /* 00131 * Compute the axis by maximizing the ratio of the local variance on the 00132 * total variance of the data set. 00133 */ 00134 void LocalVariance::maximizeIndex() 00135 { 00136 // clear allocated memory 00137 clear(); 00138 // initialize memory 00139 initializeMemory(); 00140 // compute covariance matrices 00141 computeCovarianceMatrices(); 00142 // compute the axis 00143 computeAxis(); 00144 } 00145 00146 /* 00147 * Compute the axis by maximizing the ratio of the local variance on the 00148 * total variance of the data set. 00149 */ 00150 void LocalVariance::maximizeIndex(Vector const& weights) 00151 { 00152 // clear allocated memory 00153 clear(); 00154 // initialize memory 00155 initializeMemory(); 00156 // compute covariance matrices 00157 computeCovarianceMatrices(weights); 00158 // compute the axis 00159 computeAxis(); 00160 } 00161 00162 /* compute the covariances matrices of the data set 00163 * @param nbNeighbor number of neighbors to look at 00164 **/ 00165 void LocalVariance::computeCovarianceMatrices() 00166 { 00167 // compute the covariance matrix 00168 p_dataStatistics_->run(); 00169 covariance_ = p_dataStatistics_->covariance(); 00170 00171 // constants 00172 const Integer first_ind = p_y_->firstRow(); 00173 const Integer last_ind = p_y_->lastRow(); 00174 const Integer first_var = p_y_->firstCol(); 00175 const Integer last_var = p_y_->lastCol(); 00176 const Real pond = 2* nbNeighbor_ * p_y_->sizeVe(); 00177 00178 // compute local covariance matrix 00179 for (Integer j=first_var; j<=last_var; j++) 00180 { 00181 // compute local covariance 00182 for (Integer k=first_var; k<=last_var; k++) 00183 { 00184 Real sum = 0.0; 00185 for (Integer i=first_ind; i<=last_ind; i++) 00186 { 00187 for (Integer l = 1; l <= nbNeighbor_; ++l) 00188 { 00189 sum += ((*p_y_)(i, j) - (*p_y_)(neighbors_(i, l), j)) 00190 * ((*p_y_)(i, k) - (*p_y_)(neighbors_(i, l), k)); 00191 } 00192 } 00193 (*p_localCov_)(j, k) = sum/pond; 00194 } 00195 } 00196 } 00197 00198 /* compute the weighted covariances matrices of the data set 00199 **/ 00200 void LocalVariance::computeCovarianceMatrices( Vector const& weights) 00201 { 00202 // compute the weighted covariance matrix using mMutivariate class 00203 p_dataStatistics_->run(weights); 00204 covariance_ = p_dataStatistics_->covariance(); 00205 00206 // get dimensions 00207 const Integer first_ind = p_y_->firstRow(); 00208 const Integer last_ind = p_y_->lastRow(); 00209 const Integer first_var = p_y_->firstCol(); 00210 const Integer last_var = p_y_->lastCol(); 00211 const Real pond = 2* nbNeighbor_; 00212 // compute weighted local covariance matrix 00213 for (Integer i=first_var; i<=last_var; i++) 00214 { 00215 // compute the local covariance matrix 00216 for (Integer j=first_var; j<=last_var; j++) 00217 { 00218 Real sum = 0.0; 00219 for (Integer k=first_ind; k<=last_ind; k++) 00220 { 00221 for (Integer l = 1; l <= nbNeighbor_; ++l) 00222 { 00223 sum += (weights[k] * weights[neighbors_(k, l)]) 00224 * ((*p_y_)(k, i) - (*p_y_)(neighbors_(k, l), i)) 00225 * ((*p_y_)(k, j) - (*p_y_)(neighbors_(k, l), j)); 00226 } 00227 } 00228 (*p_localCov_)(i, j) = sum / pond; 00229 } 00230 } 00231 } 00232 00233 /* compute the axis 00234 **/ 00235 void LocalVariance::computeAxis() 00236 { 00237 // compute the number of axis 00238 Range range(p_y_->firstCol(), min(p_y_->firstCol()+dim_-1, p_y_->lastCol())); 00239 // constant 00240 const Integer first_axe = range.first(); 00241 const Integer last_axe = range.last(); 00242 00243 // compute the eigenvalues decomposition of the local covariance 00244 EigenvaluesSymmetric* decomp = new EigenvaluesSymmetric(p_localCov_); 00245 decomp->run(); 00246 // compute the generalized inverse of the local covariance 00247 MatrixSquare* inv_local = decomp->ginv(); 00248 // we can safely remove the decomposition 00249 delete decomp; 00250 00251 // compute the product 00252 MatrixSquare* prod = mult(*inv_local, covariance_); 00253 // we can safely remove the inverse 00254 delete inv_local; 00255 // compute the eigenvalues decomposition of the product 00256 decomp = new EigenvaluesSymmetric(prod); 00257 decomp->run(); 00258 // we can sagely remove the product 00259 delete prod; 00260 00261 // save axis and index values 00262 axis_.resize(p_y_->rangeHo(), range); 00263 index_values_.resize(range); 00264 for (Integer j=first_axe; j<=last_axe; j++) 00265 { 00266 axis_[j] = decomp->rotation()[j]; 00267 index_values_[j] = decomp->eigenvalues()[j]; 00268 } 00269 // we can safely remove the decomposition 00270 delete decomp; 00271 } 00272 00273 void LocalVariance::prim() 00274 { 00275 // get dimensions 00276 const Integer first_ind = p_y_->firstRow(); 00277 const Integer last_ind = p_y_->lastRow(); 00278 /* value vector : store the minimal value. */ 00279 Vector value(p_y_->rangeVe(), Arithmetic<Real>::max()); 00280 /* position of the points */ 00281 Array1D<Integer> ipos(p_y_->rangeVe()); 00282 // Initialization the position array 00283 for (Integer i=first_ind; i<=last_ind; i++) ipos[i] = i; 00284 00285 // start Prim algorithm 00286 //Initialization of the root 00287 value[first_ind] = 0.0; // the root have value 0.0 00288 neighbors_(first_ind, 1) = first_ind; // and have itself as predecessor 00289 Integer imin = first_ind; // the index of the current minimal value 00290 Real kmin = 0.0; // the current minimal value 00291 // begin iterations 00292 for (Integer iter = last_ind; iter>=first_ind; iter--) 00293 { 00294 // put the minimal key at the end of the array key_ 00295 value.swap(imin, iter); // put the minimal value to the end 00296 ipos.swap(imin, iter); // update the position of current minimal point 00297 // Update the value for the neighbors points and find minimal value 00298 imin = first_ind; 00299 kmin = value[first_ind]; 00300 // ref on the current point 00301 Integer icur = ipos[iter]; 00302 Point P((*p_y_)(icur), true); 00303 // update distance of the neighbors point 00304 for (Integer i=first_ind; i<iter; i++) 00305 { 00306 // check if we have a better distance for the neighbors 00307 Real d=dist(P, (*p_y_)(ipos[i])); 00308 if (d < value[i]) 00309 { 00310 value[i] = d; 00311 neighbors_(ipos[i], 1) = icur; 00312 } 00313 // minimal key 00314 if (kmin>value[i]) { imin=i; kmin = value[i];} 00315 } 00316 } 00317 } 00318 00319 void LocalVariance::minimalDistance() 00320 { 00321 // get dimensions 00322 const Integer first_ind = p_y_->firstRow(); 00323 const Integer last_ind = p_y_->lastRow(); 00324 // start minimal distance algorithm 00325 for (Integer j = first_ind; j<last_ind; j++) 00326 { 00327 // ref on the current point 00328 Point P((*p_y_)(j), true); 00329 // update distance of the neighbors point 00330 for (Integer i=j+1; i<=last_ind; i++) 00331 { 00332 // compute distance between point i and point j 00333 Real d=dist(P, (*p_y_)(i)); 00334 // check if we get a better distance for the point j 00335 if (dist_(i, nbNeighbor_) > d ) 00336 { 00337 // check if we get a better distance for the point i 00338 Integer pos = nbNeighbor_; 00339 while (dist_(i, pos) > d && pos-- > 1) {} 00340 pos++; 00341 // shift values 00342 for (int k= nbNeighbor_ -1; k>= pos; k--) 00343 { 00344 dist_(i, k+1) = dist_(i, k); 00345 neighbors_(i, k+1) = neighbors_(i, k); 00346 } 00347 // set minimal distance in place 00348 dist_(i, pos) = d; 00349 neighbors_(i, pos) = j; 00350 } 00351 // check if we get a better distance for the point j 00352 if (dist_(j, nbNeighbor_) > d ) 00353 { 00354 // insertion sorting algorihtm 00355 Integer pos = nbNeighbor_; 00356 while (dist_(j, pos) > d && pos-- > 1) {} 00357 pos++; 00358 // shift valuesconst 00359 for (int k= nbNeighbor_ -1; k>= pos; k--) 00360 { 00361 dist_(j, k+1) = dist_(j, k); 00362 neighbors_(j, k+1) = neighbors_(j, k); 00363 } 00364 // set minimal distance in place 00365 dist_(j, pos) = d; 00366 neighbors_(j, pos) = i; 00367 } 00368 } 00369 } 00370 } 00371 00372 /* clear allocated memory */ 00373 void LocalVariance::clear() 00374 { 00375 if (p_dataStatistics_) delete p_dataStatistics_; 00376 p_dataStatistics_ = 0; 00377 if (p_localCov_) delete p_localCov_; 00378 p_localCov_ = 0; 00379 } 00380 00381 /* initialize dimension */ 00382 void LocalVariance::initializeMemory() 00383 { 00384 p_dataStatistics_ = new Stat::MultivariateMatrix(p_y_); 00385 p_localCov_ = new MatrixSquare(p_y_->rangeHo()); 00386 } 00387 00388 00389 } // namespace STK