|
STK++ 1.0
|
00001 /*--------------------------------------------------------------------*/ 00002 /* Copyright (C) 2004-2007 Serge Iovleff 00003 00004 This program is free software; you can redistribute it and/or modify 00005 it under the terms of the GNU Lesser General Public License as 00006 published by the Free Software Foundation; either version 2 of the 00007 License, or (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU Lesser General Public License for more details. 00013 00014 You should have received a copy of the GNU Lesser General Public 00015 License along with this program; if not, write to the 00016 Free Software Foundation, Inc., 00017 59 Temple Place, 00018 Suite 330, 00019 Boston, MA 02111-1307 00020 USA 00021 00022 Contact : Serge.Iovleff@stkpp.org 00023 */ 00024 00025 /* 00026 * Project: stkpp::STatistiK::StatDesc 00027 * Purpose: Compute elementary 1D statistics for all variables. 00028 * Author: Serge Iovleff, serge.iovleff@stkpp.org 00029 **/ 00030 00036 #ifndef STK_STAT_UNIVARIATEREAL_H 00037 #define STK_STAT_UNIVARIATEREAL_H 00038 00039 #include "../../Sdk/include/STK_ITContainer1D.h" 00040 #include "../../DManager/include/STK_HeapSort.h" 00041 00042 #include "STK_Stat_Univariate.h" 00043 00044 namespace STK 00045 { 00046 namespace Stat 00047 { 00048 00061 template < class TContainer1D > 00062 class Univariate<Real, TContainer1D> 00063 { 00064 public: 00071 Univariate( ITContainer1D<Real, TContainer1D> const& V, bool sorted = false) 00072 : nbSamples_(V.size()) 00073 , nbAvailable_(V.size()) 00074 , nbMiss_(0) 00075 , V_(V.asLeaf()) 00076 , W_() 00077 , weighted_(false) 00078 , sorted_(sorted) 00079 , comporder_(false) 00080 , compstat_(false) 00081 , sumweights_(0.) 00082 , sum2weights_(0.) 00083 , mean_(Arithmetic<Real>::NA()) 00084 , median_(Arithmetic<Real>::NA()) 00085 , var_(Arithmetic<Real>::NA()) 00086 , uvar_(Arithmetic<Real>::NA()) 00087 , std_(Arithmetic<Real>::NA()) 00088 , ustd_(Arithmetic<Real>::NA()) 00089 , mad_(Arithmetic<Real>::NA()) 00090 , kurtosis_(Arithmetic<Real>::NA()) 00091 , skewness_(Arithmetic<Real>::NA()) 00092 , quartiles_(3, Arithmetic<Real>::NA()) 00093 , deciles_(9, Arithmetic<Real>::NA()) 00094 , viceciles_(19, Arithmetic<Real>::NA()) 00095 , percentiles_(99, Arithmetic<Real>::NA()) 00096 { 00097 initializeVariable(); 00098 compOrderStatistics(); 00099 compStatistics(); 00100 } 00101 00110 Univariate( ITContainer1D<Real, TContainer1D> const& V 00111 , ITContainer1D<Real, TContainer1D> const& W 00112 , bool sorted = false 00113 ) 00114 : nbSamples_(V.size()) 00115 , nbAvailable_(V.size()) 00116 , nbMiss_(0) 00117 , V_(V.asLeaf()) 00118 , W_(W.asLeaf()) 00119 , weighted_(true) 00120 , sorted_(sorted) 00121 , comporder_(false) 00122 , compstat_(false) 00123 , sumweights_(0.0) 00124 , sum2weights_(0.0) 00125 , mean_(Arithmetic<Real>::NA()) 00126 , median_(Arithmetic<Real>::NA()) 00127 , var_(Arithmetic<Real>::NA()) 00128 , uvar_(Arithmetic<Real>::NA()) 00129 , std_(Arithmetic<Real>::NA()) 00130 , ustd_(Arithmetic<Real>::NA()) 00131 , mad_(Arithmetic<Real>::NA()) 00132 , kurtosis_(Arithmetic<Real>::NA()) 00133 , skewness_(Arithmetic<Real>::NA()) 00134 , quartiles_(3) 00135 , deciles_(9) 00136 , viceciles_(19) 00137 , percentiles_(99) 00138 { 00139 // check weights size 00140 if ((V_.range() != W_.range())) 00141 { throw runtime_error("Univariate<Real, TContainer1D>::Univariate(V, W, sorted) " 00142 "V and W have not the same range"); 00143 } 00144 initializeVariableAndWeights(); 00145 compOrderStatistics(); 00146 compWeightedStatistics(); 00147 } 00148 00152 Univariate( const Univariate& stat) 00153 : nbSamples_(stat.nbSamples_) 00154 , nbAvailable_(stat.nbAvailable_) 00155 , nbMiss_(stat.nbMiss_) 00156 , V_(stat.V_) 00157 , W_(stat.W_) 00158 , weighted_(stat.weighted_) 00159 , sorted_(stat.sorted_) 00160 , comporder_(stat.comporder_) 00161 , compstat_(stat.compstat_) 00162 , sumweights_(stat.sumweights_) 00163 , sum2weights_(stat.sum2weights_) 00164 , min_(stat.min_) 00165 , max_(stat.max_) 00166 , amax_(stat.amax_) 00167 , mean_(stat.mean_) 00168 , median_(stat.median_) 00169 , var_(stat.var_) 00170 , uvar_(stat.uvar_) 00171 , std_(stat.std_) 00172 , ustd_(stat.ustd_) 00173 , mad_(stat.mad_) 00174 , kurtosis_(stat.kurtosis_) 00175 , skewness_(stat.skewness_) 00176 , quartiles_(stat.quartiles_) 00177 , deciles_(stat.deciles_) 00178 , viceciles_(stat.viceciles_) 00179 , percentiles_(stat.percentiles_) 00180 { ;} 00181 00183 ~Univariate() { ;} 00184 00186 Univariate& operator=( const Univariate& stat) 00187 { 00188 nbSamples_ = stat.nbSamples_; 00189 nbAvailable_ = stat.nbAvailable_; 00190 nbMiss_ = stat.nbMiss_; 00191 V_ = stat.V_; 00192 W_ = stat.W_; 00193 weighted_ = stat.weighted_; 00194 sorted_ = stat.sorted_; 00195 comporder_ = stat.comporder_; 00196 compstat_ = stat.compstat_; 00197 sumweights_ = stat.sumweights_; 00198 sum2weights_ = stat.sum2weights_; 00199 min_ = stat.min_; 00200 max_ = stat.max_; 00201 amax_ = stat.amax_; 00202 mean_ = stat.mean_; 00203 median_ = stat.median_; 00204 var_ = stat.var_; 00205 uvar_ = stat.uvar_; 00206 std_ = stat.std_; 00207 ustd_ = stat.ustd_; 00208 mad_ = stat.mad_; 00209 kurtosis_ = stat.kurtosis_; 00210 skewness_ = stat.skewness_; 00211 quartiles_ = stat.quartiles_; 00212 deciles_ = stat.deciles_; 00213 viceciles_ = stat.viceciles_; 00214 percentiles_ = stat.percentiles_; 00215 00216 return *this; 00217 } 00218 00223 void setData( ITContainer1D<Real, TContainer1D> const& V, bool sorted = false) 00224 { 00225 // initialize dimensions 00226 nbSamples_ = V.size(); 00227 nbAvailable_ = V.size(); 00228 nbMiss_ = 0; 00229 // set variable and weights 00230 V_ = V.asLeaf(); 00231 W_.clear(); // no weights 00232 // initialize data 00233 weighted_ = false; 00234 sorted_ = sorted; 00235 comporder_ = false; 00236 compstat_ = false; 00237 sumweights_ = 0.0; 00238 sum2weights_ = 0.0; 00239 mean_ = Arithmetic<Real>::NA(); 00240 median_ = Arithmetic<Real>::NA(); 00241 var_ = Arithmetic<Real>::NA(); 00242 uvar_ = Arithmetic<Real>::NA(); 00243 std_ = Arithmetic<Real>::NA(); 00244 ustd_ = Arithmetic<Real>::NA(); 00245 mad_ = Arithmetic<Real>::NA(); 00246 kurtosis_ = Arithmetic<Real>::NA(); 00247 skewness_ = Arithmetic<Real>::NA(); 00248 quartiles_ = Arithmetic<Real>::NA(); 00249 deciles_ = Arithmetic<Real>::NA(); 00250 viceciles_ = Arithmetic<Real>::NA(); 00251 percentiles_ = Arithmetic<Real>::NA(); 00252 00253 initializeVariable(); 00254 compOrderStatistics(); 00255 compStatistics(); 00256 } 00257 00263 void setData( ITContainer1D<Real, TContainer1D> const& V 00264 , ITContainer1D<Real, TContainer1D> const& W 00265 , bool sorted = false 00266 ) 00267 { 00268 // check weights size 00269 if ((V.range() != W.range())) 00270 { throw runtime_error("Univariate<Real>::setData(V, W, sorted) " 00271 "V and W have not the same range"); 00272 } 00273 // initialize dimensions 00274 nbSamples_ = V.size(); 00275 nbAvailable_ = V.size(); 00276 nbMiss_ = 0; 00277 // set variable and weights 00278 V_ = V.asLeaf(); 00279 W_ = W.asLeaf(); 00280 // initialize data 00281 weighted_ = true; 00282 sorted_ = sorted; 00283 comporder_ = false; 00284 compstat_ = false; 00285 sumweights_ = 0.0; 00286 sum2weights_ = 0.0; 00287 mean_ = Arithmetic<Real>::NA(); 00288 median_ = Arithmetic<Real>::NA(); 00289 var_ = Arithmetic<Real>::NA(); 00290 uvar_ = Arithmetic<Real>::NA(); 00291 std_ = Arithmetic<Real>::NA(); 00292 ustd_ = Arithmetic<Real>::NA(); 00293 mad_ = Arithmetic<Real>::NA(); 00294 kurtosis_ = Arithmetic<Real>::NA(); 00295 skewness_ = Arithmetic<Real>::NA(); 00296 quartiles_ = Arithmetic<Real>::NA(); 00297 deciles_ = Arithmetic<Real>::NA(); 00298 viceciles_ = Arithmetic<Real>::NA(); 00299 percentiles_ = Arithmetic<Real>::NA(); 00300 00301 initializeVariableAndWeights(); 00302 compOrderStatistics(); 00303 compWeightedStatistics(); 00304 } 00305 00307 inline const Integer nbSamples() const {return nbSamples_;} 00309 inline const Integer nbAvailableSamples() const {return nbAvailable_;} 00311 inline const Integer nbMissingSamples() const {return nbMiss_;} 00312 00314 inline const Real min() const {return min_;} 00316 inline const Real max() const {return max_;} 00318 inline const Real aMax() const {return amax_;} 00319 00321 inline const Real mean() const {return mean_;} 00323 inline const Real median() const {return median_;} 00324 00326 inline const Real variance() const {return var_;} 00328 inline const Real unbiasedVariance() const {return uvar_;} 00330 inline const Real std() const {return std_;} 00332 inline const Real unbiasedStd() const {return ustd_;} 00334 inline const Real mad() const { return( mad_);} 00335 00337 inline const Real kurtosis() const {return kurtosis_;} 00339 inline const Real skewness() const {return skewness_;} 00340 00342 inline TContainer1D const& quartiles() const {return quartiles_;} 00344 inline TContainer1D const& deciles() const {return deciles_;} 00346 inline TContainer1D const& viceciles() const { return viceciles_;} 00348 inline TContainer1D const& percentiles() const { return percentiles_;} 00349 00350 private: 00354 void initializeVariable() 00355 { 00356 // initialize the extreme values 00357 min_ = Arithmetic<Real>::max(); 00358 max_ = -Arithmetic<Real>::max(); 00359 // discard missing values 00360 for (Integer i=V_.last(); i>=V_.first(); i--) 00361 { 00362 if ( Arithmetic<Real>::isNA(V_[i]) ) 00363 { 00364 // if NA 00365 nbAvailable_--; // decrease nbAvailable_ 00366 nbMiss_++; // increase nbMiss_ 00367 V_.erase(i); // Delete the current row 00368 } 00369 else 00370 { 00371 min_ = STK::min(V_[i], min_); // update min_ 00372 max_ = STK::max(V_[i], max_); // update max_ 00373 } 00374 } 00375 // all weights are 1/n. 00376 sumweights_ = 1.0; 00377 sum2weights_= 1./(Real)nbAvailable_; 00378 00379 // no samples 00380 if ((nbAvailable_ == 0)) 00381 { 00382 min_ = Arithmetic<Real>::NA(); 00383 max_ = Arithmetic<Real>::NA(); 00384 amax_ = Arithmetic<Real>::NA(); 00385 } 00386 else // one or more samples : compute amax 00387 amax_ = STK::max(abs(min_), abs(max_)); 00388 } 00389 00393 void initializeVariableAndWeights() 00394 { 00395 // initialize the extreme values 00396 min_ = Arithmetic<Real>::max(); 00397 max_ = -Arithmetic<Real>::max(); 00398 00399 // discard missing values or values without weights 00400 for (Integer i=V_.last(); i>=V_.first(); i--) 00401 { 00402 if ( Arithmetic<Real>::isNA(V_[i])||Arithmetic<Real>::isNA(W_[i])) 00403 { 00404 V_.erase(i); // Delete the current row 00405 W_.erase(i); // for the weights too 00406 nbAvailable_--; // decrease number of samples 00407 nbMiss_++; // increase number of missing values 00408 } 00409 else 00410 { 00411 min_ = STK::min(V_[i], min_); // update min_ 00412 max_ = STK::max(V_[i], max_); // update max_ 00413 sumweights_ += (W_[i] = abs((Real)W_[i]));// sum absolute weights 00414 sum2weights_ += W_[i] * W_[i]; // sum squared weights 00415 } 00416 } 00417 // no samples 00418 if ((nbAvailable_ == 0)) 00419 { 00420 min_ = Arithmetic<Real>::NA(); 00421 max_ = Arithmetic<Real>::NA(); 00422 amax_ = Arithmetic<Real>::NA(); 00423 } 00424 else // one or more samples : compute amax 00425 amax_ = STK::max(abs(min_), abs(max_)); 00426 } 00427 00436 void compStatistics() 00437 { 00438 // If there is no samples 00439 if (nbAvailable_ == 0) 00440 { 00441 mean_ = Arithmetic<Real>::NA(); 00442 var_ = Arithmetic<Real>::NA(); 00443 uvar_ = Arithmetic<Real>::NA(); 00444 std_ = Arithmetic<Real>::NA(); 00445 ustd_ = Arithmetic<Real>::NA(); 00446 mad_ = Arithmetic<Real>::NA(); 00447 kurtosis_ = Arithmetic<Real>::NA(); 00448 skewness_ = Arithmetic<Real>::NA(); 00449 return; 00450 } 00451 00452 // One observation 00453 if (nbAvailable_ == 1) 00454 { 00455 mean_ = V_.front(); 00456 var_ = Arithmetic<Real>::NA(); 00457 std_ = 0.0; 00458 ustd_ = Arithmetic<Real>::NA(); 00459 mad_ = 0.0; 00460 kurtosis_ = 0.0; 00461 skewness_ = 0.0; 00462 return; 00463 } 00464 // get indexes 00465 const Integer first = V_.first(), last = V_.last(); 00466 // get nbAvailable observation in Real 00467 const Real n = (Real)nbAvailable_; 00468 // first pass : compute the mean 00469 // scale the samples for preventing overflows 00470 mean_ = 0.; 00471 { 00472 if (amax_) // if the absolute maximal value is greater than 0 00473 { 00474 // sum samples with scaling 00475 for (Integer i=first; i<=last; i++) 00476 mean_ += V_[i]/amax_; 00477 // divide by the number of available observation 00478 mean_ /= n; 00479 // unscale 00480 mean_ *= amax_; 00481 } 00482 } 00483 // second pass : compute the variance, skewness and kurtosis 00484 // initialize values 00485 Real sum = 0.0, dev1, dev2; 00486 var_ = 0.0; 00487 mad_ = 0.0; 00488 kurtosis_ = 0.0; 00489 skewness_ = 0.0; 00490 for (Integer i=1; i<=nbAvailable_; i++) 00491 { 00492 sum += (dev1 = V_[i]-mean_); // deviation from the mean 00493 var_ += (dev2 = dev1*dev1); // squared deviation 00494 mad_ += abs( dev1 ); // absolute deviation 00495 skewness_ += dev2 * dev1; // cubic deviation 00496 kurtosis_ += dev2 * dev2; // squared squared deviation 00497 } 00498 mad_ *= sum2weights_; 00499 uvar_ = (var_ - sum*sum/n)/(n - 1.); 00500 var_ = (var_ - sum*sum/n)/n; 00501 std_ = sqrt((double)var_); 00502 ustd_ = sqrt((double)uvar_); 00503 // if there is variance 00504 if (var_) 00505 { 00506 skewness_ /= n; 00507 kurtosis_ /= n; 00508 00509 skewness_ /= (var_ * std_); 00510 kurtosis_ /= (var_ * var_); 00511 kurtosis_ -= 3.0; 00512 } 00513 else 00514 { 00515 skewness_ = 0.0; 00516 kurtosis_ = 0.0; 00517 } 00518 // Statistics are computed 00519 compstat_ = true; 00520 } 00521 00523 void compWeightedStatistics() 00524 { 00525 // If there is no samples 00526 if (nbAvailable_ == 0) 00527 { 00528 mean_ = Arithmetic<Real>::NA(); 00529 var_ = Arithmetic<Real>::NA(); 00530 uvar_ = Arithmetic<Real>::NA(); 00531 std_ = Arithmetic<Real>::NA(); 00532 ustd_ = Arithmetic<Real>::NA(); 00533 mad_ = Arithmetic<Real>::NA(); 00534 kurtosis_ = Arithmetic<Real>::NA(); 00535 skewness_ = Arithmetic<Real>::NA(); 00536 return; 00537 } 00538 00539 // One observation or pathological weights 00540 if ((nbAvailable_ == 1)||(sumweights_*sumweights_ <= sum2weights_)) 00541 { 00542 mean_ = V_.front(); 00543 var_ = 0.; 00544 uvar_ = Arithmetic<Real>::NA(); 00545 std_ = 0.; 00546 ustd_ = Arithmetic<Real>::NA(); 00547 mad_ = 0.; 00548 kurtosis_ = 0.; 00549 skewness_ = 0.; 00550 return; 00551 } 00552 // get indexes 00553 const Integer first = V_.first(), last = V_.last(); 00554 // first pass : get the mean 00555 // scale the samples for preventing overflows 00556 mean_ = 0.; 00557 if (amax_) // if the maximal value is greater than 0 00558 { 00559 // sum samples 00560 for (Integer i=first; i<=last; i++) 00561 mean_ += (W_[i]*V_[i])/amax_; // compute the mean with scaling 00562 mean_ /= sumweights_; // weight the sum 00563 mean_ *= amax_; // and unscale 00564 } 00565 // second pass : compute the variance, skewness and kurtosis 00566 // initialize some values 00567 Real sum = 0.0, dev1, dev2; 00568 var_ = 0.0; 00569 mad_ = 0.0; 00570 kurtosis_ = 0.0; 00571 skewness_ = 0.0; 00572 for (Integer i=1; i<=nbAvailable_; i++) 00573 { 00574 Real weight = W_[i]; 00575 sum += weight * (dev1 = V_[i]-mean_); // deviation from the mean 00576 var_ += weight * (dev2 = dev1*dev1); // squared value 00577 mad_ += weight * abs(dev1); // absolute deviation from mean 00578 skewness_ += weight * dev2 * dev1; 00579 kurtosis_ += weight * dev2 * dev2; 00580 } 00581 mad_ /= sumweights_; 00582 uvar_ = (var_ - sum*sum/sumweights_)/(sumweights_ - sum2weights_/sumweights_); 00583 var_ = (var_ - sum*sum)/(sumweights_); 00584 std_ = sqrt((double)var_); 00585 ustd_ = sqrt((double)uvar_); 00586 // if there is variance 00587 if (var_) 00588 { 00589 skewness_ /= (sumweights_ * var_ * std_); 00590 kurtosis_ /= (sumweights_ * var_ * var_); 00591 kurtosis_ -= 3.0; 00592 } 00593 else 00594 { 00595 skewness_ = 0.0; 00596 kurtosis_ = 0.0; 00597 } 00598 // Stat are computed 00599 compstat_ = true; 00600 } 00601 00603 void compOrderStatistics() 00604 { 00605 // no samples 00606 if (nbAvailable_ == 0) 00607 { 00608 median_ = Arithmetic<Real>::NA(); 00609 quartiles_ = Arithmetic<Real>::NA(); 00610 deciles_ = Arithmetic<Real>::NA(); 00611 viceciles_ = Arithmetic<Real>::NA(); 00612 percentiles_ = Arithmetic<Real>::NA(); 00613 return; 00614 } 00615 00616 // One observation 00617 if (nbAvailable_ == 1) 00618 { 00619 median_ = max_; 00620 quartiles_ = max_; 00621 deciles_ = max_; 00622 viceciles_ = max_; 00623 percentiles_ = max_; 00624 return; 00625 } 00626 // sort values 00627 if (!sorted_) 00628 { 00629 // if the Variable is not weighted, we can sort it directly 00630 if (!weighted_) heapSort(V_); 00631 else // otherwise we have to sort V_and W_ using indirection 00632 { 00633 Array1D<Integer> I; // auxiliary Array for indirection 00634 heapSort(I, V_); 00635 applySort(V_, I); 00636 applySort(W_, I); 00637 } 00638 sorted_ = true; 00639 } 00640 // Find the Quantiles of the distribution 00641 compQuantiles(quartiles_); 00642 compQuantiles(deciles_); 00643 compQuantiles(viceciles_); 00644 compQuantiles(percentiles_); 00645 // get the median 00646 median_ = quartiles_[2]; 00647 } 00648 00655 void compQuantiles(TContainer1D& T) 00656 { 00657 #ifdef STK_DEBUG 00658 if (!sorted_) 00659 { throw runtime_error("Univariate<Real>::comptiles(T) " 00660 "V_ is not sorted"); 00661 } 00662 #endif 00663 // in case of 00664 T.shift(1); 00665 // number of quantiles 00666 Integer nt = T.last(), shift = V_.first()-1; 00667 Real n1 = Real(nbAvailable_+1), nt1 = Real(nt+1); 00668 00669 for (Integer k=1; k<=nt; k++) 00670 { 00671 // find index of the k-th quantile 00672 Real find = Real(k)*n1/nt1; // compute the index in Real 00673 Integer tind = STK::max(Integer(1), Integer(find)); // in Integer 00674 Integer tind1 = STK::min(nbAvailable_, tind+1); // next 00675 Real aux = find - Real(tind); // lower ponderation 00676 00677 // nbAvailable_+1 not perfectly divisible ? weighting... 00678 aux ? T[k] = aux * V_[shift+tind] + (1.0-aux)*V_[shift+tind1] 00679 : T[k] = V_[shift+tind]; 00680 } 00681 } 00682 00683 protected: 00684 // dimensions 00685 Integer nbSamples_; 00686 Integer nbAvailable_; 00687 Integer nbMiss_; 00688 00689 // containers 00690 TContainer1D V_; 00691 TContainer1D W_; 00692 00693 // Some flag about the internal state of the object 00694 bool weighted_; 00695 bool sorted_; 00696 bool comporder_; 00697 bool compstat_; 00698 00699 // statistics 00700 Real sumweights_; 00701 Real sum2weights_; 00702 Real min_; 00703 Real max_; 00704 Real amax_; 00705 00706 Real mean_; 00707 Real median_; 00708 Real var_; 00709 Real uvar_; 00710 Real std_; 00711 Real ustd_; 00712 Real mad_; 00713 Real kurtosis_; 00714 Real skewness_; 00715 00716 TContainer1D quartiles_; 00717 TContainer1D deciles_; 00718 TContainer1D viceciles_; 00719 TContainer1D percentiles_; 00720 }; 00721 00729 template<class TContainer1D> 00730 Real mean( ITContainer1D<Real, TContainer1D> const& V) 00731 { 00732 // no samples 00733 if (V.empty()) 00734 { return Arithmetic<Real>::NA();} 00735 00736 // get dimensions 00737 Integer nobs = V.size(); 00738 // sum the samples 00739 Real sum = 0.0; 00740 for (Integer i=V.first(); i<=V.last(); i++) 00741 { if (Arithmetic<Real>::isFinite(V[i])) 00742 sum += V[i]; 00743 else 00744 nobs--; 00745 } 00746 // compute the mean 00747 if (nobs) return sum /= (Real)nobs; 00748 else return Arithmetic<Real>::NA(); 00749 } 00750 00761 template<class TContainer1D> 00762 Real mean( ITContainer1D<Real, TContainer1D> const& V 00763 , ITContainer1D<Real, TContainer1D> const& W 00764 ) 00765 { 00766 // no samples 00767 if (V.empty()) { return Arithmetic<Real>::NA();} 00768 00769 // get dimensions 00770 Real sum = 0.0; 00771 // if the weight are not of the same size, ignore them 00772 if (V.range() != W.range()) return mean(V); 00773 00774 // sum the weighted samples 00775 Real nweight = 0.0; 00776 for (Integer i=V.first(); i<=V.last(); i++) 00777 { if ( (Arithmetic<Real>::isFinite(V[i])) 00778 &&(Arithmetic<Real>::isFinite(W[i])) 00779 ) 00780 { 00781 Real weight = abs(W[i]); 00782 nweight += weight; 00783 sum += weight * V[i]; 00784 } 00785 } 00786 // compute the mean 00787 return (nweight) ? sum /= nweight : 0.; 00788 } 00789 00800 template<class TContainer1D> 00801 Real varianceWithFixedMean( ITContainer1D<Real, TContainer1D> const& V 00802 , Real const& mu 00803 , bool unbiased = false 00804 ) 00805 { 00806 // no samples 00807 if (V.empty()) { return Arithmetic<Real>::NA();} 00808 00809 // get dimensions 00810 const Integer first = V.first(), last = V.last(); 00811 Integer nobs = V.size(); 00812 // sum 00813 Real sum = 0.0, var = 0.0, dev; 00814 for (Integer i=first; i<=last; i++) 00815 { 00816 if (Arithmetic<Real>::isFinite(V[i])) 00817 { 00818 sum += (dev = V[i] - mu); // deviation from the mean 00819 var += (dev*dev); // squared value 00820 } 00821 else nobs--; 00822 } 00823 // compute the variance 00824 if (unbiased) 00825 { 00826 return (nobs > 1) ? (var - (sum*sum)/(Real)nobs)/(Real)(nobs -1) 00827 : Arithmetic<Real>::NA(); 00828 00829 } 00830 // variance 00831 return (nobs > 0) ? (var - (sum*sum)/(Real)nobs)/(Real)(nobs) 00832 : Arithmetic<Real>::NA(); 00833 } 00834 00845 template<class TContainer1D> 00846 Real varianceWithFixedMean( ITContainer1D<Real, TContainer1D> const& V 00847 , ITContainer1D<Real, TContainer1D> const& W 00848 , Real const& mu 00849 , bool unbiased = false 00850 ) 00851 { 00852 // no samples 00853 if (V.empty()) { return Arithmetic<Real>::NA();} 00854 00855 // if the weight are not of the same size, ignore them 00856 if (V.range() != W.range()) return varianceWithFixedMean(V, mu); 00857 00858 // get dimensions 00859 const Integer first = V.first(), last = V.last(); 00860 // sum 00861 Real dev, sum = 0.0, var = 0.0, nweight = 0.0, nweight2 = 0.0; 00862 for (Integer i=first; i<=last; i++) 00863 { if ( Arithmetic<Real>::isFinite(V[i]) && Arithmetic<Real>::isFinite(W[i]) ) 00864 { 00865 Real weight = abs(W[i]); 00866 nweight += weight; 00867 nweight2 += weight * weight; 00868 sum += weight*(dev = V[i]-mu); // deviation from the mean 00869 var += weight*(dev*dev); // squared value 00870 } 00871 } 00872 // compute the variance 00873 if (unbiased) 00874 { 00875 return (nweight*nweight - nweight2 > 0.) ? (var - sum*sum/nweight)/(nweight - nweight2/nweight) 00876 : Arithmetic<Real>::NA(); 00877 00878 } 00879 return (nweight) ? (var - sum*sum)/(nweight) : Arithmetic<Real>::NA(); 00880 } 00881 00891 template<class TContainer1D> 00892 Real variance( ITContainer1D<Real, TContainer1D> const& V, bool unbiased = false) 00893 { 00894 // no samples 00895 if (V.empty()) { return Arithmetic<Real>::NA();} 00896 00897 // Compute the mean 00898 Real mu = mean<TContainer1D>(V); 00899 00900 // get dimensions 00901 const Integer first = V.first(), last = V.last(); 00902 Integer nobs = V.size(); 00903 // sum 00904 Real sum = 0.0, var = 0.0, dev; 00905 for (Integer i=first; i<=last; i++) 00906 { 00907 if (Arithmetic<Real>::isFinite(V[i])) 00908 { 00909 sum += (dev = V[i] - mu); // deviation from the mean 00910 var += (dev*dev); // squared value 00911 } 00912 else nobs--; 00913 } 00914 // compute the variance 00915 if (unbiased) 00916 { 00917 return (nobs > 1) ? (var - (sum*sum)/(Real)nobs)/(Real)(nobs -1) 00918 : Arithmetic<Real>::NA(); 00919 00920 } 00921 return (nobs > 0) ? (var - (sum*sum)/(Real)nobs)/(Real)(nobs) 00922 : Arithmetic<Real>::NA(); 00923 } 00924 00939 template<class TContainer1D> 00940 Real variance( ITContainer1D<Real, TContainer1D> const& V 00941 , ITContainer1D<Real, TContainer1D> const& W 00942 , bool unbiased = false 00943 ) 00944 { 00945 // no samples 00946 if (V.empty()) { return Arithmetic<Real>::NA();} 00947 00948 // Compute the mean if necessary 00949 Real mu = mean(V, W); 00950 00951 // if the weight are not of the same size, ignore them 00952 if (V.range() != W.range()) return varianceWithFixedMean(V, mu); 00953 00954 // get dimensions 00955 const Integer first = V.first(), last = V.last(); 00956 // sum 00957 Real dev, sum = 0.0, var = 0.0, nweight = 0.0, nweight2 = 0.0; 00958 for (Integer i=first; i<=last; i++) 00959 { if ( Arithmetic<Real>::isFinite(V[i]) && Arithmetic<Real>::isFinite(W[i]) ) 00960 { 00961 Real weight = abs(W[i]); 00962 nweight += weight; 00963 nweight2 += weight * weight; 00964 sum += weight*(dev = V[i]-mu); // deviation from the mean 00965 var += weight*(dev*dev); // squared value 00966 } 00967 } 00968 // compute the variance 00969 if (unbiased) 00970 { 00971 return (nweight*nweight - nweight2 > 0.) ? (var - sum*sum/nweight)/(nweight - nweight2/nweight) 00972 : Arithmetic<Real>::NA(); 00973 00974 } 00975 return (nweight) ? (var - sum*sum)/(nweight) : Arithmetic<Real>::NA(); 00976 } 00977 00978 00979 } // namespace Stat 00980 00981 } // namespace STK 00982 00983 #endif /*STK_STAT_UNIVARIATEREAL_H*/