STK++ 1.0
STK_DataFrame.cpp
Go to the documentation of this file.
00001 /*--------------------------------------------------------------------*/
00002 /*     Copyright (C) 2004-2007  Serge Iovleff
00003 
00004     This program is free software; you can redistribute it and/or modify
00005     it under the terms of the GNU Lesser General Public License as
00006     published by the Free Software Foundation; either version 2 of the
00007     License, or (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU Lesser General Public License for more details.
00013 
00014     You should have received a copy of the GNU Lesser General Public
00015     License along with this program; if not, write to the
00016     Free Software Foundation, Inc.,
00017     59 Temple Place,
00018     Suite 330,
00019     Boston, MA 02111-1307
00020     USA
00021 
00022     Contact : Serge.Iovleff@stkpp.org
00023 */
00024 
00025 /*
00026  * Project:  stkpp::DManager
00027  * Purpose:  Implement the DataFrame (Table) class.
00028  * Author:   Serge Iovleff, serge.iovleff@stkpp.org
00029  *
00030 **/
00031 
00036 #include "../include/STK_DataFrame.h"
00037 #include "../include/STK_ExportToCsv.h"
00038 
00039 namespace STK
00040 {
00041 /* Default constructor . */
00042 DataFrame::DataFrame() : _BaseList()
00043                        , IContainer2D(Range(), Range())
00044 { ;}
00045 
00046 /* Copy constructor                                                         */
00047 DataFrame::DataFrame( DataFrame const& T, bool ref)
00048                     : _BaseList(T)
00049                     , IContainer2D(T)
00050 {
00051   // the adress of the variables are copied in List1D
00052   // but we need to copy explicitly the data
00053   for (Integer j=first(); j<=last(); j++) // for all columns
00054     if (T[j])                            // if there is data
00055       at(j) = T[j]->clone(ref); // set the adress of a clone
00056 }
00057 
00058 /* destructor.                                                              */
00059 DataFrame::~DataFrame()
00060 {
00061   // free the rows as the destructor of _BaseList will not free the mem
00062   freeRows();
00063 }
00064 
00065 /* clear the object.                                                  */
00066 void DataFrame::clear()
00067 {
00068   freeMem();
00069   // set default range for list
00070   _BaseList::setRange();
00071   // set default range for container2D
00072   IContainer2D::setRange();
00073 }
00074 
00075 /* Operator = : overwrite the DataFrame with T. */
00076 DataFrame& DataFrame::operator=(DataFrame const& T)
00077 {
00078   // Resize if necessary.
00079   if (sizeHo() != T.sizeHo()) _BaseList::resize(T.rangeHo());
00080 
00081   // Copy without overlapping.
00082   if ((T.first()>=first()))
00083   {
00084     for (Integer jt=T.first(), j=first(); jt<=T.last(); j++, jt++)
00085     {
00086       // clear old mem if any
00087       if (at(j)) delete at(j);
00088       // if there is a variable, create a copy
00089       if (T[jt]) at(j) = T[jt]->clone();
00090       else       at(j) = (IVariable*)NULL;
00091     }
00092   }
00093   else
00094   {
00095     for (Integer jt=T.last(), j=last(); jt>=T.first(); j--, jt--)
00096     {
00097       // clear old mem if any
00098       if (at(j)) delete at(j);
00099       // if there is a variable, create a copy
00100       if (T[jt]) at(j) = T[jt]->clone();
00101       else       at(j) = (IVariable*)NULL;
00102     }
00103   }
00104   return *this;
00105 }
00106 
00107 /* New beginning index for the object. */
00108 void DataFrame::shift(Integer const& cbeg)
00109 {
00110   // list1D shift
00111   _BaseList::shift(cbeg);
00112   // IContainer2D shift for Col
00113   setFirstHo(cbeg);
00114 }
00115 
00116 /* New beginning index for the object. */
00117 void DataFrame::shift(Integer const& rbeg, Integer const& cbeg)
00118 {
00119   // if there is something to do
00120   if ((rbeg - firstRow() != 0)||(cbeg - firstCol() != 0))
00121   {
00122     // list1D shift
00123     shift(cbeg);
00124     // For each column update Variable
00125     for (Integer j=first(); j<=last(); j++)
00126       if (at(j)) { at(j)->shift(rbeg);}
00127     // update range of the rows
00128     setFirstVe(rbeg);
00129   }
00130 }
00131 
00132 
00133 /* Del last column of the container.                                     */
00134 void DataFrame::popBackCols(Integer const& n)
00135 {
00136   // if n<=0 nothing to do
00137   if (n<=0) return;
00138   // if there is columns to erase
00139   if (sizeHo()<n)
00140   { throw out_of_range("DataFrame::popBackCols(n) "
00141                        "sizeHo() < n");
00142   }
00143   // for all columns, delete variables
00144   for (Integer j=lastCol() - n +1; j<=lastCol(); j++)
00145     if (at(j)) delete at(j);
00146   // popBack() of List1D
00147   _BaseList::popBack(n);
00148   // update IContainer2D
00149   decLastHo(n);
00150   // if it was the last elt, free mem
00151   if (this->sizeHo() == 0) freeMem();
00152 }
00153 
00154 /* Delete n columns at the nuber pos of the container. */
00155 void DataFrame::eraseCols( Integer const& pos, Integer const& n)
00156 {
00157   // if n<=0 nothing to do
00158   if (n<=0) return;
00159   // check conditions
00160   if (pos<first())
00161   { throw out_of_range("DataFrame::eraseCols(pos, n) "
00162                        "pos < first()");
00163   }
00164   if (pos>last())
00165   { throw out_of_range("DataFrame::eraseCols(pos, n) "
00166                        "pos > last()");
00167   }
00168   if (last() < pos+n-1)
00169   { throw out_of_range("DataFrame::eraseCols(pos, n) "
00170                        "last() < pos+n-1");
00171   }
00172   // for all columns, delete variables
00173   for (Integer j=pos+n-1; j>=pos; j--)
00174     if (at(j)) { delete at(j);}
00175   // delete elements of the List1D
00176   erase(pos, n);
00177   // update rangeHo_
00178   decLastHo(n);
00179   // if it was the last col, free mem
00180   if (this->sizeHo() == 0) freeMem();
00181 }
00182 
00183 /* Insert variable V at the position i to the container.              */
00184 void DataFrame::insertVariable(Integer const& pos, IVariable* const & V)
00185 {
00186   // List1D
00187   List1D<IVariable*>::insertElt(pos);
00188   at(pos) = V;
00189   // the variable have to be in the same range
00190   at(pos)->shift(firstRow());
00191   // update horizontal range (the number of column)
00192   incLastHo(1);
00193 
00194   // update rows with NA values
00195   Integer inc = sizeVe() - V->size();
00196   if (inc == 0) return; // same size
00197   if (inc > 0) // V has less rows
00198   { // put NA values to the inserted column
00199     at(pos)->pushBackNAValues(inc);
00200   }
00201   else
00202   { // put NA values to the other columns
00203     for (Integer i=this->firstCol(); i <pos; i++)
00204       if (at(i)) { at(i)->pushBackNAValues(-inc);}
00205     for (Integer i=pos+1; i <=this->lastCol(); i++)
00206       if (at(i)) { at(i)->pushBackNAValues(-inc);}
00207     // update LastVe
00208     incLastVe(-inc);
00209   }
00210 }
00211 
00212 /* Merge this with a dataframe (horizontally). */
00213 void DataFrame::pushBackVariable( IVariable* const &V)
00214 {
00215   // List1D
00216   push_back(V);
00217   // update horizontal range (the number of column)
00218   incLastHo(1);
00219   // adjust the first index of the inserted variable
00220   at(lastCol())->shift(firstRow());
00221   // update rows with NA values
00222   Integer inc = sizeVe() - V->size();
00223   if (inc == 0) return; // same size
00224   if (inc > 0) //V has less rows
00225   { // put NA values to the inserted columns
00226     at(lastCol())->pushBackNAValues(inc);
00227   }
00228   else
00229   { // put NA values to the oter columns
00230     for (Integer i=this->first(); i <lastCol(); i++)
00231       if (at(i)) { at(i)->pushBackNAValues(-inc);}
00232     // update LastVe
00233     incLastVe(-inc);
00234   }
00235 }
00236 
00237 /* Insert the DatatFrame D at the column pos to the container.           */
00238 void DataFrame::insertDataFrame( Integer const& pos, const DataFrame& D)
00239 {
00240   // List1D
00241   insertElt(pos, D.sizeHo());
00242   // insert all columns of D
00243   for (Integer i = D.firstCol(), icol = pos; i <=D.lastCol(); i++, icol++)
00244   {
00245     if (D.at(i))
00246     {
00247       at(icol) = D.at(i)->clone();
00248       at(icol)->shift(firstRow());
00249     }
00250   }
00251   // update LastHo
00252   incLastHo(D.sizeHo());
00253   // update rows with NA values
00254   Integer inc = sizeVe() - D.sizeVe();
00255   if (inc == 0) return; // same size
00256   if (inc > 0) // D has less rows
00257   { // put NA values to the inserted columns
00258     for (Integer i= pos+D.sizeHo()-1; i >=pos; i--)
00259       if (at(i)) { at(i)->pushBackNAValues(inc);}
00260   }
00261   else
00262   { // put NA values to the oter columns
00263     for (Integer i=this->firstCol(); i <pos; i++)
00264       if (at(i)) { at(i)->pushBackNAValues(-inc);}
00265     for (Integer i=pos+D.sizeHo(); i <=this->last(); i++)
00266       if (at(i)) { at(i)->pushBackNAValues(-inc);}
00267     // update LastVe
00268     incLastVe(-inc);
00269   }
00270 }
00271 
00272 /* Merge this with a dataframe (horizontally).
00273 */
00274 void DataFrame::pushBackDataFrame( DataFrame const &D)
00275 {
00276   // compute pos
00277   Integer pos(last()+1);
00278   // List1D
00279   pushBack(D.sizeHo());
00280   // insert all columns of D
00281   for (Integer i = D.first(), icol = pos; i <=D.last(); i++, icol++)
00282   {
00283     if (D.at(i))
00284     {
00285       at(icol) = D.at(i)->clone();
00286       at(icol)->shift(firstRow());
00287     }
00288   }
00289   // update LastHo
00290   incLastHo(D.sizeHo());
00291   // update rows with NA values
00292   Integer inc = sizeVe() - D.sizeVe();
00293   if (inc == 0) return; // same size
00294   if (inc > 0) // D has less rows
00295   { // put NA values to the inserted columns
00296     for (Integer i= last(); i >=pos; i--)
00297       if (at(i)) { at(i)->pushBackNAValues(inc);}
00298   }
00299   else
00300   { // put NA values to the oter columns
00301     for (Integer i=this->first(); i <pos; i++)
00302       if (at(i)) { at(i)->pushBackNAValues(-inc);}
00303     // update LastVe
00304     incLastVe(-inc);
00305   }
00306 }
00307 
00308 /* Add columns to the container.                                         */
00309 void DataFrame::pushBackCols(Integer const& n)
00310 {
00311   // if n<=0 nothing to do
00312   if (n <= 0) return;
00313   // add n columns to list1D
00314   insert(Range(last()+1, last()+n), (IVariable*)NULL);
00315   // update IContainer2D
00316   incLastHo(n);
00317 }
00318 
00319 /* Insert columns at the specified position to the container.            */
00320 void DataFrame::insertCols( Integer const& pos, Integer const& n)
00321 {
00322   if (n <= 0) return;        // if n<=0 nothing to do
00323 #ifdef STK_BOUNDS_CHECK
00324   // check conditions
00325   if (pos<first())
00326   { throw out_of_range("DataFrame::insertCols(pos, n) "
00327                             "pos<first()");
00328   }
00329   if (pos>last())
00330   { throw out_of_range("Dataframe::insertCols(pos, n) "
00331                             "pos>last()");
00332   }
00333 #endif
00334   // insert n elements in list1D
00335   insert(Range(pos, pos+n-1), (IVariable*)NULL);
00336   // update IContainer2D
00337   incLastHo(n);
00338 }
00339 
00340 /* Add n rows to the container.                                       */
00341 void DataFrame::pushBackRows(Integer const& n)
00342 {
00343   // if n<=0 nothing to do
00344   if (n<=0) return;
00345   // for each column append row
00346   for (Integer j=first(); j<=last(); j++)
00347   {
00348     if (at(j))
00349     { at(j)->pushBack(n);}
00350   }
00351   // update range of the container
00352   incRangeVe(n);
00353 }
00354 
00355 /* Insert n rows at the ith position of the container.                */
00356 void DataFrame::insertRows( Integer const& pos, Integer const& n)
00357 {
00358   // if n<=0 nothing to do
00359   if (n<=0) return;
00360 #ifdef STK_BOUNDS_CHECK
00361   if (firstRow() > pos)
00362   { throw out_of_range("DataFrame::insertRows(pos, n) "
00363                             "firstRow() > pos");
00364   }
00365   if (lastRow()+1 < pos)
00366   { throw out_of_range("DataFrame::insertRows(pos, n) "
00367                             "lastRow()+1 < pos");
00368   }
00369 #endif
00370   // insert rows to each variables
00371   for (Integer j=first(); j<=last(); j++)
00372   {
00373     // if there is a variable
00374     if (at(j))
00375     { at(j)->insertElt(pos, n);}
00376   }
00377   // update rangeVe_
00378   incLastVe(n);
00379 }
00380 
00381 /* Dell last row of the container.                                    */
00382 void DataFrame::popBackRows(Integer const& n)
00383 {
00384   if (sizeVe() < n)
00385   { throw out_of_range("DataFrame::popBackRows(n) "
00386                             "sizeVe() < n");
00387   }
00388   // del last row to each variable
00389   for (Integer j=first(); j<=last(); j++)
00390     if (at(j)) { at(j)->popBack(n);}
00391   // update rangeVe_
00392   decLastVe(n);
00393 }
00394 
00395 /* Dell n rows at the ith position to the container.                  */
00396 void DataFrame::eraseRows( Integer const& pos, Integer const& n)
00397 {
00398   // if n<=0 nothing to do
00399   if (n<=0) return;
00400 #ifdef STK_BOUNDS_CHECK
00401   if (firstRow() > pos)
00402   { throw out_of_range("DataFrame::eraseRows(pos, n) "
00403                             "firstRow() > pos");
00404   }
00405   if (lastRow() < pos)
00406   { throw out_of_range("DataFrame::eraseRows(pos, n) "
00407                             "lastRow() < pos");
00408   }
00409   if (lastRow() < pos+n-1)
00410   { throw out_of_range("DataFrame::eraseRows(pos, n) "
00411                             "lastRow() < pos+n-1");
00412   }
00413 #endif
00414   // for each variable erase elts
00415   for (Integer j=first(); j<=last(); j++)
00416     if (at(j)) { at(j)->erase(pos, n);}
00417   // update rangeVe_
00418   decLastVe(n);
00419 }
00420 
00421 /* Protected function for memory deallocation.                       */
00422 void DataFrame::freeMem()
00423 {
00424   // liberate variables
00425   freeRows();
00426   // call base freeMem
00427   _BaseList::freeMem();
00428   // set range to default
00429   setRangeVe();
00430   setRangeHo();
00431 }
00432 
00433 /* Protected function for rows memory deallocation.                  */
00434 void DataFrame::freeRows()
00435 {
00436   // for all columns
00437   for (Integer j=first(); j<=last(); j++)
00438     if (at(j))          // if there is mem allocated
00439     {
00440       delete at(j);     // erase
00441       at(j) = 0;        // set default
00442     }
00443   // set default range
00444   setRangeVe();
00445 }
00446 
00447 // write a selection
00448 void DataFrame::writeDataFrame( ostream& os, Integer const& left
00449                               , Integer const& right
00450                               ) const
00451 {
00452   // Export  to csv the DataFrame
00453   ExportToCsv csv(*this);
00454   // get the csv
00455   ReadWriteCsv* pData = csv.p_readWriteCsv();
00456   // set delimiters to blank
00457   pData->setDelimiters(STRING_BLANK);
00458   // write the csv
00459   pData->writeSelection(os, firstRow(), lastRow(), left, right);
00460 }
00461 
00462 /* Print a DataFrame.                                                 */
00463 ostream& operator<< (ostream& s, const DataFrame& V)
00464 {
00465   s << std::right;
00466   V.writeDataFrame(s, V.firstCol(), V.lastCol());
00467 
00468   return s;
00469 }
00470 
00471 } // Namespace STK