|
STK++ 1.0
|
00001 /*--------------------------------------------------------------------*/ 00002 /* Copyright (C) 2004-2007 Serge Iovleff 00003 00004 This program is free software; you can redistribute it and/or modify 00005 it under the terms of the GNU Lesser General Public License as 00006 published by the Free Software Foundation; either version 2 of the 00007 License, or (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU Lesser General Public License for more details. 00013 00014 You should have received a copy of the GNU Lesser General Public 00015 License along with this program; if not, write to the 00016 Free Software Foundation, Inc., 00017 59 Temple Place, 00018 Suite 330, 00019 Boston, MA 02111-1307 00020 USA 00021 00022 Contact : Serge.Iovleff@stkpp.org 00023 */ 00024 00025 /* 00026 * Project: stkpp::DManager 00027 * Purpose: Implementation of the class ReadWriteCsv.h. 00028 * Author: Serge Iovleff, serge.iovleff@stkpp.org 00029 * 00030 **/ 00031 00036 // required STL headers 00037 #include <iostream> 00038 #include <sstream> 00039 #include <fstream> 00040 #include <iomanip> 00041 00042 #include "../include/STK_ReadWriteCsv.h" 00043 #include "../include/STK_DManager_Util.h" 00044 00045 namespace STK 00046 { 00047 00051 static const std::string ERRORCODES[] = 00052 { 00053 "In ReadWriteCsv UNKNOWN ERROR", 00054 "In ReadWriteCsv::variable() An unknown error occurred!", 00055 "In ReadWriteCsv Variable name not found!", 00056 "In ReadWriteCsv Filename name not found!", 00057 "In ReadWriteCsv File not found!", 00058 "In ReadWriteCsv The Number of Headers is different" 00059 " from the Number of Data Columns!" 00060 }; 00061 00062 // using declarations 00063 using namespace Csv; 00064 00069 static Integer maxLength(const Variable<String>& V) 00070 { 00071 if (V.empty()) return 0; 00072 00073 // initialize 00074 Integer maxlength = -Arithmetic<Integer>::max(); 00075 // loop over the values 00076 for (Integer i=V.first(); i<=V.last(); i++) 00077 { 00078 if (!Arithmetic<String>::isNA(V[i])) // update 00079 maxlength = max(maxlength, (Integer(V[i].size()))); 00080 } 00081 return maxlength; 00082 } 00083 00084 00093 static Integer CountCols( String const& line 00094 , String const& delimiters 00095 , Variable<String>& typeDelimiter 00096 ) 00097 { 00098 // number of fields in the file 00099 Integer numField = 0; 00100 // no delimiters 00101 typeDelimiter.resize(); 00102 // Find first delimiter 00103 String::size_type idx = line.find_first_of(delimiters); 00104 // if the position of the delimiter char is found (there is a position) 00105 while (idx != line.npos) 00106 { 00107 // add a column 00108 ++numField; 00109 // save delimiter 00110 typeDelimiter.push_back(line.substr(idx,1)); 00111 // find next delimiter 00112 idx = line.find_first_of(delimiters, idx+1); 00113 } 00114 // last delimiter is the end of line 00115 typeDelimiter.push_back(_T("\n")); 00116 // return the number of fields 00117 return ++numField; 00118 } 00119 00120 // The default constructor. Instantiates an instance of CDataFile and 00121 // initialize data members to default values. 00122 // Instantiates an instance of CDataFile with the 00123 // specified read flags. 00124 ReadWriteCsv::ReadWriteCsv( bool read_names) 00125 : file_name_("") 00126 , with_names_(read_names) 00127 , delimiter_(DEFAULT_DELIMITER) 00128 , reserve_(DEFAULT_RESERVE) 00129 , msg_error_("") 00130 { } 00131 00132 // Misc. constructor. Instantiates an instance of ReadWriteCsv 00133 // with specified read flags. 00134 ReadWriteCsv::ReadWriteCsv( std::string const& file_name 00135 , bool read_names 00136 , String const& delimiter 00137 , Integer const& reserve 00138 ) 00139 : file_name_(file_name) 00140 , with_names_(read_names) 00141 , delimiter_(delimiter) 00142 , reserve_(reserve) 00143 , msg_error_("") 00144 { } 00145 00146 // Copy constructor. Instantiates an instance of ReadWriteCsv with the 00147 // contents of another ReadWriteCsv. 00148 ReadWriteCsv::ReadWriteCsv(ReadWriteCsv const& df) 00149 { *this = df;} 00150 00151 // destructor 00152 ReadWriteCsv::~ReadWriteCsv() 00153 { clear();} 00154 00155 // sets one ReadWriteCsv equal to another ReadWriteCsv. 00156 ReadWriteCsv& ReadWriteCsv::operator=(ReadWriteCsv const& df) 00157 { 00158 delimiter_ = df.delimiter_; 00159 reserve_ = df.reserve_; 00160 with_names_ = df.with_names_; 00161 file_name_ = df.file_name_; 00162 msg_error_ = df.msg_error_; 00163 source_file_names_ = df.source_file_names_; 00164 str_data_ = df.str_data_; 00165 00166 return *this; 00167 } 00168 00169 // adds one ReadWriteCsv to another ReadWriteCsv 00170 ReadWriteCsv& ReadWriteCsv::operator+=(ReadWriteCsv const& df) 00171 { 00172 for ( Integer i=df.source_file_names_.first() 00173 ; i<=df.source_file_names_.last() 00174 ; i++) 00175 source_file_names_.push_back(df.source_file_names_[i]); 00176 00177 for ( Integer i=df.str_data_.first() 00178 ; i<=df.str_data_.last() 00179 ; i++) 00180 str_data_.push_back(df.str_data_[i]); 00181 00182 return *this; 00183 } 00184 00185 // adds ReadWriteCsv(s) together 00186 ReadWriteCsv ReadWriteCsv::operator+(ReadWriteCsv const& df) const 00187 { // copy this, add df and return the result 00188 return ReadWriteCsv((*this)) += df; 00189 } 00190 00191 // try to set a value at the (icol, irow) place 00192 bool ReadWriteCsv::setData( Integer const& icol 00193 , Integer const& irow 00194 , String const& value) 00195 { 00196 try 00197 { 00198 str_data_.at(icol).at(irow) = value; 00199 return true; 00200 } 00201 catch(const Exception& e) { msg_error_ = e.error(); } 00202 catch(...) { msg_error_ = ERRORCODES[0]; } 00203 return false; 00204 } 00205 00206 bool ReadWriteCsv::appendData(Integer const& icol, String const& value) 00207 { 00208 try 00209 { 00210 if (Arithmetic<String>::isNA(value)) 00211 str_data_[icol].push_back(Arithmetic<String>::NA()); 00212 else 00213 str_data_[icol].push_back(value); 00214 return true; 00215 } 00216 catch(const Exception& e) { msg_error_ = e.error(); } 00217 catch(...) { msg_error_ = ERRORCODES[0]; } 00218 return false; 00219 } 00220 00221 // Deletes the variable whose index is icol from a ReadWriteCsv. 00222 // Returns true if successful, false if an error is encountered. 00223 bool ReadWriteCsv::eraseColumn(Integer const& icol) 00224 { 00225 try 00226 { 00227 // delete the variable from source_file_names_ 00228 source_file_names_.erase(icol), 00229 00230 // delete the variable from str_data_ 00231 str_data_.erase(icol); 00232 00233 return true; 00234 } 00235 catch(const Exception& e) { msg_error_ = e.error(); } 00236 catch(...) { msg_error_ = ERRORCODES[0]; } 00237 return false; 00238 } 00239 00240 Integer ReadWriteCsv::name( Integer const& icol, String& rStr) const 00241 { 00242 try 00243 { 00244 rStr = str_data_.at(icol).name(); 00245 return static_cast<Integer> (rStr.length()); 00246 } 00247 catch(const Exception& e) { msg_error_ = e.error(); } 00248 catch(...) { msg_error_ = ERRORCODES[1]; } 00249 return -1; 00250 } 00251 00252 /* Set the variable name @c name at the specified index. 00253 * @param icol index of the variable 00254 * @param name name of the variable to set 00255 * @return @c true if successful, @c false if an error is encountered. 00256 **/ 00257 bool ReadWriteCsv::setName( Integer const& icol, String const& name) 00258 { 00259 try 00260 { 00261 str_data_.at(icol).setName(name); 00262 return true; 00263 } 00264 catch(const Exception& e) { msg_error_ = e.error(); } 00265 catch(...) { msg_error_ = ERRORCODES[1]; } 00266 return false; 00267 } 00268 00269 Integer ReadWriteCsv::largestNumberOfRows() const 00270 { 00271 Integer retVal = 0; 00272 00273 for (Integer i=str_data_.first(); i<=str_data_.last(); i++) 00274 retVal = max(retVal, str_data_[i].size()); 00275 00276 return retVal; 00277 } 00278 00279 /* Returns the largest number of end index of samples. 00280 **/ 00281 Integer ReadWriteCsv::lastVe() const 00282 { 00283 Integer retVal = Arithmetic<Integer> ::min(); 00284 00285 const Integer first = str_data_.first(), last = str_data_.last(); 00286 for (Integer i=first; i<=last; i++) 00287 { 00288 //stk_cout << str_data_.at(i); 00289 retVal = max(retVal, lastVe(i)); 00290 } 00291 // return the maximal number of row 00292 return retVal; 00293 } 00294 00295 /* Returns the lower number of end index of samples. 00296 **/ 00297 Integer ReadWriteCsv::firstVe() const 00298 { 00299 Integer retVal = Arithmetic<Integer> ::max(); 00300 00301 const Integer first = str_data_.first(), last = str_data_.last(); 00302 for (Integer i=first; i<=last; i++) 00303 retVal = min(retVal, firstVe(i)); 00304 00305 return retVal; 00306 } 00307 00308 // Clears all data in the ReadWriteCsv 00309 void ReadWriteCsv::clear() 00310 { 00311 msg_error_ = ""; 00312 source_file_names_.clear(); 00313 str_data_.clear(); 00314 } 00315 00316 // Returns the length of the String if successful. 00317 // Returns -1 if an error is encountered. 00318 Integer ReadWriteCsv::data( Integer const& icol 00319 , Integer const& irow 00320 , String &lpStr) const 00321 { 00322 Integer retVal = 0; 00323 00324 try 00325 { 00326 lpStr = str_data_.at(icol).at(irow); 00327 retVal = static_cast<Integer> (lpStr.length()); 00328 } 00329 catch(const Exception& e) 00330 { 00331 msg_error_ = e.error(); 00332 retVal = -1; 00333 } 00334 catch(...) // other Exceptions 00335 { 00336 msg_error_ = ERRORCODES[1]; 00337 retVal = -1; 00338 } 00339 return retVal; 00340 } 00341 00342 // Returns the length of the String if successful. 00343 // Returns -1 if an error is encountered. 00344 Integer ReadWriteCsv::data( String const& variable_name 00345 , Integer const& irow 00346 , String& rStr) const 00347 { 00348 Integer retVal = 0; 00349 Integer iVar = lookupVariableIndex(variable_name); // find col 00350 00351 if(iVar != -1) 00352 retVal = data(iVar, irow, rStr); 00353 else 00354 { 00355 msg_error_ = ERRORCODES[5]; 00356 retVal = -1; 00357 } 00358 return retVal; 00359 } 00360 00361 00362 // Returns the new size of rVector if successful. 00363 // Returns -1 if an error is encountered. 00364 Integer ReadWriteCsv::data( Integer const& icol, Variable<String>& rVector) const 00365 { 00366 Integer retVal = 0; 00367 00368 try 00369 { 00370 rVector = str_data_.at(icol); 00371 retVal = rVector.size(); 00372 } 00373 catch(const Exception& e) 00374 { 00375 msg_error_ = e.error(); 00376 retVal = -1; 00377 } 00378 catch(...) 00379 { 00380 msg_error_ = ERRORCODES[2]; 00381 retVal = -1; 00382 } 00383 return retVal; 00384 } 00385 00386 // Returns the new size of rVector if successful. 00387 // Returns -1 if an error is encountered. 00388 Integer ReadWriteCsv::data( String const& variable_name 00389 , Variable<String>& rVector) const 00390 { 00391 Integer index = lookupVariableIndex(variable_name); 00392 00393 if(index != -1) 00394 return data(index, rVector); 00395 else 00396 msg_error_ = ERRORCODES[5]; 00397 return -1; 00398 } 00399 00400 // Returns the index of the first instance of the specified variable 00401 // found AFTER iStartingIndex. 00402 // Returns -1 if the variable is not found. 00403 Integer ReadWriteCsv::colIndex( String const& variable_name 00404 , Integer const& iStartingIndex 00405 ) const 00406 { 00407 return lookupVariableIndex(variable_name, iStartingIndex); 00408 } 00409 00410 // Returns the index of the specified variable. 00411 // Returns -1 if the variable is not found. 00412 Integer ReadWriteCsv::colIndex( String const& variable_name 00413 , std::string const& sourceFilename 00414 , Integer const& iStartingIndex 00415 ) const 00416 { 00417 Integer it = source_file_names_.first(); 00418 while (it <= source_file_names_.last()) 00419 { if (source_file_names_[it] == sourceFilename) break; 00420 it++; 00421 } 00422 00423 if(it == source_file_names_.last()+1) // sourceFilename was not found 00424 { 00425 msg_error_ = ERRORCODES[6]; 00426 return -1; 00427 } 00428 00429 Integer offset = source_file_names_.first(); 00430 while(it != source_file_names_.first()) 00431 { 00432 offset++; 00433 it--; 00434 } 00435 00436 return lookupVariableIndex(variable_name, iStartingIndex+offset); 00437 } 00438 00439 // Returns the index of the first variable name that matches szName. 00440 // Returns -1 if szName is not found. 00441 Integer ReadWriteCsv::lookupVariableIndex( String const& variable_name 00442 , Integer const& offset /*=0*/) const 00443 { 00444 Integer it = str_data_.first() + offset; 00445 while (it <= str_data_.last()) 00446 { if (str_data_[it].name() == variable_name) break; 00447 it++; 00448 } 00449 00450 if(it == str_data_.last()+1) // variable Name was not found 00451 return -1; 00452 00453 Integer retVal = str_data_.first(); 00454 while(it != str_data_.first()) 00455 { 00456 retVal++; 00457 it--; 00458 } 00459 return retVal; 00460 } 00461 00462 // Attempts to add a variable with the name specified by data 00463 // and the values contained in data. Returns true if successful, 00464 // false if an error is encountered. 00465 bool ReadWriteCsv::push_back( const Variable<String>& data) 00466 { 00467 try 00468 { 00469 source_file_names_.push_back(file_name_); 00470 str_data_.push_back(data); 00471 str_data_.back().reserve(reserve_); 00472 return true; 00473 } 00474 catch(const Exception& e) { msg_error_ = e.error(); } 00475 catch(...) { msg_error_ = ERRORCODES[0]; } 00476 return false; 00477 } 00478 00479 // Attempts to add a variable with the values contained in data. 00480 // Returns true if successful, 00481 // false if an error is encountered. 00482 bool ReadWriteCsv::push_front( const Variable<String>& data) 00483 { 00484 try 00485 { 00486 source_file_names_.push_front(file_name_); 00487 str_data_.push_front(data); 00488 return true; 00489 } 00490 catch(const Exception& e) { msg_error_ = e.error(); } 00491 catch(...) { msg_error_ = ERRORCODES[0]; } 00492 return false; 00493 } 00494 00495 // Reads the default file. 00496 // Returns true if successful, false if an error occurred. 00497 bool ReadWriteCsv::read() 00498 { 00499 return read(file_name_); 00500 } 00501 // Reads the specified file. 00502 // Returns true if successful, false if an error occurred. 00503 bool ReadWriteCsv::read(std::string const& file_name) 00504 { 00505 try 00506 { 00507 // update file_name 00508 file_name_ = file_name; 00509 // input file stream 00510 ifstream inFile; 00511 // open file 00512 inFile.open(file_name.c_str()); 00513 // check error 00514 if (inFile.rdstate() & std::ios::failbit) 00515 { 00516 inFile.close(); 00517 msg_error_ = ERRORCODES[4]; 00518 msg_error_ += "\nFile: " + file_name; 00519 return false; 00520 } 00521 // read file 00522 inFile >> *this; 00523 // close file 00524 inFile.close(); 00525 return true; 00526 } 00527 catch(const Exception& e) 00528 { 00529 msg_error_ = e.error(); 00530 msg_error_ += "\nIn ReadWriteCsv::read(" + file_name + ")"; 00531 } 00532 catch(...) 00533 { 00534 msg_error_ = ERRORCODES[0]; 00535 msg_error_ += "\nIn ReadWriteCsv::read(" + file_name + ")"; 00536 } 00537 00538 return false; 00539 } 00540 00541 // write the ReadWriteCsv in a file 00542 bool ReadWriteCsv::write( const std::string &file_name) const 00543 { 00544 file_name_ = file_name; 00545 try 00546 { 00547 ofstream os(file_name.c_str()); 00548 writeSelection( os 00549 , firstVe() 00550 , lastVe() 00551 , str_data_.first() 00552 , str_data_.last() 00553 ); 00554 os.close(); 00555 return true; 00556 } 00557 catch(const Exception& e) { msg_error_ = e.error(); } 00558 catch(...) { msg_error_ = ERRORCODES[0]; } 00559 return false; 00560 } 00561 00562 // write a selection 00563 void ReadWriteCsv::write( ostream& os) const 00564 { 00565 writeSelection(os, firstVe(), lastVe(), first(), last()); 00566 } 00567 00568 // write a selection 00569 void ReadWriteCsv::writeSelection( ostream& os 00570 , Integer const& top 00571 , Integer const& bottom 00572 , Integer const& left 00573 , Integer const& right) const 00574 { 00575 // create a vector for the format of the output 00576 Array1D<Integer> format(Range(left, right), 0); 00577 // for each var, find the largest size 00578 for(Integer iVar=left; iVar<=right; iVar++) 00579 { 00580 format.at(iVar) = maxLength(str_data_.at(iVar)); 00581 if (with_names_) 00582 format.at(iVar) = max( format.at(iVar) 00583 , (Integer )str_data_.at(iVar).name().size()); 00584 } 00585 // write if needed names variables 00586 if (with_names_) 00587 for(Integer iVar=left; iVar<=right; iVar++) 00588 { 00589 os << std::setw(format[iVar]) << std::right 00590 << ConstProxy<String>(str_data_.at(iVar).name()) 00591 << ((iVar==right) ? _T('\n') : delimiter_.at(0)); 00592 } 00593 00594 // write data 00595 for(Integer irow = top; irow<=bottom; irow++) 00596 for(Integer iVar = left; iVar<=right; iVar++) 00597 { 00598 try 00599 { 00600 os << std::setw(format[iVar]) << std::right 00601 << ConstProxy<String>(str_data_.at(iVar).at(irow)); 00602 } 00603 catch(...) 00604 { 00605 // if an error occur, we put NA value 00606 os << std::setw(format[iVar]) << std::right << STRING_NA; 00607 } 00608 os << ((iVar==right) ? _T('\n') : delimiter_.at(0)); 00609 } 00610 } 00611 00612 // reads the data from the stream and returns the stream when done. 00613 istream& operator>>(istream& is, ReadWriteCsv& df) 00614 { 00615 try 00616 { 00617 // clear previous ReadCvs if we don't want to append 00618 if (!(Csv::RF_APPEND_DATA)) df.clear(); 00619 // compute number of existing variables 00620 Integer colOffset = df.size(); 00621 // initialize the initial number of variables to 0 00622 Integer nbVars = 0; 00623 00624 // aux variable for handling delimiters 00625 Variable<String> typeDelimiter; 00626 // set filname 00627 df.source_file_names_.push_back(df.file_name_); 00628 // load file in memory 00629 stringstream inBuffer; 00630 inBuffer << is.rdbuf(); 00631 00632 // If the names are at the top line 00633 if (df.with_names_) 00634 { 00635 // get current line 00636 String lineBuffer; 00637 // Count the number of names of the first line 00638 Integer numField; 00639 do 00640 { 00641 // get current line in strBuffer 00642 std::getline(inBuffer, lineBuffer); 00643 DManager::removeCharBeforeAndAfter(lineBuffer, CHAR_BLANK); 00644 (lineBuffer.size() == 0) ? 00645 numField = 0 00646 : numField = CountCols( lineBuffer, df.delimiter_, typeDelimiter); 00647 } 00648 while ((numField == 0)&&(!inBuffer.eof())); 00649 // break if we get the end of file 00650 if (inBuffer.eof()) return is; 00651 00652 // Declare an input string stream 00653 istringstream instream; 00654 // Reset from possible previous errors. 00655 instream.clear(); 00656 // Use strBuffer as source of input. 00657 instream.str(lineBuffer); 00658 // Loop over the columns 00659 for(Integer icol=1; icol<=numField; icol++) 00660 { 00661 // Append a Col 00662 df.push_back(Variable<String>()); 00663 df.str_data_.at(icol+colOffset).setName( DManager::getField( instream 00664 , typeDelimiter.elt(icol).at(0)) 00665 ); 00666 } 00667 // Update the number of var 00668 nbVars = STK::max(nbVars, numField); 00669 } 00670 00671 // Read data : loop for all rows 00672 for (Integer irow=1; !inBuffer.eof(); irow++) 00673 { 00674 // current line 00675 String lineBuffer; 00676 // number of fields in the line 00677 Integer numField; 00678 // loop until we encounter a line 00679 do 00680 { 00681 // get current line in strBuffer 00682 std::getline(inBuffer, lineBuffer); 00683 DManager::removeCharBeforeAndAfter(lineBuffer, CHAR_BLANK); 00684 } 00685 while ((lineBuffer.size() == 0)&&(!inBuffer.eof())); 00686 // pass empty line 00687 if (lineBuffer.size() == 0) continue; 00688 numField = CountCols( lineBuffer, df.delimiter_, typeDelimiter); 00689 // Declare an input string stream 00690 istringstream instream; 00691 // Reset from possible previous errors. 00692 instream.clear(); 00693 // Use lineBuffer as source of input. 00694 instream.str(lineBuffer); 00695 String fieldValue; 00696 // first loop on the exisiting cols with data 00697 Integer attCols = min(numField, nbVars); 00698 for (Integer icol=1; icol<=attCols; icol++) 00699 { 00700 // Read field 00701 fieldValue = DManager::getField( instream, typeDelimiter.elt(icol).at(0)); 00702 // append Data to the columnn 00703 df.appendData(icol+colOffset, fieldValue); 00704 } 00705 00706 // second loop on the existing cols without data 00707 // will be passed if (numField < nbVars) 00708 for (Integer icol=attCols+1; icol<=nbVars; icol++) 00709 df.str_data_.at(icol+colOffset).push_back(Arithmetic<String>::NA()); 00710 00711 // loop on the non-exisiting cols 00712 // will be passed if (numField > nbVars) 00713 for (Integer icol=nbVars+1; icol<=numField; icol++) 00714 { 00715 // we create each column with NA values 00716 df.push_back(Variable<String>( irow-1 00717 , Arithmetic<String>::NA() 00718 , IVariable::giveName(icol+colOffset) 00719 ) 00720 ); 00721 // Read field 00722 fieldValue = DManager::getField( instream, typeDelimiter.elt(icol).at(0)); 00723 df.appendData(icol+colOffset, fieldValue); 00724 } 00725 // Update the number of variables 00726 nbVars = STK::max(nbVars, numField); 00727 // break if we get the end of the file 00728 if (inBuffer.eof()) break; 00729 } // irow loop 00730 } 00731 catch(const Exception& e) 00732 { 00733 df.msg_error_ = e.error(); 00734 throw e; 00735 } 00736 catch(...) 00737 { 00738 df.msg_error_ = ERRORCODES[0]; 00739 throw Exception(); 00740 } 00741 00742 return is; 00743 } 00744 00745 ostream& operator<<(ostream& os, ReadWriteCsv const& df) 00746 { 00747 try 00748 { 00749 df.writeSelection( os 00750 , df.firstVe() 00751 , df.lastVe() 00752 , df.str_data_.first() 00753 , df.str_data_.last() 00754 ); 00755 } 00756 // catch and re-throw any Exceptions 00757 catch(const Exception& e) { throw e; } 00758 catch(...) { throw Exception(); } 00759 return os; 00760 } 00761 00762 } // Namespace STK