SequenceReader/SequenceReaderFasta.cpp

Go to the documentation of this file.
00001 
00002 // #######################################################################
00003 
00004 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00005 // Version 3.2, released 1st March 2004
00006 // Copyright (c) Genome Research 2002
00007 
00008 // SSAHA is free software; you can redistribute it and/or modify 
00009 // it under the terms of version 2 of the GNU General Public Licence
00010 // as published by the Free Software Foundation.
00011  
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public Licence for more details.
00016  
00017 // You should have received a copy of the GNU General Public Licence
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00020 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00021 
00022 // #######################################################################
00023 
00024 // Module Name  : SequenceReaderFasta
00025 // File Name    : SequenceReaderFasta.cpp
00026 // Language     : C++
00027 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00028 
00029 // Description:
00030 
00031 // Includes:
00032 
00033 #include "SequenceEncoder.h"
00034 #include "SequenceReader.h"
00035 #include "SequenceReaderFasta.h"
00036 #include <strstream>
00037 #include <fstream>
00038 
00039 
00040 // ### Function Definitions ###
00041 
00042 // Name:
00043 // Arguments:
00044 // TYPE  NAME  IN/OUT COMMENT
00045 // Returns: TYPE COMMENT
00046 
00047   SequenceReaderFasta::SequenceReaderFasta
00048   ( const char* fileName, SequenceEncoder* pEncoder,
00049     ostream& monitoringStream )
00050    : SequenceReaderFile ( fileName, '>', '>', pEncoder->clone(),
00051                          monitoringStream )
00052   {
00053     monitoringStream_ << "constructing SequenceReaderFasta"  << endl;
00054   } // ~constructor
00055 
00056   SequenceReaderFasta::SequenceReaderFasta
00057   ( const char* fileName,
00058     ostream& monitoringStream )
00059    : SequenceReaderFile ( fileName, '>', '>', new SequenceEncoderDNA(12),
00060                          monitoringStream )
00061   {
00062     monitoringStream_ << "constructing SequenceReaderFasta" << endl;
00063   } // ~constructor
00064 
00065   SequenceReaderFastaProtein::SequenceReaderFastaProtein
00066   ( const char* fileName, ostream& monitoringStream ) 
00067   : SequenceReaderFasta ( fileName, new SequenceEncoderProtein(5), 
00068                           monitoringStream )
00069   {
00070     monitoringStream_ << "constructing SequenceReaderFastaProtein" << endl;
00071   } // ~constructor
00072 
00073   SequenceReaderFile::SequenceReaderFile
00074   ( const char* fileName,
00075     char seqStartChar,
00076     char seqStopChar,
00077     SequenceEncoder* pEncoder,
00078     ostream& monitoringStream )
00079     : SequenceReader( monitoringStream ),
00080       seqStartChar_( seqStartChar ),
00081       seqStopChar_( seqStopChar ),
00082       //      lastSourceSeqNum_( 0 ),
00083       pInputFileStream_( new ifstream( fileName ) ),
00084       fileName_( fileName ),
00085       pEncoder_( pEncoder )
00086   {
00087 
00088     monitoringStream_ << "constructing SequenceReaderFile" << this << endl;
00089     if ( pInputFileStream_->fail() )
00090     {
00091       throw SSAHAException
00092       (   (string)"SequenceReaderFile - unable to open input file "
00093         + (string)fileName );
00094 
00095     } // ~if
00096     monitoringStream_ << "SequenceReaderFile::connected to file "
00097                       << fileName << endl;
00098 
00099   } // ~constructor
00100 
00101   SequenceReaderFile::SequenceReaderFile
00102   ( istream& inputStream,
00103     char seqStartChar,
00104     char seqStopChar,
00105     SequenceEncoder* pEncoder,
00106     ostream& monitoringStream )
00107     : SequenceReader( monitoringStream ),
00108       seqStartChar_( seqStartChar ),
00109       seqStopChar_( seqStopChar ),
00110       //   lastSourceSeqNum_( 0 ),
00111       pInputFileStream_( &inputStream ),
00112       //      fileName_( fileName ),
00113       pEncoder_( pEncoder )
00114   {
00115 
00116     monitoringStream_ << "constructing SequenceReaderFile" << this << endl;
00117     if ( pInputFileStream_->fail() )
00118     {
00119       throw SSAHAException
00120       ( "SequenceReaderFile - unable to open input stream " );
00121 
00122     } // ~if
00123     monitoringStream_ << "SequenceReaderFile::connected to input file stream."
00124                       << endl;
00125 
00126   } // ~constructor
00127 
00128 
00129 
00130     
00131 
00132   SequenceReaderFile::SequenceReaderFile( const SequenceReaderFile& rhs) 
00133   : SequenceReader( rhs.monitoringStream_ ),
00134     seqStartChar_( rhs.seqStartChar_ ),
00135     seqStopChar_( rhs.seqStopChar_ ),
00136     pInputFileStream_( new ifstream( rhs.fileName_.c_str() ) ),
00137     fileName_( rhs.fileName_.c_str() ),
00138     seqPositions_( rhs.seqPositions_ ),
00139     //   lastSourceSeqNum_(0),
00140     pEncoder_( rhs.pEncoder_->clone() )
00141   {
00142     monitoringStream_ << "copy constructing SequenceReaderFile" << this 
00143                       << endl;
00144     if (pInputFileStream_->fail())
00145     {
00146        throw SSAHAException
00147              (   (string)"SequenceReaderFile - unable to open input file " 
00148                + (string)fileName_ );
00149 
00150     } // ~if
00151 
00152     monitoringStream_ << "SequenceReaderFile:SequenceReader: "
00153                       << "connected to file " << fileName_ << endl;
00154 
00155   } // ~copy constructor
00156 
00157   SequenceReaderFile::~SequenceReaderFile()
00158   {
00159     monitoringStream_ << "destructing SequenceReaderFile" << this << endl;
00160     delete pEncoder_;
00161     ifstream*  pf(dynamic_cast<ifstream*>(pInputFileStream_));
00162     if (pf!=NULL) 
00163     {
00164         pf->close();
00165         delete pInputFileStream_;
00166     }
00167   } // ~destructor
00168 
00169   // Function Name: changeMode
00170   // Arguments: const SequenceReaderMode&
00171   // Makes a copy of mode and uses it to handle mismatch character reads
00172   void SequenceReaderFile::changeMode( SequenceReaderMode* pMode )
00173   {
00174     pEncoder_->changeMode( pMode );
00175   }
00176 
00177   // Function Name: rewind
00178   // Arguments: void
00179   // Returns:   void
00180   // Rewind to the start of the data file, so that getNextSequence will
00181   // return the first sequence in the file. NB Also clears any error flags
00182   // that are set on pInputFileStream_ 
00183   void SequenceReaderFile::rewind( void ) 
00184   {
00185     // rewind to start of file
00186     pInputFileStream_->clear();
00187     pInputFileStream_->seekg( 0 );
00188     lastSequenceNumber_ = 0;
00189   } // ~SequenceReaderFile::rewind( void )
00190 
00191   // Function Name: findSequence
00192   // Arguments: SequenceNumber (in)
00193   // Returns:   void
00194   // Winds the input file stream to the start of sequence number seqNum. 
00195   // Throws an exception if seqNum exceeds the number of sequences in
00196   // the file.
00197   bool SequenceReaderFile::findSequence( SequenceNumber seqNum )
00198   {
00199     
00200     //    cout << "SRF::findSeq start" << pInputFileStream_->tellg() << endl; 
00201     pInputFileStream_->clear();
00202     if ( seqNum ==0 )
00203     { 
00204       monitoringStream_ 
00205         << "SRF::findSeq: Requested sequence number (0) is not valid (must start at 1)." 
00206         << endl;
00207       return false;
00208     } // ~if
00209     else if ( seqNum <= seqPositions_.size() )
00210     {
00211       pInputFileStream_->seekg(seqPositions_[ seqNum - 1 ],ios::beg);
00212     } // ~if
00213     else if ( allSequencesRead_ == false )
00214     { // need to find more sequences in the file
00215 
00216       if ( seqPositions_.size() != 0 )
00217       {
00218         pInputFileStream_->seekg
00219         (seqPositions_.back(), ios::beg);
00220         pInputFileStream_->getline
00221         ( inputBuffer_ , inputBufferSize_, '\n' );
00222       } // ~if
00223    
00224       while(1)
00225       {
00226         //        pInputFileStream_->getline
00227         //      ( inputBuffer_ , inputBufferSize_, '\n' );
00228               //     getline( *pInputFileStream_, inputBuffer_, '\n' );
00229         if (pInputFileStream_->peek() == EOF) 
00230         {
00231           numSequencesInFile_ = seqPositions_.size();
00232           //      cout << "SRF::findSeq: asr = true" << endl;
00233           allSequencesRead_ = true;
00234           monitoringStream_ 
00235           << "SRF::findSeq(EOF): Requested sequence number (" << seqNum 
00236           << ")\nis outside range of sequences in file (1 to " 
00237           << numSequencesInFile_ << ")." << endl;
00238           //          throw NumberOutOfRange();
00239           // cout << "SRF::findSeq end" << pInputFileStream_->tellg() << endl; 
00240           return false;
00241         } // ~if
00242         else if (pInputFileStream_->peek() == seqStartChar_) 
00243         {
00244           seqPositions_.push_back(pInputFileStream_->tellg());
00245           if( seqPositions_.size() == seqNum ) break;
00246         } // ~else if
00247         pInputFileStream_->getline
00248         ( inputBuffer_ , inputBufferSize_, '\n' );
00249       } // ~while
00250       //      while ( seqPositions_.size() != seqNum );
00251 
00252     lastSequenceNumber_ = seqNum - 1;
00253     
00254     } // ~else if
00255     else
00256     { 
00257       monitoringStream_ 
00258         << "SRF::findSeq(asr): Requested sequence number (" << seqNum 
00259         << ")\nis outside range of sequences in file (1 to " 
00260         << numSequencesInFile_ << ")." << endl;
00261       //      throw NumberOutOfRange();
00262 
00263       //      if (seqNum == numSequencesInFile_+1)
00264       //     {
00265       //        cout << "SRF::findSeq - flicking to end" << endl; 
00266       //        pInputFileStream_->seekg( 0, ios::end );
00267       //  }
00268       //  cout << "SRF::findSeq end" << pInputFileStream_->tellg() << endl; 
00269       return false;
00270     } // ~else
00271 
00272     lastSequenceNumber_ = seqNum - 1;
00273     //  cout << "SRF::findSeq end" << pInputFileStream_->tellg() << endl; 
00274     return true;
00275   } // ~SequenceReaderFile::findSequence( SequenceNumber seqNum )
00276 
00277 
00278   // Function Name: getNextSequence
00279   // Arguments: WordSequence& (out), int (in)
00280   // Returns:   bool
00281   // Read the next set of sequence information from the file and parse it
00282   // into WordSequence format
00283   int SequenceReaderFile::getNextSequence
00284   ( WordSequence& nextSeq, int wordLength )
00285   {
00286     DEBUG_L2( "SequenceReaderFile::getNextSequence" );
00287     //   cout << "SRF::getNext start" << pInputFileStream_->tellg() << endl; 
00288     //    char firstOfLine;
00289     // `Interesting' standard library quirk: even though we are exclusively
00290     // reading chars firstOfLine must be an int (cos EOF is an int not a char)
00291     int firstOfLine;
00292     pInputFileStream_->clear();
00293     firstOfLine = pInputFileStream_->peek();
00294     if ( firstOfLine == EOF )
00295     {
00296       monitoringStream_ << "End of file has been reached." << endl;
00297       if ( allSequencesRead_ == false )
00298       {
00299         monitoringStream_ << "Setting numSequencesInFile to " 
00300                           << lastSequenceNumber_ << "." << endl;
00301         numSequencesInFile_ = lastSequenceNumber_;
00302         //  cout << "SRF::getNextSeq: asr = true" << endl;
00303         //  cout << pInputFileStream_->tellg() << endl;
00304           assert(!pInputFileStream_->fail());
00305           
00306         allSequencesRead_ = true;
00307       } // ~if
00308       // cout << "SRF::getNext end" << pInputFileStream_->tellg() << endl; 
00309       return -1;
00310     } // ~if
00311 
00312     std::streampos startPos( pInputFileStream_->tellg() );     
00313 
00314     if ( (char)firstOfLine == seqStartChar_ )
00315     { // then everything until the next '\n' is side info 
00316       //      pInputFileStream_->getline( sideInfoBuffer_, sideInfoBufferSize_, '\n' );
00317       getline( *pInputFileStream_, sideInfoBuffer_, '\n' );
00318       DEBUG_L2( "Read " << pInputFileStream_->gcount()
00319                         << " bytes of side info to buffer" );
00320     } // ~if
00321     else
00322     {
00323       monitoringStream_ 
00324         << "Error: file not in expected format (expected \"" 
00325         << seqStartChar_ << "\", received \"" << (char) firstOfLine
00326         << "\" instead).\n";
00327       throw SSAHAException("Data in wrong format!");
00328       //      sideInfoBuffer_="No side info for sequence.";
00329     } // ~else
00330 
00331     //    int basesInSequence(0); // this accumulates to the total number of
00332     // bases in the sequence (may be spread across multiple lines)
00333     //    Word thisWord(0);
00334 
00335     pEncoder_->setWordLength(wordLength);
00336     pEncoder_->linkSeq(nextSeq);
00337 
00338     while (1==1)
00339     {
00340 
00341       int numChars; // this is the number of bases on a line of FASTA file
00342                     // info (i.e. from one '\n' to the next) 
00343       firstOfLine = pInputFileStream_->peek();
00344 
00345       if (((char)firstOfLine == seqStopChar_) || ( firstOfLine == EOF )) break;
00346       pInputFileStream_->getline( inputBuffer_, inputBufferSize_, '\n' );
00347       //          getline( *pInputFileStream_, inputBuffer_, '\n' );
00348 
00349       DEBUG_L2( "Read " << pInputFileStream_->gcount()
00350                         << " bytes of sequence data to buffer" );
00351 
00352 
00353           numChars = pInputFileStream_->gcount() - 1; 
00354           // the '-1' accounts for '\n' at end of read
00355           //          numChars = inputBuffer_.size();
00356 
00357       pEncoder_->encode( inputBuffer_, numChars );
00358 
00359     }; // ~while
00360 
00361     pEncoder_->unlinkSeq();
00362 
00363     //    shiftLastWord( nextSeq, basesInSequence, wordLength );
00364 
00365     // If the last word is not completely full (as will be the case if
00366     // wordLength does not divide numBases) then shift the word so the
00367     // valid bases occupy the most significant bits of the word.
00368     //    nextSeq.numBasesInLast = basesInSequence % wordLength;
00369     //    thisWord = ( numBasesInLast == 0 ) ? 0 
00370     //     : thisWord << 2 * ( wordLength - numBasesInLast - 1);
00371     //   nextSeq.push_back(thisWord);
00372     //    if ( nextSeq.numBasesInLast != 0 )  
00373     //    nextSeq.back() <<= 2 * ( wordLength - nextSeq.numBasesInLast - 1);
00374 
00375     //    DEBUG_L2(    "There were " << basesInSequence
00376     //        << " base pairs in this sequence." );
00377       
00378     //    DEBUG_L2(    "Last word (" << printWord( nextSeq.back(), wordLength )
00379     //        << ") contains " << nextSeq.numBasesInLast
00380     //        << " valid base pairs" );
00381 
00382     DEBUG_L2("Finished reading sequence " << lastSequenceNumber_ );
00383 
00384     lastSequenceNumber_++;
00385 
00386     if ( lastSequenceNumber_ > seqPositions_.size() )
00387     {
00388       seqPositions_.push_back( startPos );
00389     } // ~if
00390     //    cout << "SRF::getNext end" << pInputFileStream_->tellg() << endl; 
00391 
00392     return nextSeq.getNumBasesInLast(); 
00393 
00394   } // ~SequenceReaderFile::getNextSequence
00395 
00396   // Function Name: getSequence
00397   // Arguments: WordSequence& (out), int (in), int (in)
00398   // Returns:   bool
00399   // Read the sequenceNumber-th set of sequence information from the file and 
00400   // parse it into WordSequence format
00401   int SequenceReaderFile::getSequence 
00402   ( WordSequence& nextSeq, SequenceNumber sequenceNumber, int wordLength )
00403   {
00404     DEBUG_L2( "SequenceReaderFile::getSequence" );
00405 
00406     if (!findSequence( sequenceNumber )) return -1;
00407     else return getNextSequence( nextSeq, wordLength );
00408 
00409   } // ~SequenceReaderFile::getSequence 
00410 
00411   // Function Name: getLastSequenceName
00412   // Arguments: string& (out)
00413   // Returns:   void
00414   // Fills the string with the name of the last sequence read 
00415   void SequenceReaderFile::getLastSequenceName( string& seqName ) const
00416   {
00417     DEBUG_L2( "SequenceReaderFile::getLastSequenceName" );
00418     string::size_type nameEnd = sideInfoBuffer_.find_first_of(' ');
00419     if ( nameEnd == string::npos ) nameEnd = sideInfoBuffer_.size();
00420     seqName = sideInfoBuffer_.substr( 1, nameEnd-1 );
00421     DEBUG_L2( "Last sequence name: " << seqName );
00422   } // ~SequenceReaderFile::getLastSequenceName
00423 
00424   // Function Name: printName
00425   // Arguments: ostream& (out), SequenceNumber (in)
00426   // Returns:   void
00427   // Sends the name of the requested sequence to the output stream.
00428   bool SequenceReaderFile::printName
00429   ( ostream& os, SequenceNumber seqNum )
00430   {
00431     DEBUG_L3( "SequenceReaderFile::printName" );
00432 
00433     if (!findSequence( seqNum )) return false;
00434 
00435 
00436     string firstLine;
00437 
00438     getline(*pInputFileStream_,firstLine,'\n');
00439 
00440     string::size_type nameEnd = firstLine.find_first_of(' ');
00441     if ( nameEnd == string::npos ) nameEnd = firstLine.size();
00442 
00443     os << firstLine.substr( 1, nameEnd ); 
00444 
00445     findSequence( seqNum );
00446 
00447     return true;
00448   } // ~SequenceReaderFile::printName
00449 
00450 
00451   // Function Name: printSideInfo
00452   // Arguments: ostream& (out), SequenceNumber (in)
00453   // Returns:   void
00454   // Sends the side info (e.g. clone name) for the requested sequence 
00455   // to the output stream.
00456   bool SequenceReaderFile::printSideInfo
00457   ( ostream& os, SequenceNumber seqNum )
00458   {
00459     DEBUG_L2( "SequenceReaderFile::printSideInfo" );
00460  
00461     if (!findSequence( seqNum )) return false;
00462 
00463     string firstLine;
00464 
00465     getline(*pInputFileStream_,firstLine,'\n');
00466 
00467     string::size_type nameEnd = firstLine.find_first_of(' ');
00468 
00469     if ( nameEnd == string::npos ) os << "No side info for this sequence.";
00470     else os << firstLine.substr(firstLine.find_first_of(' ') ); 
00471 
00472     findSequence( seqNum );
00473 
00474     return true;
00475   } // ~SequenceReaderFile::printSideInfo
00476 
00477   // Function Name: printSource
00478   // Arguments: ostream& (out), SequenceNumber (in)
00479   // Returns:   void
00480   // Send to the output stream the source data (in general, ASCII) from which 
00481   // the requested sequence was decoded.
00482   bool SequenceReaderFile::printSource
00483   ( ostream& os, SequenceNumber seqNum )
00484   {
00485     DEBUG_L2( "SequenceReaderFile::getSequenceSource" );
00486 
00487     if (!findSequence( seqNum )) return false;
00488 
00489     int firstOfLine((int)'x');
00490     while( ( (char)firstOfLine != seqStartChar_ ) && ( firstOfLine != EOF ) )
00491     {
00492       pInputFileStream_->getline( inputBuffer_, inputBufferSize_, '\n' );
00493       os << inputBuffer_ << endl;
00494       firstOfLine = pInputFileStream_->peek();
00495     } // ~while
00496 
00497     findSequence( seqNum );
00498 
00499     return true;
00500   } // ~SequenceReaderFile::printSource
00501 
00502   // Function Name: computeNumSequencesInFile
00503   // Arguments: void
00504   // Returns:   int
00505   // Returns the number of sequences in the file (will be done by lazy 
00506   // initialization, i.e. will only be calculated if asked for. NB this
00507   // will lose the current place in the file)
00508   SequenceNumber SequenceReaderFile::computeNumSequencesInFile( void ) 
00509   {
00510     DEBUG_L2( "SequenceReaderFile::computeNumSequencesInFile" );
00511 
00512     SequenceNumber numSeqs = 0;
00513 
00514     // rewind to start of file
00515     pInputFileStream_->clear();
00516     pInputFileStream_->seekg( 0 );
00517     seqPositions_.clear();
00518 
00519     while ( pInputFileStream_->peek() != EOF )
00520     {
00521       if (pInputFileStream_->peek() == seqStartChar_) 
00522       {
00523         numSeqs++;
00524         seqPositions_.push_back(pInputFileStream_->tellg());
00525       } // ~if
00526               pInputFileStream_->getline
00527                ( inputBuffer_ , inputBufferSize_, '\n' );
00528               //     getline( *pInputFileStream_, inputBuffer_, '\n' );
00529     } // ~while
00530 
00531     lastSequenceNumber_ = numSeqs;
00532 
00533     DEBUG_L2( "Found " << numSeqs << " sequences in the file." );
00534 
00535     return numSeqs;
00536 
00537   } // ~SequenceReaderFile::computeNumSequencesInFile( void ) const
00538 
00539   // Function Name: getBitsPerSymbol
00540   // Arguments: none
00541   // Returns:   int
00542   // Returns number of bits per symbol used in encoding
00543 int SequenceReaderFile::getBitsPerSymbol ( void ) const
00544 {
00545   return pEncoder_->getBitsPerSymbol();
00546 } // ~SequenceReaderFile::getBitsPerSymbol ( void ) const
00547 
00548   // Function Name: getSourceDataType
00549   // Arguments: none
00550   // Returns:   SourceDataType
00551   // Returns type of data being encoded (protein or DNA)
00552 SourceDataType SequenceReaderFile::getSourceDataType( void ) const
00553 {
00554   return pEncoder_->getSourceDataType();
00555 }
00556 
00557 
00558 // Function Name: extractSource
00559 // This extracts the source data for bases seqStart to seqEnd inclusive
00560 // of sequence seqNum and places it in source
00561 void SequenceReaderFile::extractSource
00562 ( char** pSource, //vector<char>& source, 
00563   SequenceNumber seqNum,
00564   SequenceOffset seqStart,
00565   SequenceOffset seqEnd )
00566 {
00567 
00568   if (seqNum!= lastSourceSeqNum_)
00569   {
00570 
00571     SequenceReaderState* pState(saveState());
00572 
00573     if (!findSequence( seqNum )) 
00574     {
00575 
00576       throw SSAHAException
00577         ("Could not find start of seq in SequenceReaderFile::extractSource");
00578     } // ~if
00579 
00580     //    cout << "moved to " << pInputFileStream_->tellg() << endl;
00581 
00582     // Check that this is a sensible place for sequence start
00583     //    int firstOfLine(pInputFileStream_->peek());
00584     //  if (((char)firstOfLine)!=seqStartChar_)
00585     //   {
00586     //   throw SSAHAException
00587     //  ("Did not find expected start char in SequenceReaderFile::extractSource");
00588     //  } // ~if    
00589 
00590 
00591     extractToCache( pInputFileStream_ );
00592     lastSourceSeqNum_=seqNum;
00593 
00594     // return to original position in file
00595     restoreState(pState);
00596 
00597   } // ~if
00598 
00599   // check for japes and tomfoolery
00600 
00601   if ( seqStart>seqEnd)
00602   {
00603       throw SSAHAException
00604         ("Requested seq start exceeds requested seq end in SequenceReaderFile::extractSource");
00605   } // ~if
00606   else if (seqEnd>lastSourceSeq_.size() )
00607   {
00608       throw SSAHAException
00609         ("Requested last byte exceeds end of seq in SequenceReaderFile::extractSource");
00610   } // ~else if
00611 
00612   *pSource = &lastSourceSeq_[seqStart-1];
00613   // copy relevant bit of sequence across (TBD this is a waste, just return ptr)
00614   //  vector<char>::size_type currentSize(source.size());
00615   //  source.resize(currentSize+seqEnd-seqStart+1);
00616   //  memcpy( (char*)&source[currentSize], (char*)&lastSourceSeq_[seqStart-1], seqEnd-seqStart+1);
00617   //  cout << "SRF::extract end" << pInputFileStream_->tellg() << endl; 
00618 
00619 } // ~SequenceReaderFile::extractSource
00620 
00621 
00622 
00623 void SequenceReaderFile::saveIndexImp
00624 ( ostream& fileFile, 
00625   ostream& indexFile, 
00626   int& fileNumber )
00627 {
00628   computeNumSequencesInFile(); // ensure have scanned to end of file
00629   fileFile << fileName_ << endl;
00630   SeqIndexInfo* pIndex = new SeqIndexInfo[seqPositions_.size()];
00631   for (int i(0) ; i < seqPositions_.size() ; i++)
00632   {
00633     pIndex[i].fileNum=fileNumber;
00634     pIndex[i].seqPos=seqPositions_[i];
00635   } // ~for
00636 
00637   indexFile.write( (const char*)pIndex, 
00638                    seqPositions_.size()*sizeof(SeqIndexInfo) );
00639   delete [] pIndex;
00640 } // ~SequenceReaderFile::saveIndex
00641 
00642 
00643 
00644 // End of file SequenceReaderFasta.cpp
00645 
00646 
00647 
00648 

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2