SequenceReaderFile Class Reference

#include <SequenceReaderFasta.h>

Inheritance diagram for SequenceReaderFile:

Inheritance graph
[legend]
Collaboration diagram for SequenceReaderFile:

Collaboration graph
[legend]
List of all members.

Public Member Functions

 SequenceReaderFile (const char *fileName, char seqStartChar, char seqStopChar, SequenceEncoder *pEncoder, ostream &monitoringStream=cerr)
 SequenceReaderFile (istream &inputStream, char seqStartChar, char seqStopChar, SequenceEncoder *pEncoder, ostream &monitoringStream=cerr)
 SequenceReaderFile (const SequenceReaderFile &rhs)
virtual ~SequenceReaderFile ()
virtual SequenceReaderclone (void)
virtual void changeMode (SequenceReaderMode *pMode)
void rewind (void)
virtual bool findSequence (SequenceNumber seqNum)
virtual int getNextSequence (WordSequence &nextSeq, int wordLength)
virtual int getSequence (WordSequence &nextSeq, SequenceNumber sequenceNumber, int wordLength)
virtual void getLastSequenceName (string &seqName) const
virtual int getBitsPerSymbol (void) const
virtual SourceDataType getSourceDataType (void) const
virtual bool printName (ostream &os, SequenceNumber seqNum)
virtual bool printSideInfo (ostream &os, SequenceNumber seqNum)
virtual bool printSource (ostream &os, SequenceNumber seqNum)
virtual void extractSource (char **pSource, SequenceNumber seqNum, SequenceOffset seqStart, SequenceOffset seqEnd)
virtual void saveIndexImp (ostream &fileFile, ostream &indexFile, int &fileNumber)
virtual SequenceReaderStatesaveState (void) const
virtual void restoreState (SequenceReaderState *pState)

Protected Types

enum  Constants { sideInfoBufferSize_ = 20000, inputBufferSize_ = 20000 }

Protected Member Functions

virtual SequenceNumber computeNumSequencesInFile (void)

Protected Attributes

char inputBuffer_ [inputBufferSize_]
string sideInfoBuffer_
char seqStartChar_
char seqStopChar_
istream * pInputFileStream_
string fileName_
SequenceEncoderpEncoder_
vector< std::streampos > seqPositions_

Private Member Functions

SequenceReaderFileoperator= (const SequenceReaderFile &)

Detailed Description

Definition at line 67 of file SequenceReaderFasta.h.


Member Enumeration Documentation

enum SequenceReaderFile::Constants [protected]

Enumerator:
sideInfoBufferSize_ 
inputBufferSize_ 

Definition at line 253 of file SequenceReaderFasta.h.

00253 { sideInfoBufferSize_ = 20000, inputBufferSize_ = 20000 };


Constructor & Destructor Documentation

SequenceReaderFile::SequenceReaderFile ( const char *  fileName,
char  seqStartChar,
char  seqStopChar,
SequenceEncoder pEncoder,
ostream &  monitoringStream = cerr 
)

Definition at line 74 of file SequenceReaderFasta.cpp.

Referenced by clone().

00079     : SequenceReader( monitoringStream ),
00080       seqStartChar_( seqStartChar ),
00081       seqStopChar_( seqStopChar ),
00082       //      lastSourceSeqNum_( 0 ),
00083       pInputFileStream_( new ifstream( fileName ) ),
00084       fileName_( fileName ),
00085       pEncoder_( pEncoder )
00086   {
00087 
00088     monitoringStream_ << "constructing SequenceReaderFile" << this << endl;
00089     if ( pInputFileStream_->fail() )
00090     {
00091       throw SSAHAException
00092       (   (string)"SequenceReaderFile - unable to open input file "
00093         + (string)fileName );
00094 
00095     } // ~if
00096     monitoringStream_ << "SequenceReaderFile::connected to file "
00097                       << fileName << endl;
00098 
00099   } // ~constructor

Here is the caller graph for this function:

SequenceReaderFile::SequenceReaderFile ( istream &  inputStream,
char  seqStartChar,
char  seqStopChar,
SequenceEncoder pEncoder,
ostream &  monitoringStream = cerr 
)

Definition at line 102 of file SequenceReaderFasta.cpp.

00107     : SequenceReader( monitoringStream ),
00108       seqStartChar_( seqStartChar ),
00109       seqStopChar_( seqStopChar ),
00110       //   lastSourceSeqNum_( 0 ),
00111       pInputFileStream_( &inputStream ),
00112       //      fileName_( fileName ),
00113       pEncoder_( pEncoder )
00114   {
00115 
00116     monitoringStream_ << "constructing SequenceReaderFile" << this << endl;
00117     if ( pInputFileStream_->fail() )
00118     {
00119       throw SSAHAException
00120       ( "SequenceReaderFile - unable to open input stream " );
00121 
00122     } // ~if
00123     monitoringStream_ << "SequenceReaderFile::connected to input file stream."
00124                       << endl;
00125 
00126   } // ~constructor

SequenceReaderFile::SequenceReaderFile ( const SequenceReaderFile rhs  ) 

Definition at line 132 of file SequenceReaderFasta.cpp.

References fileName_, SequenceReader::monitoringStream_, and pInputFileStream_.

00133   : SequenceReader( rhs.monitoringStream_ ),
00134     seqStartChar_( rhs.seqStartChar_ ),
00135     seqStopChar_( rhs.seqStopChar_ ),
00136     pInputFileStream_( new ifstream( rhs.fileName_.c_str() ) ),
00137     fileName_( rhs.fileName_.c_str() ),
00138     seqPositions_( rhs.seqPositions_ ),
00139     //   lastSourceSeqNum_(0),
00140     pEncoder_( rhs.pEncoder_->clone() )
00141   {
00142     monitoringStream_ << "copy constructing SequenceReaderFile" << this 
00143                       << endl;
00144     if (pInputFileStream_->fail())
00145     {
00146        throw SSAHAException
00147              (   (string)"SequenceReaderFile - unable to open input file " 
00148                + (string)fileName_ );
00149 
00150     } // ~if
00151 
00152     monitoringStream_ << "SequenceReaderFile:SequenceReader: "
00153                       << "connected to file " << fileName_ << endl;
00154 
00155   } // ~copy constructor

SequenceReaderFile::~SequenceReaderFile (  )  [virtual]

Definition at line 157 of file SequenceReaderFasta.cpp.

References SequenceReader::monitoringStream_, pEncoder_, and pInputFileStream_.

00158   {
00159     monitoringStream_ << "destructing SequenceReaderFile" << this << endl;
00160     delete pEncoder_;
00161     ifstream*  pf(dynamic_cast<ifstream*>(pInputFileStream_));
00162     if (pf!=NULL) 
00163     {
00164         pf->close();
00165         delete pInputFileStream_;
00166     }
00167   } // ~destructor


Member Function Documentation

virtual SequenceReader* SequenceReaderFile::clone ( void   )  [inline, virtual]

Implements SequenceReader.

Reimplemented in SequenceReaderFastq.

Definition at line 107 of file SequenceReaderFasta.h.

References SequenceReaderFile().

Referenced by main().

00108   { return new SequenceReaderFile( *this ); }

Here is the call graph for this function:

Here is the caller graph for this function:

void SequenceReaderFile::changeMode ( SequenceReaderMode pMode  )  [virtual]

Implements SequenceReader.

Definition at line 172 of file SequenceReaderFasta.cpp.

References SequenceEncoder::changeMode(), and pEncoder_.

Referenced by main().

00173   {
00174     pEncoder_->changeMode( pMode );
00175   }

Here is the call graph for this function:

Here is the caller graph for this function:

void SequenceReaderFile::rewind ( void   )  [virtual]

Implements SequenceReader.

Definition at line 183 of file SequenceReaderFasta.cpp.

References SequenceReader::lastSequenceNumber_, and pInputFileStream_.

Referenced by main().

00184   {
00185     // rewind to start of file
00186     pInputFileStream_->clear();
00187     pInputFileStream_->seekg( 0 );
00188     lastSequenceNumber_ = 0;
00189   } // ~SequenceReaderFile::rewind( void )

Here is the caller graph for this function:

bool SequenceReaderFile::findSequence ( SequenceNumber  seqNum  )  [virtual]

Implements SequenceReader.

Definition at line 197 of file SequenceReaderFasta.cpp.

References SequenceReader::allSequencesRead_, inputBuffer_, inputBufferSize_, SequenceReader::lastSequenceNumber_, SequenceReader::monitoringStream_, SequenceReader::numSequencesInFile_, pInputFileStream_, seqPositions_, and seqStartChar_.

Referenced by extractSource(), getSequence(), main(), printName(), printSideInfo(), and printSource().

00198   {
00199     
00200     //    cout << "SRF::findSeq start" << pInputFileStream_->tellg() << endl; 
00201     pInputFileStream_->clear();
00202     if ( seqNum ==0 )
00203     { 
00204       monitoringStream_ 
00205         << "SRF::findSeq: Requested sequence number (0) is not valid (must start at 1)." 
00206         << endl;
00207       return false;
00208     } // ~if
00209     else if ( seqNum <= seqPositions_.size() )
00210     {
00211       pInputFileStream_->seekg(seqPositions_[ seqNum - 1 ],ios::beg);
00212     } // ~if
00213     else if ( allSequencesRead_ == false )
00214     { // need to find more sequences in the file
00215 
00216       if ( seqPositions_.size() != 0 )
00217       {
00218         pInputFileStream_->seekg
00219         (seqPositions_.back(), ios::beg);
00220         pInputFileStream_->getline
00221         ( inputBuffer_ , inputBufferSize_, '\n' );
00222       } // ~if
00223    
00224       while(1)
00225       {
00226         //        pInputFileStream_->getline
00227         //      ( inputBuffer_ , inputBufferSize_, '\n' );
00228               //     getline( *pInputFileStream_, inputBuffer_, '\n' );
00229         if (pInputFileStream_->peek() == EOF) 
00230         {
00231           numSequencesInFile_ = seqPositions_.size();
00232           //      cout << "SRF::findSeq: asr = true" << endl;
00233           allSequencesRead_ = true;
00234           monitoringStream_ 
00235           << "SRF::findSeq(EOF): Requested sequence number (" << seqNum 
00236           << ")\nis outside range of sequences in file (1 to " 
00237           << numSequencesInFile_ << ")." << endl;
00238           //          throw NumberOutOfRange();
00239           // cout << "SRF::findSeq end" << pInputFileStream_->tellg() << endl; 
00240           return false;
00241         } // ~if
00242         else if (pInputFileStream_->peek() == seqStartChar_) 
00243         {
00244           seqPositions_.push_back(pInputFileStream_->tellg());
00245           if( seqPositions_.size() == seqNum ) break;
00246         } // ~else if
00247         pInputFileStream_->getline
00248         ( inputBuffer_ , inputBufferSize_, '\n' );
00249       } // ~while
00250       //      while ( seqPositions_.size() != seqNum );
00251 
00252     lastSequenceNumber_ = seqNum - 1;
00253     
00254     } // ~else if
00255     else
00256     { 
00257       monitoringStream_ 
00258         << "SRF::findSeq(asr): Requested sequence number (" << seqNum 
00259         << ")\nis outside range of sequences in file (1 to " 
00260         << numSequencesInFile_ << ")." << endl;
00261       //      throw NumberOutOfRange();
00262 
00263       //      if (seqNum == numSequencesInFile_+1)
00264       //     {
00265       //        cout << "SRF::findSeq - flicking to end" << endl; 
00266       //        pInputFileStream_->seekg( 0, ios::end );
00267       //  }
00268       //  cout << "SRF::findSeq end" << pInputFileStream_->tellg() << endl; 
00269       return false;
00270     } // ~else
00271 
00272     lastSequenceNumber_ = seqNum - 1;
00273     //  cout << "SRF::findSeq end" << pInputFileStream_->tellg() << endl; 
00274     return true;
00275   } // ~SequenceReaderFile::findSequence( SequenceNumber seqNum )

Here is the caller graph for this function:

int SequenceReaderFile::getNextSequence ( WordSequence nextSeq,
int  wordLength 
) [virtual]

Implements SequenceReader.

Reimplemented in SequenceReaderFastq.

Definition at line 284 of file SequenceReaderFasta.cpp.

References SequenceReader::allSequencesRead_, DEBUG_L2, SequenceEncoder::encode(), WordSequence::getNumBasesInLast(), inputBuffer_, inputBufferSize_, SequenceReader::lastSequenceNumber_, SequenceEncoder::linkSeq(), SequenceReader::monitoringStream_, SequenceReader::numSequencesInFile_, pEncoder_, pInputFileStream_, seqPositions_, seqStartChar_, seqStopChar_, SequenceEncoder::setWordLength(), sideInfoBuffer_, and SequenceEncoder::unlinkSeq().

Referenced by SequenceReaderFastq::getNextSequence(), getSequence(), and main().

00285   {
00286     DEBUG_L2( "SequenceReaderFile::getNextSequence" );
00287     //   cout << "SRF::getNext start" << pInputFileStream_->tellg() << endl; 
00288     //    char firstOfLine;
00289     // `Interesting' standard library quirk: even though we are exclusively
00290     // reading chars firstOfLine must be an int (cos EOF is an int not a char)
00291     int firstOfLine;
00292     pInputFileStream_->clear();
00293     firstOfLine = pInputFileStream_->peek();
00294     if ( firstOfLine == EOF )
00295     {
00296       monitoringStream_ << "End of file has been reached." << endl;
00297       if ( allSequencesRead_ == false )
00298       {
00299         monitoringStream_ << "Setting numSequencesInFile to " 
00300                           << lastSequenceNumber_ << "." << endl;
00301         numSequencesInFile_ = lastSequenceNumber_;
00302         //  cout << "SRF::getNextSeq: asr = true" << endl;
00303         //  cout << pInputFileStream_->tellg() << endl;
00304           assert(!pInputFileStream_->fail());
00305           
00306         allSequencesRead_ = true;
00307       } // ~if
00308       // cout << "SRF::getNext end" << pInputFileStream_->tellg() << endl; 
00309       return -1;
00310     } // ~if
00311 
00312     std::streampos startPos( pInputFileStream_->tellg() );     
00313 
00314     if ( (char)firstOfLine == seqStartChar_ )
00315     { // then everything until the next '\n' is side info 
00316       //      pInputFileStream_->getline( sideInfoBuffer_, sideInfoBufferSize_, '\n' );
00317       getline( *pInputFileStream_, sideInfoBuffer_, '\n' );
00318       DEBUG_L2( "Read " << pInputFileStream_->gcount()
00319                         << " bytes of side info to buffer" );
00320     } // ~if
00321     else
00322     {
00323       monitoringStream_ 
00324         << "Error: file not in expected format (expected \"" 
00325         << seqStartChar_ << "\", received \"" << (char) firstOfLine
00326         << "\" instead).\n";
00327       throw SSAHAException("Data in wrong format!");
00328       //      sideInfoBuffer_="No side info for sequence.";
00329     } // ~else
00330 
00331     //    int basesInSequence(0); // this accumulates to the total number of
00332     // bases in the sequence (may be spread across multiple lines)
00333     //    Word thisWord(0);
00334 
00335     pEncoder_->setWordLength(wordLength);
00336     pEncoder_->linkSeq(nextSeq);
00337 
00338     while (1==1)
00339     {
00340 
00341       int numChars; // this is the number of bases on a line of FASTA file
00342                     // info (i.e. from one '\n' to the next) 
00343       firstOfLine = pInputFileStream_->peek();
00344 
00345       if (((char)firstOfLine == seqStopChar_) || ( firstOfLine == EOF )) break;
00346       pInputFileStream_->getline( inputBuffer_, inputBufferSize_, '\n' );
00347       //          getline( *pInputFileStream_, inputBuffer_, '\n' );
00348 
00349       DEBUG_L2( "Read " << pInputFileStream_->gcount()
00350                         << " bytes of sequence data to buffer" );
00351 
00352 
00353           numChars = pInputFileStream_->gcount() - 1; 
00354           // the '-1' accounts for '\n' at end of read
00355           //          numChars = inputBuffer_.size();
00356 
00357       pEncoder_->encode( inputBuffer_, numChars );
00358 
00359     }; // ~while
00360 
00361     pEncoder_->unlinkSeq();
00362 
00363     //    shiftLastWord( nextSeq, basesInSequence, wordLength );
00364 
00365     // If the last word is not completely full (as will be the case if
00366     // wordLength does not divide numBases) then shift the word so the
00367     // valid bases occupy the most significant bits of the word.
00368     //    nextSeq.numBasesInLast = basesInSequence % wordLength;
00369     //    thisWord = ( numBasesInLast == 0 ) ? 0 
00370     //     : thisWord << 2 * ( wordLength - numBasesInLast - 1);
00371     //   nextSeq.push_back(thisWord);
00372     //    if ( nextSeq.numBasesInLast != 0 )  
00373     //    nextSeq.back() <<= 2 * ( wordLength - nextSeq.numBasesInLast - 1);
00374 
00375     //    DEBUG_L2(    "There were " << basesInSequence
00376     //        << " base pairs in this sequence." );
00377       
00378     //    DEBUG_L2(    "Last word (" << printWord( nextSeq.back(), wordLength )
00379     //        << ") contains " << nextSeq.numBasesInLast
00380     //        << " valid base pairs" );
00381 
00382     DEBUG_L2("Finished reading sequence " << lastSequenceNumber_ );
00383 
00384     lastSequenceNumber_++;
00385 
00386     if ( lastSequenceNumber_ > seqPositions_.size() )
00387     {
00388       seqPositions_.push_back( startPos );
00389     } // ~if
00390     //    cout << "SRF::getNext end" << pInputFileStream_->tellg() << endl; 
00391 
00392     return nextSeq.getNumBasesInLast(); 
00393 
00394   } // ~SequenceReaderFile::getNextSequence

Here is the call graph for this function:

Here is the caller graph for this function:

int SequenceReaderFile::getSequence ( WordSequence nextSeq,
SequenceNumber  sequenceNumber,
int  wordLength 
) [virtual]

Implements SequenceReader.

Definition at line 402 of file SequenceReaderFasta.cpp.

References DEBUG_L2, findSequence(), and getNextSequence().

Referenced by main().

00403   {
00404     DEBUG_L2( "SequenceReaderFile::getSequence" );
00405 
00406     if (!findSequence( sequenceNumber )) return -1;
00407     else return getNextSequence( nextSeq, wordLength );
00408 
00409   } // ~SequenceReaderFile::getSequence 

Here is the call graph for this function:

Here is the caller graph for this function:

void SequenceReaderFile::getLastSequenceName ( string &  seqName  )  const [virtual]

Implements SequenceReader.

Definition at line 415 of file SequenceReaderFasta.cpp.

References DEBUG_L2, and sideInfoBuffer_.

Referenced by main().

00416   {
00417     DEBUG_L2( "SequenceReaderFile::getLastSequenceName" );
00418     string::size_type nameEnd = sideInfoBuffer_.find_first_of(' ');
00419     if ( nameEnd == string::npos ) nameEnd = sideInfoBuffer_.size();
00420     seqName = sideInfoBuffer_.substr( 1, nameEnd-1 );
00421     DEBUG_L2( "Last sequence name: " << seqName );
00422   } // ~SequenceReaderFile::getLastSequenceName

Here is the caller graph for this function:

int SequenceReaderFile::getBitsPerSymbol ( void   )  const [virtual]

Implements SequenceReader.

Definition at line 543 of file SequenceReaderFasta.cpp.

References SequenceEncoder::getBitsPerSymbol(), and pEncoder_.

Referenced by main().

00544 {
00545   return pEncoder_->getBitsPerSymbol();
00546 } // ~SequenceReaderFile::getBitsPerSymbol ( void ) const

Here is the call graph for this function:

Here is the caller graph for this function:

SourceDataType SequenceReaderFile::getSourceDataType ( void   )  const [virtual]

Implements SequenceReader.

Definition at line 552 of file SequenceReaderFasta.cpp.

References SequenceEncoder::getSourceDataType(), and pEncoder_.

00553 {
00554   return pEncoder_->getSourceDataType();
00555 }

Here is the call graph for this function:

bool SequenceReaderFile::printName ( ostream &  os,
SequenceNumber  seqNum 
) [virtual]

Implements SequenceReader.

Definition at line 429 of file SequenceReaderFasta.cpp.

References DEBUG_L3, findSequence(), and pInputFileStream_.

00430   {
00431     DEBUG_L3( "SequenceReaderFile::printName" );
00432 
00433     if (!findSequence( seqNum )) return false;
00434 
00435 
00436     string firstLine;
00437 
00438     getline(*pInputFileStream_,firstLine,'\n');
00439 
00440     string::size_type nameEnd = firstLine.find_first_of(' ');
00441     if ( nameEnd == string::npos ) nameEnd = firstLine.size();
00442 
00443     os << firstLine.substr( 1, nameEnd ); 
00444 
00445     findSequence( seqNum );
00446 
00447     return true;
00448   } // ~SequenceReaderFile::printName

Here is the call graph for this function:

bool SequenceReaderFile::printSideInfo ( ostream &  os,
SequenceNumber  seqNum 
) [virtual]

Implements SequenceReader.

Definition at line 457 of file SequenceReaderFasta.cpp.

References DEBUG_L2, findSequence(), and pInputFileStream_.

00458   {
00459     DEBUG_L2( "SequenceReaderFile::printSideInfo" );
00460  
00461     if (!findSequence( seqNum )) return false;
00462 
00463     string firstLine;
00464 
00465     getline(*pInputFileStream_,firstLine,'\n');
00466 
00467     string::size_type nameEnd = firstLine.find_first_of(' ');
00468 
00469     if ( nameEnd == string::npos ) os << "No side info for this sequence.";
00470     else os << firstLine.substr(firstLine.find_first_of(' ') ); 
00471 
00472     findSequence( seqNum );
00473 
00474     return true;
00475   } // ~SequenceReaderFile::printSideInfo

Here is the call graph for this function:

bool SequenceReaderFile::printSource ( ostream &  os,
SequenceNumber  seqNum 
) [virtual]

Implements SequenceReader.

Definition at line 483 of file SequenceReaderFasta.cpp.

References DEBUG_L2, findSequence(), inputBuffer_, inputBufferSize_, pInputFileStream_, and seqStartChar_.

00484   {
00485     DEBUG_L2( "SequenceReaderFile::getSequenceSource" );
00486 
00487     if (!findSequence( seqNum )) return false;
00488 
00489     int firstOfLine((int)'x');
00490     while( ( (char)firstOfLine != seqStartChar_ ) && ( firstOfLine != EOF ) )
00491     {
00492       pInputFileStream_->getline( inputBuffer_, inputBufferSize_, '\n' );
00493       os << inputBuffer_ << endl;
00494       firstOfLine = pInputFileStream_->peek();
00495     } // ~while
00496 
00497     findSequence( seqNum );
00498 
00499     return true;
00500   } // ~SequenceReaderFile::printSource

Here is the call graph for this function:

void SequenceReaderFile::extractSource ( char **  pSource,
SequenceNumber  seqNum,
SequenceOffset  seqStart,
SequenceOffset  seqEnd 
) [virtual]

Reimplemented from SourceReader.

Definition at line 562 of file SequenceReaderFasta.cpp.

References SourceReader::extractToCache(), findSequence(), SourceReader::lastSourceSeq_, SourceReader::lastSourceSeqNum_, pInputFileStream_, restoreState(), and saveState().

Referenced by main().

00566 {
00567 
00568   if (seqNum!= lastSourceSeqNum_)
00569   {
00570 
00571     SequenceReaderState* pState(saveState());
00572 
00573     if (!findSequence( seqNum )) 
00574     {
00575 
00576       throw SSAHAException
00577         ("Could not find start of seq in SequenceReaderFile::extractSource");
00578     } // ~if
00579 
00580     //    cout << "moved to " << pInputFileStream_->tellg() << endl;
00581 
00582     // Check that this is a sensible place for sequence start
00583     //    int firstOfLine(pInputFileStream_->peek());
00584     //  if (((char)firstOfLine)!=seqStartChar_)
00585     //   {
00586     //   throw SSAHAException
00587     //  ("Did not find expected start char in SequenceReaderFile::extractSource");
00588     //  } // ~if    
00589 
00590 
00591     extractToCache( pInputFileStream_ );
00592     lastSourceSeqNum_=seqNum;
00593 
00594     // return to original position in file
00595     restoreState(pState);
00596 
00597   } // ~if
00598 
00599   // check for japes and tomfoolery
00600 
00601   if ( seqStart>seqEnd)
00602   {
00603       throw SSAHAException
00604         ("Requested seq start exceeds requested seq end in SequenceReaderFile::extractSource");
00605   } // ~if
00606   else if (seqEnd>lastSourceSeq_.size() )
00607   {
00608       throw SSAHAException
00609         ("Requested last byte exceeds end of seq in SequenceReaderFile::extractSource");
00610   } // ~else if
00611 
00612   *pSource = &lastSourceSeq_[seqStart-1];
00613   // copy relevant bit of sequence across (TBD this is a waste, just return ptr)
00614   //  vector<char>::size_type currentSize(source.size());
00615   //  source.resize(currentSize+seqEnd-seqStart+1);
00616   //  memcpy( (char*)&source[currentSize], (char*)&lastSourceSeq_[seqStart-1], seqEnd-seqStart+1);
00617   //  cout << "SRF::extract end" << pInputFileStream_->tellg() << endl; 
00618 
00619 } // ~SequenceReaderFile::extractSource

Here is the call graph for this function:

Here is the caller graph for this function:

void SequenceReaderFile::saveIndexImp ( ostream &  fileFile,
ostream &  indexFile,
int &  fileNumber 
) [virtual]

Reimplemented from SourceReader.

Definition at line 624 of file SequenceReaderFasta.cpp.

References computeNumSequencesInFile(), fileName_, and seqPositions_.

00627 {
00628   computeNumSequencesInFile(); // ensure have scanned to end of file
00629   fileFile << fileName_ << endl;
00630   SeqIndexInfo* pIndex = new SeqIndexInfo[seqPositions_.size()];
00631   for (int i(0) ; i < seqPositions_.size() ; i++)
00632   {
00633     pIndex[i].fileNum=fileNumber;
00634     pIndex[i].seqPos=seqPositions_[i];
00635   } // ~for
00636 
00637   indexFile.write( (const char*)pIndex, 
00638                    seqPositions_.size()*sizeof(SeqIndexInfo) );
00639   delete [] pIndex;
00640 } // ~SequenceReaderFile::saveIndex

Here is the call graph for this function:

virtual SequenceReaderState* SequenceReaderFile::saveState ( void   )  const [inline, virtual]

Reimplemented from SequenceReader.

Definition at line 210 of file SequenceReaderFasta.h.

References SequenceReader::lastSequenceNumber_, and pInputFileStream_.

Referenced by extractSource().

00211   {
00212     return new SequenceReaderFileState
00213       ( lastSequenceNumber_, pInputFileStream_->tellg() );
00214   }

Here is the caller graph for this function:

virtual void SequenceReaderFile::restoreState ( SequenceReaderState pState  )  [inline, virtual]

Reimplemented from SequenceReader.

Definition at line 221 of file SequenceReaderFasta.h.

References SequenceReaderFileState::filePos_, SequenceReaderState::lastSequenceNumber_, SequenceReader::lastSequenceNumber_, and pInputFileStream_.

Referenced by extractSource().

00222   {
00223     SequenceReaderFileState* p
00224       (dynamic_cast<SequenceReaderFileState*>(pState));
00225     assert(p!=NULL);
00226     lastSequenceNumber_ = p->lastSequenceNumber_;
00227     pInputFileStream_->seekg( p->filePos_, ios::beg );
00228     delete pState;
00229   }

Here is the caller graph for this function:

SequenceNumber SequenceReaderFile::computeNumSequencesInFile ( void   )  [protected, virtual]

Implements SequenceReader.

Definition at line 508 of file SequenceReaderFasta.cpp.

References DEBUG_L2, inputBuffer_, inputBufferSize_, SequenceReader::lastSequenceNumber_, pInputFileStream_, seqPositions_, and seqStartChar_.

Referenced by saveIndexImp().

00509   {
00510     DEBUG_L2( "SequenceReaderFile::computeNumSequencesInFile" );
00511 
00512     SequenceNumber numSeqs = 0;
00513 
00514     // rewind to start of file
00515     pInputFileStream_->clear();
00516     pInputFileStream_->seekg( 0 );
00517     seqPositions_.clear();
00518 
00519     while ( pInputFileStream_->peek() != EOF )
00520     {
00521       if (pInputFileStream_->peek() == seqStartChar_) 
00522       {
00523         numSeqs++;
00524         seqPositions_.push_back(pInputFileStream_->tellg());
00525       } // ~if
00526               pInputFileStream_->getline
00527                ( inputBuffer_ , inputBufferSize_, '\n' );
00528               //     getline( *pInputFileStream_, inputBuffer_, '\n' );
00529     } // ~while
00530 
00531     lastSequenceNumber_ = numSeqs;
00532 
00533     DEBUG_L2( "Found " << numSeqs << " sequences in the file." );
00534 
00535     return numSeqs;
00536 
00537   } // ~SequenceReaderFile::computeNumSequencesInFile( void ) const

Here is the caller graph for this function:

SequenceReaderFile& SequenceReaderFile::operator= ( const SequenceReaderFile  )  [private]


Member Data Documentation

char SequenceReaderFile::inputBuffer_[inputBufferSize_] [protected]

Definition at line 254 of file SequenceReaderFasta.h.

Referenced by computeNumSequencesInFile(), findSequence(), SequenceReaderFastq::getNextSequence(), getNextSequence(), and printSource().

string SequenceReaderFile::sideInfoBuffer_ [protected]

Definition at line 257 of file SequenceReaderFasta.h.

Referenced by getLastSequenceName(), and getNextSequence().

char SequenceReaderFile::seqStartChar_ [protected]

Definition at line 258 of file SequenceReaderFasta.h.

Referenced by computeNumSequencesInFile(), findSequence(), SequenceReaderFastq::getNextSequence(), getNextSequence(), and printSource().

char SequenceReaderFile::seqStopChar_ [protected]

Definition at line 259 of file SequenceReaderFasta.h.

Referenced by getNextSequence().

istream* SequenceReaderFile::pInputFileStream_ [protected]

Definition at line 261 of file SequenceReaderFasta.h.

Referenced by computeNumSequencesInFile(), extractSource(), findSequence(), SequenceReaderFastq::getNextSequence(), getNextSequence(), printName(), printSideInfo(), printSource(), restoreState(), rewind(), saveState(), SequenceReaderFile(), and ~SequenceReaderFile().

string SequenceReaderFile::fileName_ [protected]

Definition at line 264 of file SequenceReaderFasta.h.

Referenced by saveIndexImp(), and SequenceReaderFile().

SequenceEncoder* SequenceReaderFile::pEncoder_ [protected]

Definition at line 265 of file SequenceReaderFasta.h.

Referenced by changeMode(), getBitsPerSymbol(), getNextSequence(), getSourceDataType(), and ~SequenceReaderFile().

vector<std::streampos> SequenceReaderFile::seqPositions_ [protected]

Definition at line 267 of file SequenceReaderFasta.h.

Referenced by computeNumSequencesInFile(), findSequence(), getNextSequence(), and saveIndexImp().


The documentation for this class was generated from the following files:
Generated on Fri Dec 21 13:21:00 2007 for ssaha by  doxygen 1.5.2