SequenceReader/SequenceReaderLocal.cpp

Go to the documentation of this file.
00001 
00002 // #######################################################################
00003 
00004 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00005 // Version 3.2, released 1st March 2004
00006 // Copyright (c) Genome Research 2002
00007 
00008 // SSAHA is free software; you can redistribute it and/or modify 
00009 // it under the terms of version 2 of the GNU General Public Licence
00010 // as published by the Free Software Foundation.
00011  
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public Licence for more details.
00016  
00017 // You should have received a copy of the GNU General Public Licence
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00020 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00021 
00022 // #######################################################################
00023 
00024 // Module Name  : SequenceReaderLocal
00025 // File Name    : SequenceReaderLocal.cpp
00026 // Language     : C++
00027 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00028 
00029 // Description:
00030 
00031 // Includes:
00032 
00033 #include "SequenceReaderLocal.h"
00034 #include "SequenceReader.h"
00035 
00036 // ### Function Definitions ###
00037 
00038 // Name:
00039 // Arguments:
00040 // TYPE  NAME  IN/OUT COMMENT
00041 // Returns: TYPE COMMENT
00042 
00043   // Function Name: Constructor
00044   // Arguments: ostream&
00045   // Takes the data from seqFile and places it in seqData_, seqBasesInLast_
00046   // and seqNames_
00047 SequenceReaderLocal::SequenceReaderLocal
00048 ( SequenceReader& seqFile, int wordLength, ostream& monitoringStream ) :
00049 sourceData_( seqFile.getSourceDataType() ),
00050 wordLength_( wordLength ),
00051 bitsPerSymbol_( seqFile.getBitsPerSymbol() ),
00052 SequenceReader( monitoringStream )
00053 {
00054   monitoringStream_ 
00055     << "constructing SequenceReaderLocal from SequenceReader" << endl;
00056     seqFile.rewind();
00057     const pair< WordSequence, std::string> dummy; 
00058 
00059     seqData_.push_back(dummy);
00060     //    seqNames_.push_back();
00061 
00062     while(  seqFile.getNextSequence( seqData_.back().first, wordLength_ ) != -1 ) 
00063     {      
00064       seqFile.getLastSequenceName( seqData_.back().second );
00065       seqData_.push_back(dummy);
00066     } // ~while
00067       seqData_.pop_back();
00068     //    numSequencesInFile_ = seqFile.getNumSequencesInFile();
00069     rewind();
00070     seqFile.rewind();
00071 } // ~constructor
00072 
00073 SequenceReaderLocal::SequenceReaderLocal
00074 ( int wordLength, int bitsPerSymbol, ostream& monitoringStream ) :
00075 sourceData_( gUnknownData ),
00076 wordLength_( wordLength ),
00077 bitsPerSymbol_( bitsPerSymbol ),
00078 SequenceReader( monitoringStream )
00079 {
00080   monitoringStream_ 
00081     << "constructing empty SequenceReaderLocal" << endl;
00082 }
00083 
00084 
00085   // Function Name: Copy constructor
00086   // Arguments:
00087   // NB This is potentially slow. 
00088 // %%%% not properly implemented - don't try to copy! %%%%%
00089 SequenceReaderLocal::SequenceReaderLocal( const SequenceReaderLocal& rhs )
00090 {
00091   monitoringStream_ << "copy constructing SequenceReaderLocal" << endl;
00092 } // ~destructor
00093 
00094   // Function Name: Destructor
00095   // Arguments:
00096 SequenceReaderLocal::~SequenceReaderLocal()
00097 {
00098   monitoringStream_ << "destructing SequenceReaderLocal" << endl;
00099 } // ~destructor
00100 
00101   // Function Name: getNextSequence
00102   // Arguments: WordSequence& (out), int (in)
00103   // Returns:   int
00104   // Read the next set of sequence information from the file and parse it
00105   // into WordSequence format. Returns -1 if there has been a problem with
00106   // reading the sequence, else returns the number of valid base pairs 
00107   // contained within the final word of the sequence.
00108   int SequenceReaderLocal::getNextSequence( WordSequence& nextSeq, int wordLength )
00109   {
00110     if ( lastSequenceNumber_ == getNumSequencesInFile() )
00111     {
00112       monitoringStream_ << "End of file has been reached." << endl;
00113       return (-1);
00114     } // if
00115     if ( wordLength != wordLength_ )
00116     {
00117       monitoringStream_ 
00118       << "Error: sequence data word length is fixed (" 
00119       << wordLength_ << " base pairs)." << endl;
00120       throw SSAHAException("Wrong word length for SequenceReaderLocal");
00121     } // if
00122     // NB sequences are numbered 1 ... n but seqData_ is numbered
00123     // 0 ... n-1
00124     lastSequenceNumber_++;
00125     nextSeq = seqData_[lastSequenceNumber_-1].first; 
00126     //    nextSeq.numBasesInLast
00127     //   = seqData_[lastSequenceNumber_-1].first.numBasesInLast;
00128     return (seqData_[lastSequenceNumber_-1].first.getNumBasesInLast());
00129   } // ~SequenceReaderLocal::getNextSequence
00130 
00131   // Function Name: getSequence
00132   // Arguments: WordSequence& (out), SequenceNumber (in), int (in)
00133   // Returns:   bool
00134   // Read the sequenceNumber-th set of sequence information from the file and 
00135   // parse it into WordSequence format
00136   int SequenceReaderLocal::getSequence
00137   ( WordSequence& nextSeq, SequenceNumber sequenceNumber, int wordLength )
00138   {
00139 
00140     if ( sequenceNumber > numSequencesInFile_ )
00141     {
00142       monitoringStream_ << "End of file has been reached." << endl;
00143       return (-1);
00144     } // if
00145     if ( wordLength != wordLength_ )
00146     {
00147       monitoringStream_ 
00148       << "Error: sequence data word length is fixed (" 
00149       << wordLength_ << " base pairs)." << endl;
00150       throw SSAHAException("Wrong word length for SequenceReaderLocal");
00151     } // if
00152     // NB sequences are numbered 1 ... n but seqData_ is numbered
00153     // 0 ... n-1
00154     nextSeq = seqData_[sequenceNumber - 1].first; 
00155     //    nextSeq.numBasesInLast=seqData_[sequenceNumber-1].first.numBasesInLast;
00156     return nextSeq.getNumBasesInLast();
00157   } // ~getSequence
00158 
00159   // Function Name: findSequence
00160   // Arguments: SequenceNumber (in)
00161   // Returns:   void
00162   // Winds the input file stream to the start of sequence number seqNum. 
00163   // Returns false if seqNum exceeds the number of sequences in
00164   // the file.
00165   bool SequenceReaderLocal::findSequence( SequenceNumber seqNum )
00166   {
00167 
00168     if (( seqNum <= seqData_.size()) && ( seqNum != 0 ))
00169     {
00170       lastSequenceNumber_ = seqNum - 1;
00171       return true;
00172     } // ~if
00173     else
00174     {
00175       return false;
00176     } // ~else
00177   }
00178 
00179 
00180   // Function Name: getLastSequenceName
00181   // Arguments: string& (out)
00182   // Returns:   void
00183   // Fills the string with the name of the last sequence read 
00184   void SequenceReaderLocal::getLastSequenceName( string& seqName ) const
00185   {
00186     seqName = seqData_[lastSequenceNumber_ - 1].second;
00187   }
00188 
00189   // Function Name: getSequenceName
00190   // Arguments: string& (out), SequenceNumber (in)
00191   // Returns:   void
00192   // Fills a string with the name of the requested sequence
00193   bool SequenceReaderLocal::printName( ostream& os, SequenceNumber seqNum) 
00194   {
00195     if ( seqNum > numSequencesInFile_ )
00196     {
00197       monitoringStream_ << "Error: requested sequence number (" << seqNum
00198                         << ") exceeds num seqs in file ("
00199                         << numSequencesInFile_ << ")." << endl;
00200       return false;
00201     } // if
00202     os <<  seqData_[seqNum - 1].second << endl;
00203     return true;
00204   }  
00205 
00206   // Function Name: printSideInfo
00207   // Arguments: string& (out), SequenceNumber (in)
00208   // Returns:   void
00209   // Fills a string with the name of the requested sequence
00210   bool SequenceReaderLocal::printSideInfo
00211   ( ostream& os, SequenceNumber seqNum )
00212   {
00213     return false;
00214   }
00215 
00216   // Function Name: printSource
00217   // Arguments: string& (out), SequenceNumber (in)
00218   // Returns:   void
00219   // Fills a string with the name of the requested sequence
00220   bool SequenceReaderLocal::printSource
00221   ( ostream& os, SequenceNumber seqNum )
00222   {
00223     return false;
00224   }
00225 
00226 
00227 
00228 // End of file SequenceReaderLocal.cpp
00229 

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2