QueryManager/MatchStore.h

Go to the documentation of this file.
00001 /*  Last edited: Apr  4 10:15 2002 (ac2) */
00002 
00003 // #######################################################################
00004 
00005 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00006 // Version 3.2, released 1st March 2004
00007 // Copyright (c) Genome Research 2002
00008 
00009 // SSAHA is free software; you can redistribute it and/or modify 
00010 // it under the terms of version 2 of the GNU General Public Licence
00011 // as published by the Free Software Foundation.
00012  
00013 // This program is distributed in the hope that it will be useful,
00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 // GNU General Public Licence for more details.
00017  
00018 // You should have received a copy of the GNU General Public Licence
00019 // along with this program; if not, write to the Free Software
00020 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00021 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00022 
00023 // #######################################################################
00024 
00025 // Module Name  : MatchStore
00026 // File Name    : MatchStore.h
00027 // Language     : C++
00028 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00029 
00030 // Include guard:
00031 #ifndef INCLUDED_MatchStore
00032 #define INCLUDED_MatchStore
00033 
00034 // Description:
00035 
00036 // Includes:
00037 #include "GlobalDefinitions.h"
00038 #include <string>
00039 #include <algorithm>
00040 #include <deque>
00041 
00042 // NB it is good practise for #include statements in header files to be
00043 // replaced by forward declarations if at all possible
00044 
00045 // ### Class Declarations ###
00046 
00047 
00048 class HashTableGeneric;
00049 class SourceReader;
00050 
00051 
00052 // Match and subclasses
00053 
00054 class Match
00055 {
00056  public:
00057   virtual ~Match() {}
00058 
00059   virtual SequenceNumber getSubjectNum( void ) const=0;
00060   //  virtual string         getSubjectName( void ) const=0;
00061   virtual const char*         getSubjectName( void ) const=0;
00062   virtual SequenceOffset getSubjectStart( void ) const=0;
00063   virtual SequenceOffset getSubjectEnd( void ) const=0;
00064 
00065   virtual SequenceNumber getQueryNum( void ) const=0;
00066   virtual string         getQueryName( void ) const=0;
00067   virtual SequenceOffset getQueryStart( void ) const=0;
00068   virtual SequenceOffset getQueryEnd( void ) const=0;
00069   
00070   virtual int  getQuerySize( void ) const=0;
00071   virtual int  getNumBases(void ) const=0;
00072   virtual bool isQueryForward( void ) const=0;
00073   virtual bool isSubjectForward( void ) const=0;
00074   
00075   virtual void           print( void ) const=0;
00076  
00077 
00078 }; // ~class Match
00079 
00080 
00081 // Struct Name: MatchImp
00082 // Description: This contains details of a gapped alignment between 
00083 // the query sequence under analysis and a sequence from the subject database. 
00084 
00085 class MatchStoreImp;
00086 
00087 class MatchImp : public Match
00088 {
00089  public:
00090 
00091   virtual ~MatchImp() {}
00092 
00093   virtual SequenceNumber getSubjectNum( void ) const 
00094     { return subjectNum_; }
00095   //  virtual inline string  getSubjectName( void ) const;
00096   virtual inline const char*  getSubjectName( void ) const;
00097   virtual SequenceOffset getSubjectStart( void ) const 
00098     { return subjectStart_; }
00099   virtual SequenceOffset getSubjectEnd( void ) const 
00100     { return subjectEnd_; }
00101 
00102   virtual inline SequenceNumber getQueryNum( void ) const;
00103   virtual inline string         getQueryName( void ) const;
00104   virtual SequenceOffset getQueryStart( void ) const 
00105     { return queryStart_; }
00106   virtual SequenceOffset getQueryEnd( void ) const 
00107     { return queryEnd_; }
00108   
00109   virtual inline int  getQuerySize( void ) const; 
00110   virtual int  getNumBases(void ) const 
00111     { return numBases_; }
00112   virtual bool isQueryForward( void ) const 
00113     { return isQueryForward_; }
00114   virtual bool isSubjectForward( void ) const 
00115     { return isSubjectForward_; }
00116   
00117   virtual void           print( void ) const {}
00118   //
00119 
00120 
00121   MatchImp(  MatchStoreImp* myStore,
00122              SequenceNumber subjectNum,
00123              SequenceOffset numBases,
00124              SequenceOffset queryStart,
00125              SequenceOffset queryEnd,
00126              SequenceOffset subjectStart,
00127              SequenceOffset subjectEnd,
00128              bool isQueryForward,
00129              bool isSubjectForward ):
00130     myStore_( myStore ),
00131     subjectNum_( subjectNum ),  
00132     numBases_( numBases ),  
00133     queryStart_( queryStart ),
00134     queryEnd_( queryEnd ),
00135     subjectStart_( subjectStart ),
00136     subjectEnd_( subjectEnd ),
00137     isQueryForward_( isQueryForward ),
00138     isSubjectForward_( isSubjectForward ){}
00139 
00140   SequenceNumber subjectNum_;
00141   SequenceOffset numBases_;
00142 
00143   SequenceOffset queryStart_;
00144   SequenceOffset queryEnd_;
00145   SequenceOffset subjectStart_;
00146   SequenceOffset subjectEnd_;
00147 
00148   bool isQueryForward_;
00149   bool isSubjectForward_;
00150 
00151   MatchStoreImp* myStore_;
00152 
00153 };
00154 
00155 
00156 // MatchStore and subclasses
00157 
00158 class MatchStore : public vector<Match*>
00159 {
00160  public:
00161   virtual ~MatchStore() {}
00162 
00163   virtual void clear( void ) {}
00164 
00165 
00166   virtual void printResult( ostream& outputStream = cout ) const {}
00167 
00168   virtual void addMatch(  SequenceNumber subjectNum,
00169                           SequenceOffset numBases,
00170                           SequenceOffset queryStart,
00171                           SequenceOffset queryEnd,
00172                           SequenceOffset subjectStart,
00173                           SequenceOffset subjectEnd,
00174                           bool isQueryForward,
00175                           bool isSubjectForward ) {}
00176 };
00177 
00178 class HitListVector;
00179 class NameReader;
00180 
00181 class MatchStoreImp : public MatchStore
00182 {
00183   friend class MatchImp;
00184   public:
00185   MatchStoreImp( string queryName,
00186                  int        queryNum,
00187                  int        queryBases,
00188                  const NameReader& reader) :
00189   queryName_ ( queryName ),
00190   queryNum_( queryNum ),
00191   queryBases_( queryBases ), 
00192   reader_( reader )
00193     {
00194       //      reserve(1000);
00195       //     imps_.reserve(1000);
00196     }
00197 
00198   ~MatchStoreImp();
00199   
00200   virtual void addMatch(  SequenceNumber subjectNum,
00201                           SequenceOffset numBases,
00202                           SequenceOffset queryStart,
00203                           SequenceOffset queryEnd,
00204                           SequenceOffset subjectStart,
00205                           SequenceOffset subjectEnd,
00206                           bool isQueryForward,
00207                           bool isSubjectForward );
00208 
00209   // Function Name: printResult
00210   // Arguments: const QueryResult& (in), ostream& (out)
00211   // Convert the contents of result to ASCII and send to the specified
00212   // output stream
00213   void printResult
00214   ( ostream& outputStream = cout ) const;
00215 
00216   virtual void clear( void ); 
00217 
00218   void setup( void );
00219 
00220  protected:
00221   vector<MatchImp*> imps_;
00222 
00223   string     queryName_;
00224   int        queryNum_;
00225   int        queryBases_;
00226   //  HashTable& subjectTable_;
00227   //  mutable map< SequenceNumber, string > names_;
00228   const NameReader& reader_;
00229 
00230 }; // ~MatchStore
00231 
00232 
00233 
00234 // MatchTask and subclasses
00235 
00236 class MatchTask
00237 {
00238  public:
00239   virtual ~MatchTask() {}
00240   virtual void operator()( MatchStore& store ) =0;
00241 };
00242 
00243 template <class TASK1, class TASK2> class CombineTask : public MatchTask
00244 {
00245   public:
00246     CombineTask(TASK1& t1, TASK2& t2) : t1_(t1), t2_(t2) {}
00247     CombineTask( void ) : t1_imp(), t2_imp(), t1_(t1_imp), t2_(t2_imp) {}
00248     virtual void operator()( MatchStore& store )
00249 {
00250     t1_(store);
00251     t2_(store);
00252   }
00253   private:
00254   TASK1 t1_imp;
00255   TASK2 t2_imp;
00256   TASK1& t1_;
00257   TASK2& t2_;
00258 };
00259 
00260 // CombineTaskVirtual uses virtual function polymorphism instead of templates:
00261 // more versatile as you don't have to know exact type of task1 and
00262 // task2 at compile time. Cost of virtual function likely to be negligible
00263 // wrt to tasks themselves (sorting, printing etc)
00264 
00265 class CombineTaskVirtual : public MatchTask
00266 {
00267  public:
00268   CombineTaskVirtual(MatchTask& t1, MatchTask& t2) : t1_(t1), t2_(t2) {}
00269   virtual void operator()( MatchStore& store )
00270   {
00271     t1_(store);
00272     t2_(store);
00273   }
00274   private:
00275   MatchTask& t1_;
00276   MatchTask& t2_;
00277 };
00278 
00279 // prints out all matches
00280 class MatchTaskPrint : public MatchTask
00281 {
00282  public: 
00283   MatchTaskPrint( ostream& outputStream = cout ) :
00284     outputStream_( outputStream ) {}
00285   virtual void operator()( MatchStore& store );
00286  private:
00287   ostream& outputStream_;
00288 
00289 };
00290 
00291 // prints out all matches in `parser friendly format'
00292 class MatchTaskPrintTabbed : public MatchTask
00293 {
00294  public: 
00295   MatchTaskPrintTabbed( ostream& outputStream = cout ) :
00296     outputStream_( outputStream ) {}
00297   virtual void operator()( MatchStore& store );
00298  private:
00299   ostream& outputStream_;
00300 };
00301 
00302 // same as MatchTaskPrint, except reverses frame of query
00303 // coordinates for reverse matches
00304 class MatchTaskPrintReverse : public MatchTask
00305 {
00306  public: 
00307   MatchTaskPrintReverse( ostream& outputStream = cout ) :
00308     outputStream_( outputStream ) {}
00309   virtual void operator()( MatchStore& store );
00310  private:
00311   ostream& outputStream_;
00312 
00313 };
00314 
00315 // same as MatchTaskPrintTabbed, except reverses frame of query
00316 // coordinates for reverse matches
00317 class MatchTaskPrintTabbedReverse : public MatchTask
00318 {
00319  public: 
00320   MatchTaskPrintTabbedReverse( ostream& outputStream = cout ) :
00321     outputStream_( outputStream ) {}
00322   virtual void operator()( MatchStore& store );
00323  private:
00324   ostream& outputStream_;
00325 };
00326 
00327 
00328 class SortByMatchSize 
00329 {
00330 public:
00331   SortByMatchSize() {}
00332   bool operator()( const Match* p1, const Match* p2 ) const
00333   { 
00334     // We want the largest match at the top
00335     return ( (p1->getNumBases()) > (p2->getNumBases() ) );
00336   }
00337 };
00338 
00339 class SortBySubjectName 
00340 {
00341  public:
00342   SortBySubjectName() {}
00343   bool operator()( const Match* p1, const Match* p2 ) const
00344   { 
00345     return ( (p1->getSubjectName()) < (p2->getSubjectName() ) );
00346   }
00347 };
00348 
00349 class SortByQueryStart
00350 {
00351  public:
00352   SortByQueryStart() {}
00353   bool operator()( const Match* p1, const Match* p2 ) const
00354   { 
00355     return ( (p1->getQueryStart()) < (p2->getQueryStart() ) );
00356   }
00357 };
00358 
00359 class SortByDiff
00360 {
00361  public:
00362   SortByDiff() {}
00363   bool operator()( const Match* p1, const Match* p2 ) const
00364   { 
00365     return (   (p1->getQueryStart()-p1->getSubjectStart())
00366              > (p2->getQueryStart()-p2->getSubjectStart())  );
00367   }
00368 };
00369 
00370 class SortByPercentMatch
00371 {
00372  public:
00373   SortByPercentMatch() {}
00374   bool operator()( const Match* p1, const Match* p2 ) const
00375   { 
00376     return (   (p1->getNumBases()*(p2->getQueryEnd()-p2->getQueryStart()+1))
00377              > (p2->getNumBases()*(p1->getQueryEnd()-p1->getQueryStart()+1)));
00378   }
00379 };
00380   
00381 class SortNull
00382 {
00383 public:
00384   SortNull() {}
00385   bool operator()( const Match* p1, const Match* p2) const 
00386   {
00387     return false;
00388   }
00389 };
00390 
00391 #ifdef OLD_SORTBYMATCH_DEFINITION
00392 class SortByMatch 
00393 {
00394 public:
00395   SortByMatch() {}
00396   bool operator()( const Match* p1, const Match* p2 ) const
00397   { 
00398     return (    (p1->getNumBases()) > (p2->getNumBases() )
00399              || (    (p1->getNumBases()) == (p2->getNumBases() )
00400                   && (p1->getSubjectName()) < (p2->getSubjectName() ) )
00401              || (    (p1->getSubjectName()) == (p2->getSubjectName() )
00402                   && (p1->getQueryStart()) == (p2->getQueryStart() ) ) );
00403   }
00404 };
00405 #endif
00406 
00407 // Sorting by subject name is very slow, so sort by subject number instead
00408 // TC 25.3.2
00409 class SortByMatch 
00410 {
00411 public:
00412   SortByMatch() {}
00413   bool operator()( const Match* p1, const Match* p2 ) const
00414   { 
00415     return (    (p1->getNumBases()) > (p2->getNumBases() )
00416              || (    (p1->getNumBases()) == (p2->getNumBases() )
00417                   && (p1->getSubjectNum()) < (p2->getSubjectNum() ) )
00418              || (    (p1->getSubjectNum()) == (p2->getSubjectNum() )
00419                   && (p1->getQueryStart()) == (p2->getQueryStart() ) ) );
00420   }
00421 };
00422 
00423 
00424 template < class C1, class C2, class C3=SortNull > 
00425 class CombineSort
00426 {
00427 public:
00428   CombineSort() : c1_(), c2_(), c3_() {}
00429   bool operator()( const Match* p1, const Match* p2) const
00430   {
00431     return (    c1_(p1,p2)
00432              || ( (!c1_(p2,p1)) && c2_(p1,p2) ) 
00433              || ( (!c2_(p2,p1)) && c3_(p1,p2) ) );
00434   }
00435   C1 c1_;
00436   C2 c2_;
00437   C3 c3_;
00438 };
00439 
00440 template <class SORTER> class MatchTaskSort : public MatchTask
00441 {
00442 public:
00443   MatchTaskSort
00444   ( 
00445     unsigned int maxToSort = 1<<30,
00446     double partialThreshold = 0.0
00447   ) : sorter_(), maxToSort_(maxToSort), partialThreshold_(partialThreshold) {}
00448   // TBD should be virtual???
00449   void operator()(MatchStore& store )
00450   {
00451     if (    ( store.size() <= maxToSort_)
00452          || (  ((double)maxToSort_) / ((double)store.size())
00453                > partialThreshold_  )  )
00454       sort( store.begin(), store.end(), sorter_ );
00455     else 
00456     {
00457       partial_sort( store.begin(), 
00458                     store.begin()+maxToSort_, 
00459                     store.end(), 
00460                     sorter_ );
00461     }
00462     store.resize(min((unsigned int)store.size(),maxToSort_));
00463   }
00464 private:
00465   unsigned int maxToSort_;
00466   double partialThreshold_;
00467   SORTER sorter_;
00468 }; // class MatchSort
00469 
00470 #ifdef MOVED_TO_ALIGNERDOTH
00471 // Declarations for MatchTaskAlign
00472 
00473 typedef int ScoreType;
00474 typedef ScoreType ScoreTable[65536];
00475 typedef unsigned char uchar;
00476 typedef unsigned char PathType;
00477 
00478 typedef unsigned char AlignBreakType;
00479 
00480 // alignment of two sequences is described by a vector of AlignInfo 
00481 // Meant for sequences that contain large stretches of matches
00482 // punctuated by the occasional AlignBreak
00483 
00484 struct AlignInfo
00485 {
00486   AlignInfo( void ) : numMatches(0), breakType(0) {}
00487   int numMatches;
00488   AlignBreakType breakType;
00489 };
00490 
00491 
00492 //void fillScoreTable( ScoreTable& table );
00493 
00494 class MatchTaskAlign : public MatchTask
00495 {
00496 public:
00497   MatchTaskAlign( SourceReader& querySource, 
00498                   SourceReader& subjectSource,
00499                   int numCols,
00500                   ostream& outputStream,
00501                   bool reverseQueryCoords, 
00502                   bool doAlignment );
00503   virtual ~MatchTaskAlign();
00504 
00505   virtual void operator()(MatchStore& store);
00506 
00507   // TBD rest of these could be made private
00508 
00509   void align( const char* pQuery, int queryStart, int queryEnd,
00510               const char* pSubject, int subjectStart, int subjectEnd );
00511   void createAlignment
00512   ( deque<AlignInfo>& alignment,
00513     const char* pQuery, int queryStart, int queryEnd,
00514     const char* pSubject, int subjectStart, int subjectEnd );
00515   void formatAlignment
00516   ( deque<AlignInfo>& alignment,
00517     const char* pQuery, int queryStart, int queryEnd,
00518     const char* pSubject, int subjectStart, int subjectEnd );
00519 
00520   bool matchChar( const char a, const char b )
00521   {
00522     return (tolower(a)==tolower(b));
00523   }
00524 
00525   virtual void outputAlignmentLine( void );
00526 
00527   void reverseQueryCoords( bool yesOrNo= true) 
00528   { 
00529     reverseQueryCoords_ = yesOrNo;
00530   }
00531 
00532   void doAlignment( bool yesOrNo= true) 
00533   { 
00534     doAlignment_ = yesOrNo;
00535   }
00536  
00537 protected:
00538 
00539   ostream& outputStream_;
00540   SourceReader& querySource_;
00541   SourceReader& subjectSource_;
00542   // reverseQueryCoords_ - if `true' reverses orientation of
00543   // query coordinates, so they are as seen in the forward frame
00544   // Basically set to true for ssaha executable but false in 
00545   // ssahaClient, because in that case any necessary reversals
00546   // are done in ssahaServer
00547   bool reverseQueryCoords_;
00548   // doAlignment_ - if `true' prints match description line plus
00549   // graphical alignment for each match, if `false' just prints
00550   // match description line
00551   bool doAlignment_;
00552 
00553   char* pBufSeq1;
00554   char* pBufAlign;
00555   char* pBufSeq2;
00556   int numCols_;
00557 
00558   ScoreTable* pTable_;
00559 
00560 
00561 };
00562 
00563 // Class Name: MatchTaskAlignDNA
00564 // Description: Aligns using DNA score table
00565 class MatchTaskAlignDNA : public MatchTaskAlign
00566 {
00567  public:
00568   MatchTaskAlignDNA( SourceReader& querySource, 
00569                      SourceReader& subjectSource,
00570                      int numCols=80,
00571                      ostream& outputStream=cout,
00572                      bool reverseQueryCoords=true,
00573                      bool doAlignment=true);
00574  protected:
00575   void fillScoreTable( void );
00576 
00577   static bool isTableFilled_;
00578   static ScoreTable table_;
00579 };
00580 
00581 //#ifdef DATA_SOURCES_FOR_BLOSUM62_MATRIX
00582 The below is cut and pasted from
00583 http://www.ncbi.nlm.nih.gov/IEB/ToolBox/C_DOC/lxr/source/data/BLOSUM62
00584 
00585 -- quote --
00586 
00587   1 #  Matrix made by matblas from blosum62.iij
00588   2 #  * column uses minimum score
00589   3 #  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
00590   4 #  Blocks Database = /data/blocks_5.0/blocks.dat
00591   5 #  Cluster Percentage: >= 62
00592   6 #  Entropy =   0.6979, Expected =  -0.5209
00593   7    A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
00594   8 A  4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2  0 -2 -1  0 -4 
00595   9 R -1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2 -3 -1  0 -1 -4 
00596  10 N -2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2 -3  3  0 -1 -4 
00597  11 D -2 -2  1  6 -3  0  2 -1 -1 -3 -4 -1 -3 -3 -1  0 -1 -4 -3 -3  4  1 -1 -4 
00598  12 C  0 -3 -3 -3  9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 
00599  13 Q -1  1  0  0 -3  5  2 -2  0 -3 -2  1  0 -3 -1  0 -1 -2 -1 -2  0  3 -1 -4 
00600  14 E -1  0  0  2 -4  2  5 -2  0 -3 -3  1 -2 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4 
00601  15 G  0 -2  0 -1 -3 -2 -2  6 -2 -4 -4 -2 -3 -3 -2  0 -2 -2 -3 -3 -1 -2 -1 -4 
00602  16 H -2  0  1 -1 -3  0  0 -2  8 -3 -3 -1 -2 -1 -2 -1 -2 -2  2 -3  0  0 -1 -4 
00603  17 I -1 -3 -3 -3 -1 -3 -3 -4 -3  4  2 -3  1  0 -3 -2 -1 -3 -1  3 -3 -3 -1 -4 
00604  18 L -1 -2 -3 -4 -1 -2 -3 -4 -3  2  4 -2  2  0 -3 -2 -1 -2 -1  1 -4 -3 -1 -4 
00605  19 K -1  2  0 -1 -3  1  1 -2 -1 -3 -2  5 -1 -3 -1  0 -1 -3 -2 -2  0  1 -1 -4 
00606  20 M -1 -1 -2 -3 -1  0 -2 -3 -2  1  2 -1  5  0 -2 -1 -1 -1 -1  1 -3 -1 -1 -4 
00607  21 F -2 -3 -3 -3 -2 -3 -3 -3 -1  0  0 -3  0  6 -4 -2 -2  1  3 -1 -3 -3 -1 -4 
00608  22 P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4  7 -1 -1 -4 -3 -2 -2 -1 -2 -4 
00609  23 S  1 -1  1  0 -1  0  0  0 -1 -2 -2  0 -1 -2 -1  4  1 -3 -2 -2  0  0  0 -4 
00610  24 T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  1  5 -2 -2  0 -1 -1  0 -4 
00611  25 W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1  1 -4 -3 -2 11  2 -3 -4 -3 -2 -4 
00612  26 Y -2 -2 -2 -3 -2 -1 -2 -3  2 -1 -1 -2 -1  3 -3 -2 -2  2  7 -1 -3 -2 -1 -4 
00613  27 V  0 -3 -3 -3 -1 -2 -2 -3 -3  3  1 -2  1 -1 -2 -2  0 -3 -1  4 -3 -2 -1 -4 
00614  28 B -2 -1  3  4 -3  0  1 -1  0 -3 -4  0 -3 -3 -2  0 -1 -4 -3 -3  4  1 -1 -4 
00615  29 Z -1  0  0  1 -3  3  4 -2  0 -3 -3  1 -1 -3 -1  0 -1 -3 -2 -2  1  4 -1 -4 
00616  30 X  0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2  0  0 -2 -1 -1 -1 -1 -1 -4 
00617  31 * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4  1 
00618 
00619 -- unquote --
00620 
00621 In addition:
00622 Score of any character with - is -10
00623 Score of - with - is -1
00624 These are the gap open and gap extension penalties used with BLOSUM62 in
00625 Huang/Zhang, CABIOS 12(6) 1996, pp. 497-506
00626 
00627 //#endif
00628 
00629 static const char blosumResidues[] 
00630 = "ARNDCQEGHILKMFPSTWYVBZX";
00631 // 12345678901234567890123
00632 
00633 static const ScoreType blosumScores[] =
00634 {
00635 4, // A
00636 -1, 5, // R
00637 -2, 0, 6, // N
00638 -2, -2, 1, 6, // D
00639 0, -3, -3, -3, 9,// C
00640 -1, 1, 0, 0, -3, 5,// Q
00641 -1, 0, 0, 2, -4, 2, 5,// E
00642 0, -2, 0, -1, -3, -2, -2, 6, // G
00643 -2, 0, 1, -1, -3, 0, 0, -2, 8, // H
00644 -1, -3, -3, -3, -1, -3, -3, -4, -3, 4, // I
00645 -1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, // L
00646 -1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, // K
00647 -1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, // M
00648 -2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, // F
00649 -1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, // P
00650 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, // S
00651 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, // T
00652 -3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, // W
00653 -2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7,// Y
00654 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4,// V
00655 -2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4,// B
00656 -1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, // Z
00657 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1 //X
00658 };
00659 
00660 static const int numBlosumResidues = 23;
00661 
00662 // Class Name: MatchTaskAlignProtein
00663 // Description: Aligns protein using score table based on BLOSUM62
00664 class MatchTaskAlignProtein : public MatchTaskAlign
00665 {
00666  public:
00667   MatchTaskAlignProtein( SourceReader& querySource, 
00668                          SourceReader& subjectSource,
00669                          int numCols=80,
00670                          ostream& outputStream=cout );
00671  protected:
00672   void fillScoreTable( void );
00673 
00674   static bool isTableFilled_;
00675   static ScoreTable table_;
00676 };
00677 
00678 #endif
00679 
00680 
00681 
00682 
00683 
00684 
00685 
00686 // Class Name :
00687 // Description: 
00688 
00689 // ### Function Declarations ###
00690 
00691 // Name:
00692 // Arguments:
00693 // TYPE  NAME  IN/OUT COMMENT
00694 // Returns: TYPE COMMENT
00695 
00696 // End of include guard:
00697 #endif
00698 
00699 // End of file MatchStore.h
00700 
00701 
00702 
00703 
00704 

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2