QueryManager/MatchStoreUngapped.cpp

Go to the documentation of this file.
00001 
00002 // #######################################################################
00003 
00004 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00005 // Version 3.2, released 1st March 2004
00006 // Copyright (c) Genome Research 2002
00007 
00008 // SSAHA is free software; you can redistribute it and/or modify 
00009 // it under the terms of version 2 of the GNU General Public Licence
00010 // as published by the Free Software Foundation.
00011  
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public Licence for more details.
00016  
00017 // You should have received a copy of the GNU General Public Licence
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00020 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00021 
00022 // #######################################################################
00023 
00024 // Module Name  : MatchStoreUngapped
00025 // File Name    : MatchStoreUngapped.cpp
00026 // Language     : C++
00027 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00028 
00029 // Description:
00030 
00031 // Includes:
00032 
00033 #include "MatchStoreUngapped.h"
00034 #include "MatchStoreGapped.h"
00035 #include "SequenceReader.h"
00036 #include "HashTable.h"
00037 #include "GlobalDefinitions.h"
00038 #include <algorithm>
00039 
00040 // ### Function Definitions ###
00041 
00042 // Name:
00043 // Arguments:
00044 // TYPE  NAME  IN/OUT COMMENT
00045 // Returns: TYPE COMMENT
00046 
00047 
00048   // Function Name: findMatch
00049   // Arguments: vector<Match>& (out), int (in), int (in), int (in)
00050   // Returns: true if more than minToPrint bases were found in total.
00051   // Sorts its member HitInfo instances in the following way:
00052   // 1 (highest priority) by subjectNum: thus the hits for each subject
00053   // sequence are grouped together
00054   // 2 by diff: for a given subject sequence, hits which have the same 'diff' 
00055   // value are part of the same ungapped alignment between the subject
00056   // sequence and the query sequence.
00057   // 3 (lowest priority) by queryPos
00058   // After sorting, the routine looks for a region of hits which have the same
00059   // subjectNum and diff values but for which the queryPos value differs
00060   // by stepLength. Each such region denotes a consecutive series of matching 
00061   // base pairs between the query and subject, and the details are encoded in
00062   // an instance of AdjacentMatchGap 
00063   void MatchAlgorithmUngapped::generateMatches
00064     //  ( HitListVector& hitList, MatchStore& store )
00065   ( HitListVector& hitList, MatchAdder& addMatch ) 
00066   {
00067 
00068     //    HitListVector hitList;
00069     //    subjectTable.matchSequence( querySeq, hitList, numRepeats_ );
00070     //   subjectTable.setNumRepeats(numRepeats_);
00071     //  subjectTable.matchSequence( querySeq, hitList );
00072 
00073     if (dynamic_cast<MatchAdderCodonCodon*>(&addMatch)
00074         ||dynamic_cast<MatchAdderCodonProtein*>(&addMatch))
00075     {
00076       wordLength_ *= gNumReadingFrames; // %%%%%%%%%%%%%% these need to be
00077       stepLength_ *= gNumReadingFrames; // reset to do proteins again %%%
00078     }
00079 
00080     sort( hitList.begin(), hitList.end() );
00081 
00082     // for (HitListVector::iterator i(hitList.begin()); i != hitList.end();++i)
00083     //  DEBUG_L2( "hit "<<i->subjectNum<<" "<< i->diff << " " << i->queryPos );
00084 
00085     if ( hitList.size() == 0 ) return;
00086 
00087     HitListVector::const_iterator i ( hitList.begin() );
00088 
00089     int adjacentBases = wordLength_;
00090     SequenceNumber adjacentStart = i->queryPos;
00091     int basesToAdd;
00092 
00093     HitListVector::const_iterator lastHit( i );
00094     ++i;
00095 
00096     for (  ; i != hitList.end() ; ++i )
00097     {
00098 
00099       if (    ( i->subjectNum == lastHit->subjectNum )
00100            && ( i->diff == lastHit->diff ) )
00101       {
00102         /*
00103         if ( i->queryPos - lastHit->queryPos == stepLength_ )
00104           {
00105             adjacentBases += stepLength_;
00106           } // ~if
00107         */
00108         basesToAdd = i->queryPos - lastHit->queryPos;
00109         if (basesToAdd <= stepLength_) 
00110           // %%%% chg to stepLength + x to handle gaps in query
00111           {
00112             adjacentBases += basesToAdd;
00113           } // ~if
00114         
00115         else
00116         {
00117           if ( adjacentBases>=minToProcess_)
00118           {
00119             DEBUG_L2(lastHit->subjectNum << " " << 
00120                           adjacentBases << " " <<
00121                           adjacentStart << " " <<
00122                           adjacentStart + adjacentBases -1 << " " <<
00123                           adjacentStart + lastHit->diff << " " <<
00124                           adjacentStart + lastHit->diff + adjacentBases -1 );
00125 
00126             
00127             addMatch(     lastHit->subjectNum,
00128                           adjacentBases,
00129                           adjacentStart,
00130                           adjacentStart + adjacentBases -1,
00131                           adjacentStart + lastHit->diff,
00132                           adjacentStart + lastHit->diff + adjacentBases -1 );
00133           }
00134           adjacentStart = i->queryPos;
00135           adjacentBases = wordLength_;
00136         } // ~else
00137       }
00138       else
00139       {
00140         if( adjacentBases>=minToProcess_)
00141           {
00142             DEBUG_L2(lastHit->subjectNum << " " << 
00143                           adjacentBases << " " <<
00144                           adjacentStart << " " <<
00145                           adjacentStart + adjacentBases -1 << " " <<
00146                           adjacentStart + lastHit->diff << " " <<
00147                           adjacentStart + lastHit->diff + adjacentBases -1 );
00148           
00149                 addMatch(         lastHit->subjectNum,
00150                           adjacentBases,
00151                           adjacentStart,
00152                           adjacentStart + adjacentBases -1,
00153                           adjacentStart + lastHit->diff,
00154                           adjacentStart + lastHit->diff + adjacentBases -1 );
00155           }
00156            adjacentStart = i->queryPos;
00157            adjacentBases = wordLength_;
00158 
00159       } // ~else
00160       ++lastHit;
00161     } // ~for
00162     if (adjacentBases>=minToProcess_)
00163       {
00164             DEBUG_L2(lastHit->subjectNum << " " << 
00165                           adjacentBases << " " <<
00166                           adjacentStart << " " <<
00167                           adjacentStart + adjacentBases -1 << " " <<
00168                           adjacentStart + lastHit->diff << " " <<
00169                           adjacentStart + lastHit->diff + adjacentBases -1 );
00170 
00171         addMatch(                 lastHit->subjectNum,
00172                           adjacentBases,
00173                           adjacentStart,
00174                           adjacentStart + adjacentBases -1,
00175                           adjacentStart + lastHit->diff,
00176                           adjacentStart + lastHit->diff + adjacentBases -1 );
00177       }
00178     return;
00179 
00180   } // ~findMatch
00181 
00182 
00183 
00184 
00185 
00186 
00187 
00188 
00189 // End of file MatchStoreUngapped.cpp
00190 
00191 

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2