QueryManager/MatchStore.cpp

Go to the documentation of this file.
00001 
00002 // #######################################################################
00003 
00004 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00005 // Version 3.2, released 1st March 2004
00006 // Copyright (c) Genome Research 2002
00007 
00008 // SSAHA is free software; you can redistribute it and/or modify 
00009 // it under the terms of version 2 of the GNU General Public Licence
00010 // as published by the Free Software Foundation.
00011  
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public Licence for more details.
00016  
00017 // You should have received a copy of the GNU General Public Licence
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00020 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00021 
00022 // #######################################################################
00023 
00024 // Module Name  : MatchStore
00025 // File Name    : MatchStore.cpp
00026 // Language     : C++
00027 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00028 
00029 // Description:
00030 
00031 // Includes:
00032 
00033 #include "MatchStore.h"
00034 #include "HashTableGeneric.h"
00035 #include "QueryManager.h"
00036 #include "MatchStoreGapped.h"
00037 #include "SequenceReader.h"
00038 #include <iomanip> // for setprecision() etc...
00039 
00040 // Match member function definitions
00041 
00042 
00043 const char* MatchImp::getSubjectName( void ) const
00044 {
00045   //  cout << "names_" << myStore_->names_.size() << endl; // %%%
00046   //  assert(myStore_->names_.find(subjectNum_)!=myStore_->names_.end());
00047   //  return myStore_->names_[ subjectNum_ ];
00048 
00049   return myStore_->reader_.getSequenceName(subjectNum_);
00050 
00051   //  string name;
00052   //  myStore_->subjectTable_.getSequenceName( name, subjectNum_ );
00053   //  return name;
00054 }
00055 
00056 SequenceNumber MatchImp::getQueryNum( void ) const
00057 {
00058   return myStore_->queryNum_;
00059 }
00060 
00061 string MatchImp::getQueryName( void ) const
00062 {
00063   return myStore_->queryName_;
00064 }
00065 
00066 int  MatchImp::getQuerySize( void ) const
00067 {
00068   return myStore_->queryBases_;
00069 }
00070 
00071 // MatchStore member functions definitions
00072 
00073 
00074   // Function Name: printResult
00075   // Arguments: const QueryResult& (in), ostream& (out)
00076   // Convert the contents of result to ASCII and send to the specified
00077   // output stream
00078 void MatchStoreImp::printResult
00079 ( ostream& outputStream  ) const
00080 { 
00081   
00082   if (empty()) return;
00083 
00084   outputStream << endl << "Matches For Query " 
00085                << queryNum_
00086                << " (" << queryBases_
00087                << " bases): " << queryName_
00088                << "\n\n";    
00089  
00090   for( vector<MatchImp*>::const_iterator i(imps_.begin());i!=imps_.end();++i)
00091   {
00092     //    MatchImp* i(*j);
00093 
00094     outputStream << (((*i)->isQueryForward_)?"F":"R")
00095                  <<" " << (*i)->subjectNum_
00096                  << "\t: " << (*i)->getSubjectName()// names_[(*i)->subjectNum_]
00097                  << "\tBases: " << (*i)->numBases_
00098                  << "\tQ: " << (*i)->queryStart_ 
00099                  << " to " << (*i)->queryEnd_
00100                  << "\tS: " << (*i)->subjectStart_ 
00101                  << " to " << (*i)->subjectEnd_ 
00102                  << "\n"; 
00103 
00104   }  // ~for i
00105    
00106   outputStream << endl;
00107 
00108 } // ~QueryManager::printResult
00109 
00110 MatchStoreImp::~MatchStoreImp()
00111 {
00112   for (vector<MatchImp*>::iterator i(imps_.begin()) ; i!= imps_.end() ; ++i )
00113     delete (*i);
00114 
00115 }
00116 
00117 void MatchStoreImp::clear( void )
00118 { 
00119     
00120   for (vector<MatchImp*>::iterator i(imps_.begin()) ; i!= imps_.end() ; ++i )
00121     delete (*i);
00122   vector<Match*>::clear();
00123   imps_.clear();
00124   //    matchImps_.clear();
00125   queryBases_ = 0;
00126 }
00127 
00128 
00129 
00130 
00131 void MatchStoreImp::addMatch( 
00132                           SequenceNumber subjectNum,
00133                           SequenceOffset numBases,
00134                           SequenceOffset queryStart,
00135                           SequenceOffset queryEnd,
00136                           SequenceOffset subjectStart,
00137                           SequenceOffset subjectEnd,
00138                           bool isQueryForward,
00139                           bool isSubjectForward )
00140 {
00141   DEBUG_L2(             subjectNum << " " << 
00142                         numBases << " " << 
00143                         queryStart << " " << 
00144                         queryEnd << " " << 
00145                         subjectStart << " " << 
00146                         subjectEnd << " " << 
00147                         isQueryForward );
00148 
00149   imps_.push_back( new MatchImp ( this,
00150                                   subjectNum, 
00151                                   numBases, 
00152                                   queryStart, 
00153                                   queryEnd, 
00154                                   subjectStart, 
00155                                   subjectEnd, 
00156                                   isQueryForward,
00157                                   isSubjectForward) );
00158   //  push_back(static_cast<Match*>(imps_.back()));
00159   //  names_.insert(pair<SequenceNumber, string>(subjectNum, subjectName));
00160   //  cout << "inserting subject num" << subjectNum << " " << subjectName
00161   //     << endl;
00162   //  push_back(&matchImps_.back());
00163   //  MatchStore::iterator i(&back());
00164   //  DEBUG_L1( matchImps_.size() << " " << size() << " " 
00165   //    << i << " " << *i << " "  );
00166   //  DEBUG_L1( (*i)->getQueryNum() << " " << (*i)->getQueryName() << " " 
00167   //    << (*i)->getQueryStart() << " " << (*i)->getQueryEnd() << " " 
00168   //    << (*i)->getSubjectNum() << " " << (*i)->getSubjectName() << " " 
00169   //    << (*i)->getSubjectStart() << " " << (*i)->getSubjectEnd() ); 
00170   
00171 
00172 }
00173 
00174 void MatchStoreImp::setup( void )
00175 {
00176   vector<Match*>::clear();
00177   for ( vector<MatchImp*>::iterator i(imps_.begin());i!=imps_.end();++i)
00178     push_back( static_cast<Match*>(*i));
00179 }
00180 
00181 
00182 // MatchTask member function definitions
00183 
00184 
00185 void MatchTaskPrint::operator()( MatchStore& store )
00186 {
00187   if (store.empty()) return;
00188 
00189   vector<Match*>::const_iterator i(store.begin());
00190 
00191   outputStream_ << endl << "Matches For Query " 
00192                << (*i)->getQueryNum()
00193                << " (" << (*i)->getQuerySize()
00194                << " bases): " << (*i)->getQueryName()
00195                << "\n\n";    
00196 
00197   outputStream_ << setprecision(2) << setiosflags(ios::fixed);
00198 
00199   for( ;i!=store.end();++i)
00200   {
00201 
00202     outputStream_ << (((*i)->isQueryForward() )?"F":"R")
00203                   << (((*i)->isSubjectForward() )?"F":"R")
00204                   << " " << (*i)->getSubjectNum() 
00205                   << "\t: " << (*i)->getSubjectName()
00206                   << "\tScore: " << (*i)->getNumBases()
00207                   << "\tQ: " << (*i)->getQueryStart()  
00208                   << " to " << (*i)->getQueryEnd() 
00209                   << "\tS: " << (*i)->getSubjectStart()  
00210                   << " to " << (*i)->getSubjectEnd()  
00211                   << "\t" << 100.0*((*i)->getNumBases()) / 
00212       ((*i)->getQueryEnd()-(*i)->getQueryStart()+1)
00213                   << "\%\n"; 
00214 
00215   }  // ~for i
00216    
00217   outputStream_ << endl;
00218 
00219 
00220 }
00221 
00222 void MatchTaskPrintTabbed::operator()( MatchStore& store )
00223 {
00224   if (store.empty()) return;
00225 
00226   outputStream_ << setprecision(2) << setiosflags(ios::fixed);
00227   for (MatchStore::iterator i(store.begin()); i!=store.end() ; ++i )
00228   outputStream_ 
00229     << ((*i)->isQueryForward()?"F":"R")
00230     << ((*i)->isSubjectForward()?"F":"R") << "\t"
00231     << (*i)->getQueryName() << "\t"
00232     << (*i)->getQueryStart() << "\t" 
00233     << (*i)->getQueryEnd() << "\t"
00234     << (*i)->getSubjectName() << "\t"
00235     << (*i)->getSubjectStart() << "\t"
00236     << (*i)->getSubjectEnd() << "\t"
00237     << (*i)->getNumBases() << "\t" 
00238     << 100.0*((*i)->getNumBases()) / 
00239        ((*i)->getQueryEnd()-(*i)->getQueryStart()+1)
00240     << endl;
00241 
00242 }
00243 void MatchTaskPrintReverse::operator()( MatchStore& store )
00244 {
00245   if (store.empty()) return;
00246 
00247   vector<Match*>::const_iterator i(store.begin());
00248 
00249   outputStream_ << endl << "Matches For Query " 
00250                << (*i)->getQueryNum()
00251                << " (" << (*i)->getQuerySize()
00252                << " bases): " << (*i)->getQueryName()
00253                << "\n\n";    
00254 
00255   outputStream_ << setprecision(2) << setiosflags(ios::fixed);
00256 
00257   for( ;i!=store.end();++i)
00258   {
00259     outputStream_ << (((*i)->isQueryForward() )?"F":"R")
00260                   << (((*i)->isSubjectForward() )?"F":"R")
00261                   <<" " << (*i)->getSubjectNum() 
00262                   << "\t: " << (*i)->getSubjectName()
00263                   << "\tScore: " << (*i)->getNumBases()
00264                   << "\tQ: " 
00265                   << ( (*i)->isQueryForward() ? (*i)->getQueryStart()  
00266                      : (*i)->getQuerySize()-(*i)->getQueryEnd()+1 )     
00267                   << " to " 
00268                   << ( (*i)->isQueryForward() ? (*i)->getQueryEnd()  
00269                      : (*i)->getQuerySize()-(*i)->getQueryStart()+1 )     
00270                   << "\tS: " << (*i)->getSubjectStart()  
00271                   << " to " << (*i)->getSubjectEnd()  
00272                   << "\t" << 100.0*((*i)->getNumBases()) / 
00273                     ((*i)->getQueryEnd()-(*i)->getQueryStart()+1)
00274                   << "\%\n"; 
00275 
00276   }  // ~for i
00277    
00278   outputStream_ << endl;
00279 
00280 
00281 }
00282 
00283 void MatchTaskPrintTabbedReverse::operator()( MatchStore& store )
00284 {
00285   if (store.empty()) return;
00286   outputStream_ << setprecision(2) << setiosflags(ios::fixed);
00287   for (MatchStore::iterator i(store.begin()); i!=store.end() ; ++i )
00288   outputStream_ 
00289     << ((*i)->isQueryForward()?"F":"R") 
00290     << ((*i)->isSubjectForward()?"F":"R") << "\t"
00291     << (*i)->getQueryName() << "\t"
00292     << (   (*i)->isQueryForward() ? (*i)->getQueryStart()  
00293          : (*i)->getQuerySize()-(*i)->getQueryEnd()+1 ) << "\t"     
00294     << (   (*i)->isQueryForward() ? (*i)->getQueryEnd()  
00295          : (*i)->getQuerySize()-(*i)->getQueryStart()+1 ) << "\t"     
00296     << (*i)->getSubjectName() << "\t"
00297     << (*i)->getSubjectStart() << "\t"
00298     << (*i)->getSubjectEnd() << "\t"
00299     << (*i)->getNumBases() << "\t" 
00300     << 100.0*((*i)->getNumBases()) / 
00301        ((*i)->getQueryEnd()-(*i)->getQueryStart()+1)
00302     << endl;
00303 
00304 }
00305 
00306 #ifdef MOVED_TO_ALIGNERDOTCPP
00307 // Definitions for MatchTaskAlign
00308 
00309 
00310 PathType fromW = (PathType)1;
00311 PathType fromN = (PathType)2;
00312 PathType fromSW = (PathType)4;
00313 PathType fromMismatch = (PathType)8;
00314 
00315 // Next two definitions mean that the absolute value of each
00316 // score in a score table need to be either less than 1024 
00317 // or a power or two
00318 ScoreType veryBadScoreIndeed = -67108864;
00319 //ScoreType matchMask = 1024; // set this bit if characters match
00320 
00321 ScoreType matchScore = 1; // | matchMask;
00322 ScoreType mismatchScore= -2;
00323 ScoreType gapStartScore = -4;
00324 ScoreType gapContScore = -3;
00325 ScoreType nScore = 0; // same as crossmatch defaults
00326 //ScoreType veryBadScoreIndeed = -100000000;
00327 
00328 
00329 
00330 
00331 const AlignBreakType alignBreakNull = 0;
00332 const AlignBreakType alignBreakMismatch = 1;
00333 const AlignBreakType alignBreakGapQuery = 2;
00334 const AlignBreakType alignBreakGapSubject = 3;
00335 
00336 MatchTaskAlign::MatchTaskAlign
00337 ( SourceReader& querySource, 
00338   SourceReader& subjectSource,
00339   int numCols,
00340   ostream& outputStream, 
00341   bool reverseQueryCoords,
00342   bool doAlignment ):
00343   querySource_(querySource), 
00344   subjectSource_(subjectSource),
00345   numCols_(numCols),
00346   reverseQueryCoords_(reverseQueryCoords),
00347   doAlignment_(doAlignment),
00348   outputStream_(outputStream)
00349 {
00350   pBufSeq1= new char [numCols+1];
00351   pBufSeq2= new char [numCols+1];
00352   pBufAlign= new char [numCols+1];
00353   pBufSeq1[numCols]='\0';
00354   pBufSeq2[numCols]='\0';
00355   pBufAlign[numCols]='\0';
00356 } // ~MatchTaskAlign::MatchTaskAlign
00357 
00358 
00359 MatchTaskAlign::~MatchTaskAlign()
00360 {
00361   delete [] pBufSeq1;
00362   delete [] pBufSeq2;
00363   delete [] pBufAlign;
00364 } // ~MatchTaskAlign::~MatchTaskAlign
00365 
00366 
00367 void MatchTaskAlign::operator()(MatchStore& store)
00368 {
00369   //  cout << "MTA::()" << endl;
00370 
00371   if (store.empty()) return;
00372 
00373   SequenceOffset effectiveQueryStart, effectiveQueryEnd;
00374 
00375 
00376   vector<Match*>::const_iterator i(store.begin());
00377 
00378   //  outputStream_ << endl << "Matches For Query " 
00379   //       << (*i)->getQueryNum()
00380   //       << " (" << (*i)->getQuerySize()
00381   //       << " bases): " << (*i)->getQueryName()
00382   //       << "\n\n";    
00383 
00384   outputStream_ << setprecision(2) << setiosflags(ios::fixed);
00385 
00386   //  vector<char> queryData, subjectData;
00387 
00388   for( ;i!=store.end();++i)
00389   {
00390     
00391     if( reverseQueryCoords_ && (!(*i)->isQueryForward()) )
00392     {
00393       effectiveQueryStart = (*i)->getQuerySize()-(*i)->getQueryEnd()+1;
00394       effectiveQueryEnd = (*i)->getQuerySize()-(*i)->getQueryStart()+1;
00395     } // ~if
00396     else
00397     {
00398       effectiveQueryStart = (*i)->getQueryStart();
00399       effectiveQueryEnd = (*i)->getQueryEnd();
00400     } // ~else
00401 
00402     outputStream_ 
00403         << ((*i)->isQueryForward()?"F":"R") 
00404         << ((*i)->isSubjectForward()?"F":"R") << "\t"
00405         << (*i)->getQueryName() << "\t"
00406         << effectiveQueryStart << "\t"     
00407         << effectiveQueryEnd << "\t"     
00408         << (*i)->getSubjectName() << "\t"
00409         << (*i)->getSubjectStart() << "\t"
00410         << (*i)->getSubjectEnd() << "\t"
00411         << (*i)->getNumBases() << "\t" 
00412         << 100.0*((*i)->getNumBases()) / 
00413       ((*i)->getQueryEnd()-(*i)->getQueryStart()+1)
00414         << endl;
00415 
00416     // suppress output of graphical alignments if required
00417     if( !doAlignment_ ) continue;
00418 
00419 
00420     char* pSubject;
00421     char* pQuery;
00422   
00423     //    cout << "extracting subject" << endl;
00424     subjectSource_.extractSource( &pSubject, //subjectData, 
00425                                   (*i)->getSubjectNum(),
00426                                   (*i)->getSubjectStart(),
00427                                   (*i)->getSubjectEnd() );
00428 
00429     if (pSubject[0]=='\0') continue; // did not get subject data from server
00430 
00431     //    cout << "extracting query" << endl;
00432     if ((*i)->isQueryForward())
00433     {
00434       querySource_.extractSource
00435         ( &pQuery, //queryData, 
00436           (*i)->getQueryNum(),
00437           effectiveQueryStart,
00438           effectiveQueryEnd );
00439     } // ~if
00440     else
00441     {
00442       querySource_.extractSourceReverse
00443       ( &pQuery, //queryData, 
00444         (*i)->getQueryNum(),
00445         effectiveQueryStart,  
00446         effectiveQueryEnd );
00447     } // ~else
00448     //   cout << "all extractions done" << endl;
00449 
00450     align( pQuery,//(const char*) &queryData[0], 
00451            effectiveQueryStart,  
00452            effectiveQueryEnd,
00453            pSubject,//(const char*) &subjectData[0],
00454            (*i)->getSubjectStart(),
00455            (*i)->getSubjectEnd() );
00456 
00457     //    queryData.clear();
00458     //    subjectData.clear();
00459   } // ~for i
00460 
00461 
00462 } // ~MatchTaskAlign::operator()(MatchStore& store)
00463 
00464 
00465 void MatchTaskAlign::align
00466 ( const char* pQuery, int queryStart, int queryEnd,
00467   const char* pSubject, int subjectStart, int subjectEnd )
00468 {
00469 
00470   deque<AlignInfo> alignment;
00471   createAlignment
00472     ( alignment, 
00473       pQuery, queryStart, queryEnd, 
00474       pSubject, subjectStart, subjectEnd );
00475   formatAlignment
00476     ( alignment, 
00477       pQuery, queryStart, queryEnd, 
00478       pSubject, subjectStart, subjectEnd );
00479 
00480 } // ~void MatchTaskAlign::align
00481 
00482 
00483 
00484 
00485 void MatchTaskAlign::createAlignment
00486 ( 
00487  deque<AlignInfo>& alignment,
00488  const char* pQuery, int queryStart, int queryEnd,
00489  const char* pSubject, int subjectStart, int subjectEnd )
00490 {
00491 
00492   const char* p1;
00493   const char* p2;
00494 
00495   int querySize(queryEnd-queryStart+1);
00496   int subjectSize(subjectEnd-subjectStart+1);
00497 
00498   int bandWidth;
00499   int bandLength;
00500 
00501   AlignBreakType gapInLargest, gapInSmallest;
00502 
00503   // ensure *p1 <= *p2
00504   if (querySize<=subjectSize)
00505   {
00506     p1=pQuery; p2=pSubject;
00507     bandWidth  = subjectSize-querySize+1;
00508     bandLength = querySize;
00509     gapInSmallest=alignBreakGapQuery;
00510     gapInLargest=alignBreakGapSubject;
00511   } // ~if
00512   else
00513   {
00514     p1=pSubject; p2=pQuery;
00515     bandWidth  = querySize-subjectSize+1;
00516     bandLength = subjectSize;
00517     gapInSmallest=alignBreakGapSubject;
00518     gapInLargest=alignBreakGapQuery;
00519   } // ~else
00520 
00521   //  const char* p = pQuery;
00522   //  for (int i(queryStart); i<= queryEnd ; i++ ) cout << *p++;
00523   //  cout << endl;
00524   //  p = pSubject;
00525   //  for (int i(subjectStart); i<= subjectEnd ; i++ ) cout << *p++;
00526   // cout << endl;
00527 
00528   //  fillScoreTable(table);  // TBD do this in constructor
00529 
00530   vector<vector<PathType> > path;
00531 
00532   // set up data structure to store path
00533   path.resize( bandWidth+1 );
00534 
00535   // zeroth vector is kept null, to make indexing easier
00536   for (int i(1) ; i<=bandWidth; i++ ) path[i].resize( bandLength );
00537 
00538   // set up storage for current and previous column of scores
00539   vector<ScoreType> scoresCurrent, scoresLast;
00540   scoresCurrent.resize(bandWidth+2); // %%
00541   scoresLast.resize(bandWidth+2); // %%%
00542 
00543   // These stop the traceback going 'off track'
00544   scoresCurrent.front()=veryBadScoreIndeed;
00545   scoresCurrent.back()=veryBadScoreIndeed;
00546 
00547   scoresLast.front()=veryBadScoreIndeed;
00548   scoresLast.back()=veryBadScoreIndeed;
00549 
00550 
00551   vector<ScoreType>* pLast(&scoresLast);
00552   vector<ScoreType>* pCurrent(&scoresCurrent);
00553   vector<ScoreType>* pTemp;
00554 
00555   // set up storage to decide maximum score
00556 
00557   vector<pair<ScoreType, PathType> > scoresPossible;
00558 
00559   //  scoresPossible.resize(4); 
00560   scoresPossible.resize(3); 
00561 
00562   int i,j;
00563 
00564   uchar a[2];
00565   unsigned short* b = (unsigned short*) &a[0];
00566 
00567   ScoreType bestScore, thisScore;
00568   bool isMatch;
00569 
00570   for (i=0; i<bandLength; i++)
00571   {
00572     for (j=1 ; j <= bandWidth ; j++ ) // %%
00573     {
00574       //            cout << "j: " << j << " i: " << i << endl;
00575       path[j][i]=0; // TBD probably not needed
00576 
00577       // fill in direction indicators
00578       //    scoresPossible[0].second=0;
00579 
00580       scoresPossible[0].second=fromW;
00581       scoresPossible[1].second=fromN;
00582       scoresPossible[2].second=fromSW;
00583 
00584       // fill in corresponding scores
00585 
00586       //      scoresPossible[0].first=0;
00587 
00588       //      cout << p2[i+j-1] << p1[i] << endl;
00589 
00590       a[0]=p2[i+j-1];
00591       a[1]='-';
00592       scoresPossible[1].first=(*pCurrent)[j-1]+(*pTable_)[*b];
00593 
00594       a[1]=p1[i];
00595       //      isMatch=(*pTable_)[*b]&matchMask;
00596       isMatch=(tolower(a[0])==tolower(a[1]));
00597 
00598       //      scoresPossible[0].first=(*pLast)[j]+(*pTable_)[*b]-(isMatch*matchMask);
00599       scoresPossible[0].first=(*pLast)[j]+(*pTable_)[*b];
00600       //  cout << a[0] << a[1] << " " << (*pTable_)[*b]  << endl;
00601       // flag mismatches
00602       //      path[j][i] |= ((*pTable_)[*b]!=matchScore)*fromMismatch;
00603       path[j][i] |= (!isMatch)*fromMismatch;
00604 
00605       a[0]='-';
00606 
00607       scoresPossible[2].first=(*pLast)[j+1]+(*pTable_)[*b];
00608 
00609       //        cout << "scores 0/W/N/SW: " << scoresPossible[0].first << " " 
00610       //    << scoresPossible[1].first << " " 
00611       //    << scoresPossible[2].first << endl;
00612 
00613       sort(scoresPossible.begin(),scoresPossible.end());
00614 
00615       bestScore = scoresPossible.back().first;
00616 
00617       //      cout << "best score: " << bestScore << endl;
00618 
00619       (*pCurrent)[j]=bestScore;
00620 
00621 
00622       path[j][i] |=     
00623         scoresPossible[2].second; // * (bestScore!=0);
00624 
00625       path[j][i] |=  
00626         scoresPossible[1].second 
00627         * (scoresPossible[1].first==bestScore);
00628         //      * (bestScore!=0);
00629 
00630       path[j][i] |= 
00631         scoresPossible[0].second 
00632         * (scoresPossible[0].first==bestScore);
00633         //      * (bestScore!=0);
00634 
00635       //      path[j][i] |= 
00636       //        scoresPossible[0].second 
00637       //        * (scoresPossible[0].first==bestScore)
00638       //        * (bestScore!=0);
00639 
00640       //      cout << "path entry: " << (int)path[j][i] << endl;
00641 
00642     } // ~for j
00643     pTemp=pLast;
00644     pLast=pCurrent;
00645     pCurrent=pTemp;
00646 
00647   } // ~for i
00648 
00649   //    cout << bandWidth << " " << bandLength << endl;
00650 
00651      for (j=1 ; j <= bandWidth ; j++ )
00652      {
00653        //    cout << "j: ";
00654      for (i=0; i<bandLength; i++)
00655      {
00656        //       cout << (int)path[j][i] << " - ";
00657      } // ~for i
00658      //   cout << endl;
00659     } // ~for j
00660 
00661 
00662      cout << "alignment score: " << (*pLast)[bandWidth] << endl; 
00663 
00664   // et finalement, le traceback
00665 
00666   i= bandLength-1; j=bandWidth;
00667 
00668   alignment.push_front();
00669   alignment.front().breakType=alignBreakNull;
00670 
00671   while ((i>=0)&&(j>=1))
00672   {
00673     //      cout << j << " " << i << "... " << endl;
00674     if ((path[j][i]&fromW)&&(i>=0))
00675     {
00676       if (path[j][i]&fromMismatch)
00677       {
00678         //      cout << " mismatch" << endl;
00679         alignment.push_front();
00680         alignment.front().breakType=alignBreakMismatch;
00681       } // ~if
00682       else 
00683       {
00684         //      cout << " match" << endl;
00685         alignment.front().numMatches++;
00686       } // ~else
00687       i--;
00688     } // ~if
00689     else if ((path[j][i]&fromN)&&(j>=1))
00690     {
00691       // cout << " gap in smallest" << endl;
00692       j--;
00693       alignment.push_front();
00694       alignment.front().breakType=gapInSmallest; 
00695     } // ~else if
00696     else if ((path[j][i]&fromSW)&&(j<bandWidth)&&(i>0))
00697     {
00698       //    cout << " gap in largest" << endl;
00699       i--; j++;
00700       alignment.push_front();
00701       alignment.front().breakType=gapInLargest;
00702     } // ~else if
00703     //    else assert(1==0) //if (i>0)
00704       //    {
00705       //   cout << " mismatch" << endl;
00706       //  i--;
00707       //  alignment.push_front();
00708       // alignment.front().breakType=alignBreakMismatch;
00709       // }
00710     else assert(1==0); // stuck! - shouldn't happen!
00711 
00712   } // ~while
00713   //  cout << "final i,j=" << i << ", " << j << endl;
00714   for ( ; j > 1 ; j-- )
00715   {
00716       alignment.push_front();
00717       alignment.front().breakType=gapInSmallest; 
00718   }
00719 } // ~void MatchTaskAlign::createAlignment
00720 
00721 
00722 void MatchTaskAlign::formatAlignment
00723 ( 
00724  deque<AlignInfo>& alignment,
00725  const char* pQuery, int queryStart, int queryEnd,
00726  const char* pSubject, int subjectStart, int subjectEnd )
00727 {
00728   //  cout << "format alignment" << endl;
00729 
00730   int queryNext(queryStart);
00731   int subjectNext(subjectStart);
00732 
00733   char* pCursorSeq1(&pBufSeq1[numCols_]); // triggers reset to start of line
00734   char* pCursorAlign(NULL);
00735   char* pCursorSeq2(NULL);
00736 
00737   *pBufSeq1='\0'; // ensures 3 null strings are printed at first line break 
00738   *pBufSeq2='\0';
00739   *pBufAlign='\0';
00740 
00741   deque<AlignInfo>::iterator i(alignment.begin());
00742 
00743   while( i!=alignment.end() ) 
00744   {
00745     //    cout << queryNext << " " << subjectNext << endl;
00746     if (pCursorSeq1==&pBufSeq1[numCols_])
00747     {
00748       //      cout << "about to write line" << endl;
00749       if (*pBufSeq1!='\0')
00750         outputAlignmentLine();
00751         //      cout << pBufSeq1 << endl << pBufAlign << endl << pBufSeq2 << endl<< endl;
00752       sprintf( pBufSeq1, "Q:%9.9d ", queryNext );
00753       sprintf( pBufAlign, "            " ); // should be 12 spaces
00754       sprintf( pBufSeq2, "S:%9.9d ", subjectNext );
00755       pCursorSeq1 = &pBufSeq1[12];
00756       pCursorSeq2 = &pBufSeq2[12];
00757       pCursorAlign = &pBufAlign[12];
00758 
00759     } // ~if
00760     
00761     if (i->numMatches>0)
00762     { // TBD do these en masse using memcpy 
00763     *(pCursorSeq1++)=*(pQuery++);
00764     *(pCursorAlign++)='|';
00765     *(pCursorSeq2++)=*(pSubject++);
00766     queryNext++;
00767     subjectNext++;
00768     i->numMatches--;
00769     }
00770     else
00771     {
00772       if (i->breakType==alignBreakMismatch)
00773       {
00774         *(pCursorSeq1++)=*(pQuery++);
00775         *(pCursorAlign++)='x';
00776         *(pCursorSeq2++)=*(pSubject++);
00777         queryNext++;
00778         subjectNext++;
00779       } // ~if
00780       else if (i->breakType==alignBreakGapQuery)
00781       {
00782         *(pCursorSeq1++)='-';
00783         *(pCursorAlign++)=' ';
00784         *(pCursorSeq2++)=*(pSubject++);
00785         //      queryNext++;
00786         subjectNext++;
00787       } // ~else if
00788       else if (i->breakType==alignBreakGapSubject)
00789       {
00790         *(pCursorSeq1++)=*(pQuery++);
00791         *(pCursorAlign++)=' ';
00792         *(pCursorSeq2++)='-';
00793         queryNext++;
00794         //      subjectNext++;
00795       } // ~else if
00796       i++;
00797     } // ~else
00798 
00799   } // ~while
00800 
00801   // check for unfinished lines
00802   //  if (pCursorSeq1!=&pBufSeq1[numCols_-1])
00803   //  {
00804   //  *pCursorSeq1++='\n';
00805   //  *pCursorSeq2++='\n';
00806   //  *pCursorAlign++='\n';
00807   *pCursorSeq1='\0';
00808   *pCursorSeq2='\0';
00809   *pCursorAlign='\0';
00810   //  }
00811   //  if (pCursorSeq1!=&pBufSeq1[numCols_])
00812   //  {
00813   //  }
00814   //  cout << pBufSeq1 << endl << pBufAlign << endl << pBufSeq2 << endl << endl;
00815   outputAlignmentLine();
00816 
00817 } // ~void MatchTaskAlign::formatAlignment
00818 
00819 void MatchTaskAlign::outputAlignmentLine( void )
00820 {
00821   outputStream_ << pBufSeq1 << endl << pBufAlign << endl 
00822                 << pBufSeq2 << endl << endl;
00823 } // ~void MatchTaskAlign::outputAlignmentLine( void )
00824 
00825 
00826 
00827 // Definitions for MatchTaskAlignDNA
00828 
00829 bool MatchTaskAlignDNA::isTableFilled_ = false;
00830 ScoreTable MatchTaskAlignDNA::table_;
00831 
00832 
00833 MatchTaskAlignDNA::MatchTaskAlignDNA
00834 ( SourceReader& querySource, 
00835   SourceReader& subjectSource,
00836   int numCols=80,
00837   ostream& outputStream=cout,
00838   bool reverseQueryCoords=true,
00839   bool doAlignment=true ) : 
00840   MatchTaskAlign( querySource, 
00841                   subjectSource, 
00842                   numCols, 
00843                   outputStream,
00844                   reverseQueryCoords,
00845                   doAlignment )
00846 {
00847   pTable_ = &table_;
00848   if (!isTableFilled_)
00849   {
00850     fillScoreTable();
00851     isTableFilled_=true;
00852   }
00853 
00854 } // ~MatchTaskAlignDNA::MatchTaskAlignDNA
00855 
00856 
00857 void MatchTaskAlignDNA::fillScoreTable( void )
00858 {
00859 
00860   uchar a[2];
00861   unsigned short* b = (unsigned short*) &a[0];
00862   //  a[0]=0; a[1]=0;
00863 
00864   // TBD PAM/BLOSUM for proteins
00865 
00866   const char base[] = {"agct"};
00867 
00868   ScoreType thisScore;
00869   for (int i(0); i < 4; i++ )
00870   {
00871     for (int j(0); j < 4; j++ )
00872     {
00873       thisScore = ((i==j) ? matchScore: mismatchScore);
00874       a[0]=base[i]; a[1] = base[j]; table_[ *b ] = thisScore;
00875       //      a[0]=base[i]; a[1] = toupper(base[j]); table_[ *b ] = thisScore;
00876       a[1] = toupper(base[j]); table_[ *b ] = thisScore;
00877       a[0]=toupper(base[i]); a[1] = base[j]; table_[ *b ] = thisScore;
00878       //      a[0]=toupper(base[i]); a[1] = toupper(base[j]); table_[ *b ] = thisScore;
00879       a[1] = toupper(base[j]); table_[ *b ] = thisScore;
00880     } // ~for j
00881 
00882     a[0]=base[i];  
00883     a[1] = 'n'; table_[ *b ] = nScore;
00884     a[1] = 'N'; table_[ *b ] = nScore;
00885     a[1] = '-'; table_[ *b ] = gapStartScore;
00886 
00887     a[0]=toupper(base[i]);  
00888     a[1] = 'n'; table_[ *b ] = nScore;
00889     a[1] = 'N'; table_[ *b ] = nScore;
00890     a[1] = '-'; table_[ *b ] = gapStartScore;
00891 
00892     a[1]=base[i];  
00893     a[0] = 'n'; table_[ *b ] = nScore;
00894     a[0] = 'N'; table_[ *b ] = nScore;
00895     a[0] = '-'; table_[ *b ] = gapStartScore;
00896 
00897     a[1]=toupper(base[i]);  
00898     a[0] = 'n'; table_[ *b ] = nScore;
00899     a[0] = 'N'; table_[ *b ] = nScore;
00900     a[0] = '-'; table_[ *b ] = gapStartScore;
00901     
00902 
00903 
00904   } // ~for i;
00905 
00906     a[0]='n'; a[1] = 'n'; table_[ *b ] = nScore;
00907     a[0]='n'; a[1] = 'N'; table_[ *b ] = nScore;
00908     a[0]='N'; a[1] = 'n'; table_[ *b ] = nScore;
00909     a[0]='N'; a[1] = 'N'; table_[ *b ] = nScore;
00910 
00911     a[0]='-'; a[1] = 'n'; table_[ *b ] = gapStartScore;
00912     a[0]='-'; a[1] = 'N'; table_[ *b ] = gapStartScore;
00913     a[0]='n'; a[1] = '-'; table_[ *b ] = gapStartScore;
00914     a[0]='N'; a[1] = '-'; table_[ *b ] = gapStartScore;
00915 
00916     a[0]='-'; a[1] = '-'; table_[ *b ] = gapContScore;
00917 
00918 } // ~void MatchTaskAlignDNA::fillScoreTable( void )
00919 
00920 // Definitions for MatchTaskAlignProtein
00921  
00922 bool MatchTaskAlignProtein::isTableFilled_ = false;
00923 ScoreTable MatchTaskAlignProtein::table_;
00924 
00925 MatchTaskAlignProtein::MatchTaskAlignProtein
00926 ( SourceReader& querySource, 
00927   SourceReader& subjectSource,
00928   int numCols=80,
00929   ostream& outputStream=cout ) : 
00930   MatchTaskAlign( querySource, subjectSource, numCols, outputStream, false, true )
00931 {
00932   pTable_ = &table_;
00933   if (!isTableFilled_)
00934   {
00935     fillScoreTable();
00936     isTableFilled_=true;
00937   }
00938 
00939 } // ~MatchTaskAlignProtein::MatchTaskAlignProtein
00940 
00941 ScoreType blosumScoreStopCodon= -4; // as used by NCBI - see URL in .h file
00942 ScoreType blosumScoreGapStart= -10;
00943 ScoreType blosumScoreGapExtend=-1;
00944 
00945 
00946 void MatchTaskAlignProtein::fillScoreTable( void )
00947 {
00948 
00949   uchar a[2];
00950   unsigned short* b = (unsigned short*) &a[0];
00951 
00952   const ScoreType* pScore = blosumScores;
00953 
00954 
00955   for (int i(0); i < numBlosumResidues; i++ )
00956   {
00957     for (int j(0); j < i; j++ )
00958     {
00959       //      cout << i << " " << j << endl; 
00960 
00961       // a[0] = uc, a[1] = uc
00962       a[0]=blosumResidues[i]; a[1]=blosumResidues[j]; table_[ *b ] = *pScore;
00963       //      cout << a[0] << a[1] << " " table_[ *b ] << endl;
00964       // a[0] = uc, a[1] = lc
00965       a[1]=tolower(a[1]); table_[ *b ] = *pScore;
00966       // a[0] = lc, a[1] = lc
00967       a[0]=tolower(a[0]); table_[ *b ] = *pScore;
00968       // a[0] = lc, a[1] = uc
00969       a[1]=toupper(a[1]); table_[ *b ] = *pScore;
00970 
00971       a[0]=blosumResidues[j]; a[1]=blosumResidues[i]; table_[ *b ] = *pScore;
00972       a[1]=tolower(a[1]); table_[ *b ] = *pScore;
00973       a[0]=tolower(a[0]); table_[ *b ] = *pScore;
00974       a[1]=toupper(a[1]); table_[ *b ] = *pScore;
00975      
00976       ++pScore;
00977     } // ~for j
00978 
00979     a[0] = blosumResidues[i];
00980     a[1] = blosumResidues[i]; table_[ *b ] = (*pScore); // a0=uc, a1=uc
00981     a[1] = tolower(a[1]); table_[ *b ] = (*pScore);  // a0=uc, a1=lc
00982     a[1] = '*'; table_[ *b ] = blosumScoreStopCodon; // a0=uc, a1=*
00983     a[1] = '-'; table_[ *b ] = blosumScoreGapStart;  // a0=uc, a1=-
00984 
00985     a[0] = tolower(a[0]); 
00986     a[1] = blosumResidues[i]; table_[ *b ] = (*pScore); // a0=lc, a1=uc
00987     a[1] = tolower(a[1]); table_[ *b ] = (*pScore);  // a0=lc, a1=lc
00988     a[1] = '*'; table_[ *b ] = blosumScoreStopCodon; // a0=lc, a1=*
00989     a[1] = '-'; table_[ *b ] = blosumScoreGapStart;  // a0=lc, a1=-
00990 
00991     a[1] = blosumResidues[i];
00992     a[0] = '*'; table_[ *b ] = blosumScoreStopCodon; // a0=*, a1=uc
00993     a[0] = '-'; table_[ *b ] = blosumScoreGapStart;  // a0=-, a1=uc
00994 
00995     a[1] = tolower(a[1]);
00996     a[0] = '*'; table_[ *b ] = blosumScoreStopCodon; // a0=*, a1=lc
00997     a[0] = '-'; table_[ *b ] = blosumScoreGapStart;  // a0=-, a1=lc
00998 
00999     ++pScore;
01000   } // ~for i
01001 
01002   a[0] = '*'; a[1] = '*'; table_[ *b ] = 1;
01003   a[0] = '*'; a[1] = '-'; table_[ *b ] = blosumScoreGapStart;
01004   a[0] = '-'; a[1] = '*'; table_[ *b ] = blosumScoreGapStart;
01005   a[0] = '-'; a[1] = '-'; table_[ *b ] = blosumScoreGapExtend;
01006 
01007 } // ~void MatchTaskAlignProtein::fillScoreTable( void )
01008 #endif
01009 
01010 // ### Function Definitions ###
01011 
01012 // Name:
01013 // Arguments:
01014 // TYPE  NAME  IN/OUT COMMENT
01015 // Returns: TYPE COMMENT
01016 
01017 // End of file MatchStore.cpp
01018 

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2