00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "MatchAligner.h"
00034 #include "SequenceReader.h"
00035 #include "SequenceEncoder.h"
00036 #include <iomanip>
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049 MatchTaskAlign::MatchTaskAlign
00050 ( SourceReader& querySource,
00051 SourceReader& subjectSource,
00052 MatchAligner* pAlign,
00053 bool reverseQueryCoords,
00054 bool doAlignment,
00055 ostream& outputStream ):
00056 querySource_(querySource),
00057 subjectSource_(subjectSource),
00058 pAlign_(pAlign),
00059
00060 reverseQueryCoords_(reverseQueryCoords),
00061 doAlignment_(doAlignment),
00062 outputStream_(outputStream)
00063 {
00064 }
00065
00066
00067 MatchTaskAlign::~MatchTaskAlign()
00068 {
00069 delete pAlign_;
00070 }
00071
00072
00073 void MatchTaskAlign::operator()(MatchStore& store)
00074 {
00075
00076
00077 if (store.empty()) return;
00078
00079 SequenceOffset effectiveQueryStart, effectiveQueryEnd;
00080
00081
00082 vector<Match*>::const_iterator i(store.begin());
00083
00084
00085
00086
00087
00088
00089
00090 outputStream_ << setprecision(2) << setiosflags(ios::fixed);
00091
00092
00093
00094 for( ;i!=store.end();++i)
00095 {
00096
00097 if( reverseQueryCoords_ && (!(*i)->isQueryForward()) )
00098 {
00099 effectiveQueryStart = (*i)->getQuerySize()-(*i)->getQueryEnd()+1;
00100 effectiveQueryEnd = (*i)->getQuerySize()-(*i)->getQueryStart()+1;
00101 }
00102 else
00103 {
00104 effectiveQueryStart = (*i)->getQueryStart();
00105 effectiveQueryEnd = (*i)->getQueryEnd();
00106 }
00107
00108 outputStream_
00109 << ((*i)->isQueryForward()?"F":"R")
00110 << ((*i)->isSubjectForward()?"F":"R") << "\t"
00111 << (*i)->getQueryName() << "\t"
00112 << effectiveQueryStart << "\t"
00113 << effectiveQueryEnd << "\t"
00114 << (*i)->getSubjectName() << "\t"
00115 << (*i)->getSubjectStart() << "\t"
00116 << (*i)->getSubjectEnd() << "\t"
00117 << (*i)->getNumBases() << "\t"
00118 << 100.0*((*i)->getNumBases()) /
00119 ((*i)->getQueryEnd()-(*i)->getQueryStart()+1)
00120 << endl;
00121
00122
00123 if( !doAlignment_ ) continue;
00124
00125
00126 char* pSubject;
00127 char* pQuery;
00128
00129
00130
00131 if ((*i)->isSubjectForward())
00132 {
00133
00134 subjectSource_.extractSource( &pSubject,
00135 (*i)->getSubjectNum(),
00136 (*i)->getSubjectStart(),
00137 (*i)->getSubjectEnd() );
00138 }
00139 else
00140 {
00141
00142 subjectSource_.extractSourceReverse( &pSubject,
00143 (*i)->getSubjectNum(),
00144 (*i)->getSubjectStart(),
00145 (*i)->getSubjectEnd() );
00146 }
00147
00148 if (pSubject[0]=='\0') continue;
00149
00150 if ((*i)->isQueryForward())
00151 {
00152
00153 querySource_.extractSource
00154 ( &pQuery,
00155 (*i)->getQueryNum(),
00156 effectiveQueryStart,
00157 effectiveQueryEnd );
00158 }
00159 else
00160 {
00161
00162 querySource_.extractSourceReverse
00163 ( &pQuery,
00164 (*i)->getQueryNum(),
00165 effectiveQueryStart,
00166 effectiveQueryEnd );
00167 }
00168
00169
00170 (*pAlign_)( pQuery,
00171 effectiveQueryStart,
00172 effectiveQueryEnd,
00173 pSubject,
00174 (*i)->getSubjectStart(),
00175 (*i)->getSubjectEnd() );
00176
00177
00178
00179 }
00180
00181
00182 }
00183
00184
00185
00186
00187
00188 StaticScoreTable MatchAligner::tableDNA_(&makeScoreTableDNA);
00189 StaticScoreTable MatchAligner::tableBlosum62_(&makeScoreTableBlosum62);
00190
00191
00192 MatchAligner::MatchAligner
00193 ( int numCols,
00194 int bandExtension,
00195 ScoreTable* pTable,
00196 ostream& outputStream ) :
00197 numCols_(numCols),
00198 bandExtension_(bandExtension),
00199 pTable_(pTable),
00200 outputStream_(outputStream)
00201 {
00202 pBufSeq1_= new char [numCols+1];
00203 pBufSeq2_= new char [numCols+1];
00204 pBufAlign_= new char [numCols+1];
00205 pBufSeq1_[numCols]='\0';
00206 pBufSeq2_[numCols]='\0';
00207 pBufAlign_[numCols]='\0';
00208 }
00209
00210
00211 MatchAligner::~MatchAligner()
00212 {
00213 delete [] pBufSeq1_;
00214 delete [] pBufSeq2_;
00215 delete [] pBufAlign_;
00216 }
00217
00218
00219
00220 void MatchAligner::operator()
00221 ( const char* pQuery, int queryStart, int queryEnd,
00222 const char* pSubject, int subjectStart, int subjectEnd )
00223 {
00224
00225 Alignment alignment;
00226 createAlignment
00227 ( alignment,
00228 pQuery, queryStart, queryEnd,
00229 pSubject, subjectStart, subjectEnd );
00230
00231 formatAlignment
00232 ( alignment,
00233 pQuery, queryStart, queryEnd,
00234 pSubject, subjectStart, subjectEnd );
00235
00236 }
00237
00238
00239 void MatchAligner::createAlignment
00240 (
00241 Alignment& alignment,
00242 const char* pQuery, int queryStart, int queryEnd,
00243 const char* pSubject, int subjectStart, int subjectEnd )
00244 {
00245
00246 const char* p1;
00247 const char* p2;
00248
00249 int querySize(queryEnd-queryStart+1);
00250 int subjectSize(subjectEnd-subjectStart+1);
00251
00252
00253
00254 int p1Size, p2Size;
00255
00256
00257 AlignBreakType p1Gap, p2Gap;
00258
00259
00260 if (querySize<=subjectSize)
00261 {
00262 p1=pQuery; p2=pSubject;
00263 p1Size=querySize; p2Size = subjectSize;
00264
00265
00266 p1Gap=alignBreakGapQuery;
00267 p2Gap=alignBreakGapSubject;
00268 }
00269 else
00270 {
00271 p1=pSubject; p2=pQuery;
00272 p1Size= subjectSize; p2Size = querySize;
00273
00274
00275 p1Gap=alignBreakGapSubject;
00276 p2Gap=alignBreakGapQuery;
00277 }
00278
00279 PathMatrix<PathType> path;
00280 ColumnFillerBasic doMatrix( p1, p1Size, p2, p2Size, bandExtension_,
00281 *pTable_ );
00282
00283 alignment.totalScore_ = path.fillIn(doMatrix);
00284
00285 TraceBackerBasic doCell(p1Gap, p2Gap, alignment);
00286 path.traceBack(doCell);
00287
00288
00289
00290
00291 }
00292
00293
00294 void MatchAligner::formatAlignment
00295 (
00296 Alignment& alignment,
00297 const char* pQuery, int queryStart, int queryEnd,
00298 const char* pSubject, int subjectStart, int subjectEnd )
00299 {
00300
00301 outputStream_ << "Alignment score: " << alignment.totalScore_ << endl;
00302
00303 int queryNext(queryStart);
00304 int subjectNext(subjectStart);
00305
00306 pCursorSeq1_ = &pBufSeq1_[numCols_];
00307
00308 *pBufSeq1_='\0';
00309
00310
00311
00312 Alignment::iterator i(alignment.begin());
00313
00314 while( i!=alignment.end() )
00315 {
00316
00317
00318 if (i->numMatches>0)
00319 {
00320
00321
00322 outputAlignmentColumn
00323 ( *pQuery,
00324 ((tolower(*pQuery)==tolower(*pSubject))?'|':'x'),
00325 *pSubject,
00326 queryNext++,
00327 subjectNext++ );
00328 pQuery++;
00329 pSubject++;
00330
00331
00332
00333 i->numMatches--;
00334 }
00335 else
00336 {
00337 if (i->breakType==alignBreakMismatch)
00338 {
00339 outputAlignmentColumn
00340 (*(pQuery++), 'x', *(pSubject++), queryNext++, subjectNext++ );
00341
00342
00343 }
00344 else if (i->breakType==alignBreakGapQuery)
00345 {
00346 outputAlignmentColumn
00347 ( '-', ' ', *(pSubject++), queryNext, subjectNext++ );
00348
00349 }
00350 else if (i->breakType==alignBreakGapSubject)
00351 {
00352 outputAlignmentColumn
00353 (*(pQuery++), ' ', '-', queryNext++, subjectNext );
00354
00355 }
00356 i++;
00357 }
00358
00359 }
00360
00361 *pCursorSeq1_='\0';
00362 *pCursorSeq2_='\0';
00363 *pCursorAlign_='\0';
00364
00365 outputAlignmentLine();
00366
00367 }
00368
00369 void MatchAligner::outputAlignmentColumn
00370 ( const char queryChar, const char alignChar, const char subjectChar,
00371 int queryNext, int subjectNext)
00372 {
00373 if (pCursorSeq1_==&pBufSeq1_[numCols_])
00374 {
00375 if (*pBufSeq1_!='\0')
00376 outputAlignmentLine();
00377 sprintf( pBufSeq1_, "Q:%9.9d ", queryNext );
00378 sprintf( pBufAlign_, " " );
00379 sprintf( pBufSeq2_, "S:%9.9d ", subjectNext );
00380 pCursorSeq1_ = &pBufSeq1_[12];
00381 pCursorSeq2_ = &pBufSeq2_[12];
00382 pCursorAlign_ = &pBufAlign_[12];
00383
00384 }
00385 *(pCursorSeq1_++) = queryChar;
00386 *(pCursorAlign_++) = alignChar;
00387 *(pCursorSeq2_++) = subjectChar;
00388 }
00389
00390
00391
00392
00393 void MatchAligner::outputAlignmentLine( void )
00394 {
00395 outputStream_ << pBufSeq1_ << endl << pBufAlign_ << endl
00396 << pBufSeq2_ << endl << endl;
00397 }
00398
00399
00400
00401 void makeScoreTableDNA( ScoreTable& table )
00402 {
00403
00404 uchar a[2];
00405 unsigned short* b = (unsigned short*) &a[0];
00406
00407
00408 const char base[] = {"agct"};
00409
00410 ScoreType thisScore;
00411 for (int i(0); i < 4; i++ )
00412 {
00413 for (int j(0); j < 4; j++ )
00414 {
00415 thisScore = ((i==j) ? matchScoreDNA: mismatchScoreDNA);
00416 a[0]=base[i]; a[1] = base[j]; table[ *b ] = thisScore;
00417 a[1] = toupper(base[j]); table[ *b ] = thisScore;
00418 a[0]=toupper(base[i]); a[1] = base[j]; table[ *b ] = thisScore;
00419 a[1] = toupper(base[j]); table[ *b ] = thisScore;
00420 }
00421
00422 a[0]=base[i];
00423 a[1] = 'n'; table[ *b ] = nScoreDNA;
00424 a[1] = 'N'; table[ *b ] = nScoreDNA;
00425 a[1] = '-'; table[ *b ] = gapStartScoreDNA;
00426
00427 a[0]=toupper(base[i]);
00428 a[1] = 'n'; table[ *b ] = nScoreDNA;
00429 a[1] = 'N'; table[ *b ] = nScoreDNA;
00430 a[1] = '-'; table[ *b ] = gapStartScoreDNA;
00431
00432 a[1]=base[i];
00433 a[0] = 'n'; table[ *b ] = nScoreDNA;
00434 a[0] = 'N'; table[ *b ] = nScoreDNA;
00435 a[0] = '-'; table[ *b ] = gapStartScoreDNA;
00436
00437 a[1]=toupper(base[i]);
00438 a[0] = 'n'; table[ *b ] = nScoreDNA;
00439 a[0] = 'N'; table[ *b ] = nScoreDNA;
00440 a[0] = '-'; table[ *b ] = gapStartScoreDNA;
00441
00442
00443
00444 }
00445
00446 a[0]='n'; a[1] = 'n'; table[ *b ] = nScoreDNA;
00447 a[0]='n'; a[1] = 'N'; table[ *b ] = nScoreDNA;
00448 a[0]='N'; a[1] = 'n'; table[ *b ] = nScoreDNA;
00449 a[0]='N'; a[1] = 'N'; table[ *b ] = nScoreDNA;
00450
00451 a[0]='-'; a[1] = 'n'; table[ *b ] = gapStartScoreDNA;
00452 a[0]='-'; a[1] = 'N'; table[ *b ] = gapStartScoreDNA;
00453 a[0]='n'; a[1] = '-'; table[ *b ] = gapStartScoreDNA;
00454 a[0]='N'; a[1] = '-'; table[ *b ] = gapStartScoreDNA;
00455
00456 a[0]='-'; a[1] = '-'; table[ *b ] = gapExtendScoreDNA;
00457
00458 }
00459
00460
00461 void makeScoreTableBlosum62( ScoreTable& table )
00462 {
00463
00464 uchar a[2];
00465 unsigned short* b = (unsigned short*) &a[0];
00466
00467 const ScoreType* pScore = blosumScores;
00468
00469
00470 for (int i(0); i < numBlosumResidues; i++ )
00471 {
00472 for (int j(0); j < i; j++ )
00473 {
00474
00475
00476
00477 a[0]=blosumResidues[i]; a[1]=blosumResidues[j]; table[ *b ] = *pScore;
00478
00479
00480 a[1]=tolower(a[1]); table[ *b ] = *pScore;
00481
00482 a[0]=tolower(a[0]); table[ *b ] = *pScore;
00483
00484 a[1]=toupper(a[1]); table[ *b ] = *pScore;
00485
00486 a[0]=blosumResidues[j]; a[1]=blosumResidues[i]; table[ *b ] = *pScore;
00487 a[1]=tolower(a[1]); table[ *b ] = *pScore;
00488 a[0]=tolower(a[0]); table[ *b ] = *pScore;
00489 a[1]=toupper(a[1]); table[ *b ] = *pScore;
00490
00491 ++pScore;
00492 }
00493
00494 a[0] = blosumResidues[i];
00495 a[1] = blosumResidues[i]; table[ *b ] = (*pScore);
00496 a[1] = tolower(a[1]); table[ *b ] = (*pScore);
00497 a[1] = '*'; table[ *b ] = stopCodonScoreBLOSUM;
00498 a[1] = '-'; table[ *b ] = gapStartScoreBLOSUM;
00499
00500 a[0] = tolower(a[0]);
00501 a[1] = blosumResidues[i]; table[ *b ] = (*pScore);
00502 a[1] = tolower(a[1]); table[ *b ] = (*pScore);
00503 a[1] = '*'; table[ *b ] = stopCodonScoreBLOSUM;
00504 a[1] = '-'; table[ *b ] = gapStartScoreBLOSUM;
00505
00506 a[1] = blosumResidues[i];
00507 a[0] = '*'; table[ *b ] = stopCodonScoreBLOSUM;
00508 a[0] = '-'; table[ *b ] = gapStartScoreBLOSUM;
00509
00510 a[1] = tolower(a[1]);
00511 a[0] = '*'; table[ *b ] = stopCodonScoreBLOSUM;
00512 a[0] = '-'; table[ *b ] = gapStartScoreBLOSUM;
00513
00514 ++pScore;
00515 }
00516
00517 a[0] = '*'; a[1] = '*'; table[ *b ] = 1;
00518 a[0] = '*'; a[1] = '-'; table[ *b ] = gapStartScoreBLOSUM;
00519 a[0] = '-'; a[1] = '*'; table[ *b ] = gapStartScoreBLOSUM;
00520 a[0] = '-'; a[1] = '-'; table[ *b ] = gapExtendScoreBLOSUM;
00521
00522 }
00523
00524 MatchAlignerTranslated::MatchAlignerTranslated
00525 ( int numCols,
00526 int bandExtension,
00527 bool isQueryProtein,
00528 bool isSubjectProtein,
00529 ostream& outputStream ) :
00530 MatchAligner( numCols,
00531 bandExtension,
00532 tableBlosum62_.getTable(),
00533 outputStream ),
00534 isQueryProtein_(isQueryProtein),
00535 isSubjectProtein_(isSubjectProtein)
00536 {
00537
00538 }
00539
00540
00541 void MatchAlignerTranslated::codonize
00542 ( const char* pSeq,
00543 int seqSize,
00544 int finalFrame,
00545 vector<vector<char> >& translatedSeqs )
00546 {
00547 translatedSeqs.resize(gNumReadingFrames);
00548 translatedSeqs[0].resize(seqSize);
00549 translatedSeqs[1].resize(seqSize - (finalFrame==0));
00550 translatedSeqs[2].resize(seqSize - (finalFrame!=2));
00551
00552 const char* pChar;
00553 vector<char>::iterator i;
00554
00555 for (int j(0); j < gNumReadingFrames; j++ )
00556 {
00557 pChar = pSeq+j;
00558 for (i=translatedSeqs[j].begin();
00559 i!=translatedSeqs[j].end(); i++,
00560 pChar+=gNumReadingFrames )
00561 {
00562
00563
00564
00565
00566 if ( (ttDNA[ *(pChar) ]==nv)
00567 || (ttDNA[ *(pChar+1) ]==nv)
00568 || (ttDNA[ *(pChar+2) ]==nv) )
00569 {
00570 *i='X';
00571 }
00572 else
00573 {
00574 *i= gResidueNames[ ttCodon[ ttDNA[ *(pChar) ] << 4
00575 | ttDNA[ *(pChar+1) ] << 2
00576 | ttDNA[ *(pChar+2) ] ] ];
00577 }
00578
00579 }
00580
00581 }
00582
00583 }
00584
00585
00586 void MatchAlignerTranslated::createAlignment
00587 ( Alignment& alignment,
00588 const char* pQuery, int queryStart, int queryEnd,
00589 const char* pSubject, int subjectStart, int subjectEnd )
00590 {
00591 PathMatrix<PathType3D> path;
00592
00593 int queryFinalFrame(0);
00594 int subjectFinalFrame(0);
00595
00596 int querySize(queryEnd-queryStart+1);
00597 int subjectSize(subjectEnd-subjectStart+1);
00598
00599 vector<vector<char> > queryTranslated;
00600 vector<vector<char> > subjectTranslated;
00601 const char* pQueryTrans[gNumReadingFrames] =
00602 { pQuery, NULL, NULL };
00603 const char* pSubjectTrans[gNumReadingFrames] =
00604 { pSubject, NULL, NULL };
00605
00606
00607
00608 bool p1IsQuery;
00609
00610
00611 if (!isQueryProtein_)
00612 {
00613 queryFinalFrame = querySize % gNumReadingFrames;
00614 querySize /= gNumReadingFrames;
00615 codonize( pQuery, querySize, queryFinalFrame, queryTranslated );
00616 queryTranslated[1].push_back(0);
00617 queryTranslated[2].push_back(0);
00618 pQueryTrans[0] = static_cast<const char*>(&*queryTranslated[0].begin());
00619 pQueryTrans[1] = static_cast<const char*>(&*queryTranslated[1].begin());
00620 pQueryTrans[2] = static_cast<const char*>(&*queryTranslated[2].begin());
00621 }
00622
00623 if (!isSubjectProtein_)
00624 {
00625 subjectFinalFrame = subjectSize % gNumReadingFrames;
00626 subjectSize /= gNumReadingFrames;
00627 codonize( pSubject, subjectSize, subjectFinalFrame, subjectTranslated );
00628 subjectTranslated[1].push_back(0);
00629 subjectTranslated[2].push_back(0);
00630 pSubjectTrans[0]= static_cast<const char*>(&*subjectTranslated[0].begin());
00631 pSubjectTrans[1]= static_cast<const char*>(&*subjectTranslated[1].begin());
00632 pSubjectTrans[2]= static_cast<const char*>(&*subjectTranslated[2].begin());
00633 }
00634
00635 if (querySize<=subjectSize)
00636 {
00637 p1IsQuery = true;
00638 ColumnFiller3D doMatrix( pQueryTrans, querySize, queryFinalFrame,
00639 pSubjectTrans, subjectSize, subjectFinalFrame,
00640 bandExtension_, *pTable_ );
00641 alignment.totalScore_ = path.fillIn(doMatrix);
00642
00643 }
00644 else
00645 {
00646 p1IsQuery = false;
00647 ColumnFiller3D doMatrix( pSubjectTrans, subjectSize, subjectFinalFrame,
00648 pQueryTrans, querySize, queryFinalFrame,
00649 bandExtension_, *pTable_ );
00650 alignment.totalScore_ = path.fillIn(doMatrix);
00651
00652 }
00653
00654 TraceBacker3D doCell( queryFinalFrame, subjectFinalFrame,
00655 p1IsQuery, alignment );
00656 path.traceBack(doCell);
00657
00658
00659
00660 if (!isQueryProtein_)
00661 {
00662 queryTranslated[1].pop_back();
00663 queryTranslated[2].pop_back();
00664 }
00665 if (!isSubjectProtein_)
00666 {
00667 subjectTranslated[1].pop_back();
00668 subjectTranslated[2].pop_back();
00669 }
00670
00671 }
00672
00673
00674 MatchAlignerTranslatedProtein::MatchAlignerTranslatedProtein
00675 ( int isQueryProtein,
00676 int numCols,
00677 int bandExtension,
00678 ostream& outputStream ) :
00679 MatchAlignerTranslated( numCols,
00680 bandExtension,
00681 isQueryProtein,
00682 !isQueryProtein,
00683 outputStream )
00684 {
00685 }
00686
00687
00688
00689
00690 void MatchAlignerTranslatedProtein::formatAlignment
00691 ( Alignment& alignment,
00692 const char* pQuery, int queryStart, int queryEnd,
00693 const char* pSubject, int subjectStart, int subjectEnd )
00694 {
00695
00696 outputStream_ << "Alignment score: " << alignment.totalScore_ << endl;
00697
00698 int queryNext(queryStart), subjectNext(subjectStart);
00699 int* pProteinNext;
00700 int* pDNANext;
00701
00702 const char* pProteinChar;
00703 const char* pDNAChar;
00704
00705 char queryChar, subjectChar, matchChar, codonChar;
00706
00707 char* pProteinOut;
00708 char* pDNAOut;
00709
00710 AlignBreakType proteinGap, DNAGap, DNAFrameShift;
00711
00712
00713 if (isQueryProtein_)
00714 {
00715 pProteinNext = &queryNext;
00716 pDNANext = &subjectNext;
00717 pProteinChar = pQuery;
00718 pDNAChar = pSubject;
00719 pProteinOut = &queryChar;
00720 pDNAOut = &subjectChar;
00721 proteinGap = alignBreakGapQuery;
00722 DNAGap = alignBreakGapSubject;
00723 DNAFrameShift = alignBreakFrameShiftSubject;
00724 }
00725 else
00726 {
00727 pProteinNext = &subjectNext;
00728 pDNANext = &queryNext;
00729 pProteinChar = pSubject;
00730 pDNAChar = pQuery;
00731 pProteinOut = &subjectChar;
00732 pDNAOut = &queryChar;
00733 proteinGap = alignBreakGapSubject;
00734 DNAGap = alignBreakGapQuery;
00735 DNAFrameShift = alignBreakFrameShiftQuery;
00736 }
00737
00738 pCursorSeq1_ = &pBufSeq1_[numCols_];
00739
00740 *pBufSeq1_='\0';
00741
00742 Alignment::iterator i(alignment.begin());
00743
00744 while( i!=alignment.end() )
00745 {
00746
00747
00748 if (i->numMatches>0)
00749 {
00750 codonChar = getCodon( pDNAChar );
00751 matchChar = (codonChar==toupper(*pProteinChar)) ? '|' : 'x';
00752 codonChar = (matchChar=='|') ? '|' : codonChar;
00753
00754
00755
00756
00757
00758
00759
00760
00761 *pProteinOut = *(pProteinChar++);
00762 *pDNAOut = *(pDNAChar++);
00763
00764 outputAlignmentColumn
00765 (queryChar, codonChar, subjectChar, queryNext, subjectNext );
00766 (*pProteinNext)++;
00767 (*pDNANext)++;
00768
00769 *pProteinOut = '.';
00770 *pDNAOut = *(pDNAChar++);
00771 outputAlignmentColumn
00772 (queryChar, matchChar, subjectChar, queryNext, subjectNext );
00773 (*pDNANext)++;
00774
00775 *pDNAOut = *(pDNAChar++);
00776 outputAlignmentColumn
00777 (queryChar, matchChar, subjectChar, queryNext, subjectNext );
00778 (*pDNANext)++;
00779
00780
00781 i->numMatches--;
00782 }
00783 else
00784 {
00785 #ifdef XXX
00786 if (i->breakType==alignBreakMismatch)
00787 {
00788
00789
00790 *pProteinOut = *(pProteinChar++);
00791 *pDNAOut = *(pDNAChar++);
00792 outputAlignmentColumn
00793 (queryChar, 'x', subjectChar, queryNext++, subjectNext++ );
00794
00795 *pProteinOut = '.';
00796 *pDNAOut = *(pDNAChar++);
00797 outputAlignmentColumn
00798 (queryChar, 'x', subjectChar, queryNext++, subjectNext++ );
00799
00800 *pDNAOut = *(pDNAChar++);
00801 outputAlignmentColumn
00802 (queryChar, 'x', subjectChar, queryNext++, subjectNext++ );
00803
00804 }
00805 else
00806 #endif
00807 if (i->breakType==DNAFrameShift)
00808 {
00809 *pProteinOut = '-';
00810 *pDNAOut = *(pDNAChar++);
00811 outputAlignmentColumn
00812 (queryChar, ' ', subjectChar, queryNext, subjectNext );
00813 queryNext += 1 * (!isQueryProtein_);
00814 subjectNext += 1 * ( isQueryProtein_);
00815
00816 }
00817 else if (i->breakType==proteinGap)
00818 {
00819
00820 *pProteinOut = '-';
00821 *pDNAOut = *(pDNAChar++);
00822 outputAlignmentColumn
00823 (queryChar, ' ', subjectChar, queryNext, subjectNext );
00824 queryNext += 1 * (!isQueryProtein_);
00825 subjectNext += 1 * ( isQueryProtein_);
00826
00827 *pDNAOut = *(pDNAChar++);
00828 outputAlignmentColumn
00829 (queryChar, ' ', subjectChar, queryNext, subjectNext );
00830 queryNext += 1 * (!isQueryProtein_);
00831 subjectNext += 1 * ( isQueryProtein_);
00832
00833 *pDNAOut = *(pDNAChar++);
00834 outputAlignmentColumn
00835 (queryChar, ' ', subjectChar, queryNext, subjectNext );
00836 queryNext += 1 * (!isQueryProtein_);
00837 subjectNext += 1 * ( isQueryProtein_);
00838
00839 }
00840 else if (i->breakType==DNAGap)
00841 {
00842
00843 *pProteinOut = *(pProteinChar++);
00844 *pDNAOut = '-';
00845
00846 outputAlignmentColumn
00847 (queryChar, ' ', subjectChar, queryNext, subjectNext );
00848 queryNext += 1 * (isQueryProtein_);
00849 subjectNext += 1 * (!isQueryProtein_);
00850
00851 *pProteinOut = '.';
00852 outputAlignmentColumn
00853 (queryChar, ' ', subjectChar, queryNext, subjectNext );
00854 outputAlignmentColumn
00855 (queryChar, ' ', subjectChar, queryNext, subjectNext );
00856
00857 }
00858 i++;
00859 }
00860
00861 }
00862
00863 *pCursorSeq1_='\0';
00864 *pCursorSeq2_='\0';
00865 *pCursorAlign_='\0';
00866
00867 outputAlignmentLine();
00868
00869
00870 }
00871
00872
00873 void MatchAlignerTranslatedDNA::formatAlignment
00874 ( Alignment& alignment,
00875 const char* pQuery, int queryStart, int queryEnd,
00876 const char* pSubject, int subjectStart, int subjectEnd )
00877 {
00878
00879 outputStream_ << "Alignment score: " << alignment.totalScore_ << endl;
00880
00881 int queryNext(queryStart);
00882 int subjectNext(subjectStart);
00883 char codonChar;
00884
00885 pCursorSeq1_ = &pBufSeq1_[numCols_];
00886
00887 *pBufSeq1_='\0';
00888
00889
00890
00891 Alignment::iterator i(alignment.begin());
00892
00893 while( i!=alignment.end() )
00894 {
00895
00896
00897 if (i->numMatches>0)
00898 {
00899 codonChar = getCodon( pQuery );
00900 if ((codonChar!='X')&&(codonChar == getCodon(pSubject)))
00901 {
00902 outputAlignmentColumn
00903 (
00904 *(pQuery++),
00905 codonChar,
00906 *(pSubject++),
00907 queryNext++,
00908 subjectNext++
00909 );
00910 outputAlignmentColumn
00911 (*(pQuery++), '.', *(pSubject++), queryNext++, subjectNext++ );
00912 outputAlignmentColumn
00913 (*(pQuery++), '.', *(pSubject++), queryNext++, subjectNext++ );
00914 }
00915 else
00916 {
00917 outputAlignmentColumn
00918 ( *pQuery, matchChar(*pQuery,*pSubject) ? '|' : 'x', *pSubject, queryNext++, subjectNext++ );
00919 pQuery++; pSubject++;
00920 outputAlignmentColumn
00921 ( *pQuery, matchChar(*pQuery,*pSubject) ? '|' : 'x', *pSubject, queryNext++, subjectNext++ );
00922 pQuery++; pSubject++;
00923 outputAlignmentColumn
00924 ( *pQuery, matchChar(*pQuery,*pSubject) ? '|' : 'x', *pSubject, queryNext++, subjectNext++ );
00925 pQuery++; pSubject++;
00926 }
00927
00928 i->numMatches--;
00929 }
00930 else
00931 {
00932 #ifdef XXX
00933 if (i->breakType==alignBreakMismatch)
00934 {
00935 outputAlignmentColumn
00936 (*(pQuery++), 'x', *(pSubject++), queryNext++, subjectNext++ );
00937 outputAlignmentColumn
00938 (*(pQuery++), 'x', *(pSubject++), queryNext++, subjectNext++ );
00939 outputAlignmentColumn
00940 (*(pQuery++), 'x', *(pSubject++), queryNext++, subjectNext++ );
00941 }
00942 else
00943 #endif
00944 if (i->breakType==alignBreakFrameShiftQuery)
00945 {
00946 outputAlignmentColumn
00947 (*(pQuery++), ' ', '-', queryNext++, subjectNext );
00948
00949 }
00950 else if (i->breakType==alignBreakFrameShiftSubject)
00951 {
00952 outputAlignmentColumn
00953 ( '-', ' ', *(pSubject++), queryNext, subjectNext++ );
00954 }
00955 i++;
00956 }
00957
00958 }
00959
00960 *pCursorSeq1_='\0';
00961 *pCursorSeq2_='\0';
00962 *pCursorAlign_='\0';
00963
00964 outputAlignmentLine();
00965
00966
00967
00968 }
00969
00970
00971
00972
00973
00974
00975
00976 void print( vector<ScoreType>& v )
00977 {
00978 for( vector<ScoreType>::iterator i(v.begin());i!=v.end();++i)
00979 cout << "Score: " << *i << endl;
00980
00981 }
00982
00983
00984 void print( PathMatrix<PathType>& p )
00985 {
00986 for (int i(0); i< p.front().size(); i++)
00987 {
00988 for (vector<vector<PathType> >::iterator j(p.begin());
00989 j!=p.end();++j)
00990 {
00991 cout << (((*j)[i] & fromW)?'-':'.')
00992 << (((*j)[i] & fromSW)?'/':'.')
00993 << (((*j)[i] & fromN)?'|':'.')
00994 << "\t";
00995 }
00996 cout << endl;
00997 }
00998
00999 }
01000
01001
01002 ScoreType ColumnFillerBasic::operator()( PathMatrix<PathType>& matrix )
01003 {
01004 ScoreType lastScore;
01005 int i,j;
01006
01007 matrix.resize(bandLength_, vector<PathType>(colSize_) );
01008
01009
01010 matrix[0][bandExtension_]=fromFinished;
01011 v1_[bandExtension_]=0;
01012
01013
01014 for (j= bandExtension_+1; j< colSize_ ; j++)
01015 {
01016 v1_[j] = fillCell_( matrix[0][j],
01017 veryBadScoreIndeed,
01018 veryBadScoreIndeed,
01019 v1_[j-1]+gapScore_);
01020
01021 }
01022
01023 pLast_ = &v1_;
01024 pCurrent_ = &v2_;
01025
01026
01027
01028
01029 for ( i=1 ; i <= bandExtension_ ; i++ )
01030 {
01031
01032
01033 lastScore = fillCell_( matrix[i][bandExtension_-i],
01034 veryBadScoreIndeed,
01035 (*pLast_)[bandExtension_-i+1]+gapScore_,
01036 veryBadScoreIndeed );
01037 (*pCurrent_)[bandExtension_-i]=lastScore;
01038 for ( j=bandExtension_-i+1; j < colSize_-1 ; j++)
01039 {
01040 lastScore = fillCell_
01041 ( matrix[i][j],
01042 (*pLast_)[j]
01043 + getScore_(p1_[i-1],p2_[j-bandExtension_+i-1]),
01044
01045
01046 (*pLast_)[j+1] + gapScore_,
01047 lastScore + gapScore_ );
01048 (*pCurrent_)[j] = lastScore;
01049 }
01050
01051 (*pCurrent_)[j] = fillCell_
01052 ( matrix[i][j],
01053 (*pLast_)[j]
01054 + getScore_(p1_[i-1],p2_[j-bandExtension_+i-1]),
01055
01056
01057 veryBadScoreIndeed,
01058 lastScore + gapScore_ );
01059
01060 temp_ = pCurrent_; pCurrent_ = pLast_; pLast_ = temp_;
01061 }
01062
01063
01064
01065 for ( i=(bandExtension_+1) ;
01066 i < bandLength_ -bandExtension_;
01067 i++ )
01068 {
01069
01070
01071 lastScore = veryBadScoreIndeed;
01072
01073 for ( j=0; j < colSize_-1 ; j++)
01074 {
01075 lastScore = fillCell_
01076 ( matrix[i][j],
01077 (*pLast_)[j]
01078 + getScore_(p1_[i-1],p2_[j-bandExtension_+i-1]),
01079
01080
01081 (*pLast_)[j+1] + gapScore_,
01082 lastScore + gapScore_ );
01083 (*pCurrent_)[j]=lastScore;
01084 }
01085
01086 (*pCurrent_)[j] = fillCell_
01087 ( matrix[i][j],
01088 (*pLast_)[j]
01089 + getScore_(p1_[i-1],p2_[j-bandExtension_+i-1]),
01090
01091
01092 veryBadScoreIndeed,
01093 lastScore + gapScore_ );
01094
01095 temp_ = pCurrent_; pCurrent_ = pLast_; pLast_ = temp_;
01096
01097 }
01098
01099
01100
01101
01102 for ( i=bandLength_-bandExtension_ ; i < bandLength_ ; i++ )
01103 {
01104
01105
01106 lastScore = veryBadScoreIndeed;
01107
01108 for ( j=0; j < colSize_-1-i-bandExtension_+bandLength_ ; j++)
01109 {
01110 lastScore = fillCell_
01111 ( matrix[i][j],
01112 (*pLast_)[j]
01113 + getScore_(p1_[i-1],p2_[j-bandExtension_+i-1]),
01114
01115
01116
01117 (*pLast_)[j+1] + gapScore_,
01118 lastScore + gapScore_ );
01119 (*pCurrent_)[j]=lastScore;
01120 }
01121
01122 temp_ = pCurrent_; pCurrent_ = pLast_; pLast_ = temp_;
01123
01124 }
01125
01126
01127
01128 matrix.lastCell_.first
01129 = matrix.end()-1;
01130 matrix.lastCell_.second
01131 = matrix.lastCell_.first->begin() + (colSize_-bandExtension_-1);
01132
01133 return (*pLast_)[colSize_-bandExtension_-1];
01134
01135 }
01136
01137
01138 bool TraceBackerBasic::operator()
01139 ( PathMatrix<PathType>::CellIterator& current )
01140 {
01141
01142
01143 alignment_.push_front( AlignInfo() );
01144 alignment_.front().breakType=alignBreakNull;
01145
01146 if (*current.second&fromFinished)
01147 {
01148
01149 return false;
01150 }
01151 else if (*current.second&fromW)
01152 {
01153
01154 current.second
01155 = (current.first-1)->begin()
01156 + (current.second-current.first->begin());
01157 --current.first;
01158 alignment_.front().numMatches++;
01159 }
01160 else if (*current.second&fromN)
01161 {
01162
01163 assert(current.second!=current.first->begin());
01164 --current.second;
01165 alignment_.push_front( AlignInfo() );
01166 alignment_.front().breakType=p1Gap_;
01167 }
01168 else if (*current.second&fromSW)
01169 {
01170
01171 current.second
01172 = (current.first-1)->begin()
01173 + (current.second-current.first->begin());
01174 ++current.second;
01175 --current.first;
01176 alignment_.push_front( AlignInfo() );
01177 alignment_.front().breakType=p2Gap_;
01178 }
01179 else assert (1==0);
01180
01181 return true;
01182 }
01183
01184
01185
01186
01187
01188
01189
01190
01191 template<> ScoreType ColumnFiller3D::getScoreW<false>
01192 ( int j, int k, int l )
01193 {
01194
01195 return veryBadScoreIndeed;
01196 }
01197
01198 template<> ScoreType ColumnFiller3D::getScoreSW<false>
01199 ( int j, int k, int l )
01200 {
01201
01202 return veryBadScoreIndeed;
01203 }
01204
01205 template<> ScoreType ColumnFiller3D::getScoreN<false>
01206 ( int j, int k, int l )
01207 {
01208
01209 return veryBadScoreIndeed;
01210 }
01211
01212 template<> ScoreType ColumnFiller3D::getScoreChar<false>
01213 ( int i, int j, int k, int l)
01214 {
01215
01216
01217 return (ScoreType)0;
01218 }
01219
01220
01221 template<> void ColumnFiller3D::fillCell3D< false, false, false, false>
01222 ( PathMatrix<PathType3D>& matrix, int i, int j )
01223 {
01224
01225 for (int k(0); k < numFrames1_; k++ )
01226 {
01227 for (int l(0); l < numFrames2_; l++ )
01228 {
01229 (*pCurrent_)[j][k][l] = (k+l)*frameShiftScoreBLOSUM;
01230 matrix[i][j][k][l] |= fromPrevFrame1 * (k!=0);
01231 matrix[i][j][k][l] |= fromPrevFrame2 * (l!=0);
01232
01233
01234
01235 }
01236 }
01237 matrix[i][j][0][0] |= fromFinished;
01238 }
01239
01240 ColumnFiller3D::ColumnFiller3D
01241 ( const char** p1Trans, int p1Size, int p1FinalFrame,
01242 const char** p2Trans, int p2Size, int p2FinalFrame,
01243 int bandExtension,
01244 const ScoreTable& scoreTable ) :
01245 bandExtension_( (bandExtension<(p1Size/2))
01246 ? bandExtension
01247 : max( (p1Size/2)-1, 0 ) ),
01248
01249 bandWidth_(p2Size-p1Size+1),
01250 bandLength_(p1Size+1),
01251 colSize_(p2Size-p1Size+1+(2*bandExtension_)),
01252 fillCell_(),
01253 finalFrame1_(p1FinalFrame),
01254 finalFrame2_(p2FinalFrame),
01255 numFrames1_((p1Trans[1]==NULL)?1:gNumReadingFrames),
01256 numFrames2_((p2Trans[1]==NULL)?1:gNumReadingFrames),
01257 v1_(colSize_, veryBadScore3D ),
01258 v2_(colSize_, veryBadScore3D ),
01259 pLast_(&v1_),
01260 pCurrent_(&v2_),
01261 getScore_(scoreTable)
01262 {
01263
01264 p1_[0] = p1Trans[0];
01265 p1_[1] = p1Trans[1];
01266 p1_[2] = p1Trans[2];
01267 p2_[0] = p2Trans[0];
01268 p2_[1] = p2Trans[1];
01269 p2_[2] = p2Trans[2];
01270
01271
01272
01273
01274
01275
01276
01277
01278 }
01279
01280
01281
01282
01283
01284
01285
01286 ScoreType ColumnFiller3D::operator()( PathMatrix<PathType3D>& matrix )
01287 {
01288
01289
01290 int i,j,k,l;
01291
01292 matrix.resize(bandLength_, vector<PathType3D>(colSize_) );
01293
01294
01295
01296 pLast_ = &v2_;
01297 pCurrent_ = &v1_;
01298
01299
01300
01301
01302
01303 fillCell3D<false, false, false, false>( matrix, 0, bandExtension_);
01304
01305
01306
01307
01308 for (j = (bandExtension_+1); j< colSize_ ; j++)
01309 {
01310 fillCell3D<false, false, true, false>( matrix, 0, j);
01311 }
01312
01313 temp_ = pCurrent_; pCurrent_ = pLast_; pLast_ = temp_;
01314
01315
01316
01317 for ( i=1 ; i <= bandExtension_ ; i++ )
01318 {
01319
01320
01321 fillCell3D<false, true, false, false>( matrix, i, bandExtension_-i );
01322
01323
01324 for ( j=bandExtension_-i+1; j < colSize_-1 ; j++)
01325 {
01326
01327 fillCell3D<true, true, true, true>( matrix, i, j);
01328
01329 }
01330
01331
01332
01333
01334
01335
01336 fillCell3D<true, false, true, true>( matrix, i, j);
01337
01338
01339
01340 temp_ = pCurrent_; pCurrent_ = pLast_; pLast_ = temp_;
01341 }
01342
01343
01344
01345 if (colSize_==1)
01346 {
01347 for ( i=(bandExtension_+1) ;
01348 i < bandLength_ -bandExtension_;
01349 i++ )
01350 {
01351 fillCell3D<true, false, false, true>( matrix, i, 0);
01352 temp_ = pCurrent_; pCurrent_ = pLast_; pLast_ = temp_;
01353 }
01354 }
01355 else
01356 {
01357 for ( i=(bandExtension_+1) ;
01358 i < bandLength_ -bandExtension_;
01359 i++ )
01360 {
01361
01362
01363
01364 fillCell3D<true, true, false, true>( matrix, i, 0);
01365
01366 for ( j=1; j < colSize_-1 ; j++)
01367 {
01368
01369 fillCell3D<true, true, true, true>( matrix, i, j);
01370 }
01371
01372
01373 fillCell3D< true, false, true, true>(matrix, i, j);
01374 temp_ = pCurrent_; pCurrent_ = pLast_; pLast_ = temp_;
01375 }
01376 }
01377
01378
01379
01380
01381 for ( i=bandLength_-bandExtension_ ; i < bandLength_ ; i++ )
01382 {
01383
01384
01385 fillCell3D<true, true, false, true>( matrix, i, 0);
01386
01387 for ( j=1; j < colSize_-1-i-bandExtension_+bandLength_ ; j++)
01388 {
01389
01390 fillCell3D< true, true, true, true>( matrix, i, j);
01391 }
01392
01393 temp_ = pCurrent_; pCurrent_ = pLast_; pLast_ = temp_;
01394
01395 }
01396
01397
01398
01399 matrix.lastCell_.first
01400 = matrix.end()-1;
01401 matrix.lastCell_.second
01402 = matrix.lastCell_.first->begin()+ (colSize_-bandExtension_-1);
01403
01404 return (*pLast_)[colSize_-bandExtension_-1][finalFrame1_][finalFrame2_];
01405
01406 }
01407
01408
01409 bool TraceBacker3D::operator()
01410 ( PathMatrix<PathType3D>::CellIterator& current )
01411 {
01412
01413
01414
01415
01416
01417 alignment_.push_front( AlignInfo() );
01418 alignment_.front().breakType=alignBreakNull;
01419
01420 if (((*current.second)[k_][l_])&fromFinished)
01421 {
01422 return false;
01423 }
01424 else if (((*current.second)[k_][l_])&fromW)
01425 {
01426
01427 current.second
01428 = (current.first-1)->begin()
01429 + (current.second-current.first->begin());
01430 --current.first;
01431 alignment_.front().numMatches++;
01432 }
01433 else if (((*current.second)[k_][l_])&fromN)
01434 {
01435
01436 assert(current.second!=current.first->begin());
01437 --current.second;
01438 alignment_.push_front( AlignInfo() );
01439 alignment_.front().breakType=p1Gap_;
01440 }
01441 else if (((*current.second)[k_][l_])&fromSW)
01442 {
01443
01444 current.second
01445 = (current.first-1)->begin()
01446 + (current.second-current.first->begin());
01447 ++current.second;
01448 --current.first;
01449 alignment_.push_front( AlignInfo() );
01450 alignment_.front().breakType=p2Gap_;
01451 }
01452 else if (((*current.second)[k_][l_])&fromPrevFrame1)
01453 {
01454
01455 if (k_==0)
01456 {
01457
01458 current.second
01459 = (current.first-1)->begin()
01460 + (current.second-current.first->begin());
01461 ++current.second;
01462 --current.first;
01463 k_=2;
01464 }
01465 else k_--;
01466 alignment_.push_front( AlignInfo() );
01467 alignment_.front().breakType=p1FrameShift_;
01468 }
01469 else if (((*current.second)[k_][l_])&fromPrevFrame2)
01470 {
01471
01472 if (l_==0)
01473 {
01474
01475 assert(current.second!=current.first->begin());
01476 --current.second;
01477 l_=2;
01478 }
01479 else l_--;
01480 alignment_.push_front( AlignInfo() );
01481 alignment_.front().breakType=p2FrameShift_;
01482 }
01483 else assert (1==0);
01484
01485 return true;
01486
01487 }
01488
01489
01490
01491
01492
01493
01494
01495
01496
01497
01498
01499