include/search.h File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void search_protein1hit (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency)
void search_protein2hit (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency)
void search_nucleotide (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency)
void search_nucleotide_longWord (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency)
void search_nucleotide_largeTable (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency)
void search_nucleotideIndex (unsigned char *address, unsigned char *endAddress, int4 tickFrequency, struct PSSMatrix PSSMatrix)
void search_proteinSsearch (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency)
void search_nucleotideSsearch (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency)


Function Documentation

void search_nucleotide ( struct PSSMatrix  PSSMatrix,
struct sequenceData sequenceData,
uint4  numSequences,
uint4  tickFrequency 
)

Definition at line 328 of file search.c.

References alignments_addUngappedExtension(), alignments_createNew(), alignments_currentAlignment, blast_numHits, blast_numTriggerExtensions, blast_numUngappedExtensions, PSSMatrix::bytePackedCodes, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, hitMatrix_diagonalMask, hitMatrix_furthest, hitMatrix_queryLength, int2, int4, nucleotideLookup, nucleotideLookup_additionalPositions, nucleotideLookup_bitLookup, nucleotideLookup_mask, parameters_outputType, parameters_tabular, parameters_verboseDloc, parameters_wordExtraLetters, parameters_wordSize, parameters_wordTableBytes, parameters_wordTableLetters, parameters_xml, PSSMatrix_packedLeftMatches, PSSMatrix_packedRightMatches, sequenceData::sequence, sequenceData::sequenceLength, uint4, ungappedExtension_bestScore, ungappedExtension_nucleotideExtend(), ungappedExtension_print(), and ungappedExtension_subjectEndReached.

Referenced by blast_search().

00330 {
00331         uint4 sequenceCount = 0, codeword;
00332         uint4 descriptionStart = 0, descriptionLength = 0, encodedLength;
00333     uint4 numPackedBytes, numRemaining;
00334         unsigned char *subject, *sequenceEnd, prevByte, nextByte, rightMatches, leftMatches;
00335     unsigned char previousCode, *subjectPosition, *address;
00336         int4 subjectLength, subjectOffset, wordLengthMinusOne;
00337         struct ungappedExtension* ungappedExtension;
00338         int2 *queryOffsets, tableEntry, queryOffset, singleQueryOffset[2];
00339     int4 previousByteDistance;
00340     int4 diagonal;
00341     unsigned char** lastHit;
00342 
00343     wordLengthMinusOne = parameters_wordSize - 1;
00344         previousByteDistance = (parameters_wordTableLetters + 4);
00345         singleQueryOffset[1] = 0;
00346 
00347     while (sequenceCount < numSequences)
00348     {
00349         descriptionLength = sequenceData[sequenceCount].descriptionLength;
00350         descriptionStart = sequenceData[sequenceCount].descriptionStart;
00351         subjectLength = sequenceData[sequenceCount].sequenceLength;
00352         encodedLength = sequenceData[sequenceCount].encodedLength;
00353                 address = subject = sequenceData[sequenceCount].sequence;
00354 
00355         // New sequence, new possible alignment
00356         alignments_currentAlignment = NULL;
00357 
00358         // Determine number of packed bytes and number of remaining letters
00359         numPackedBytes = subjectLength / 4;
00360                 numRemaining = subjectLength % 4;
00361 
00362         // Only process sequence if at least as long as the word length
00363         if (subjectLength >= parameters_wordSize)
00364         {
00365             sequenceEnd = subject + numPackedBytes;
00366 
00367             // Read first char and advance
00368             previousCode = *address;
00369             address++;
00370 
00371             // Traverse until end of sequence
00372             while (address < sequenceEnd)
00373             {
00374                 // Read next char and update codeword
00375                 codeword = *address | (previousCode << 8);
00376                 previousCode = *address;
00377                                 tableEntry = nucleotideLookup[codeword];
00378 
00379                 // Calculate subject offset
00380                 subjectOffset = address - subject;
00381 
00382                 #ifdef BITLOOKUP
00383                                 if (nucleotideLookup_bitLookup[codeword >> 5] &
00384                     (1 << (codeword & nucleotideLookup_mask)))
00385                                 #endif
00386                 if (tableEntry)
00387                 {
00388                         if (tableEntry > 0)
00389                     {
00390                         // Just one query position
00391                         singleQueryOffset[0] = queryOffset = tableEntry;
00392                                                 queryOffsets = singleQueryOffset;
00393                     }
00394                     else
00395                     {
00396                         // Multiple query positions
00397                                                 queryOffsets = nucleotideLookup_additionalPositions - tableEntry;
00398                         queryOffset = *queryOffsets;
00399                     }
00400 
00401                     subjectPosition = address + 1;
00402                     do
00403                     {
00404                         // Calculate number of matches to left and right of hit
00405                         nextByte = PSSMatrix.bytePackedCodes[queryOffset];
00406                         rightMatches = PSSMatrix_packedRightMatches[nextByte ^ *subjectPosition];
00407                         prevByte = PSSMatrix.bytePackedCodes[queryOffset - previousByteDistance];
00408                         leftMatches = PSSMatrix_packedLeftMatches[prevByte ^ *(address - parameters_wordTableBytes)];
00409 
00410                         if (rightMatches + leftMatches >= parameters_wordExtraLetters)
00411                         {
00412                             #ifndef NO_STAGE2
00413                             // Calculate subject offset
00414                             subjectOffset = subjectPosition - subject;
00415 
00416                             // Calculate the diagonal this hit is on
00417                             diagonal = (subjectOffset * 4 - queryOffset + hitMatrix_queryLength)
00418                                      & hitMatrix_diagonalMask;
00419 
00420                             // If we have not extended past this point on this diagonal
00421                             lastHit = hitMatrix_furthest + diagonal;
00422 
00423                             #ifdef VERBOSE
00424                             if (parameters_verboseDloc == descriptionStart)
00425                                 printf("Hit %d,%d\n", queryOffset, subjectOffset * 4);
00426                             #endif
00427 
00428                             if (*lastHit < address)
00429                             {
00430                                 // Perform ungapped extension
00431                                 ungappedExtension
00432                                         = ungappedExtension_nucleotideExtend(queryOffset,
00433                                       subjectOffset, PSSMatrix, subject, subjectLength);
00434 
00435                                 // Update furthest reached value for the diagonal
00436                                 *lastHit = ungappedExtension_subjectEndReached;
00437 
00438                                 #ifdef VERBOSE
00439                                 if (parameters_verboseDloc == descriptionStart)
00440                                     printf("UngappedExtension %d,%d Score=%d\n", queryOffset,
00441                                     subjectOffset * 4, ungappedExtension_bestScore);
00442                                 if (parameters_verboseDloc == descriptionStart && ungappedExtension)
00443                                         ungappedExtension_print(ungappedExtension);
00444                                 #endif
00445 
00446                                 // If extension scores high enough to trigger gapping
00447                                 if (ungappedExtension)
00448                                 {
00449                                     // Increment count of number of trigger extensions
00450                                     blast_numTriggerExtensions++;
00451 
00452                                     // Create new alignment if needed
00453                                     if (alignments_currentAlignment == NULL)
00454                                     {
00455                                         alignments_createNew(descriptionStart, descriptionLength, subject,
00456                                                              subjectLength, encodedLength);
00457                                     }
00458 
00459                                     // Add this extension to the alignment
00460                                     alignments_addUngappedExtension(ungappedExtension);
00461                                 }
00462 
00463                                 blast_numUngappedExtensions++;
00464                             }
00465                             #endif
00466                             blast_numHits++;
00467                         }
00468 
00469                         queryOffsets++;
00470                         queryOffset = *queryOffsets;
00471                                         }
00472                     while (queryOffset);
00473                 }
00474 
00475                 address++;
00476             }
00477         }
00478 
00479         sequenceCount++;
00480 
00481         // Every so often print status .
00482         if ((sequenceCount % tickFrequency == 0) &&
00483             parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular)
00484         {
00485             #ifndef VERBOSE
00486             printf(".");
00487             fflush(stdout);
00488             #endif
00489         }
00490     }
00491 }

Here is the call graph for this function:

Here is the caller graph for this function:

void search_nucleotide_largeTable ( struct PSSMatrix  PSSMatrix,
struct sequenceData sequenceData,
uint4  numSequences,
uint4  tickFrequency 
)

Definition at line 683 of file search.c.

References alignments_addUngappedExtension(), alignments_createNew(), alignments_currentAlignment, blast_numHits, blast_numTriggerExtensions, blast_numUngappedExtensions, PSSMatrix::bytePackedCodes, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, hitMatrix_diagonalMask, hitMatrix_furthest, hitMatrix_queryLength, int4, nucleotideLookup_additionalPositions_large, nucleotideLookup_bitLookup, nucleotideLookup_large, nucleotideLookup_mask, parameters_outputType, parameters_tabular, parameters_verboseDloc, parameters_wordExtraLetters, parameters_wordSize, parameters_wordTableBytes, parameters_wordTableLetters, parameters_xml, PSSMatrix_packedLeftMatches, PSSMatrix_packedRightMatches, sequenceData::sequence, sequenceData::sequenceLength, uint4, ungappedExtension_bestScore, ungappedExtension_nucleotideExtend(), ungappedExtension_print(), and ungappedExtension_subjectEndReached.

Referenced by blast_search().

00685 {
00686         uint4 sequenceCount = 0, codeword;
00687         uint4 descriptionStart = 0, descriptionLength = 0, encodedLength;
00688     uint4 numPackedBytes, numRemaining;
00689         unsigned char *subject, *sequenceEnd, prevByte, nextByte, rightMatches, leftMatches;
00690     unsigned char previousCode, *subjectPosition, *address;
00691         int4 subjectLength, subjectOffset, wordLengthMinusOne;
00692         struct ungappedExtension* ungappedExtension;
00693         int4 *queryOffsets, tableEntry, queryOffset, singleQueryOffset[2];
00694     int4 previousByteDistance;
00695     int4 diagonal;
00696     unsigned char** lastHit;
00697 
00698     wordLengthMinusOne = parameters_wordSize - 1;
00699         previousByteDistance = (parameters_wordTableLetters + 4);
00700         singleQueryOffset[1] = 0;
00701 
00702     while (sequenceCount < numSequences)
00703     {
00704         descriptionLength = sequenceData[sequenceCount].descriptionLength;
00705         descriptionStart = sequenceData[sequenceCount].descriptionStart;
00706         subjectLength = sequenceData[sequenceCount].sequenceLength;
00707         encodedLength = sequenceData[sequenceCount].encodedLength;
00708                 address = subject = sequenceData[sequenceCount].sequence;
00709 
00710         // New sequence, new possible alignment
00711         alignments_currentAlignment = NULL;
00712 
00713         // Determine number of packed bytes and number of remaining letters
00714         numPackedBytes = subjectLength / 4;
00715                 numRemaining = subjectLength % 4;
00716 
00717         // Only process sequence if at least as long as the word length
00718         if (subjectLength >= parameters_wordSize)
00719         {
00720             sequenceEnd = subject + numPackedBytes;
00721 
00722             // Read first char and advance
00723             previousCode = *address;
00724             address++;
00725 
00726             // Traverse until end of sequence
00727             while (address < sequenceEnd)
00728             {
00729                 // Read next char and update codeword
00730                 codeword = *address | (previousCode << 8);
00731                 previousCode = *address;
00732                                 tableEntry = nucleotideLookup_large[codeword];
00733 
00734                 // Calculate subject offset
00735                 subjectOffset = address - subject;
00736 
00737                 #ifdef BITLOOKUP
00738                                 if (nucleotideLookup_bitLookup[codeword >> 5] &
00739                     (1 << (codeword & nucleotideLookup_mask)))
00740                                 #endif
00741                 if (tableEntry)
00742                 {
00743                         if (tableEntry > 0)
00744                     {
00745                         // Just one query position
00746                         singleQueryOffset[0] = queryOffset = tableEntry;
00747                                                 queryOffsets = singleQueryOffset;
00748                     }
00749                     else
00750                     {
00751                         // Multiple query positions
00752                                                 queryOffsets = nucleotideLookup_additionalPositions_large - tableEntry;
00753                         queryOffset = *queryOffsets;
00754                     }
00755 
00756                     subjectPosition = address + 1;
00757                     do
00758                     {
00759                         // Calculate number of matches to left and right of hit
00760                         nextByte = PSSMatrix.bytePackedCodes[queryOffset];
00761                         rightMatches = PSSMatrix_packedRightMatches[nextByte ^ *subjectPosition];
00762                         prevByte = PSSMatrix.bytePackedCodes[queryOffset - previousByteDistance];
00763                         leftMatches = PSSMatrix_packedLeftMatches[prevByte ^ *(address - parameters_wordTableBytes)];
00764 
00765                         if (rightMatches + leftMatches >= parameters_wordExtraLetters)
00766                         {
00767                             #ifndef NO_STAGE2
00768                             // Calculate subject offset
00769                             subjectOffset = subjectPosition - subject;
00770 
00771                             // Calculate the diagonal this hit is on
00772                             diagonal = (subjectOffset * 4 - queryOffset + hitMatrix_queryLength)
00773                                      & hitMatrix_diagonalMask;
00774 
00775                             // If we have not extended past this point on this diagonal
00776                             lastHit = hitMatrix_furthest + diagonal;
00777 
00778                             #ifdef VERBOSE
00779                             if (parameters_verboseDloc == descriptionStart)
00780                                 printf("Hit %d,%d\n", queryOffset, subjectOffset * 4);
00781                             #endif
00782                             if (*lastHit < address)
00783                             {
00784                                 // Perform ungapped extension
00785                                 ungappedExtension
00786                                         = ungappedExtension_nucleotideExtend(queryOffset,
00787                                       subjectOffset, PSSMatrix, subject, subjectLength);
00788 
00789                                 // Update furthest reached value for the diagonal
00790                                 *lastHit = ungappedExtension_subjectEndReached;
00791 
00792                                 #ifdef VERBOSE
00793                                 if (parameters_verboseDloc == descriptionStart)
00794                                     printf("UngappedExtension %d,%d Score=%d\n", queryOffset,
00795                                     subjectOffset * 4, ungappedExtension_bestScore);
00796                                 if (parameters_verboseDloc == descriptionStart && ungappedExtension)
00797                                         ungappedExtension_print(ungappedExtension);
00798                                 #endif
00799 
00800                                 // If extension scores high enough to trigger gapping
00801                                 if (ungappedExtension)
00802                                 {
00803                                     // Increment count of number of trigger extensions
00804                                     blast_numTriggerExtensions++;
00805 
00806                                     // Create new alignment if needed
00807                                     if (alignments_currentAlignment == NULL)
00808                                     {
00809                                         alignments_createNew(descriptionStart, descriptionLength, subject,
00810                                                              subjectLength, encodedLength);
00811                                     }
00812 
00813                                     // Add this extension to the alignment
00814                                     alignments_addUngappedExtension(ungappedExtension);
00815                                 }
00816 
00817                                 blast_numUngappedExtensions++;
00818                             }
00819                             #endif
00820                             blast_numHits++;
00821                         }
00822 
00823                         queryOffsets++;
00824                         queryOffset = *queryOffsets;
00825                                         }
00826                     while (queryOffset);
00827                 }
00828 
00829                 address++;
00830             }
00831         }
00832 
00833         sequenceCount++;
00834 
00835         // Every so often print status .
00836         if ((sequenceCount % tickFrequency == 0) &&
00837             parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular)
00838         {
00839             #ifndef VERBOSE
00840             printf(".");
00841             fflush(stdout);
00842             #endif
00843         }
00844     }
00845 }

Here is the call graph for this function:

Here is the caller graph for this function:

void search_nucleotide_longWord ( struct PSSMatrix  PSSMatrix,
struct sequenceData sequenceData,
uint4  numSequences,
uint4  tickFrequency 
)

Definition at line 494 of file search.c.

References alignments_addUngappedExtension(), alignments_createNew(), alignments_currentAlignment, blast_numHits, blast_numTriggerExtensions, blast_numUngappedExtensions, PSSMatrix::bytePackedCodes, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, hitMatrix_diagonalMask, hitMatrix_furthest, hitMatrix_queryLength, int2, int4, nucleotideLookup, nucleotideLookup_additionalPositions, nucleotideLookup_bitLookup, nucleotideLookup_mask, parameters_outputType, parameters_tabular, parameters_verboseDloc, parameters_wordExtraBytes, parameters_wordExtraLetters, parameters_wordSize, parameters_wordTableBytes, parameters_wordTableLetters, parameters_xml, PSSMatrix_packedLeftMatches, PSSMatrix_packedRightMatches, sequenceData::sequence, sequenceData::sequenceLength, uint4, ungappedExtension_bestScore, ungappedExtension_nucleotideExtend(), ungappedExtension_print(), and ungappedExtension_subjectEndReached.

Referenced by blast_search().

00496 {
00497         uint4 sequenceCount = 0, codeword;
00498         uint4 descriptionStart = 0, descriptionLength = 0, encodedLength;
00499     uint4 numPackedBytes, numRemaining;
00500         unsigned char *subject, *sequenceEnd, prevByte, nextByte, rightMatches, leftMatches;
00501     unsigned char previousCode, *subjectPosition, *address;
00502         int4 subjectLength, subjectOffset, wordLengthMinusOne;
00503         struct ungappedExtension* ungappedExtension;
00504         int2 *queryOffsets, tableEntry, queryOffset, singleQueryOffset[2];
00505     int4 previousByteDistance;
00506     int4 diagonal, extraBytesNeeded;
00507     unsigned char** lastHit;
00508 
00509     wordLengthMinusOne = parameters_wordSize - 1;
00510         previousByteDistance = parameters_wordTableLetters + parameters_wordExtraBytes * 4 + 4;
00511         singleQueryOffset[1] = 0;
00512 
00513     while (sequenceCount < numSequences)
00514     {
00515         descriptionLength = sequenceData[sequenceCount].descriptionLength;
00516         descriptionStart = sequenceData[sequenceCount].descriptionStart;
00517         subjectLength = sequenceData[sequenceCount].sequenceLength;
00518         encodedLength = sequenceData[sequenceCount].encodedLength;
00519                 address = subject = sequenceData[sequenceCount].sequence;
00520 
00521         // Determine number of packed bytes and number of remaining letters
00522         numPackedBytes = subjectLength / 4;
00523                 numRemaining = subjectLength % 4;
00524 
00525         // Only process sequence if at least as long as the word length
00526         if (subjectLength >= parameters_wordSize)
00527         {
00528             sequenceEnd = subject + numPackedBytes;
00529 
00530             // Read first char and advance
00531             previousCode = *address;
00532             address++;
00533 
00534             // Traverse until end of sequence
00535             while (address < sequenceEnd)
00536             {
00537                 // Read next char and update codeword
00538                 codeword = *address | (previousCode << 8);
00539                 previousCode = *address;
00540                                 tableEntry = nucleotideLookup[codeword];
00541 
00542                 // Calculate subject offset
00543                 subjectOffset = address - subject;
00544 
00545                 #ifdef BITLOOKUP
00546                                 if (nucleotideLookup_bitLookup[codeword >> 5] &
00547                     (1 << (codeword & nucleotideLookup_mask)))
00548                                 #endif
00549                 if (tableEntry)
00550                 {
00551                         if (tableEntry > 0)
00552                     {
00553                         // Just one query position
00554                         singleQueryOffset[0] = queryOffset = tableEntry;
00555                                                 queryOffsets = singleQueryOffset;
00556                     }
00557                     else
00558                     {
00559                         // Multiple query positions
00560                                                 queryOffsets = nucleotideLookup_additionalPositions - tableEntry;
00561                         queryOffset = *queryOffsets;
00562                     }
00563 
00564                     subjectPosition = address + 1;
00565                     do
00566                     {
00567                                                 extraBytesNeeded = parameters_wordExtraBytes;
00568 
00569                                                 while (extraBytesNeeded)
00570                                                 {
00571                                                         // Check for matching bytes to right
00572                             if (*subjectPosition != PSSMatrix.bytePackedCodes[queryOffset])
00573                                 break;
00574 
00575 //                                                      printf("Match %d,%d\n", queryOffset, (subjectPosition - subject)*4);
00576 
00577                             extraBytesNeeded--;
00578                             subjectPosition++;
00579                             subjectOffset++;
00580                             queryOffset+=4;
00581                                                 }
00582 
00583                                                 if (!extraBytesNeeded)
00584                                                 {
00585                             // Calculate number of matches to left and right of hit
00586                             nextByte = PSSMatrix.bytePackedCodes[queryOffset];
00587                             rightMatches = PSSMatrix_packedRightMatches[nextByte ^ *(subjectPosition)];
00588                             prevByte = PSSMatrix.bytePackedCodes[queryOffset - previousByteDistance];
00589                             leftMatches = PSSMatrix_packedLeftMatches[prevByte ^ *(address - parameters_wordTableBytes)];
00590 
00591 //                            printf("prev at %d,%d\n", queryOffset - previousByteDistance);
00592 //                              printf("part matches=[%d,%d]\n", leftMatches, rightMatches);
00593 
00594                             if (rightMatches + leftMatches >= parameters_wordExtraLetters)
00595                             {
00596 //                              printf("Hit! dloc=%d\n", descriptionStart);
00597                                 #ifndef NO_STAGE2
00598                                 // Calculate subject offset
00599                                 subjectOffset = subjectPosition - subject;
00600 
00601                                 // Calculate the diagonal this hit is on
00602                                 diagonal = (subjectOffset * 4 - queryOffset + hitMatrix_queryLength)
00603                                         & hitMatrix_diagonalMask;
00604 
00605                                 // If we have not extended past this point on this diagonal
00606                                 lastHit = hitMatrix_furthest + diagonal;
00607 
00608                                 #ifdef VERBOSE
00609                                 if (parameters_verboseDloc == descriptionStart)
00610                                     printf("Hit %d,%d\n", queryOffset, subjectOffset * 4);
00611                                 #endif
00612                                 if (*lastHit < address)
00613                                 {
00614                                     // Perform ungapped extension
00615                                     ungappedExtension
00616                                         = ungappedExtension_nucleotideExtend(queryOffset,
00617                                         subjectOffset, PSSMatrix, subject, subjectLength);
00618 
00619                                     // Update furthest reached value for the diagonal
00620                                     *lastHit = ungappedExtension_subjectEndReached;
00621 
00622                                     #ifdef VERBOSE
00623                                     if (parameters_verboseDloc == descriptionStart)
00624                                         printf("UngappedExtension %d,%d Score=%d\n", queryOffset,
00625                                         subjectOffset * 4, ungappedExtension_bestScore);
00626                                     if (parameters_verboseDloc == descriptionStart && ungappedExtension)
00627                                         ungappedExtension_print(ungappedExtension);
00628                                     #endif
00629 
00630                                     // If extension scores high enough to trigger gapping
00631                                     if (ungappedExtension)
00632                                     {
00633                                         // Increment count of number of trigger extensions
00634                                         blast_numTriggerExtensions++;
00635 
00636                                         // Create new alignment if needed
00637                                         if (alignments_currentAlignment == NULL)
00638                                         {
00639                                             alignments_createNew(descriptionStart, descriptionLength, subject,
00640                                                                 subjectLength, encodedLength);
00641                                         }
00642 
00643                                         // Add this extension to the alignment
00644                                         alignments_addUngappedExtension(ungappedExtension);
00645                                     }
00646 
00647                                     blast_numUngappedExtensions++;
00648                                 }
00649                                 #endif
00650                                 blast_numHits++;
00651                             }
00652                         }
00653 
00654                         queryOffsets++;
00655                         queryOffset = *queryOffsets;
00656                                         }
00657                     while (queryOffset);
00658                 }
00659 
00660                 address++;
00661             }
00662         }
00663 
00664         sequenceCount++;
00665 
00666         // Every so often print status .
00667         if ((sequenceCount % tickFrequency == 0) &&
00668             parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular)
00669         {
00670             #ifndef VERBOSE
00671             printf(".");
00672             fflush(stdout);
00673             #endif
00674         }
00675 
00676         // New sequence, new possible alignment
00677         alignments_currentAlignment = NULL;
00678     }
00679 }

Here is the call graph for this function:

Here is the caller graph for this function:

void search_nucleotideIndex ( unsigned char *  address,
unsigned char *  endAddress,
int4  tickFrequency,
struct PSSMatrix  PSSMatrix 
)

void search_nucleotideSsearch ( struct PSSMatrix  PSSMatrix,
struct sequenceData sequenceData,
uint4  numSequences,
uint4  tickFrequency 
)

Definition at line 1105 of file search.c.

References alignments_addFinalAlignment(), alignments_addGappedExtension(), alignments_createNew(), alignments_currentAlignment, alignments_isFinalAlignment(), dpResults::best, dpResults::bestScore, blast_gappedNominalCutoff, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, encoding_byteUnpack(), encoding_insertWilds(), gappedExtension_score(), int4, gappedExtension::nominalScore, parameters_outputType, parameters_tabular, parameters_xml, gappedExtension::queryEnd, coordinate::queryOffset, sequenceData::sequence, sequenceData::sequenceLength, smithWatermanScoring_score(), smithWatermanScoring_scoreReverse(), smithWatermanTraceback_build(), gappedExtension::subjectEnd, coordinate::subjectOffset, and uint4.

Referenced by blast_search().

01107 {
01108         uint4 sequenceCount = 0;
01109         uint4 descriptionStart = 0, descriptionLength = 0, encodedLength;
01110         unsigned char *subject, *unpackedSubject, *address;
01111         int4 subjectLength;
01112         struct gappedExtension* gappedExtension;
01113         struct dpResults dpResults, reverseDpResults;
01114 
01115     while (sequenceCount < numSequences)
01116     {
01117         descriptionLength = sequenceData[sequenceCount].descriptionLength;
01118         descriptionStart = sequenceData[sequenceCount].descriptionStart;
01119         subjectLength = sequenceData[sequenceCount].sequenceLength;
01120         encodedLength = sequenceData[sequenceCount].encodedLength;
01121                 address = subject = sequenceData[sequenceCount].sequence;
01122 
01123         // New sequence, new possible alignment
01124         alignments_currentAlignment = NULL;
01125 
01126         // Unpack the subject
01127         unpackedSubject = encoding_byteUnpack(subject, subjectLength);
01128 
01129         // Re-insert wildcards
01130         encoding_insertWilds(unpackedSubject, subject + ((subjectLength + 3) / 4),
01131                              subject + encodedLength);
01132 
01133                 // Perform SW score only
01134                 dpResults = smithWatermanScoring_score(PSSMatrix, subjectLength, unpackedSubject);
01135 
01136         // If above e-value cutoff
01137                 if (dpResults.bestScore >= blast_gappedNominalCutoff
01138             && alignments_isFinalAlignment(dpResults.bestScore))
01139                 {
01140                 // Perform SW alignment in the reverse direction, to find the start of the optimal alignment
01141                 reverseDpResults = smithWatermanScoring_scoreReverse(PSSMatrix, subjectLength,
01142                                unpackedSubject, dpResults.best);
01143 
01144             // Collect traceback information and store alignment
01145                         gappedExtension = smithWatermanTraceback_build(PSSMatrix, subjectLength, unpackedSubject,
01146                                                            reverseDpResults.best, dpResults.best);
01147 
01148             if (reverseDpResults.bestScore != dpResults.bestScore ||
01149                 dpResults.bestScore != gappedExtension->nominalScore)
01150             {
01151                 fprintf(stderr, "Error: Forward and reverse Smith-Waterman alignment scores do not match\n");
01152                 fprintf(stderr, "Forward Score=%d End=%d,%d\n", dpResults.bestScore,
01153                         dpResults.best.queryOffset, dpResults.best.subjectOffset);
01154                 fprintf(stderr, "Reverse Score=%d End=%d,%d\n", reverseDpResults.bestScore,
01155                         reverseDpResults.best.queryOffset, reverseDpResults.best.subjectOffset);
01156                 fprintf(stderr, "Traceback Score=%d End=%d,%d\n", gappedExtension->nominalScore,
01157                         gappedExtension->queryEnd, gappedExtension->subjectEnd);
01158 //                exit(-1);
01159             }
01160 
01161             gappedExtension_score(gappedExtension);
01162 
01163                         alignments_createNew(descriptionStart, descriptionLength, unpackedSubject, subjectLength, 0);
01164                         alignments_addGappedExtension(alignments_currentAlignment, gappedExtension);
01165                         alignments_addFinalAlignment(dpResults.bestScore, alignments_currentAlignment);
01166                 }
01167                 else
01168         {
01169                 free(unpackedSubject);
01170         }
01171 
01172         sequenceCount++;
01173 
01174         // Every so often print status .
01175         if ((sequenceCount % tickFrequency == 0) &&
01176             parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular)
01177         {
01178             #ifndef VERBOSE
01179             printf(".");
01180             fflush(stdout);
01181             #endif
01182         }
01183     }
01184 }

Here is the call graph for this function:

Here is the caller graph for this function:

void search_protein1hit ( struct PSSMatrix  PSSMatrix,
struct sequenceData sequenceData,
uint4  numSequences,
uint4  tickFrequency 
)

Definition at line 11 of file search.c.

References alignments_addUngappedExtension(), alignments_createNew(), alignments_currentAlignment, blast_numHits, blast_numTriggerExtensions, blast_numUngappedExtensions, constants_max_int2, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, hitMatrix_furthest, int4, PSSMatrix::matrix, group::nextGroups, group::nextWords, parameters_outputType, parameters_tabular, parameters_wordSize, parameters_xml, sequenceData::sequence, sequenceData::sequenceLength, uint2, uint4, ungappedExtension_oneHitExtend(), ungappedExtension_subjectEndReached, wordLookupDFA_additionalQueryPositions, wordLookupDFA_groups, and wordLookupDFA_numCodes.

Referenced by alignments_expandCluster(), and blast_search().

00013 {
00014         uint4 sequenceCount = 0;
00015         uint4 descriptionStart = 0, descriptionLength = 0, encodedLength;
00016         unsigned char *subject, *sequenceEnd, *address;
00017         int4 subjectLength, subjectOffset, wordLengthMinusOne, count = 0;
00018         unsigned char currentWord, *currentBlock;
00019     struct group* currentGroup;
00020         uint2* queryOffsets, queryOffset;
00021         struct ungappedExtension* ungappedExtension;
00022     int4 diagonal;
00023     unsigned char** lastHit;
00024 
00025     wordLengthMinusOne = parameters_wordSize - 1;
00026 
00027     while (sequenceCount < numSequences)
00028     {
00029         descriptionLength = sequenceData[sequenceCount].descriptionLength;
00030         descriptionStart = sequenceData[sequenceCount].descriptionStart;
00031         subjectLength = sequenceData[sequenceCount].sequenceLength;
00032         encodedLength = sequenceData[sequenceCount].encodedLength;
00033                 address = subject = sequenceData[sequenceCount].sequence;
00034 
00035         // New sequence, new possible alignment
00036         alignments_currentAlignment = NULL;
00037 
00038         // Only process sequence if at least as long as the word length
00039         if (subjectLength >= parameters_wordSize)
00040         {
00041             // Start at 000 state in Finite State Automata
00042             currentGroup = wordLookupDFA_groups;
00043 
00044             // Read first wordLength - 1 chars and advance
00045             count = wordLengthMinusOne;
00046             while (count > 0)
00047             {
00048                 if (*address < wordLookupDFA_numCodes)
00049                     currentGroup = currentGroup->nextGroups + *address;
00050                 else
00051                     currentGroup = currentGroup->nextGroups;
00052 
00053                 address++;
00054                 count--;
00055             }
00056 
00057             // Read the rest of the codes, using the Finite State Automata defined
00058             // by wordLookupDFA to get the query positions of int4erest
00059             sequenceEnd = subject + subjectLength;
00060             while (address < sequenceEnd)
00061             {
00062                 currentBlock = currentGroup->nextWords;
00063 
00064                 // If current code is a regular letter
00065                 if (*address < wordLookupDFA_numCodes)
00066                 {
00067                     // Use it
00068                     currentWord = currentBlock[*address];
00069                     currentGroup = currentGroup->nextGroups + *address;
00070                 }
00071                 else
00072                 {
00073                     // Else check if we've reached end of the file
00074                     if (address >= sequenceEnd)
00075                         break;
00076 
00077                     // If not, we've read a wild code. Use first code instead
00078                     currentWord = *currentBlock;
00079                     currentGroup = currentGroup->nextGroups;
00080                 }
00081 
00082                 if (currentWord)
00083                 {
00084                     // Calculate subject offset
00085                     subjectOffset = address - subject;
00086 
00087                     // At least one query position, stored at an extenal address
00088                     queryOffsets = ((uint2*)currentBlock) - currentWord;
00089 
00090                     // If the zero flag is stored at the first query position
00091                     if (!*queryOffsets)
00092                     {
00093                         // Go to an outside address for additional positions
00094                         queryOffsets = wordLookupDFA_additionalQueryPositions
00095                                      + (*(queryOffsets + 1) * constants_max_int2) + *(queryOffsets + 2);
00096                     }
00097 
00098                     do
00099                     {
00100                         queryOffset = *queryOffsets;
00101 
00102                         #ifndef NO_STAGE2
00103                         // Calculate the diagonal this hit is on
00104                         diagonal = subjectOffset - queryOffset;
00105 
00106                         // If we have not extended past this point on this diagonal
00107                         lastHit = hitMatrix_furthest + diagonal;
00108                         if (*lastHit < address)
00109                         {
00110                             // Increment tally number of extensions
00111                             blast_numUngappedExtensions++;
00112 
00113                             // If only one hit triggered this extension
00114                             ungappedExtension
00115                                 = ungappedExtension_oneHitExtend(PSSMatrix.matrix + queryOffset,
00116                                                                  address, PSSMatrix, subject);
00117 
00118                             // Update furthest reached value for the diagonal
00119                             *lastHit = ungappedExtension_subjectEndReached;
00120 
00121                             // If extension scores high enough to trigger gapping
00122                             if (ungappedExtension)
00123                             {
00124                                 // Increment count of number of trigger extensions
00125                                 blast_numTriggerExtensions++;
00126 
00127                                 // Create new alignment if needed
00128                                 if (alignments_currentAlignment == NULL)
00129                                 {
00130                                     // Create new alignment object
00131                                     alignments_createNew(descriptionStart, descriptionLength,
00132                                                          subject, subjectLength, encodedLength);
00133                                 }
00134 
00135                                 // Add this extension to the alignment
00136                                 alignments_addUngappedExtension(ungappedExtension);
00137                             }
00138                         }
00139                         #endif
00140 
00141                         queryOffsets++; blast_numHits++;
00142                     }
00143                     while (*queryOffsets);
00144                 }
00145 
00146                 address++;
00147             }
00148         }
00149 
00150         sequenceCount++;
00151 
00152         // Every so often print status .
00153         if ((sequenceCount % tickFrequency == 0) &&
00154             parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular)
00155         {
00156             #ifndef VERBOSE
00157             printf(".");
00158             fflush(stdout);
00159                         #endif
00160         }
00161     }
00162 }

Here is the call graph for this function:

Here is the caller graph for this function:

void search_protein2hit ( struct PSSMatrix  PSSMatrix,
struct sequenceData sequenceData,
uint4  numSequences,
uint4  tickFrequency 
)

Definition at line 165 of file search.c.

References alignments_addUngappedExtension(), alignments_createNew(), alignments_currentAlignment, blast_numHits, blast_numTriggerExtensions, blast_numUngappedExtensions, constants_max_int2, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, hitMatrix_furthest, int4, PSSMatrix::matrix, group::nextGroups, group::nextWords, parameters_A, parameters_outputType, parameters_overlap, parameters_tabular, parameters_wordSize, parameters_xml, sequenceData::sequence, sequenceData::sequenceLength, uint2, uint4, ungappedExtension_extend(), ungappedExtension_subjectEndReached, wordLookupDFA_additionalQueryPositions, wordLookupDFA_groups, and wordLookupDFA_numCodes.

Referenced by alignments_expandCluster(), and blast_search().

00167 {
00168         uint4 sequenceCount = 0;
00169         uint4 descriptionStart = 0, descriptionLength = 0, encodedLength;
00170         unsigned char *subject, *sequenceEnd;
00171         int4 subjectLength, subjectOffset, wordLengthMinusOne, count = 0;
00172         unsigned char currentWord, *currentBlock, *address;
00173     struct group* currentGroup;
00174         uint2* queryOffsets, queryOffset;
00175         struct ungappedExtension* ungappedExtension;
00176     int4 diagonal, distance;
00177     unsigned char** lastHit;
00178 
00179     wordLengthMinusOne = parameters_wordSize - 1;
00180 
00181     while (sequenceCount < numSequences)
00182     {
00183         descriptionLength = sequenceData[sequenceCount].descriptionLength;
00184         descriptionStart = sequenceData[sequenceCount].descriptionStart;
00185         subjectLength = sequenceData[sequenceCount].sequenceLength;
00186         encodedLength = sequenceData[sequenceCount].encodedLength;
00187                 address = subject = sequenceData[sequenceCount].sequence;
00188 
00189         // New sequence, new possible alignment
00190         alignments_currentAlignment = NULL;
00191 
00192         // Only process sequence if at least as long as the word length
00193         if (subjectLength >= parameters_wordSize)
00194         {
00195             // Start at 000 state in Finite State Automata
00196             currentGroup = wordLookupDFA_groups;
00197 
00198             // Read first wordLength - 1 chars and advance
00199             count = wordLengthMinusOne;
00200             while (count > 0)
00201             {
00202                 if (*address < wordLookupDFA_numCodes)
00203                     currentGroup = currentGroup->nextGroups + *address;
00204                 else
00205                     currentGroup = currentGroup->nextGroups;
00206 
00207                 address++;
00208                 count--;
00209             }
00210 
00211             // Read the rest of the codes, using the Finite State Automata defined
00212             // by wordLookupDFA to get the query positions of int4erest
00213             sequenceEnd = subject + subjectLength;
00214             while (address < sequenceEnd)
00215             {
00216                 currentBlock = currentGroup->nextWords;
00217 
00218                 // If current code is a regular letter
00219                 if (*address < wordLookupDFA_numCodes)
00220                 {
00221                     // Use it
00222                     currentWord = currentBlock[*address];
00223                     currentGroup = currentGroup->nextGroups + *address;
00224                 }
00225                 else
00226                 {
00227                     // Else check if we've reached end of the file
00228                     if (address >= sequenceEnd)
00229                         break;
00230 
00231                     // If not, we've read a wild code. Use first code instead
00232                     currentWord = currentBlock[0];
00233                     currentGroup = currentGroup->nextGroups;
00234                 }
00235 
00236                 if (currentWord)
00237                 {
00238                     // Calculate subject offset
00239                     subjectOffset = address - subject;
00240 
00241                     // If at least one query position, stored at an extenal address
00242                     queryOffsets = ((uint2*)currentBlock) - currentWord;
00243 
00244                     // If the zero flag is stored at the first query position
00245                     if (!*queryOffsets)
00246                     {
00247                         // Go to an outside address for additional positions
00248                         queryOffsets = wordLookupDFA_additionalQueryPositions
00249                                      + (*(queryOffsets + 1) * constants_max_int2) + *(queryOffsets + 2);
00250                     }
00251 
00252                     do
00253                     {
00254                         queryOffset = *queryOffsets;
00255 
00256                         #ifndef NO_STAGE2
00257                         // Calculate the diagonal this hit is on
00258                         diagonal = subjectOffset - queryOffset;
00259 
00260                         // Calculate distance since last hit
00261                         lastHit = hitMatrix_furthest + diagonal;
00262                         distance = address - *lastHit;
00263 
00264                         if (distance >= parameters_A)
00265                         {
00266                             // Too far apart, update furthest
00267                             *lastHit = address;
00268                         }
00269                         else if (distance >= parameters_overlap)
00270                         {
00271                             // Not overlaping - extension triggered
00272                             // Increment tally number of extensions
00273                             blast_numUngappedExtensions++;
00274 
00275                             // Perform ungapped extension start between query/subject start/end
00276                             // and extending outwards in each direction
00277                             ungappedExtension
00278                                 = ungappedExtension_extend(PSSMatrix.matrix + queryOffset,
00279                                                            address, *lastHit, PSSMatrix, subject);
00280 
00281                             // Update furthest reached value for the diagonal
00282                             *lastHit = ungappedExtension_subjectEndReached;
00283 
00284                             // If extension scores high enough to trigger gapping
00285                             if (ungappedExtension)
00286                             {
00287                                 // Increment count of number of trigger extensions
00288                                 blast_numTriggerExtensions++;
00289 
00290                                 // Create new alignment if needed
00291                                 if (alignments_currentAlignment == NULL)
00292                                 {
00293                                     // Create new alignment object using subject with wilds
00294                                     alignments_createNew(descriptionStart, descriptionLength, subject,
00295                                                          subjectLength, encodedLength);
00296                                 }
00297 
00298                                 // Add this extension to the alignment
00299                                 alignments_addUngappedExtension(ungappedExtension);
00300                             }
00301                         }
00302                         #endif
00303 
00304                         queryOffsets++; blast_numHits++;
00305                     }
00306                     while (*queryOffsets);
00307                 }
00308 
00309                 address++;
00310             }
00311         }
00312 
00313         sequenceCount++;
00314 
00315         // Every so often print status .
00316         if ((sequenceCount % tickFrequency == 0) &&
00317             parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular)
00318         {
00319             #ifndef VERBOSE
00320             printf(".");
00321             fflush(stdout);
00322                         #endif
00323         }
00324     }
00325 }

Here is the call graph for this function:

Here is the caller graph for this function:

void search_proteinSsearch ( struct PSSMatrix  PSSMatrix,
struct sequenceData sequenceData,
uint4  numSequences,
uint4  tickFrequency 
)

Definition at line 1032 of file search.c.

References alignments_addFinalAlignment(), alignments_addGappedExtension(), alignments_createNew(), alignments_currentAlignment, alignments_isFinalAlignment(), alignments_sortFinalAlignments(), dpResults::best, dpResults::bestScore, blast_gappedNominalCutoff, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, gappedExtension_score(), int4, gappedExtension::nominalScore, parameters_outputType, parameters_tabular, parameters_xml, sequenceData::sequence, sequenceData::sequenceLength, smithWatermanScoring_score(), smithWatermanScoring_scoreReverse(), smithWatermanTraceback_build(), and uint4.

Referenced by blast_search().

01034 {
01035         uint4 sequenceCount = 0;
01036         uint4 descriptionStart = 0, descriptionLength = 0, encodedLength;
01037         unsigned char *subject, *address;
01038         int4 subjectLength;
01039         struct gappedExtension* gappedExtension;
01040         struct dpResults dpResults, reverseDpResults;
01041 
01042     while (sequenceCount < numSequences)
01043     {
01044         descriptionLength = sequenceData[sequenceCount].descriptionLength;
01045         descriptionStart = sequenceData[sequenceCount].descriptionStart;
01046         subjectLength = sequenceData[sequenceCount].sequenceLength;
01047         encodedLength = sequenceData[sequenceCount].encodedLength;
01048                 address = subject = sequenceData[sequenceCount].sequence;
01049 
01050         // New sequence, new possible alignment
01051         alignments_currentAlignment = NULL;
01052 
01053         // Perform SW score only
01054                 dpResults = smithWatermanScoring_score(PSSMatrix, subjectLength, subject);
01055 
01056 //        printf("%d\n", dpResults.bestScore);
01057 
01058         // If above e-value cutoff
01059                 if (dpResults.bestScore >= blast_gappedNominalCutoff
01060             && alignments_isFinalAlignment(dpResults.bestScore))
01061                 {
01062             // Perform SW alignment in the reverse direction, to find the start of the optimal alignment
01063                 reverseDpResults = smithWatermanScoring_scoreReverse(PSSMatrix, subjectLength, subject,
01064                                                                  dpResults.best);
01065 
01066             // Collect traceback information and store alignment
01067                         gappedExtension = smithWatermanTraceback_build(PSSMatrix, subjectLength, subject,
01068                                                            reverseDpResults.best, dpResults.best);
01069 
01070             if (reverseDpResults.bestScore != dpResults.bestScore ||
01071                 dpResults.bestScore != gappedExtension->nominalScore)
01072             {
01073                 fprintf(stderr, "Error: Forward and reverse Smith-Waterman alignment scores do not match\n");
01074                 exit(-1);
01075             }
01076 
01077             gappedExtension_score(gappedExtension);
01078 
01079                         alignments_createNew(descriptionStart, descriptionLength, subject, subjectLength, encodedLength);
01080                         alignments_addGappedExtension(alignments_currentAlignment, gappedExtension);
01081                         alignments_addFinalAlignment(dpResults.bestScore, alignments_currentAlignment);
01082                 }
01083 
01084         // Advance to next sequence
01085         address += encodedLength - 1;
01086 
01087         sequenceCount++;
01088 
01089         // Every so often print status .
01090         if ((sequenceCount % tickFrequency == 0) &&
01091             parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular)
01092         {
01093             #ifndef VERBOSE
01094             printf(".");
01095             fflush(stdout);
01096                         #endif
01097         }
01098     }
01099 
01100         // Sort the final alignments by refined score
01101         alignments_sortFinalAlignments();
01102 }

Here is the call graph for this function:

Here is the caller graph for this function:


Generated on Wed Dec 19 20:51:57 2007 for fsa-blast by  doxygen 1.5.2