This graph shows which files directly or indirectly include this file:

Go to the source code of this file.
Functions | |
| void | search_protein1hit (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency) |
| void | search_protein2hit (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency) |
| void | search_nucleotide (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency) |
| void | search_nucleotide_longWord (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency) |
| void | search_nucleotide_largeTable (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency) |
| void | search_nucleotideIndex (unsigned char *address, unsigned char *endAddress, int4 tickFrequency, struct PSSMatrix PSSMatrix) |
| void | search_proteinSsearch (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency) |
| void | search_nucleotideSsearch (struct PSSMatrix PSSMatrix, struct sequenceData *sequenceData, uint4 numSequences, uint4 tickFrequency) |
| void search_nucleotide | ( | struct PSSMatrix | PSSMatrix, | |
| struct sequenceData * | sequenceData, | |||
| uint4 | numSequences, | |||
| uint4 | tickFrequency | |||
| ) |
Definition at line 328 of file search.c.
References alignments_addUngappedExtension(), alignments_createNew(), alignments_currentAlignment, blast_numHits, blast_numTriggerExtensions, blast_numUngappedExtensions, PSSMatrix::bytePackedCodes, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, hitMatrix_diagonalMask, hitMatrix_furthest, hitMatrix_queryLength, int2, int4, nucleotideLookup, nucleotideLookup_additionalPositions, nucleotideLookup_bitLookup, nucleotideLookup_mask, parameters_outputType, parameters_tabular, parameters_verboseDloc, parameters_wordExtraLetters, parameters_wordSize, parameters_wordTableBytes, parameters_wordTableLetters, parameters_xml, PSSMatrix_packedLeftMatches, PSSMatrix_packedRightMatches, sequenceData::sequence, sequenceData::sequenceLength, uint4, ungappedExtension_bestScore, ungappedExtension_nucleotideExtend(), ungappedExtension_print(), and ungappedExtension_subjectEndReached.
Referenced by blast_search().
00330 { 00331 uint4 sequenceCount = 0, codeword; 00332 uint4 descriptionStart = 0, descriptionLength = 0, encodedLength; 00333 uint4 numPackedBytes, numRemaining; 00334 unsigned char *subject, *sequenceEnd, prevByte, nextByte, rightMatches, leftMatches; 00335 unsigned char previousCode, *subjectPosition, *address; 00336 int4 subjectLength, subjectOffset, wordLengthMinusOne; 00337 struct ungappedExtension* ungappedExtension; 00338 int2 *queryOffsets, tableEntry, queryOffset, singleQueryOffset[2]; 00339 int4 previousByteDistance; 00340 int4 diagonal; 00341 unsigned char** lastHit; 00342 00343 wordLengthMinusOne = parameters_wordSize - 1; 00344 previousByteDistance = (parameters_wordTableLetters + 4); 00345 singleQueryOffset[1] = 0; 00346 00347 while (sequenceCount < numSequences) 00348 { 00349 descriptionLength = sequenceData[sequenceCount].descriptionLength; 00350 descriptionStart = sequenceData[sequenceCount].descriptionStart; 00351 subjectLength = sequenceData[sequenceCount].sequenceLength; 00352 encodedLength = sequenceData[sequenceCount].encodedLength; 00353 address = subject = sequenceData[sequenceCount].sequence; 00354 00355 // New sequence, new possible alignment 00356 alignments_currentAlignment = NULL; 00357 00358 // Determine number of packed bytes and number of remaining letters 00359 numPackedBytes = subjectLength / 4; 00360 numRemaining = subjectLength % 4; 00361 00362 // Only process sequence if at least as long as the word length 00363 if (subjectLength >= parameters_wordSize) 00364 { 00365 sequenceEnd = subject + numPackedBytes; 00366 00367 // Read first char and advance 00368 previousCode = *address; 00369 address++; 00370 00371 // Traverse until end of sequence 00372 while (address < sequenceEnd) 00373 { 00374 // Read next char and update codeword 00375 codeword = *address | (previousCode << 8); 00376 previousCode = *address; 00377 tableEntry = nucleotideLookup[codeword]; 00378 00379 // Calculate subject offset 00380 subjectOffset = address - subject; 00381 00382 #ifdef BITLOOKUP 00383 if (nucleotideLookup_bitLookup[codeword >> 5] & 00384 (1 << (codeword & nucleotideLookup_mask))) 00385 #endif 00386 if (tableEntry) 00387 { 00388 if (tableEntry > 0) 00389 { 00390 // Just one query position 00391 singleQueryOffset[0] = queryOffset = tableEntry; 00392 queryOffsets = singleQueryOffset; 00393 } 00394 else 00395 { 00396 // Multiple query positions 00397 queryOffsets = nucleotideLookup_additionalPositions - tableEntry; 00398 queryOffset = *queryOffsets; 00399 } 00400 00401 subjectPosition = address + 1; 00402 do 00403 { 00404 // Calculate number of matches to left and right of hit 00405 nextByte = PSSMatrix.bytePackedCodes[queryOffset]; 00406 rightMatches = PSSMatrix_packedRightMatches[nextByte ^ *subjectPosition]; 00407 prevByte = PSSMatrix.bytePackedCodes[queryOffset - previousByteDistance]; 00408 leftMatches = PSSMatrix_packedLeftMatches[prevByte ^ *(address - parameters_wordTableBytes)]; 00409 00410 if (rightMatches + leftMatches >= parameters_wordExtraLetters) 00411 { 00412 #ifndef NO_STAGE2 00413 // Calculate subject offset 00414 subjectOffset = subjectPosition - subject; 00415 00416 // Calculate the diagonal this hit is on 00417 diagonal = (subjectOffset * 4 - queryOffset + hitMatrix_queryLength) 00418 & hitMatrix_diagonalMask; 00419 00420 // If we have not extended past this point on this diagonal 00421 lastHit = hitMatrix_furthest + diagonal; 00422 00423 #ifdef VERBOSE 00424 if (parameters_verboseDloc == descriptionStart) 00425 printf("Hit %d,%d\n", queryOffset, subjectOffset * 4); 00426 #endif 00427 00428 if (*lastHit < address) 00429 { 00430 // Perform ungapped extension 00431 ungappedExtension 00432 = ungappedExtension_nucleotideExtend(queryOffset, 00433 subjectOffset, PSSMatrix, subject, subjectLength); 00434 00435 // Update furthest reached value for the diagonal 00436 *lastHit = ungappedExtension_subjectEndReached; 00437 00438 #ifdef VERBOSE 00439 if (parameters_verboseDloc == descriptionStart) 00440 printf("UngappedExtension %d,%d Score=%d\n", queryOffset, 00441 subjectOffset * 4, ungappedExtension_bestScore); 00442 if (parameters_verboseDloc == descriptionStart && ungappedExtension) 00443 ungappedExtension_print(ungappedExtension); 00444 #endif 00445 00446 // If extension scores high enough to trigger gapping 00447 if (ungappedExtension) 00448 { 00449 // Increment count of number of trigger extensions 00450 blast_numTriggerExtensions++; 00451 00452 // Create new alignment if needed 00453 if (alignments_currentAlignment == NULL) 00454 { 00455 alignments_createNew(descriptionStart, descriptionLength, subject, 00456 subjectLength, encodedLength); 00457 } 00458 00459 // Add this extension to the alignment 00460 alignments_addUngappedExtension(ungappedExtension); 00461 } 00462 00463 blast_numUngappedExtensions++; 00464 } 00465 #endif 00466 blast_numHits++; 00467 } 00468 00469 queryOffsets++; 00470 queryOffset = *queryOffsets; 00471 } 00472 while (queryOffset); 00473 } 00474 00475 address++; 00476 } 00477 } 00478 00479 sequenceCount++; 00480 00481 // Every so often print status . 00482 if ((sequenceCount % tickFrequency == 0) && 00483 parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular) 00484 { 00485 #ifndef VERBOSE 00486 printf("."); 00487 fflush(stdout); 00488 #endif 00489 } 00490 } 00491 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void search_nucleotide_largeTable | ( | struct PSSMatrix | PSSMatrix, | |
| struct sequenceData * | sequenceData, | |||
| uint4 | numSequences, | |||
| uint4 | tickFrequency | |||
| ) |
Definition at line 683 of file search.c.
References alignments_addUngappedExtension(), alignments_createNew(), alignments_currentAlignment, blast_numHits, blast_numTriggerExtensions, blast_numUngappedExtensions, PSSMatrix::bytePackedCodes, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, hitMatrix_diagonalMask, hitMatrix_furthest, hitMatrix_queryLength, int4, nucleotideLookup_additionalPositions_large, nucleotideLookup_bitLookup, nucleotideLookup_large, nucleotideLookup_mask, parameters_outputType, parameters_tabular, parameters_verboseDloc, parameters_wordExtraLetters, parameters_wordSize, parameters_wordTableBytes, parameters_wordTableLetters, parameters_xml, PSSMatrix_packedLeftMatches, PSSMatrix_packedRightMatches, sequenceData::sequence, sequenceData::sequenceLength, uint4, ungappedExtension_bestScore, ungappedExtension_nucleotideExtend(), ungappedExtension_print(), and ungappedExtension_subjectEndReached.
Referenced by blast_search().
00685 { 00686 uint4 sequenceCount = 0, codeword; 00687 uint4 descriptionStart = 0, descriptionLength = 0, encodedLength; 00688 uint4 numPackedBytes, numRemaining; 00689 unsigned char *subject, *sequenceEnd, prevByte, nextByte, rightMatches, leftMatches; 00690 unsigned char previousCode, *subjectPosition, *address; 00691 int4 subjectLength, subjectOffset, wordLengthMinusOne; 00692 struct ungappedExtension* ungappedExtension; 00693 int4 *queryOffsets, tableEntry, queryOffset, singleQueryOffset[2]; 00694 int4 previousByteDistance; 00695 int4 diagonal; 00696 unsigned char** lastHit; 00697 00698 wordLengthMinusOne = parameters_wordSize - 1; 00699 previousByteDistance = (parameters_wordTableLetters + 4); 00700 singleQueryOffset[1] = 0; 00701 00702 while (sequenceCount < numSequences) 00703 { 00704 descriptionLength = sequenceData[sequenceCount].descriptionLength; 00705 descriptionStart = sequenceData[sequenceCount].descriptionStart; 00706 subjectLength = sequenceData[sequenceCount].sequenceLength; 00707 encodedLength = sequenceData[sequenceCount].encodedLength; 00708 address = subject = sequenceData[sequenceCount].sequence; 00709 00710 // New sequence, new possible alignment 00711 alignments_currentAlignment = NULL; 00712 00713 // Determine number of packed bytes and number of remaining letters 00714 numPackedBytes = subjectLength / 4; 00715 numRemaining = subjectLength % 4; 00716 00717 // Only process sequence if at least as long as the word length 00718 if (subjectLength >= parameters_wordSize) 00719 { 00720 sequenceEnd = subject + numPackedBytes; 00721 00722 // Read first char and advance 00723 previousCode = *address; 00724 address++; 00725 00726 // Traverse until end of sequence 00727 while (address < sequenceEnd) 00728 { 00729 // Read next char and update codeword 00730 codeword = *address | (previousCode << 8); 00731 previousCode = *address; 00732 tableEntry = nucleotideLookup_large[codeword]; 00733 00734 // Calculate subject offset 00735 subjectOffset = address - subject; 00736 00737 #ifdef BITLOOKUP 00738 if (nucleotideLookup_bitLookup[codeword >> 5] & 00739 (1 << (codeword & nucleotideLookup_mask))) 00740 #endif 00741 if (tableEntry) 00742 { 00743 if (tableEntry > 0) 00744 { 00745 // Just one query position 00746 singleQueryOffset[0] = queryOffset = tableEntry; 00747 queryOffsets = singleQueryOffset; 00748 } 00749 else 00750 { 00751 // Multiple query positions 00752 queryOffsets = nucleotideLookup_additionalPositions_large - tableEntry; 00753 queryOffset = *queryOffsets; 00754 } 00755 00756 subjectPosition = address + 1; 00757 do 00758 { 00759 // Calculate number of matches to left and right of hit 00760 nextByte = PSSMatrix.bytePackedCodes[queryOffset]; 00761 rightMatches = PSSMatrix_packedRightMatches[nextByte ^ *subjectPosition]; 00762 prevByte = PSSMatrix.bytePackedCodes[queryOffset - previousByteDistance]; 00763 leftMatches = PSSMatrix_packedLeftMatches[prevByte ^ *(address - parameters_wordTableBytes)]; 00764 00765 if (rightMatches + leftMatches >= parameters_wordExtraLetters) 00766 { 00767 #ifndef NO_STAGE2 00768 // Calculate subject offset 00769 subjectOffset = subjectPosition - subject; 00770 00771 // Calculate the diagonal this hit is on 00772 diagonal = (subjectOffset * 4 - queryOffset + hitMatrix_queryLength) 00773 & hitMatrix_diagonalMask; 00774 00775 // If we have not extended past this point on this diagonal 00776 lastHit = hitMatrix_furthest + diagonal; 00777 00778 #ifdef VERBOSE 00779 if (parameters_verboseDloc == descriptionStart) 00780 printf("Hit %d,%d\n", queryOffset, subjectOffset * 4); 00781 #endif 00782 if (*lastHit < address) 00783 { 00784 // Perform ungapped extension 00785 ungappedExtension 00786 = ungappedExtension_nucleotideExtend(queryOffset, 00787 subjectOffset, PSSMatrix, subject, subjectLength); 00788 00789 // Update furthest reached value for the diagonal 00790 *lastHit = ungappedExtension_subjectEndReached; 00791 00792 #ifdef VERBOSE 00793 if (parameters_verboseDloc == descriptionStart) 00794 printf("UngappedExtension %d,%d Score=%d\n", queryOffset, 00795 subjectOffset * 4, ungappedExtension_bestScore); 00796 if (parameters_verboseDloc == descriptionStart && ungappedExtension) 00797 ungappedExtension_print(ungappedExtension); 00798 #endif 00799 00800 // If extension scores high enough to trigger gapping 00801 if (ungappedExtension) 00802 { 00803 // Increment count of number of trigger extensions 00804 blast_numTriggerExtensions++; 00805 00806 // Create new alignment if needed 00807 if (alignments_currentAlignment == NULL) 00808 { 00809 alignments_createNew(descriptionStart, descriptionLength, subject, 00810 subjectLength, encodedLength); 00811 } 00812 00813 // Add this extension to the alignment 00814 alignments_addUngappedExtension(ungappedExtension); 00815 } 00816 00817 blast_numUngappedExtensions++; 00818 } 00819 #endif 00820 blast_numHits++; 00821 } 00822 00823 queryOffsets++; 00824 queryOffset = *queryOffsets; 00825 } 00826 while (queryOffset); 00827 } 00828 00829 address++; 00830 } 00831 } 00832 00833 sequenceCount++; 00834 00835 // Every so often print status . 00836 if ((sequenceCount % tickFrequency == 0) && 00837 parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular) 00838 { 00839 #ifndef VERBOSE 00840 printf("."); 00841 fflush(stdout); 00842 #endif 00843 } 00844 } 00845 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void search_nucleotide_longWord | ( | struct PSSMatrix | PSSMatrix, | |
| struct sequenceData * | sequenceData, | |||
| uint4 | numSequences, | |||
| uint4 | tickFrequency | |||
| ) |
Definition at line 494 of file search.c.
References alignments_addUngappedExtension(), alignments_createNew(), alignments_currentAlignment, blast_numHits, blast_numTriggerExtensions, blast_numUngappedExtensions, PSSMatrix::bytePackedCodes, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, hitMatrix_diagonalMask, hitMatrix_furthest, hitMatrix_queryLength, int2, int4, nucleotideLookup, nucleotideLookup_additionalPositions, nucleotideLookup_bitLookup, nucleotideLookup_mask, parameters_outputType, parameters_tabular, parameters_verboseDloc, parameters_wordExtraBytes, parameters_wordExtraLetters, parameters_wordSize, parameters_wordTableBytes, parameters_wordTableLetters, parameters_xml, PSSMatrix_packedLeftMatches, PSSMatrix_packedRightMatches, sequenceData::sequence, sequenceData::sequenceLength, uint4, ungappedExtension_bestScore, ungappedExtension_nucleotideExtend(), ungappedExtension_print(), and ungappedExtension_subjectEndReached.
Referenced by blast_search().
00496 { 00497 uint4 sequenceCount = 0, codeword; 00498 uint4 descriptionStart = 0, descriptionLength = 0, encodedLength; 00499 uint4 numPackedBytes, numRemaining; 00500 unsigned char *subject, *sequenceEnd, prevByte, nextByte, rightMatches, leftMatches; 00501 unsigned char previousCode, *subjectPosition, *address; 00502 int4 subjectLength, subjectOffset, wordLengthMinusOne; 00503 struct ungappedExtension* ungappedExtension; 00504 int2 *queryOffsets, tableEntry, queryOffset, singleQueryOffset[2]; 00505 int4 previousByteDistance; 00506 int4 diagonal, extraBytesNeeded; 00507 unsigned char** lastHit; 00508 00509 wordLengthMinusOne = parameters_wordSize - 1; 00510 previousByteDistance = parameters_wordTableLetters + parameters_wordExtraBytes * 4 + 4; 00511 singleQueryOffset[1] = 0; 00512 00513 while (sequenceCount < numSequences) 00514 { 00515 descriptionLength = sequenceData[sequenceCount].descriptionLength; 00516 descriptionStart = sequenceData[sequenceCount].descriptionStart; 00517 subjectLength = sequenceData[sequenceCount].sequenceLength; 00518 encodedLength = sequenceData[sequenceCount].encodedLength; 00519 address = subject = sequenceData[sequenceCount].sequence; 00520 00521 // Determine number of packed bytes and number of remaining letters 00522 numPackedBytes = subjectLength / 4; 00523 numRemaining = subjectLength % 4; 00524 00525 // Only process sequence if at least as long as the word length 00526 if (subjectLength >= parameters_wordSize) 00527 { 00528 sequenceEnd = subject + numPackedBytes; 00529 00530 // Read first char and advance 00531 previousCode = *address; 00532 address++; 00533 00534 // Traverse until end of sequence 00535 while (address < sequenceEnd) 00536 { 00537 // Read next char and update codeword 00538 codeword = *address | (previousCode << 8); 00539 previousCode = *address; 00540 tableEntry = nucleotideLookup[codeword]; 00541 00542 // Calculate subject offset 00543 subjectOffset = address - subject; 00544 00545 #ifdef BITLOOKUP 00546 if (nucleotideLookup_bitLookup[codeword >> 5] & 00547 (1 << (codeword & nucleotideLookup_mask))) 00548 #endif 00549 if (tableEntry) 00550 { 00551 if (tableEntry > 0) 00552 { 00553 // Just one query position 00554 singleQueryOffset[0] = queryOffset = tableEntry; 00555 queryOffsets = singleQueryOffset; 00556 } 00557 else 00558 { 00559 // Multiple query positions 00560 queryOffsets = nucleotideLookup_additionalPositions - tableEntry; 00561 queryOffset = *queryOffsets; 00562 } 00563 00564 subjectPosition = address + 1; 00565 do 00566 { 00567 extraBytesNeeded = parameters_wordExtraBytes; 00568 00569 while (extraBytesNeeded) 00570 { 00571 // Check for matching bytes to right 00572 if (*subjectPosition != PSSMatrix.bytePackedCodes[queryOffset]) 00573 break; 00574 00575 // printf("Match %d,%d\n", queryOffset, (subjectPosition - subject)*4); 00576 00577 extraBytesNeeded--; 00578 subjectPosition++; 00579 subjectOffset++; 00580 queryOffset+=4; 00581 } 00582 00583 if (!extraBytesNeeded) 00584 { 00585 // Calculate number of matches to left and right of hit 00586 nextByte = PSSMatrix.bytePackedCodes[queryOffset]; 00587 rightMatches = PSSMatrix_packedRightMatches[nextByte ^ *(subjectPosition)]; 00588 prevByte = PSSMatrix.bytePackedCodes[queryOffset - previousByteDistance]; 00589 leftMatches = PSSMatrix_packedLeftMatches[prevByte ^ *(address - parameters_wordTableBytes)]; 00590 00591 // printf("prev at %d,%d\n", queryOffset - previousByteDistance); 00592 // printf("part matches=[%d,%d]\n", leftMatches, rightMatches); 00593 00594 if (rightMatches + leftMatches >= parameters_wordExtraLetters) 00595 { 00596 // printf("Hit! dloc=%d\n", descriptionStart); 00597 #ifndef NO_STAGE2 00598 // Calculate subject offset 00599 subjectOffset = subjectPosition - subject; 00600 00601 // Calculate the diagonal this hit is on 00602 diagonal = (subjectOffset * 4 - queryOffset + hitMatrix_queryLength) 00603 & hitMatrix_diagonalMask; 00604 00605 // If we have not extended past this point on this diagonal 00606 lastHit = hitMatrix_furthest + diagonal; 00607 00608 #ifdef VERBOSE 00609 if (parameters_verboseDloc == descriptionStart) 00610 printf("Hit %d,%d\n", queryOffset, subjectOffset * 4); 00611 #endif 00612 if (*lastHit < address) 00613 { 00614 // Perform ungapped extension 00615 ungappedExtension 00616 = ungappedExtension_nucleotideExtend(queryOffset, 00617 subjectOffset, PSSMatrix, subject, subjectLength); 00618 00619 // Update furthest reached value for the diagonal 00620 *lastHit = ungappedExtension_subjectEndReached; 00621 00622 #ifdef VERBOSE 00623 if (parameters_verboseDloc == descriptionStart) 00624 printf("UngappedExtension %d,%d Score=%d\n", queryOffset, 00625 subjectOffset * 4, ungappedExtension_bestScore); 00626 if (parameters_verboseDloc == descriptionStart && ungappedExtension) 00627 ungappedExtension_print(ungappedExtension); 00628 #endif 00629 00630 // If extension scores high enough to trigger gapping 00631 if (ungappedExtension) 00632 { 00633 // Increment count of number of trigger extensions 00634 blast_numTriggerExtensions++; 00635 00636 // Create new alignment if needed 00637 if (alignments_currentAlignment == NULL) 00638 { 00639 alignments_createNew(descriptionStart, descriptionLength, subject, 00640 subjectLength, encodedLength); 00641 } 00642 00643 // Add this extension to the alignment 00644 alignments_addUngappedExtension(ungappedExtension); 00645 } 00646 00647 blast_numUngappedExtensions++; 00648 } 00649 #endif 00650 blast_numHits++; 00651 } 00652 } 00653 00654 queryOffsets++; 00655 queryOffset = *queryOffsets; 00656 } 00657 while (queryOffset); 00658 } 00659 00660 address++; 00661 } 00662 } 00663 00664 sequenceCount++; 00665 00666 // Every so often print status . 00667 if ((sequenceCount % tickFrequency == 0) && 00668 parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular) 00669 { 00670 #ifndef VERBOSE 00671 printf("."); 00672 fflush(stdout); 00673 #endif 00674 } 00675 00676 // New sequence, new possible alignment 00677 alignments_currentAlignment = NULL; 00678 } 00679 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void search_nucleotideIndex | ( | unsigned char * | address, | |
| unsigned char * | endAddress, | |||
| int4 | tickFrequency, | |||
| struct PSSMatrix | PSSMatrix | |||
| ) |
| void search_nucleotideSsearch | ( | struct PSSMatrix | PSSMatrix, | |
| struct sequenceData * | sequenceData, | |||
| uint4 | numSequences, | |||
| uint4 | tickFrequency | |||
| ) |
Definition at line 1105 of file search.c.
References alignments_addFinalAlignment(), alignments_addGappedExtension(), alignments_createNew(), alignments_currentAlignment, alignments_isFinalAlignment(), dpResults::best, dpResults::bestScore, blast_gappedNominalCutoff, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, encoding_byteUnpack(), encoding_insertWilds(), gappedExtension_score(), int4, gappedExtension::nominalScore, parameters_outputType, parameters_tabular, parameters_xml, gappedExtension::queryEnd, coordinate::queryOffset, sequenceData::sequence, sequenceData::sequenceLength, smithWatermanScoring_score(), smithWatermanScoring_scoreReverse(), smithWatermanTraceback_build(), gappedExtension::subjectEnd, coordinate::subjectOffset, and uint4.
Referenced by blast_search().
01107 { 01108 uint4 sequenceCount = 0; 01109 uint4 descriptionStart = 0, descriptionLength = 0, encodedLength; 01110 unsigned char *subject, *unpackedSubject, *address; 01111 int4 subjectLength; 01112 struct gappedExtension* gappedExtension; 01113 struct dpResults dpResults, reverseDpResults; 01114 01115 while (sequenceCount < numSequences) 01116 { 01117 descriptionLength = sequenceData[sequenceCount].descriptionLength; 01118 descriptionStart = sequenceData[sequenceCount].descriptionStart; 01119 subjectLength = sequenceData[sequenceCount].sequenceLength; 01120 encodedLength = sequenceData[sequenceCount].encodedLength; 01121 address = subject = sequenceData[sequenceCount].sequence; 01122 01123 // New sequence, new possible alignment 01124 alignments_currentAlignment = NULL; 01125 01126 // Unpack the subject 01127 unpackedSubject = encoding_byteUnpack(subject, subjectLength); 01128 01129 // Re-insert wildcards 01130 encoding_insertWilds(unpackedSubject, subject + ((subjectLength + 3) / 4), 01131 subject + encodedLength); 01132 01133 // Perform SW score only 01134 dpResults = smithWatermanScoring_score(PSSMatrix, subjectLength, unpackedSubject); 01135 01136 // If above e-value cutoff 01137 if (dpResults.bestScore >= blast_gappedNominalCutoff 01138 && alignments_isFinalAlignment(dpResults.bestScore)) 01139 { 01140 // Perform SW alignment in the reverse direction, to find the start of the optimal alignment 01141 reverseDpResults = smithWatermanScoring_scoreReverse(PSSMatrix, subjectLength, 01142 unpackedSubject, dpResults.best); 01143 01144 // Collect traceback information and store alignment 01145 gappedExtension = smithWatermanTraceback_build(PSSMatrix, subjectLength, unpackedSubject, 01146 reverseDpResults.best, dpResults.best); 01147 01148 if (reverseDpResults.bestScore != dpResults.bestScore || 01149 dpResults.bestScore != gappedExtension->nominalScore) 01150 { 01151 fprintf(stderr, "Error: Forward and reverse Smith-Waterman alignment scores do not match\n"); 01152 fprintf(stderr, "Forward Score=%d End=%d,%d\n", dpResults.bestScore, 01153 dpResults.best.queryOffset, dpResults.best.subjectOffset); 01154 fprintf(stderr, "Reverse Score=%d End=%d,%d\n", reverseDpResults.bestScore, 01155 reverseDpResults.best.queryOffset, reverseDpResults.best.subjectOffset); 01156 fprintf(stderr, "Traceback Score=%d End=%d,%d\n", gappedExtension->nominalScore, 01157 gappedExtension->queryEnd, gappedExtension->subjectEnd); 01158 // exit(-1); 01159 } 01160 01161 gappedExtension_score(gappedExtension); 01162 01163 alignments_createNew(descriptionStart, descriptionLength, unpackedSubject, subjectLength, 0); 01164 alignments_addGappedExtension(alignments_currentAlignment, gappedExtension); 01165 alignments_addFinalAlignment(dpResults.bestScore, alignments_currentAlignment); 01166 } 01167 else 01168 { 01169 free(unpackedSubject); 01170 } 01171 01172 sequenceCount++; 01173 01174 // Every so often print status . 01175 if ((sequenceCount % tickFrequency == 0) && 01176 parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular) 01177 { 01178 #ifndef VERBOSE 01179 printf("."); 01180 fflush(stdout); 01181 #endif 01182 } 01183 } 01184 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void search_protein1hit | ( | struct PSSMatrix | PSSMatrix, | |
| struct sequenceData * | sequenceData, | |||
| uint4 | numSequences, | |||
| uint4 | tickFrequency | |||
| ) |
Definition at line 11 of file search.c.
References alignments_addUngappedExtension(), alignments_createNew(), alignments_currentAlignment, blast_numHits, blast_numTriggerExtensions, blast_numUngappedExtensions, constants_max_int2, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, hitMatrix_furthest, int4, PSSMatrix::matrix, group::nextGroups, group::nextWords, parameters_outputType, parameters_tabular, parameters_wordSize, parameters_xml, sequenceData::sequence, sequenceData::sequenceLength, uint2, uint4, ungappedExtension_oneHitExtend(), ungappedExtension_subjectEndReached, wordLookupDFA_additionalQueryPositions, wordLookupDFA_groups, and wordLookupDFA_numCodes.
Referenced by alignments_expandCluster(), and blast_search().
00013 { 00014 uint4 sequenceCount = 0; 00015 uint4 descriptionStart = 0, descriptionLength = 0, encodedLength; 00016 unsigned char *subject, *sequenceEnd, *address; 00017 int4 subjectLength, subjectOffset, wordLengthMinusOne, count = 0; 00018 unsigned char currentWord, *currentBlock; 00019 struct group* currentGroup; 00020 uint2* queryOffsets, queryOffset; 00021 struct ungappedExtension* ungappedExtension; 00022 int4 diagonal; 00023 unsigned char** lastHit; 00024 00025 wordLengthMinusOne = parameters_wordSize - 1; 00026 00027 while (sequenceCount < numSequences) 00028 { 00029 descriptionLength = sequenceData[sequenceCount].descriptionLength; 00030 descriptionStart = sequenceData[sequenceCount].descriptionStart; 00031 subjectLength = sequenceData[sequenceCount].sequenceLength; 00032 encodedLength = sequenceData[sequenceCount].encodedLength; 00033 address = subject = sequenceData[sequenceCount].sequence; 00034 00035 // New sequence, new possible alignment 00036 alignments_currentAlignment = NULL; 00037 00038 // Only process sequence if at least as long as the word length 00039 if (subjectLength >= parameters_wordSize) 00040 { 00041 // Start at 000 state in Finite State Automata 00042 currentGroup = wordLookupDFA_groups; 00043 00044 // Read first wordLength - 1 chars and advance 00045 count = wordLengthMinusOne; 00046 while (count > 0) 00047 { 00048 if (*address < wordLookupDFA_numCodes) 00049 currentGroup = currentGroup->nextGroups + *address; 00050 else 00051 currentGroup = currentGroup->nextGroups; 00052 00053 address++; 00054 count--; 00055 } 00056 00057 // Read the rest of the codes, using the Finite State Automata defined 00058 // by wordLookupDFA to get the query positions of int4erest 00059 sequenceEnd = subject + subjectLength; 00060 while (address < sequenceEnd) 00061 { 00062 currentBlock = currentGroup->nextWords; 00063 00064 // If current code is a regular letter 00065 if (*address < wordLookupDFA_numCodes) 00066 { 00067 // Use it 00068 currentWord = currentBlock[*address]; 00069 currentGroup = currentGroup->nextGroups + *address; 00070 } 00071 else 00072 { 00073 // Else check if we've reached end of the file 00074 if (address >= sequenceEnd) 00075 break; 00076 00077 // If not, we've read a wild code. Use first code instead 00078 currentWord = *currentBlock; 00079 currentGroup = currentGroup->nextGroups; 00080 } 00081 00082 if (currentWord) 00083 { 00084 // Calculate subject offset 00085 subjectOffset = address - subject; 00086 00087 // At least one query position, stored at an extenal address 00088 queryOffsets = ((uint2*)currentBlock) - currentWord; 00089 00090 // If the zero flag is stored at the first query position 00091 if (!*queryOffsets) 00092 { 00093 // Go to an outside address for additional positions 00094 queryOffsets = wordLookupDFA_additionalQueryPositions 00095 + (*(queryOffsets + 1) * constants_max_int2) + *(queryOffsets + 2); 00096 } 00097 00098 do 00099 { 00100 queryOffset = *queryOffsets; 00101 00102 #ifndef NO_STAGE2 00103 // Calculate the diagonal this hit is on 00104 diagonal = subjectOffset - queryOffset; 00105 00106 // If we have not extended past this point on this diagonal 00107 lastHit = hitMatrix_furthest + diagonal; 00108 if (*lastHit < address) 00109 { 00110 // Increment tally number of extensions 00111 blast_numUngappedExtensions++; 00112 00113 // If only one hit triggered this extension 00114 ungappedExtension 00115 = ungappedExtension_oneHitExtend(PSSMatrix.matrix + queryOffset, 00116 address, PSSMatrix, subject); 00117 00118 // Update furthest reached value for the diagonal 00119 *lastHit = ungappedExtension_subjectEndReached; 00120 00121 // If extension scores high enough to trigger gapping 00122 if (ungappedExtension) 00123 { 00124 // Increment count of number of trigger extensions 00125 blast_numTriggerExtensions++; 00126 00127 // Create new alignment if needed 00128 if (alignments_currentAlignment == NULL) 00129 { 00130 // Create new alignment object 00131 alignments_createNew(descriptionStart, descriptionLength, 00132 subject, subjectLength, encodedLength); 00133 } 00134 00135 // Add this extension to the alignment 00136 alignments_addUngappedExtension(ungappedExtension); 00137 } 00138 } 00139 #endif 00140 00141 queryOffsets++; blast_numHits++; 00142 } 00143 while (*queryOffsets); 00144 } 00145 00146 address++; 00147 } 00148 } 00149 00150 sequenceCount++; 00151 00152 // Every so often print status . 00153 if ((sequenceCount % tickFrequency == 0) && 00154 parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular) 00155 { 00156 #ifndef VERBOSE 00157 printf("."); 00158 fflush(stdout); 00159 #endif 00160 } 00161 } 00162 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void search_protein2hit | ( | struct PSSMatrix | PSSMatrix, | |
| struct sequenceData * | sequenceData, | |||
| uint4 | numSequences, | |||
| uint4 | tickFrequency | |||
| ) |
Definition at line 165 of file search.c.
References alignments_addUngappedExtension(), alignments_createNew(), alignments_currentAlignment, blast_numHits, blast_numTriggerExtensions, blast_numUngappedExtensions, constants_max_int2, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, hitMatrix_furthest, int4, PSSMatrix::matrix, group::nextGroups, group::nextWords, parameters_A, parameters_outputType, parameters_overlap, parameters_tabular, parameters_wordSize, parameters_xml, sequenceData::sequence, sequenceData::sequenceLength, uint2, uint4, ungappedExtension_extend(), ungappedExtension_subjectEndReached, wordLookupDFA_additionalQueryPositions, wordLookupDFA_groups, and wordLookupDFA_numCodes.
Referenced by alignments_expandCluster(), and blast_search().
00167 { 00168 uint4 sequenceCount = 0; 00169 uint4 descriptionStart = 0, descriptionLength = 0, encodedLength; 00170 unsigned char *subject, *sequenceEnd; 00171 int4 subjectLength, subjectOffset, wordLengthMinusOne, count = 0; 00172 unsigned char currentWord, *currentBlock, *address; 00173 struct group* currentGroup; 00174 uint2* queryOffsets, queryOffset; 00175 struct ungappedExtension* ungappedExtension; 00176 int4 diagonal, distance; 00177 unsigned char** lastHit; 00178 00179 wordLengthMinusOne = parameters_wordSize - 1; 00180 00181 while (sequenceCount < numSequences) 00182 { 00183 descriptionLength = sequenceData[sequenceCount].descriptionLength; 00184 descriptionStart = sequenceData[sequenceCount].descriptionStart; 00185 subjectLength = sequenceData[sequenceCount].sequenceLength; 00186 encodedLength = sequenceData[sequenceCount].encodedLength; 00187 address = subject = sequenceData[sequenceCount].sequence; 00188 00189 // New sequence, new possible alignment 00190 alignments_currentAlignment = NULL; 00191 00192 // Only process sequence if at least as long as the word length 00193 if (subjectLength >= parameters_wordSize) 00194 { 00195 // Start at 000 state in Finite State Automata 00196 currentGroup = wordLookupDFA_groups; 00197 00198 // Read first wordLength - 1 chars and advance 00199 count = wordLengthMinusOne; 00200 while (count > 0) 00201 { 00202 if (*address < wordLookupDFA_numCodes) 00203 currentGroup = currentGroup->nextGroups + *address; 00204 else 00205 currentGroup = currentGroup->nextGroups; 00206 00207 address++; 00208 count--; 00209 } 00210 00211 // Read the rest of the codes, using the Finite State Automata defined 00212 // by wordLookupDFA to get the query positions of int4erest 00213 sequenceEnd = subject + subjectLength; 00214 while (address < sequenceEnd) 00215 { 00216 currentBlock = currentGroup->nextWords; 00217 00218 // If current code is a regular letter 00219 if (*address < wordLookupDFA_numCodes) 00220 { 00221 // Use it 00222 currentWord = currentBlock[*address]; 00223 currentGroup = currentGroup->nextGroups + *address; 00224 } 00225 else 00226 { 00227 // Else check if we've reached end of the file 00228 if (address >= sequenceEnd) 00229 break; 00230 00231 // If not, we've read a wild code. Use first code instead 00232 currentWord = currentBlock[0]; 00233 currentGroup = currentGroup->nextGroups; 00234 } 00235 00236 if (currentWord) 00237 { 00238 // Calculate subject offset 00239 subjectOffset = address - subject; 00240 00241 // If at least one query position, stored at an extenal address 00242 queryOffsets = ((uint2*)currentBlock) - currentWord; 00243 00244 // If the zero flag is stored at the first query position 00245 if (!*queryOffsets) 00246 { 00247 // Go to an outside address for additional positions 00248 queryOffsets = wordLookupDFA_additionalQueryPositions 00249 + (*(queryOffsets + 1) * constants_max_int2) + *(queryOffsets + 2); 00250 } 00251 00252 do 00253 { 00254 queryOffset = *queryOffsets; 00255 00256 #ifndef NO_STAGE2 00257 // Calculate the diagonal this hit is on 00258 diagonal = subjectOffset - queryOffset; 00259 00260 // Calculate distance since last hit 00261 lastHit = hitMatrix_furthest + diagonal; 00262 distance = address - *lastHit; 00263 00264 if (distance >= parameters_A) 00265 { 00266 // Too far apart, update furthest 00267 *lastHit = address; 00268 } 00269 else if (distance >= parameters_overlap) 00270 { 00271 // Not overlaping - extension triggered 00272 // Increment tally number of extensions 00273 blast_numUngappedExtensions++; 00274 00275 // Perform ungapped extension start between query/subject start/end 00276 // and extending outwards in each direction 00277 ungappedExtension 00278 = ungappedExtension_extend(PSSMatrix.matrix + queryOffset, 00279 address, *lastHit, PSSMatrix, subject); 00280 00281 // Update furthest reached value for the diagonal 00282 *lastHit = ungappedExtension_subjectEndReached; 00283 00284 // If extension scores high enough to trigger gapping 00285 if (ungappedExtension) 00286 { 00287 // Increment count of number of trigger extensions 00288 blast_numTriggerExtensions++; 00289 00290 // Create new alignment if needed 00291 if (alignments_currentAlignment == NULL) 00292 { 00293 // Create new alignment object using subject with wilds 00294 alignments_createNew(descriptionStart, descriptionLength, subject, 00295 subjectLength, encodedLength); 00296 } 00297 00298 // Add this extension to the alignment 00299 alignments_addUngappedExtension(ungappedExtension); 00300 } 00301 } 00302 #endif 00303 00304 queryOffsets++; blast_numHits++; 00305 } 00306 while (*queryOffsets); 00307 } 00308 00309 address++; 00310 } 00311 } 00312 00313 sequenceCount++; 00314 00315 // Every so often print status . 00316 if ((sequenceCount % tickFrequency == 0) && 00317 parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular) 00318 { 00319 #ifndef VERBOSE 00320 printf("."); 00321 fflush(stdout); 00322 #endif 00323 } 00324 } 00325 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void search_proteinSsearch | ( | struct PSSMatrix | PSSMatrix, | |
| struct sequenceData * | sequenceData, | |||
| uint4 | numSequences, | |||
| uint4 | tickFrequency | |||
| ) |
Definition at line 1032 of file search.c.
References alignments_addFinalAlignment(), alignments_addGappedExtension(), alignments_createNew(), alignments_currentAlignment, alignments_isFinalAlignment(), alignments_sortFinalAlignments(), dpResults::best, dpResults::bestScore, blast_gappedNominalCutoff, sequenceData::descriptionLength, sequenceData::descriptionStart, sequenceData::encodedLength, gappedExtension_score(), int4, gappedExtension::nominalScore, parameters_outputType, parameters_tabular, parameters_xml, sequenceData::sequence, sequenceData::sequenceLength, smithWatermanScoring_score(), smithWatermanScoring_scoreReverse(), smithWatermanTraceback_build(), and uint4.
Referenced by blast_search().
01034 { 01035 uint4 sequenceCount = 0; 01036 uint4 descriptionStart = 0, descriptionLength = 0, encodedLength; 01037 unsigned char *subject, *address; 01038 int4 subjectLength; 01039 struct gappedExtension* gappedExtension; 01040 struct dpResults dpResults, reverseDpResults; 01041 01042 while (sequenceCount < numSequences) 01043 { 01044 descriptionLength = sequenceData[sequenceCount].descriptionLength; 01045 descriptionStart = sequenceData[sequenceCount].descriptionStart; 01046 subjectLength = sequenceData[sequenceCount].sequenceLength; 01047 encodedLength = sequenceData[sequenceCount].encodedLength; 01048 address = subject = sequenceData[sequenceCount].sequence; 01049 01050 // New sequence, new possible alignment 01051 alignments_currentAlignment = NULL; 01052 01053 // Perform SW score only 01054 dpResults = smithWatermanScoring_score(PSSMatrix, subjectLength, subject); 01055 01056 // printf("%d\n", dpResults.bestScore); 01057 01058 // If above e-value cutoff 01059 if (dpResults.bestScore >= blast_gappedNominalCutoff 01060 && alignments_isFinalAlignment(dpResults.bestScore)) 01061 { 01062 // Perform SW alignment in the reverse direction, to find the start of the optimal alignment 01063 reverseDpResults = smithWatermanScoring_scoreReverse(PSSMatrix, subjectLength, subject, 01064 dpResults.best); 01065 01066 // Collect traceback information and store alignment 01067 gappedExtension = smithWatermanTraceback_build(PSSMatrix, subjectLength, subject, 01068 reverseDpResults.best, dpResults.best); 01069 01070 if (reverseDpResults.bestScore != dpResults.bestScore || 01071 dpResults.bestScore != gappedExtension->nominalScore) 01072 { 01073 fprintf(stderr, "Error: Forward and reverse Smith-Waterman alignment scores do not match\n"); 01074 exit(-1); 01075 } 01076 01077 gappedExtension_score(gappedExtension); 01078 01079 alignments_createNew(descriptionStart, descriptionLength, subject, subjectLength, encodedLength); 01080 alignments_addGappedExtension(alignments_currentAlignment, gappedExtension); 01081 alignments_addFinalAlignment(dpResults.bestScore, alignments_currentAlignment); 01082 } 01083 01084 // Advance to next sequence 01085 address += encodedLength - 1; 01086 01087 sequenceCount++; 01088 01089 // Every so often print status . 01090 if ((sequenceCount % tickFrequency == 0) && 01091 parameters_outputType != parameters_xml && parameters_outputType != parameters_tabular) 01092 { 01093 #ifndef VERBOSE 01094 printf("."); 01095 fflush(stdout); 01096 #endif 01097 } 01098 } 01099 01100 // Sort the final alignments by refined score 01101 alignments_sortFinalAlignments(); 01102 }
Here is the call graph for this function:

Here is the caller graph for this function:

1.5.2