#include "common.h"#include "dnautil.h"#include "nt4.h"#include "dnaseq.h"#include "crudeali.h"Include dependency graph for crudeali.c:

Go to the source code of this file.
Data Structures | |
| struct | crudeHit |
| struct | crudeExon |
| struct | crudeGene |
| struct | probeTile |
| struct | fastProber |
Defines | |
| #define | maxTileSize 16 |
| #define | wobbleMask 0xF3CF |
| #define | tileHashBits 16 |
| #define | tileHashSize (1<<tileHashBits) |
| #define | tileHashMask (tileHashSize-1) |
| #define | tileHashFunc(tile) ((tile)&tileHashMask) |
| #define | maxHitsAtOnce 120000 |
Functions | |
| static int | lumpHitsIntoExons (struct crudeHit *hits, int hitCount, struct crudeExon *exons) |
| static int | lumpExonsIntoGenes (struct crudeExon *exons, int exonCount, struct crudeGene *genes, struct nt4Seq *target, boolean isRc) |
| static int | copyExonsIntoGenes (struct crudeExon *exons, int exonCount, struct crudeGene *genes, struct nt4Seq *target, boolean isRc) |
| static boolean | makeGoodTile (DNA *dna, bits32 *retTile16, bits16 *retTile8, int tileSize) |
| static struct probeTile ** | makeTileHash () |
| static struct fastProber * | makeFastProber (DNA *probeDna, int probeSize) |
| static void | freeFastProber (struct fastProber **pFp) |
| static int | makeIndividualHits16 (struct probeTile **hash, bits32 *bases, int baseOffset, int baseWordCount, struct crudeHit *hits, int maxHitCount, int hitCount) |
| static int | makeHits16 (struct fastProber *fp, struct nt4Seq *target, struct crudeHit *hits, int maxHitCount) |
| void | squeezeHits8 (struct fastProber *fp, short *compactDiagBuf, int compactDiagBufByteSize, int startToff, int lastCompactOff, int lastCompareOff, struct crudeHit *hits, int lastCompactedHit, int hitCount, int *retLastCompactedHit, int *retHitCount) |
| static int | makeHits8 (struct fastProber *fp, struct nt4Seq *target, struct crudeHit *hits, int maxHitCount) |
| static int | cmpHitsTargetFirst (const void *va, const void *vb) |
| static int | cmpExonsTargetFirst (const void *va, const void *vb) |
| static int | makeHits (struct fastProber *fp, struct nt4Seq *target, struct crudeHit *hits, int maxHitCount) |
| static int | findCrudeGenes (struct fastProber *fp, struct nt4Seq *target, struct crudeGene *genes, int maxGeneCount, boolean isRc) |
| static int | cmpGenesByScore (const void *va, const void *vb) |
| static int | countGenesBetter (struct crudeGene *genes, int geneCount, int cutoff) |
| static int | filterPoorGenes (struct crudeGene *genes, int geneCount) |
| static int | chromIx (struct nt4Seq *one, struct nt4Seq **chrome, int chromeCount) |
| static int | countPrettyGood (struct crudeGene *crudeGenes, int geneCount, int fraction, int minScore) |
| void | initWormDnaMatchScores () |
| void | scoreNoninsertingExtensions (struct crudeGene *crudeGene, DNA *probe, int probeSize) |
| crudeAli * | crudeAliFind (DNA *probe, int probeSize, struct nt4Seq **chrome, int chromeCount, int tileSize, int minScore) |
Variables | |
| static char const | rcsid [] = "$Id: crudeali.c,v 1.5 2006/05/09 01:05:55 markd Exp $" |
| static int | caTileSize = 16 |
| static boolean | caIsCdna = TRUE |
| static int | wormDnaMatchScores [256][256] |
| #define maxHitsAtOnce 120000 |
| #define maxTileSize 16 |
Definition at line 19 of file crudeali.c.
| #define tileHashBits 16 |
| #define tileHashFunc | ( | tile | ) | ((tile)&tileHashMask) |
Definition at line 283 of file crudeali.c.
Referenced by makeFastProber(), makeHits16(), and makeIndividualHits16().
| #define tileHashMask (tileHashSize-1) |
Definition at line 282 of file crudeali.c.
| #define tileHashSize (1<<tileHashBits) |
| #define wobbleMask 0xF3CF |
Definition at line 663 of file crudeali.c.
References FALSE.
Referenced by cdaLoadOne(), cdaWriteOne(), crudeAliFind(), and gdfReadOneGene().
00664 { 00665 int i; 00666 for (i=0; i<chromeCount; ++i) 00667 { 00668 if (one == chrome[i]) 00669 return i; 00670 } 00671 assert(FALSE); 00672 return -1; 00673 }
Here is the caller graph for this function:

| static int cmpExonsTargetFirst | ( | const void * | va, | |
| const void * | vb | |||
| ) | [static] |
Definition at line 548 of file crudeali.c.
References crudeExon::probeStart, and crudeExon::targetStart.
Referenced by findCrudeGenes().
00551 { 00552 struct crudeExon *a = (struct crudeExon *)va; 00553 struct crudeExon *b = (struct crudeExon *)vb; 00554 long diff; 00555 if ((diff = a->targetStart - b->targetStart) != 0) 00556 return diff; 00557 return a->probeStart - b->probeStart; 00558 }
Here is the caller graph for this function:

| static int cmpGenesByScore | ( | const void * | va, | |
| const void * | vb | |||
| ) | [static] |
Definition at line 611 of file crudeali.c.
References crudeGene::score.
Referenced by crudeAliFind(), and filterPoorGenes().
00613 { 00614 struct crudeGene *a = (struct crudeGene *)va; 00615 struct crudeGene *b = (struct crudeGene *)vb; 00616 return b->score - a->score; 00617 }
Here is the caller graph for this function:

| static int cmpHitsTargetFirst | ( | const void * | va, | |
| const void * | vb | |||
| ) | [static] |
Definition at line 536 of file crudeali.c.
References crudeHit::probeOffset, and crudeHit::targetOffset.
Referenced by findCrudeGenes().
00539 { 00540 struct crudeHit *a = (struct crudeHit *)va; 00541 struct crudeHit *b = (struct crudeHit *)vb; 00542 long diff; 00543 if ((diff = a->targetOffset - b->targetOffset) != 0) 00544 return diff; 00545 return a->probeOffset - b->probeOffset; 00546 }
Here is the caller graph for this function:

| static int copyExonsIntoGenes | ( | struct crudeExon * | exons, | |
| int | exonCount, | |||
| struct crudeGene * | genes, | |||
| struct nt4Seq * | target, | |||
| boolean | isRc | |||
| ) | [static] |
Definition at line 212 of file crudeali.c.
References crudeExon::hitCount, crudeGene::isRc, crudeExon::probeEnd, crudeGene::probeEnd, crudeExon::probeStart, crudeGene::probeStart, crudeGene::score, crudeGene::target, crudeExon::targetEnd, crudeGene::targetEnd, crudeExon::targetStart, and crudeGene::targetStart.
Referenced by findCrudeGenes().
00215 { 00216 int i; 00217 for (i=0; i<exonCount; ++i) 00218 { 00219 genes[i].target = target; 00220 genes[i].isRc = isRc; 00221 genes[i].probeStart = exons[i].probeStart; 00222 genes[i].probeEnd = exons[i].probeEnd; 00223 genes[i].targetStart = exons[i].targetStart; 00224 genes[i].targetEnd = exons[i].targetEnd; 00225 genes[i].score = exons[i].hitCount * exons[i].hitCount; 00226 } 00227 return exonCount; 00228 }
Here is the caller graph for this function:

| static int countGenesBetter | ( | struct crudeGene * | genes, | |
| int | geneCount, | |||
| int | cutoff | |||
| ) | [static] |
Definition at line 619 of file crudeali.c.
Referenced by crudeAliFind(), and filterPoorGenes().
00621 { 00622 int i; 00623 for (i=0; i<geneCount; ++i) 00624 { 00625 if (genes[i].score <= cutoff) 00626 break; 00627 } 00628 return i; 00629 }
Here is the caller graph for this function:

| static int countPrettyGood | ( | struct crudeGene * | crudeGenes, | |
| int | geneCount, | |||
| int | fraction, | |||
| int | minScore | |||
| ) | [static] |
Definition at line 675 of file crudeali.c.
References crudeGene::score.
Referenced by crudeAliFind().
00678 { 00679 int count = 0; 00680 int i; 00681 int threshold = crudeGenes->score; /* Top score */ 00682 00683 threshold = (threshold + fraction/2)/fraction; 00684 if (threshold < minScore) 00685 threshold = minScore; 00686 for (i=0; i<geneCount; ++i) 00687 { 00688 if (crudeGenes[i].score >= threshold) 00689 ++count; 00690 else 00691 break; 00692 } 00693 return count; 00694 }
Here is the caller graph for this function:

| struct crudeAli* crudeAliFind | ( | DNA * | probe, | |
| int | probeSize, | |||
| struct nt4Seq ** | chrome, | |||
| int | chromeCount, | |||
| int | tileSize, | |||
| int | minScore | |||
| ) | [read] |
Definition at line 794 of file crudeali.c.
References AllocVar, caIsCdna, caTileSize, chromIx(), cmpGenesByScore(), countGenesBetter(), countPrettyGood(), filterPoorGenes(), findCrudeGenes(), freeFastProber(), freeMem(), initWormDnaMatchScores(), makeFastProber(), maxHitsAtOnce, needMem(), reverseComplement(), scoreNoninsertingExtensions(), slAddHead, and slReverse().
Referenced by xenAlignBig(), and xenAlignWorm().
00797 { 00798 int oneGeneCount; 00799 int i; 00800 int chromeIx; 00801 int geneCount = 0; 00802 int maxGeneCount = 2*maxHitsAtOnce; 00803 int isRc; 00804 struct nt4Seq *target; 00805 struct crudeGene *crudeGenes = needMem(maxGeneCount*sizeof(*crudeGenes)); 00806 struct fastProber *fps[2]; 00807 struct crudeAli *aliList = NULL, *ali; 00808 int goodGeneCount; 00809 00810 initWormDnaMatchScores(); 00811 caTileSize = tileSize; 00812 caIsCdna = (tileSize >= 14); 00813 fps[0] = makeFastProber(probe, probeSize); 00814 reverseComplement(probe, probeSize); 00815 fps[1] = makeFastProber(probe, probeSize); 00816 reverseComplement(probe, probeSize); 00817 for (chromeIx = 0; chromeIx < chromeCount; ++chromeIx) 00818 { 00819 target = chrome[chromeIx]; 00820 for (isRc = 0; isRc <= 1; ++isRc) 00821 { 00822 /* Get crude idea of genes on one strand. */ 00823 oneGeneCount = findCrudeGenes(fps[isRc], target, 00824 crudeGenes+geneCount, maxGeneCount-geneCount, isRc); 00825 00826 /* Increment gene count and if necessary compact list. */ 00827 geneCount += oneGeneCount; 00828 if (maxGeneCount - geneCount < maxHitsAtOnce) 00829 geneCount = filterPoorGenes(crudeGenes, geneCount); 00830 } 00831 if (maxGeneCount - geneCount < maxHitsAtOnce) 00832 break; 00833 } 00834 00835 /* Sort genes by initial promise and get rid of some trash. */ 00836 { 00837 int bestScore, worstScore; 00838 qsort(crudeGenes, geneCount, sizeof(crudeGenes[0]), cmpGenesByScore); 00839 bestScore = crudeGenes[0].score; 00840 worstScore = crudeGenes[geneCount-1].score; 00841 if (bestScore > worstScore) 00842 { 00843 int cutOff = bestScore/10; 00844 goodGeneCount = geneCount = countGenesBetter(crudeGenes, geneCount, cutOff); 00845 } 00846 else if (bestScore >= minScore) 00847 goodGeneCount = geneCount; 00848 else 00849 goodGeneCount = 0; 00850 } 00851 00852 if (tileSize == 8) 00853 { 00854 goodGeneCount = countPrettyGood(crudeGenes, geneCount, 4, 4); 00855 geneCount = goodGeneCount; 00856 for (i=0; i<geneCount; ++i) 00857 scoreNoninsertingExtensions(crudeGenes+i, probe, probeSize); 00858 qsort(crudeGenes, geneCount, sizeof(crudeGenes[0]), cmpGenesByScore); 00859 goodGeneCount = countPrettyGood(crudeGenes, geneCount, 4, minScore); 00860 } 00861 for (i=0; i<goodGeneCount; ++i) 00862 { 00863 AllocVar(ali); 00864 ali->chromIx = chromIx(crudeGenes[i].target, chrome, chromeCount); 00865 ali->start = crudeGenes[i].targetStart; 00866 ali->end = crudeGenes[i].targetEnd; 00867 ali->score = crudeGenes[i].score; 00868 ali->strand = (crudeGenes[i].isRc ? '-' : '+'); 00869 slAddHead(&aliList, ali); 00870 } 00871 slReverse(&aliList); 00872 freeFastProber(&fps[0]); 00873 freeFastProber(&fps[1]); 00874 freeMem(crudeGenes); 00875 00876 return aliList; 00877 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int filterPoorGenes | ( | struct crudeGene * | genes, | |
| int | geneCount | |||
| ) | [static] |
Definition at line 631 of file crudeali.c.
References cmpGenesByScore(), countGenesBetter(), and warn().
Referenced by crudeAliFind().
00635 { 00636 int bestScore, worstScore; 00637 int newGeneCount; 00638 int halfGeneCount = geneCount/2; 00639 00640 qsort(genes, geneCount, sizeof(genes[0]), cmpGenesByScore); 00641 bestScore = genes[0].score; 00642 worstScore = genes[geneCount-1].score; 00643 00644 /* If scores are clustered with half or more at the top, 00645 * we have to be crude and just lop off bottom half of scores. */ 00646 if (bestScore == genes[halfGeneCount].score) 00647 { 00648 warn("Bunches of genes, all scoring the same. Program is " 00649 "throwing out half out of necessity.\n"); 00650 return geneCount/2; 00651 } 00652 00653 /* Drop bottom score until have gotten rid of at least half. */ 00654 newGeneCount = geneCount; 00655 while (newGeneCount > halfGeneCount) 00656 { 00657 worstScore = genes[newGeneCount-1].score; 00658 newGeneCount = countGenesBetter(genes, newGeneCount, worstScore); 00659 } 00660 return newGeneCount; 00661 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int findCrudeGenes | ( | struct fastProber * | fp, | |
| struct nt4Seq * | target, | |||
| struct crudeGene * | genes, | |||
| int | maxGeneCount, | |||
| boolean | isRc | |||
| ) | [static] |
Definition at line 581 of file crudeali.c.
References caIsCdna, cmpExonsTargetFirst(), cmpHitsTargetFirst(), copyExonsIntoGenes(), freeMem(), crudeExon::hitCount, lumpExonsIntoGenes(), lumpHitsIntoExons(), makeHits(), maxHitsAtOnce, and needMem().
Referenced by crudeAliFind().
00584 { 00585 struct crudeHit *hits; 00586 struct crudeExon *exons; 00587 int hitCount, exonCount, geneCount = 0; 00588 00589 if (fp == NULL) 00590 return 0; 00591 assert(maxGeneCount >= maxHitsAtOnce); 00592 hits = needMem(maxHitsAtOnce*sizeof(*hits)); 00593 exons = needMem(maxHitsAtOnce*sizeof(*exons)); 00594 hitCount = makeHits(fp, target, hits, maxHitsAtOnce); 00595 00596 if (hitCount > 0) 00597 { 00598 qsort(hits, hitCount, sizeof(hits[0]), cmpHitsTargetFirst); 00599 exonCount = lumpHitsIntoExons(hits, hitCount, exons); 00600 qsort(exons, exonCount, sizeof(exons[0]), cmpExonsTargetFirst); 00601 if (caIsCdna) 00602 geneCount = lumpExonsIntoGenes(exons, exonCount, genes, target, isRc); 00603 else 00604 geneCount = copyExonsIntoGenes(exons, exonCount, genes, target, isRc); 00605 } 00606 freeMem(hits); 00607 freeMem(exons); 00608 return geneCount; 00609 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void freeFastProber | ( | struct fastProber ** | pFp | ) | [static] |
Definition at line 337 of file crudeali.c.
References freeMem(), freez(), fastProber::hash, and fastProber::probes.
Referenced by crudeAliFind().
00338 { 00339 struct fastProber *fp = *pFp; 00340 if (fp != NULL) 00341 { 00342 freeMem(fp->probes); 00343 freeMem(fp->hash); 00344 freez(pFp); 00345 } 00346 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void initWormDnaMatchScores | ( | ) |
Definition at line 698 of file crudeali.c.
References FALSE, TRUE, and wormDnaMatchScores.
Referenced by crudeAliFind().
00700 { 00701 int i,j; 00702 static boolean initted = FALSE; 00703 00704 if (initted) return; 00705 initted = TRUE; 00706 for (i=0; i<256; ++i) 00707 for (j=0; j<256; ++j) 00708 wormDnaMatchScores[i][j] = -4; 00709 wormDnaMatchScores['a']['a'] = 2; 00710 wormDnaMatchScores['t']['t'] = 2; 00711 wormDnaMatchScores['A']['A'] = 2; 00712 wormDnaMatchScores['T']['T'] = 2; 00713 wormDnaMatchScores['c']['c'] = 3; 00714 wormDnaMatchScores['g']['g'] = 3; 00715 wormDnaMatchScores['C']['C'] = 3; 00716 wormDnaMatchScores['G']['G'] = 3; 00717 }
Here is the caller graph for this function:

| static int lumpExonsIntoGenes | ( | struct crudeExon * | exons, | |
| int | exonCount, | |||
| struct crudeGene * | genes, | |||
| struct nt4Seq * | target, | |||
| boolean | isRc | |||
| ) | [static] |
Definition at line 140 of file crudeali.c.
References caIsCdna, FALSE, crudeExon::hitCount, crudeExon::isLumped, crudeGene::isRc, crudeExon::probeEnd, crudeGene::probeEnd, crudeGene::probeStart, crudeExon::probeStart, crudeGene::score, crudeGene::target, crudeExon::targetEnd, crudeGene::targetEnd, crudeExon::targetStart, crudeGene::targetStart, and TRUE.
Referenced by findCrudeGenes().
00143 { 00144 int i; 00145 struct crudeGene *gene = genes; 00146 int geneCount = 0; 00147 int firstNonLumped = 0; 00148 int targetGapSpan = (caIsCdna ? 25000 : 200); 00149 int probeGapSpan = (caIsCdna ? 64 : 200); 00150 00151 for (i=0; i<exonCount; ++i) 00152 exons[i].isLumped = FALSE; 00153 00154 for (;;) 00155 { 00156 boolean startedGene = FALSE; 00157 int lastDiff = 0; 00158 int tOff, pOff; 00159 int thisDiff; 00160 int exonsInThis = 0; 00161 00162 for ( ; firstNonLumped < exonCount; ++firstNonLumped) 00163 { 00164 if (!exons[firstNonLumped].isLumped) 00165 break; 00166 } 00167 for (i=firstNonLumped; i<exonCount; ++i) 00168 { 00169 if (!exons[i].isLumped) 00170 { 00171 tOff = exons[i].targetStart; 00172 pOff = exons[i].probeStart; 00173 thisDiff = tOff - pOff; 00174 if (!startedGene) 00175 { 00176 startedGene = TRUE; 00177 exons[i].isLumped = TRUE; 00178 lastDiff = thisDiff; 00179 exonsInThis = 1; 00180 gene->target = target; 00181 gene->isRc = isRc; 00182 gene->probeStart = exons[i].probeStart; 00183 gene->probeEnd = exons[i].probeEnd; 00184 gene->targetStart = exons[i].targetStart; 00185 gene->targetEnd = exons[i].targetEnd; 00186 gene->score = exons[i].hitCount * exons[i].hitCount; 00187 } 00188 else if (gene->targetEnd - 10 <= tOff && tOff <= gene->targetEnd + targetGapSpan 00189 && gene->probeEnd - 10 <= pOff && pOff <= gene->probeEnd + probeGapSpan 00190 && lastDiff - 2 <= thisDiff) 00191 { 00192 exons[i].isLumped = TRUE; 00193 gene->targetEnd = exons[i].targetEnd; 00194 gene->probeEnd = exons[i].probeEnd; 00195 gene->score += exons[i].hitCount * exons[i].hitCount; 00196 exonsInThis += 1; 00197 lastDiff = thisDiff; 00198 } 00199 else if (tOff > gene->targetEnd + targetGapSpan) 00200 break; 00201 } 00202 } 00203 if (!startedGene) 00204 break; 00205 gene->score += exonsInThis; 00206 ++geneCount; 00207 ++gene; 00208 } 00209 return geneCount; 00210 }
Here is the caller graph for this function:

| static int lumpHitsIntoExons | ( | struct crudeHit * | hits, | |
| int | hitCount, | |||
| struct crudeExon * | exons | |||
| ) | [static] |
Definition at line 59 of file crudeali.c.
References caTileSize, FALSE, crudeExon::hitCount, crudeHit::isLumped, crudeExon::isLumped, crudeExon::probeEnd, crudeExon::probeStart, crudeExon::targetEnd, crudeHit::targetOffset, crudeExon::targetStart, and TRUE.
Referenced by findCrudeGenes().
00063 { 00064 int i; 00065 struct crudeExon *exon = exons; 00066 int exonCount = 0; 00067 int firstNonLumped = 0; 00068 00069 for (i=0; i<hitCount; ++i) 00070 hits[i].isLumped = FALSE; 00071 00072 for (;;) 00073 { 00074 boolean startedExon = FALSE; 00075 int diagDiff = 0; 00076 int tOff, pOff; 00077 int thisDiff; 00078 for (; firstNonLumped < hitCount; ++firstNonLumped) 00079 { 00080 if (!hits[firstNonLumped].isLumped) 00081 break; 00082 } 00083 for (i=firstNonLumped; i<hitCount; ++i) 00084 { 00085 if (hits[i].isLumped == FALSE) 00086 { 00087 pOff = hits[i].probeOffset; 00088 tOff = hits[i].targetOffset; 00089 thisDiff = pOff - tOff; 00090 if (!startedExon) /* First hit in exon. */ 00091 { 00092 startedExon = TRUE; 00093 hits[i].isLumped = TRUE; 00094 exon->probeStart = pOff; 00095 exon->probeEnd = pOff + caTileSize; 00096 exon->targetStart = tOff; 00097 exon->targetEnd = tOff + caTileSize; 00098 exon->hitCount = 1; 00099 diagDiff = thisDiff; 00100 } 00101 else if (diagDiff - 2 <= thisDiff && thisDiff <= diagDiff + 2 00102 && exon->probeEnd - 2 <= pOff && pOff <= exon->probeEnd + 48 00103 && exon->targetEnd - 2 <= tOff && tOff <= exon->targetEnd + 48) 00104 { 00105 hits[i].isLumped = TRUE; 00106 exon->probeEnd = pOff + caTileSize; 00107 exon->targetEnd = tOff + caTileSize; 00108 exon->hitCount += 1; 00109 } 00110 else if (tOff > exon->targetEnd + 48) 00111 break; 00112 } 00113 } 00114 if (!startedExon) /* Must be all lumped together. */ 00115 break; 00116 ++exonCount; 00117 ++exon; 00118 } 00119 00120 /* If small tile size then get rid of exons with only one hit. */ 00121 if (caTileSize == 8) 00122 { 00123 struct crudeExon *read = exons, *write = exons; 00124 int twoFerCount = 0; 00125 for (i=0; i<exonCount; ++i) 00126 { 00127 if (read->hitCount >= 2) 00128 { 00129 *write++ = *read++; 00130 ++twoFerCount; 00131 } 00132 else 00133 ++read; 00134 } 00135 exonCount = twoFerCount; 00136 } 00137 return exonCount; 00138 }
Here is the caller graph for this function:

| static struct fastProber* makeFastProber | ( | DNA * | probeDna, | |
| int | probeSize | |||
| ) | [static, read] |
Definition at line 304 of file crudeali.c.
References AllocVar, caTileSize, fastProber::hash, makeGoodTile(), makeTileHash(), needMem(), probeTile::next, probeTile::offset, fastProber::probes, fastProber::probeSize, probeTile::tile16, probeTile::tile8, tileHashFunc, and wobbleMask.
Referenced by crudeAliFind().
00305 { 00306 int maxProbeCount = probeSize - caTileSize + 1; 00307 int probeCount = 0; 00308 struct probeTile *probes; 00309 struct probeTile *probe; 00310 struct probeTile **hash; 00311 int i; 00312 struct fastProber *fp; 00313 00314 if (maxProbeCount <= 0) 00315 return NULL; 00316 AllocVar(fp); 00317 fp->hash = hash = makeTileHash(); 00318 fp->probeSize = probeSize; 00319 fp->probes = probes = needMem(maxProbeCount * sizeof(probes[0]) ); 00320 for (i=0; i<maxProbeCount; ++i) 00321 { 00322 probe = &probes[probeCount]; 00323 if (makeGoodTile(probeDna+i, &probe->tile16, &probe->tile8, caTileSize)) 00324 { 00325 int hashIx; 00326 probe->tile8 &= wobbleMask; 00327 hashIx = (caTileSize > 8 ? tileHashFunc(probe->tile16) : tileHashFunc(probe->tile8)); 00328 probe->offset = i; 00329 probe->next = hash[hashIx]; 00330 hash[hashIx] = probe; 00331 ++probeCount; 00332 } 00333 } 00334 return fp; 00335 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static boolean makeGoodTile | ( | DNA * | dna, | |
| bits32 * | retTile16, | |||
| bits16 * | retTile8, | |||
| int | tileSize | |||
| ) | [static] |
Definition at line 230 of file crudeali.c.
References FALSE, ntVal, packDna16(), packDna8(), and TRUE.
Referenced by makeFastProber().
00233 { 00234 int i; 00235 int repMod; 00236 int endRep; 00237 int maxRep = (tileSize / 2); 00238 00239 /* Make sure no N's in tile */ 00240 for (i=0; i<tileSize; ++i) 00241 { 00242 if (ntVal[(int)dna[i]] < 0) 00243 return FALSE; 00244 } 00245 00246 /* Make sure that tile isn't part of a pattern. */ 00247 for (repMod = 1; repMod <= maxRep; repMod += 1) 00248 { 00249 boolean allSame = TRUE; 00250 endRep = tileSize - repMod; 00251 for (i=0; i<endRep; ++i) 00252 { 00253 if (dna[i] != dna[i+repMod]) 00254 { 00255 allSame = FALSE; 00256 break; 00257 } 00258 } 00259 if (allSame) 00260 return FALSE; 00261 } 00262 00263 if (tileSize > 8) 00264 *retTile16 = packDna16(dna); 00265 else 00266 *retTile8 = packDna8(dna); 00267 00268 return TRUE; 00269 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int makeHits | ( | struct fastProber * | fp, | |
| struct nt4Seq * | target, | |||
| struct crudeHit * | hits, | |||
| int | maxHitCount | |||
| ) | [static] |
Definition at line 566 of file crudeali.c.
References caTileSize, errAbort(), makeHits16(), and makeHits8().
Referenced by findCrudeGenes().
00569 { 00570 if (caTileSize == 8) 00571 return makeHits8(fp, target, hits, maxHitCount); 00572 else if (caTileSize == 16) 00573 return makeHits16(fp, target, hits, maxHitCount); 00574 else 00575 { 00576 errAbort("Can only do tile sizes of 8 or 16 in makeHits"); 00577 return 0; 00578 } 00579 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int makeHits16 | ( | struct fastProber * | fp, | |
| struct nt4Seq * | target, | |||
| struct crudeHit * | hits, | |||
| int | maxHitCount | |||
| ) | [static] |
Definition at line 381 of file crudeali.c.
References nt4Seq::baseCount, nt4Seq::bases, bits32, caTileSize, fastProber::hash, makeIndividualHits16(), and tileHashFunc.
Referenced by makeHits().
00384 { 00385 bits32 *bases = target->bases; 00386 struct probeTile **hash = fp->hash; 00387 unsigned long acc; 00388 int hitCount = 0; 00389 int i; 00390 int baseWordCount = (target->baseCount/caTileSize); 00391 bits32 *endBases = bases + baseWordCount; 00392 int chunkSize = 8; 00393 int chunkCount = baseWordCount/chunkSize; 00394 int baseOffset = 0; 00395 00396 for (i=0; i<chunkCount; ++i) 00397 { 00398 acc = ((unsigned long)(hash[tileHashFunc(bases[0])]) 00399 | (unsigned long)(hash[tileHashFunc(bases[1])]) 00400 | (unsigned long)(hash[tileHashFunc(bases[2])]) 00401 | (unsigned long)(hash[tileHashFunc(bases[3])]) 00402 | (unsigned long)(hash[tileHashFunc(bases[4])]) 00403 | (unsigned long)(hash[tileHashFunc(bases[5])]) 00404 | (unsigned long)(hash[tileHashFunc(bases[6])]) 00405 | (unsigned long)(hash[tileHashFunc(bases[7])])); 00406 if (acc) 00407 { 00408 hitCount = makeIndividualHits16(hash, bases, baseOffset, chunkSize, hits, maxHitCount, hitCount); 00409 if (hitCount >= maxHitCount) 00410 break; 00411 } 00412 bases += chunkSize; 00413 baseOffset += chunkSize; 00414 } 00415 if (bases != endBases) 00416 hitCount = makeIndividualHits16(hash, bases, baseOffset, endBases-bases, hits, maxHitCount, hitCount); 00417 return hitCount; 00418 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int makeHits8 | ( | struct fastProber * | fp, | |
| struct nt4Seq * | target, | |||
| struct crudeHit * | hits, | |||
| int | maxHitCount | |||
| ) | [static] |
Definition at line 462 of file crudeali.c.
References nt4Seq::baseCount, nt4Seq::bases, bits16, bits32, caTileSize, freeMem(), fastProber::hash, needLargeMem(), crudeHit::probeOffset, fastProber::probeSize, squeezeHits8(), crudeHit::targetOffset, tileHashBits, warn(), and wobbleMask.
Referenced by makeHits().
00465 { 00466 bits16 *bases = (bits16*)target->bases; 00467 struct probeTile **hash = fp->hash; 00468 struct probeTile *pbt; 00469 int hitCount = 0; 00470 int i; 00471 int baseWordCount = (target->baseCount/caTileSize); 00472 int tOffset = 0; 00473 00474 /* Handle big/little endian problem. */ 00475 bits32 endianTest = 0x12345678; 00476 bits16 *endianPt = (bits16*)(void*)(&endianTest); 00477 boolean needSwap = (*endianPt == 0x5678); 00478 int swapOffset[2]; 00479 int swapIx = 0; 00480 00481 /* Every so often we throw out singleton hits. The variables below 00482 * manage this. */ 00483 int compactWindowSizePower = 10; 00484 int compactWindowSize = (1<<compactWindowSizePower); 00485 int compactModMask = compactWindowSize-1; 00486 int compactOverlap = 50; 00487 int lastCompactedHit = 0; 00488 int compactBufIntSize = (compactWindowSize+compactOverlap+fp->probeSize+4); 00489 int compactBufSize = (compactBufIntSize*sizeof(short)); 00490 short *compactDiagBuf = needLargeMem(compactBufSize); 00491 00492 if (needSwap) 00493 { 00494 swapOffset[0] = 8; 00495 swapOffset[1] = -8; 00496 } 00497 else 00498 { 00499 swapOffset[0] = swapOffset[1] = 0; 00500 } 00501 00502 assert(tileHashBits == (8*sizeof(bits16))); 00503 for (i=0; i<baseWordCount; ++i) 00504 { 00505 if ((pbt = hash[*bases++ & wobbleMask]) != NULL) 00506 { 00507 if (hitCount < maxHitCount) 00508 { 00509 hits[hitCount].probeOffset = pbt->offset; 00510 hits[hitCount].targetOffset = tOffset + swapOffset[swapIx]; 00511 ++hitCount; 00512 } 00513 else 00514 { 00515 warn("Got too many hits, only keeping first %d\n", hitCount); 00516 break; 00517 } 00518 } 00519 tOffset += caTileSize; 00520 if ((tOffset&compactModMask) == 0) 00521 { 00522 int startToff = tOffset - compactWindowSize - compactOverlap; 00523 int lastCompactOff = startToff + compactWindowSize; 00524 int lastCompareOff = tOffset; 00525 00526 squeezeHits8(fp, compactDiagBuf, compactBufSize, 00527 startToff, lastCompactOff, lastCompareOff, 00528 hits, lastCompactedHit, hitCount, &lastCompactedHit, &hitCount); 00529 } 00530 swapIx = 1 - swapIx; /* Toggle between 0 and 1. */ 00531 } 00532 freeMem(compactDiagBuf); 00533 return hitCount; 00534 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static int makeIndividualHits16 | ( | struct probeTile ** | hash, | |
| bits32 * | bases, | |||
| int | baseOffset, | |||
| int | baseWordCount, | |||
| struct crudeHit * | hits, | |||
| int | maxHitCount, | |||
| int | hitCount | |||
| ) | [static] |
Definition at line 348 of file crudeali.c.
References caTileSize, probeTile::next, probeTile::offset, crudeHit::probeOffset, crudeHit::targetOffset, probeTile::tile16, tileHashFunc, and warn().
Referenced by makeHits16().
00351 { 00352 struct probeTile *probe; 00353 int i; 00354 for (i=0; i<baseWordCount; ++i) 00355 { 00356 if ((probe = hash[tileHashFunc(bases[i])]) != NULL) 00357 { 00358 do 00359 { 00360 if (bases[i] == probe->tile16) 00361 { 00362 if (hitCount >= maxHitCount) 00363 { 00364 warn("Too many hits, only taking first %d", maxHitCount); 00365 goto END_HIT_LOOP; 00366 } 00367 hits[hitCount].probeOffset = probe->offset; 00368 hits[hitCount].targetOffset = ((i+baseOffset) * caTileSize); 00369 ++hitCount; 00370 break; 00371 } 00372 probe = probe->next; 00373 } 00374 while (probe != NULL); 00375 } 00376 } 00377 END_HIT_LOOP: 00378 return hitCount; 00379 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static struct probeTile** makeTileHash | ( | ) | [static, read] |
Definition at line 285 of file crudeali.c.
References needLargeMem(), and tileHashSize.
Referenced by makeFastProber().
00286 { 00287 struct probeTile **table; 00288 int tableSize = tileHashSize * sizeof(table[0]); 00289 00290 table = needLargeMem(tableSize); 00291 memset(table, 0, tableSize); 00292 return table; 00293 }
Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 741 of file crudeali.c.
References nt4Seq::baseCount, freeMem(), crudeGene::isRc, nt4Unpack(), crudeGene::probeStart, reverseComplement(), crudeGene::score, crudeGene::target, crudeGene::targetStart, and wormDnaMatchScores.
Referenced by crudeAliFind().
00744 { 00745 int i, size; 00746 int score = 0, maxScore = 0; 00747 struct nt4Seq *targetChrom = crudeGene->target; 00748 DNA *target; 00749 DNA p,t; 00750 00751 /* Figure out what DNA to fetch - trying to get all of target that 00752 * corresponds to all of probe, but clipping if at ends of chromosome. */ 00753 int pTileStart = crudeGene->probeStart; 00754 int tTileStart = crudeGene->targetStart; 00755 int pStart = 0, pEnd = probeSize; 00756 int tStart = tTileStart - pTileStart; 00757 int tEnd = tStart + probeSize; 00758 if (tStart < 0) 00759 { 00760 pStart -= tStart; 00761 tStart = 0; 00762 } 00763 if (tEnd > targetChrom->baseCount) 00764 { 00765 int diff = tEnd - targetChrom->baseCount; 00766 tEnd -= diff; 00767 pEnd -= diff; 00768 } 00769 size = tEnd - tStart; 00770 if (crudeGene->isRc) 00771 reverseComplement(probe, probeSize); 00772 if (size > 0) 00773 { 00774 target = nt4Unpack(targetChrom, tStart, size); 00775 for (i = 0; i<size; ++i) 00776 { 00777 assert(i + pStart < probeSize); 00778 p = probe[i + pStart]; 00779 t = target[i]; 00780 score += wormDnaMatchScores[(int)p][(int)t]; 00781 if (score < 0) 00782 score = 0; 00783 if (score > maxScore) 00784 maxScore = score; 00785 } 00786 freeMem(target); 00787 } 00788 crudeGene->score = maxScore; 00789 if (crudeGene->isRc) 00790 reverseComplement(probe, probeSize); 00791 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void squeezeHits8 | ( | struct fastProber * | fp, | |
| short * | compactDiagBuf, | |||
| int | compactDiagBufByteSize, | |||
| int | startToff, | |||
| int | lastCompactOff, | |||
| int | lastCompareOff, | |||
| struct crudeHit * | hits, | |||
| int | lastCompactedHit, | |||
| int | hitCount, | |||
| int * | retLastCompactedHit, | |||
| int * | retHitCount | |||
| ) |
Definition at line 420 of file crudeali.c.
References crudeHit::probeOffset, fastProber::probeSize, and crudeHit::targetOffset.
Referenced by makeHits8().
00427 { 00428 int diagOffset = fp->probeSize+2; 00429 int diag; 00430 int i; 00431 struct crudeHit *hit, *writeHit; 00432 int compactDiagBufArraySize = compactDiagBufByteSize/sizeof(short); 00433 00434 memset(compactDiagBuf, 0, compactDiagBufByteSize); 00435 for (i=lastCompactedHit; i<hitCount; ++i) 00436 { 00437 hit = hits+i; 00438 diag = (hit->targetOffset - startToff) - hit->probeOffset + diagOffset; 00439 assert(diag >= 0 && diag < compactDiagBufArraySize); 00440 compactDiagBuf[diag] += 1; 00441 } 00442 writeHit = hits+lastCompactedHit; 00443 for (i=lastCompactedHit; i<hitCount; ++i) 00444 { 00445 hit = hits+i; 00446 if (hit->targetOffset >= lastCompactOff) 00447 break; 00448 diag = (hit->targetOffset - startToff) - hit->probeOffset + diagOffset; 00449 if (compactDiagBuf[diag] > 1) 00450 *writeHit++ = *hit; 00451 } 00452 *retLastCompactedHit = (writeHit - hits); 00453 for (;i<hitCount; ++i) 00454 { 00455 hit = hits+i; 00456 *writeHit++ = *hit; 00457 } 00458 *retHitCount = (writeHit - hits); 00459 }
Here is the caller graph for this function:

boolean caIsCdna = TRUE [static] |
Definition at line 24 of file crudeali.c.
Referenced by crudeAliFind(), findCrudeGenes(), and lumpExonsIntoGenes().
int caTileSize = 16 [static] |
Definition at line 23 of file crudeali.c.
Referenced by crudeAliFind(), lumpHitsIntoExons(), makeFastProber(), makeHits(), makeHits16(), makeHits8(), and makeIndividualHits16().
char const rcsid[] = "$Id: crudeali.c,v 1.5 2006/05/09 01:05:55 markd Exp $" [static] |
Definition at line 17 of file crudeali.c.
int wormDnaMatchScores[256][256] [static] |
Definition at line 696 of file crudeali.c.
Referenced by initWormDnaMatchScores(), and scoreNoninsertingExtensions().
1.5.2