lib/dnautil.c File Reference

#include "common.h"
#include "dnautil.h"

Include dependency graph for dnautil.c:

Go to the source code of this file.

Data Structures

struct  codonTable
struct  aminoAcidTable

Functions

static void initNtVal ()
AA lookupCodon (DNA *dna)
boolean isStopCodon (DNA *dna)
boolean isKozak (char *dna, int dnaSize, int pos)
boolean isReallyStopCodon (char *dna, boolean selenocysteine)
AA lookupMitoCodon (DNA *dna)
Codon codonVal (DNA *start)
DNAvalToCodon (int val)
void dnaTranslateSome (DNA *dna, char *out, int outSize)
static void initNtChars ()
static void initNtMixedCaseChars ()
static void initNtCompTable ()
void complement (DNA *dna, long length)
void reverseComplement (DNA *dna, long length)
long reverseOffset (long offset, long arraySize)
void reverseIntRange (int *pStart, int *pEnd, int size)
void reverseUnsignedRange (unsigned *pStart, unsigned *pEnd, int size)
void toRna (DNA *dna)
char * skipIgnoringDash (char *a, int size, bool skipTrailingDash)
int countNonDash (char *a, int size)
int nextPowerOfFour (long x)
long dnaOrAaFilteredSize (char *raw, char filter[256])
void dnaOrAaFilter (char *in, char *out, char filter[256])
long dnaFilteredSize (char *rawDna)
void dnaFilter (char *in, DNA *out)
void dnaFilterToN (char *in, DNA *out)
void dnaMixedCaseFilter (char *in, DNA *out)
long aaFilteredSize (char *raw)
void aaFilter (char *in, DNA *out)
void upperToN (char *s, int size)
void lowerToN (char *s, int size)
void dnaBaseHistogram (DNA *dna, int dnaSize, int histogram[4])
bits32 packDna16 (DNA *in)
bits16 packDna8 (DNA *in)
UBYTE packDna4 (DNA *in)
void unpackDna (bits32 *tiles, int tileCount, DNA *out)
void unpackDna4 (UBYTE *tiles, int byteCount, DNA *out)
static void checkSizeTypes ()
int intronOrientationMinSize (DNA *iStart, DNA *iEnd, int minIntronSize)
int intronOrientation (DNA *iStart, DNA *iEnd)
int dnaScore2 (DNA a, DNA b)
int dnaOrAaScoreMatch (char *a, char *b, int size, int matchScore, int mismatchScore, char ignore)
int dnaScoreMatch (DNA *a, DNA *b, int size)
int aaScore2 (AA a, AA b)
int aaScoreMatch (AA *a, AA *b, int size)
void writeSeqWithBreaks (FILE *f, char *letters, int letterCount, int maxPerLine)
static int findTailPolyAMaybeMask (DNA *dna, int size, boolean doMask, boolean loose)
int tailPolyASizeLoose (DNA *dna, int size)
int maskTailPolyA (DNA *dna, int size)
static int findHeadPolyTMaybeMask (DNA *dna, int size, boolean doMask, boolean loose)
int headPolyTSizeLoose (DNA *dna, int size)
int maskHeadPolyT (DNA *dna, int size)
boolean isDna (char *poly, int size)
boolean isAllDna (char *poly, int size)
static void initAaVal ()
void dnaUtilOpen ()

Variables

static char const rcsid [] = "$Id: dnautil.c,v 1.49 2007/03/14 04:54:55 kent Exp $"
codonTable codonTable []
int ntVal [256]
int ntValLower [256]
int ntValUpper [256]
int ntVal5 [256]
int ntValNoN [256]
DNA valToNt [(N_BASE_VAL|MASKED_BASE_BIT)+1]
int ntValMasked [256]
DNA valToNtMasked [256]
static boolean inittedNtVal = FALSE
char ntChars [256]
char ntMixedCaseChars [256]
DNA ntCompTable [256]
static boolean inittedCompTable = FALSE
int aaVal [256]
AA valToAa [20]
AA aaChars [256]
aminoAcidTable aminoAcidTable []


Function Documentation

void aaFilter ( char *  in,
DNA out 
)

Definition at line 599 of file dnautil.c.

References aaChars, and dnaOrAaFilter().

Referenced by startServer().

00601 {
00602 dnaOrAaFilter(in, out, aaChars);
00603 }

Here is the call graph for this function:

Here is the caller graph for this function:

long aaFilteredSize ( char *  raw  ) 

Definition at line 593 of file dnautil.c.

References aaChars, and dnaOrAaFilteredSize().

Referenced by startServer().

00595 {
00596 return dnaOrAaFilteredSize(raw, aaChars);
00597 }

Here is the call graph for this function:

Here is the caller graph for this function:

int aaScore2 ( AA  a,
AA  b 
)

Definition at line 801 of file dnautil.c.

Referenced by clumpToHspRange(), and findCrossover().

00803 {
00804 if (a == 'X' || b == 'X') return 0;
00805 if (a == b) return 2;
00806 else return -1;
00807 }

Here is the caller graph for this function:

int aaScoreMatch ( AA a,
AA b,
int  size 
)

Definition at line 809 of file dnautil.c.

References dnaOrAaScoreMatch().

Referenced by ffScoreSomething(), findCrossover(), and scoreAli().

00811 {
00812 return dnaOrAaScoreMatch(a, b, size, 2, -1, 'X');
00813 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void checkSizeTypes (  )  [static]

Definition at line 729 of file dnautil.c.

References bits16, bits32, UBYTE, and WORD.

Referenced by dnaUtilOpen().

00731 {
00732 assert(sizeof(UBYTE) == 1);
00733 assert(sizeof(WORD) == 2);
00734 assert(sizeof(bits32) == 4);
00735 assert(sizeof(bits16) == 2);
00736 }

Here is the caller graph for this function:

Codon codonVal ( DNA start  ) 

Definition at line 289 of file dnautil.c.

References ntVal.

00292 {
00293 int v1,v2,v3;
00294 
00295 if ((v1 = ntVal[(int)start[0]]) < 0)
00296     return -1;
00297 if ((v2 = ntVal[(int)start[1]]) < 0)
00298     return -1;
00299 if ((v3 = ntVal[(int)start[2]]) < 0)
00300     return -1;
00301 return ((v1<<4) + (v2<<2) + v3);
00302 }

void complement ( DNA dna,
long  length 
)

Definition at line 431 of file dnautil.c.

References initNtCompTable(), inittedCompTable, and ntCompTable.

Referenced by reverseComplement().

00432 {
00433 int i;
00434 
00435 if (!inittedCompTable) initNtCompTable();
00436 for (i=0; i<length; ++i)
00437     {
00438     *dna = ntCompTable[(int)*dna];
00439     ++dna;
00440     }
00441 }

Here is the call graph for this function:

Here is the caller graph for this function:

int countNonDash ( char *  a,
int  size 
)

Definition at line 511 of file dnautil.c.

Referenced by axtCheck(), axtSubsetOnT(), axtTransPosToQ(), and mafSubsetE().

00513 {
00514 int count = 0;
00515 int i;
00516 for (i=0; i<size; ++i)
00517     if (a[i] != '-') 
00518         ++count;
00519 return count;
00520 }

Here is the caller graph for this function:

void dnaBaseHistogram ( DNA dna,
int  dnaSize,
int  histogram[4] 
)

Definition at line 632 of file dnautil.c.

References ntVal, and zeroBytes().

Referenced by dnaMark0(), and makeFreqTable().

00635 {
00636 int val;
00637 zeroBytes(histogram, 4*sizeof(int));
00638 while (--dnaSize >= 0)
00639     {
00640     if ((val = ntVal[(int)*dna++]) >= 0)
00641         ++histogram[val];
00642     }
00643 }

Here is the call graph for this function:

Here is the caller graph for this function:

void dnaFilter ( char *  in,
DNA out 
)

Definition at line 568 of file dnautil.c.

References dnaOrAaFilter(), and ntChars.

Referenced by gfPcrMakePrimer(), and startServer().

00570 {
00571 dnaOrAaFilter(in, out, ntChars);
00572 }

Here is the call graph for this function:

Here is the caller graph for this function:

long dnaFilteredSize ( char *  rawDna  ) 

Definition at line 562 of file dnautil.c.

References dnaOrAaFilteredSize(), and ntChars.

Referenced by gfPcrMakePrimer(), and startServer().

00564 {
00565 return dnaOrAaFilteredSize(rawDna, ntChars);
00566 }

Here is the call graph for this function:

Here is the caller graph for this function:

void dnaFilterToN ( char *  in,
DNA out 
)

Definition at line 574 of file dnautil.c.

References initNtChars(), and ntChars.

Referenced by doBlat().

00576 {
00577 DNA c;
00578 initNtChars();
00579 while ((c = *in++) != 0)
00580     {
00581     if ((c = ntChars[(int)c]) != 0) *out++ = c;
00582     else *out++ = 'n';
00583     }
00584 *out++ = 0;
00585 }

Here is the call graph for this function:

Here is the caller graph for this function:

void dnaMixedCaseFilter ( char *  in,
DNA out 
)

Definition at line 587 of file dnautil.c.

References dnaOrAaFilter(), and ntMixedCaseChars.

00589 {
00590 dnaOrAaFilter(in, out, ntMixedCaseChars);
00591 }

Here is the call graph for this function:

void dnaOrAaFilter ( char *  in,
char *  out,
char  filter[256] 
)

Definition at line 550 of file dnautil.c.

References dnaUtilOpen().

Referenced by aaFilter(), dnaFilter(), and dnaMixedCaseFilter().

00552 {
00553 char c;
00554 dnaUtilOpen();
00555 while ((c = *in++) != 0)
00556     {
00557     if ((c = filter[(int)c]) != 0) *out++ = c;
00558     }
00559 *out++ = 0;
00560 }

Here is the call graph for this function:

Here is the caller graph for this function:

long dnaOrAaFilteredSize ( char *  raw,
char  filter[256] 
)

Definition at line 537 of file dnautil.c.

References dnaUtilOpen().

Referenced by aaFilteredSize(), and dnaFilteredSize().

00539 {
00540 char c;
00541 long count = 0;
00542 dnaUtilOpen();
00543 while ((c = *raw++) != 0)
00544     {
00545     if (filter[(int)c]) ++count;
00546     }
00547 return count;
00548 }

Here is the call graph for this function:

Here is the caller graph for this function:

int dnaOrAaScoreMatch ( char *  a,
char *  b,
int  size,
int  matchScore,
int  mismatchScore,
char  ignore 
)

Definition at line 773 of file dnautil.c.

Referenced by aaScoreMatch(), bioScoreMatch(), and dnaScoreMatch().

00776 {
00777 int i;
00778 int score = 0;
00779 for (i=0; i<size; ++i)
00780     {
00781     char aa = a[i];
00782     char bb = b[i];
00783     if (aa == ignore || bb == ignore)
00784         continue;
00785     if (aa == bb)
00786         score += matchScore;
00787     else
00788         score += mismatchScore;
00789     }
00790 return score;
00791 }

Here is the caller graph for this function:

int dnaScore2 ( DNA  a,
DNA  b 
)

Definition at line 765 of file dnautil.c.

Referenced by clumpToHspRange(), and findCrossover().

00767 {
00768 if (a == 'n' || b == 'n') return 0;
00769 if (a == b) return 1;
00770 else return -1;
00771 }

Here is the caller graph for this function:

int dnaScoreMatch ( DNA a,
DNA b,
int  size 
)

Definition at line 793 of file dnautil.c.

References dnaOrAaScoreMatch().

Referenced by cdaAliFromFfAli(), expandLeft(), expandRight(), ffScoreSomeAlis(), ffScoreSomething(), findCrossover(), rangeScore(), reconsiderAlignedGaps(), and scoreAli().

00797 {
00798 return dnaOrAaScoreMatch(a, b, size, 1, -1, 'n');
00799 }

Here is the call graph for this function:

Here is the caller graph for this function:

void dnaTranslateSome ( DNA dna,
char *  out,
int  outSize 
)

Definition at line 311 of file dnautil.c.

References lookupCodon().

00314 {
00315 int i;
00316 int dnaSize;
00317 int protSize = 0;
00318 
00319 outSize -= 1;  /* Room for terminal zero */
00320 dnaSize = strlen(dna);
00321 for (i=0; i<dnaSize-2; i+=3)
00322     {
00323     if (protSize >= outSize)
00324         break;
00325     if ((out[protSize++] = lookupCodon(dna+i)) == 0)
00326         break;
00327     }
00328 out[protSize] = 0;
00329 }

Here is the call graph for this function:

void dnaUtilOpen (  ) 

Definition at line 1068 of file dnautil.c.

References checkSizeTypes(), FALSE, initAaVal(), initNtChars(), initNtCompTable(), initNtMixedCaseChars(), initNtVal(), and TRUE.

Referenced by axtScoreSym(), axtScoreSymFilterRepeats(), dnaOrAaFilter(), dnaOrAaFilteredSize(), faFastReadNext(), faMixedSpeedReadNext(), faReadMixedNext(), faToDna(), faToProtein(), ffFind(), gffOpen(), htmShell(), htmShellWithHead(), isAllDna(), isDna(), main(), nextSeqFromMem(), nibOpenVerify(), nibStreamOpen(), twoBitReadSeqFragExt(), and unalignedUnpackDna().

01070 {
01071 static boolean opened = FALSE;
01072 if (!opened)
01073     {
01074     checkSizeTypes();
01075     initNtVal();
01076     initAaVal();
01077     initNtChars();
01078     initNtMixedCaseChars();
01079     initNtCompTable();
01080     opened = TRUE;
01081     }
01082 }

Here is the call graph for this function:

Here is the caller graph for this function:

static int findHeadPolyTMaybeMask ( DNA dna,
int  size,
boolean  doMask,
boolean  loose 
) [static]

Definition at line 903 of file dnautil.c.

Referenced by headPolyTSizeLoose(), and maskHeadPolyT().

00908 {
00909 int i;
00910 int score = 10;
00911 int bestScore = 10;
00912 int bestPos = -1;
00913 int pastPoly = 0;
00914 int trimSize = 0;
00915 
00916 for (i=0; i<size; ++i)
00917     {
00918     DNA b = dna[i];
00919     if (b == 'n' || b == 'N')
00920         continue;
00921     if (score > 20) score = 20;
00922     if (b == 't' || b == 'T')
00923         {
00924         score += 1;
00925         if (score >= bestScore)
00926             {
00927             bestScore = score;
00928             bestPos = i;
00929             }
00930         else if (loose && score >= (bestScore - 8))
00931             {
00932             /* If loose, keep extending even if score isn't back up to best. */
00933             bestPos = i;
00934             }
00935         }
00936     else
00937         {
00938         score -= 10;
00939         }
00940     if (score < 0)
00941         {
00942         pastPoly = i;
00943         break;
00944         }
00945     }
00946 if (bestPos >= 0)
00947     {
00948     trimSize = bestPos+1 - 2;   // Leave two for aa in taa stop codon
00949     if (trimSize > 0)
00950         {
00951         if (doMask)
00952             memset(dna, 'n', trimSize);
00953         }
00954     else
00955         trimSize = 0;
00956     }
00957 return trimSize;
00958 }

Here is the caller graph for this function:

static int findTailPolyAMaybeMask ( DNA dna,
int  size,
boolean  doMask,
boolean  loose 
) [static]

Definition at line 832 of file dnautil.c.

Referenced by maskTailPolyA(), and tailPolyASizeLoose().

00837 {
00838 int i;
00839 int score = 10;
00840 int bestScore = 10;
00841 int bestPos = -1;
00842 int trimSize = 0;
00843 
00844 for (i=size-1; i>=0; --i)
00845     {
00846     DNA b = dna[i];
00847     if (b == 'n' || b == 'N')
00848         continue;
00849     if (score > 20) score = 20;
00850     if (b == 'a' || b == 'A')
00851         {
00852         score += 1;
00853         if (score >= bestScore)
00854             {
00855             bestScore = score;
00856             bestPos = i;
00857             }
00858         else if (loose && score >= (bestScore - 8))
00859             {
00860             /* If loose, keep extending even if score isn't back up to best. */
00861             bestPos = i;
00862             }
00863         }
00864     else
00865         {
00866         score -= 10;
00867         }
00868     if (score < 0)
00869         {
00870         break;
00871         }
00872     }
00873 if (bestPos >= 0)
00874     {
00875     trimSize = size - bestPos - 2;      // Leave two for aa in taa stop codon
00876     if (trimSize > 0)
00877         {
00878         if (doMask)
00879             for (i=size - trimSize; i<size; ++i)
00880                 dna[i] = 'n';
00881         }
00882     else
00883         trimSize = 0;
00884     }
00885 return trimSize;
00886 }

Here is the caller graph for this function:

int headPolyTSizeLoose ( DNA dna,
int  size 
)

Definition at line 960 of file dnautil.c.

References FALSE, findHeadPolyTMaybeMask(), and TRUE.

00965 {
00966 return findHeadPolyTMaybeMask(dna, size, FALSE, TRUE);
00967 }

Here is the call graph for this function:

static void initAaVal (  )  [static]

Definition at line 1049 of file dnautil.c.

References aaChars, aaVal, aminoAcidTable, ArraySize, aminoAcidTable::letter, and valToAa.

Referenced by dnaUtilOpen().

01051 {
01052 int i;
01053 char c, lowc;
01054 
01055 for (i=0; i<ArraySize(aaVal); ++i)
01056     aaVal[i] = -1;
01057 for (i=0; i<ArraySize(aminoAcidTable); ++i)
01058     {
01059     c = aminoAcidTable[i].letter;
01060     lowc = tolower(c);
01061     aaVal[(int)c] = aaVal[(int)lowc] = i;
01062     aaChars[(int)c] = aaChars[(int)lowc] = c;
01063     valToAa[i] = c;
01064     }
01065 aaChars['x'] = aaChars['X'] = 'X';
01066 }

Here is the caller graph for this function:

static void initNtChars (  )  [static]

Definition at line 335 of file dnautil.c.

References FALSE, ntChars, TRUE, and zeroBytes().

Referenced by dnaFilterToN(), and dnaUtilOpen().

00336 {
00337 static boolean initted = FALSE;
00338 
00339 if (!initted)
00340     {
00341     zeroBytes(ntChars, sizeof(ntChars));
00342     ntChars['a'] = ntChars['A'] = 'a';
00343     ntChars['c'] = ntChars['C'] = 'c';
00344     ntChars['g'] = ntChars['G'] = 'g';
00345     ntChars['t'] = ntChars['T'] = 't';
00346     ntChars['n'] = ntChars['N'] = 'n';
00347     ntChars['u'] = ntChars['U'] = 'u';
00348     ntChars['-'] = 'n';
00349     initted = TRUE;
00350     }
00351 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void initNtCompTable (  )  [static]

Definition at line 383 of file dnautil.c.

References inittedCompTable, ntCompTable, TRUE, and zeroBytes().

Referenced by complement(), and dnaUtilOpen().

00384 {
00385 zeroBytes(ntCompTable, sizeof(ntCompTable));
00386 ntCompTable[' '] = ' ';
00387 ntCompTable['-'] = '-';
00388 ntCompTable['='] = '=';
00389 ntCompTable['a'] = 't';
00390 ntCompTable['c'] = 'g';
00391 ntCompTable['g'] = 'c';
00392 ntCompTable['t'] = 'a';
00393 ntCompTable['u'] = 'a';
00394 ntCompTable['n'] = 'n';
00395 ntCompTable['-'] = '-';
00396 ntCompTable['.'] = '.';
00397 ntCompTable['A'] = 'T';
00398 ntCompTable['C'] = 'G';
00399 ntCompTable['G'] = 'C';
00400 ntCompTable['T'] = 'A';
00401 ntCompTable['U'] = 'A';
00402 ntCompTable['N'] = 'N';
00403 ntCompTable['R'] = 'Y';
00404 ntCompTable['Y'] = 'R';
00405 ntCompTable['M'] = 'K';
00406 ntCompTable['K'] = 'M';
00407 ntCompTable['S'] = 'S';
00408 ntCompTable['W'] = 'W';
00409 ntCompTable['V'] = 'B';
00410 ntCompTable['H'] = 'D';
00411 ntCompTable['D'] = 'H';
00412 ntCompTable['B'] = 'V';
00413 ntCompTable['X'] = 'N';
00414 ntCompTable['r'] = 'y';
00415 ntCompTable['y'] = 'r';
00416 ntCompTable['s'] = 's';
00417 ntCompTable['w'] = 'w';
00418 ntCompTable['m'] = 'k';
00419 ntCompTable['k'] = 'm';
00420 ntCompTable['v'] = 'b';
00421 ntCompTable['h'] = 'd';
00422 ntCompTable['d'] = 'h';
00423 ntCompTable['b'] = 'v';
00424 ntCompTable['x'] = 'n';
00425 ntCompTable['('] = ')';
00426 ntCompTable[')'] = '(';
00427 inittedCompTable = TRUE;
00428 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void initNtMixedCaseChars (  )  [static]

Definition at line 355 of file dnautil.c.

References FALSE, ntMixedCaseChars, TRUE, and zeroBytes().

Referenced by dnaUtilOpen().

00356 {
00357 static boolean initted = FALSE;
00358 
00359 if (!initted)
00360     {
00361     zeroBytes(ntMixedCaseChars, sizeof(ntMixedCaseChars));
00362     ntMixedCaseChars['a'] = 'a';
00363     ntMixedCaseChars['A'] = 'A';
00364     ntMixedCaseChars['c'] = 'c';
00365     ntMixedCaseChars['C'] = 'C';
00366     ntMixedCaseChars['g'] = 'g';
00367     ntMixedCaseChars['G'] = 'G';
00368     ntMixedCaseChars['t'] = 't';
00369     ntMixedCaseChars['T'] = 'T';
00370     ntMixedCaseChars['n'] = 'n';
00371     ntMixedCaseChars['N'] = 'N';
00372     ntMixedCaseChars['u'] = 'u';
00373     ntMixedCaseChars['U'] = 'U';
00374     ntMixedCaseChars['-'] = 'n';
00375     initted = TRUE;
00376     }
00377 }

Here is the call graph for this function:

Here is the caller graph for this function:

static void initNtVal (  )  [static]

Definition at line 134 of file dnautil.c.

References A_BASE_VAL, ArraySize, C_BASE_VAL, G_BASE_VAL, inittedNtVal, MASKED_BASE_BIT, N_BASE_VAL, ntVal, ntVal5, ntValLower, ntValMasked, ntValNoN, ntValUpper, T_BASE_VAL, TRUE, U_BASE_VAL, valToNt, and valToNtMasked.

Referenced by dnaUtilOpen(), lookupCodon(), and lookupMitoCodon().

00135 {
00136 if (!inittedNtVal)
00137     {
00138     int i;
00139     for (i=0; i<ArraySize(ntVal); i++)
00140         {
00141         ntValUpper[i] = ntValLower[i] = ntVal[i] = -1;
00142         ntValNoN[i] = T_BASE_VAL;
00143         if (isspace(i) || isdigit(i))
00144             ntVal5[i] = ntValMasked[i] = -1;
00145         else
00146             {
00147             ntVal5[i] = N_BASE_VAL;
00148             ntValMasked[i] = (islower(i) ? (N_BASE_VAL|MASKED_BASE_BIT) : N_BASE_VAL);
00149             }
00150         }
00151     ntVal5['t'] = ntVal5['T'] = ntValNoN['t'] = ntValNoN['T'] = ntVal['t'] = ntVal['T'] = 
00152         ntValLower['t'] = ntValUpper['T'] = T_BASE_VAL;
00153     ntVal5['u'] = ntVal5['U'] = ntValNoN['u'] = ntValNoN['U'] = ntVal['u'] = ntVal['U'] = 
00154         ntValLower['u'] = ntValUpper['U'] = U_BASE_VAL;
00155     ntVal5['c'] = ntVal5['C'] = ntValNoN['c'] = ntValNoN['C'] = ntVal['c'] = ntVal['C'] = 
00156         ntValLower['c'] = ntValUpper['C'] = C_BASE_VAL;
00157     ntVal5['a'] = ntVal5['A'] = ntValNoN['a'] = ntValNoN['A'] = ntVal['a'] = ntVal['A'] = 
00158         ntValLower['a'] = ntValUpper['A'] = A_BASE_VAL;
00159     ntVal5['g'] = ntVal5['G'] = ntValNoN['g'] = ntValNoN['G'] = ntVal['g'] = ntVal['G'] = 
00160         ntValLower['g'] = ntValUpper['G'] = G_BASE_VAL;
00161 
00162     valToNt[T_BASE_VAL] = valToNt[T_BASE_VAL|MASKED_BASE_BIT] = 't';
00163     valToNt[C_BASE_VAL] = valToNt[C_BASE_VAL|MASKED_BASE_BIT] = 'c';
00164     valToNt[A_BASE_VAL] = valToNt[A_BASE_VAL|MASKED_BASE_BIT] = 'a';
00165     valToNt[G_BASE_VAL] = valToNt[G_BASE_VAL|MASKED_BASE_BIT] = 'g';
00166     valToNt[N_BASE_VAL] = valToNt[N_BASE_VAL|MASKED_BASE_BIT] = 'n';
00167 
00168     /* masked values */
00169     ntValMasked['T'] = T_BASE_VAL;
00170     ntValMasked['U'] = U_BASE_VAL;
00171     ntValMasked['C'] = C_BASE_VAL;
00172     ntValMasked['A'] = A_BASE_VAL;
00173     ntValMasked['G'] = G_BASE_VAL;
00174 
00175     ntValMasked['t'] = T_BASE_VAL|MASKED_BASE_BIT;
00176     ntValMasked['u'] = U_BASE_VAL|MASKED_BASE_BIT;
00177     ntValMasked['c'] = C_BASE_VAL|MASKED_BASE_BIT;
00178     ntValMasked['a'] = A_BASE_VAL|MASKED_BASE_BIT;
00179     ntValMasked['g'] = G_BASE_VAL|MASKED_BASE_BIT;
00180 
00181     valToNtMasked[T_BASE_VAL] = 'T';
00182     valToNtMasked[C_BASE_VAL] = 'C';
00183     valToNtMasked[A_BASE_VAL] = 'A';
00184     valToNtMasked[G_BASE_VAL] = 'G';
00185     valToNtMasked[N_BASE_VAL] = 'N';
00186 
00187     valToNtMasked[T_BASE_VAL|MASKED_BASE_BIT] = 't';
00188     valToNtMasked[C_BASE_VAL|MASKED_BASE_BIT] = 'c';
00189     valToNtMasked[A_BASE_VAL|MASKED_BASE_BIT] = 'a';
00190     valToNtMasked[G_BASE_VAL|MASKED_BASE_BIT] = 'g';
00191     valToNtMasked[N_BASE_VAL|MASKED_BASE_BIT] = 'n';
00192 
00193     inittedNtVal = TRUE;
00194     }
00195 }

Here is the caller graph for this function:

int intronOrientation ( DNA iStart,
DNA iEnd 
)

Definition at line 757 of file dnautil.c.

References intronOrientationMinSize().

Referenced by ffIntronOrientation(), ffOneIntronOrientation(), pslHasIntron(), pslWeightedIntronOrientation(), and tradeMismatchToCloseSpliceGap().

00761 {
00762 return intronOrientationMinSize(iStart, iEnd, 32);
00763 }

Here is the call graph for this function:

Here is the caller graph for this function:

int intronOrientationMinSize ( DNA iStart,
DNA iEnd,
int  minIntronSize 
)

Definition at line 738 of file dnautil.c.

Referenced by intronOrientation().

00742 {
00743 if (iEnd - iStart < minIntronSize)
00744     return 0;
00745 if (iStart[0] == 'g' && iStart[1] == 't' && iEnd[-2] == 'a' && iEnd[-1] == 'g')
00746     {
00747     return 1;
00748     }
00749 else if (iStart[0] == 'c' && iStart[1] == 't' && iEnd[-2] == 'a' && iEnd[-1] == 'c')
00750     {
00751     return -1;
00752     }
00753 else
00754     return 0;
00755 }

Here is the caller graph for this function:

boolean isAllDna ( char *  poly,
int  size 
)

Definition at line 991 of file dnautil.c.

References dnaUtilOpen(), FALSE, ntChars, and TRUE.

00993 {
00994 int i;
00995 
00996 if (size <= 1)
00997     return FALSE;
00998 dnaUtilOpen();
00999 for (i=0; i<size-1; ++i)
01000     {
01001     if (ntChars[(int)poly[i]] == 0) 
01002         return FALSE;
01003     }
01004 return TRUE;
01005 }

Here is the call graph for this function:

boolean isDna ( char *  poly,
int  size 
)

Definition at line 976 of file dnautil.c.

References dnaUtilOpen(), ntChars, and round.

Referenced by faReadAllIntoHash(), and seqIsDna().

00978 {
00979 int i;
00980 int dnaCount = 0;
00981 
00982 dnaUtilOpen();
00983 for (i=0; i<size; ++i)
00984     {
00985     if (ntChars[(int)poly[i]]) 
00986         dnaCount += 1;
00987     }
00988 return (dnaCount >= round(0.9 * size));
00989 }

Here is the call graph for this function:

Here is the caller graph for this function:

boolean isKozak ( char *  dna,
int  dnaSize,
int  pos 
)

Definition at line 226 of file dnautil.c.

References A_BASE_VAL, FALSE, G_BASE_VAL, lookupCodon(), ntVal, and TRUE.

00228 {
00229 if (lookupCodon(dna+pos) != 'M')
00230    {
00231    return FALSE;
00232    }
00233 if (pos + 3 < dnaSize)
00234     {
00235     if (ntVal[(int)dna[pos+3]] == G_BASE_VAL)
00236         return TRUE;
00237     }
00238 if (pos >= 3)
00239     {
00240     int c = ntVal[(int)dna[pos-3]];
00241     if (c == A_BASE_VAL || c == G_BASE_VAL)
00242         return TRUE;
00243     }
00244 return FALSE;
00245 }

Here is the call graph for this function:

boolean isReallyStopCodon ( char *  dna,
boolean  selenocysteine 
)

Definition at line 248 of file dnautil.c.

References lookupCodon(), and lookupMitoCodon().

00251 {
00252 if (selenocysteine)
00253     {
00254     /* Luckily the mitochondria *also* replaces TGA with 
00255      * something else, even though it isn't selenocysteine */
00256     return lookupMitoCodon(dna) == 0;
00257     }
00258 else
00259     {
00260     return lookupCodon(dna) == 0;
00261     }
00262 }

Here is the call graph for this function:

boolean isStopCodon ( DNA dna  ) 

Definition at line 220 of file dnautil.c.

References lookupCodon().

00222 {
00223 return lookupCodon(dna) == 0;
00224 }

Here is the call graph for this function:

AA lookupCodon ( DNA dna  ) 

Definition at line 200 of file dnautil.c.

References codonTable, initNtVal(), inittedNtVal, ntVal, and codonTable::protCode.

Referenced by bafWriteLine(), dnaTranslateSome(), isKozak(), isReallyStopCodon(), isStopCodon(), pslShowAlignmentStranded(), and translateSeqN().

00201 {
00202 int ix;
00203 int i;
00204 char c;
00205 
00206 if (!inittedNtVal)
00207     initNtVal();
00208 ix = 0;
00209 for (i=0; i<3; ++i)
00210     {
00211     int bv = ntVal[(int)dna[i]];
00212     if (bv<0)
00213         return 'X';
00214     ix = (ix<<2) + bv;
00215     }
00216 c = codonTable[ix].protCode;
00217 return c;
00218 }

Here is the call graph for this function:

Here is the caller graph for this function:

AA lookupMitoCodon ( DNA dna  ) 

Definition at line 268 of file dnautil.c.

References codonTable, initNtVal(), inittedNtVal, codonTable::mitoCode, and ntVal.

Referenced by isReallyStopCodon().

00269 {
00270 int ix;
00271 int i;
00272 char c;
00273 
00274 if (!inittedNtVal)
00275     initNtVal();
00276 ix = 0;
00277 for (i=0; i<3; ++i)
00278     {
00279     int bv = ntVal[(int)dna[i]];
00280     if (bv<0)
00281         return 'X';
00282     ix = (ix<<2) + bv;
00283     }
00284 c = codonTable[ix].mitoCode;
00285 c = toupper(c);
00286 return c;
00287 }

Here is the call graph for this function:

Here is the caller graph for this function:

void lowerToN ( char *  s,
int  size 
)

Definition at line 618 of file dnautil.c.

00620 {
00621 char c;
00622 int i;
00623 for (i=0; i<size; ++i)
00624     {
00625     c = s[i];
00626     if (islower(c))
00627         s[i] = 'N';
00628     }
00629 }

int maskHeadPolyT ( DNA dna,
int  size 
)

Definition at line 969 of file dnautil.c.

References FALSE, findHeadPolyTMaybeMask(), and TRUE.

Referenced by trimSeq().

00972 {
00973 return findHeadPolyTMaybeMask(dna, size, TRUE, FALSE);
00974 }

Here is the call graph for this function:

Here is the caller graph for this function:

int maskTailPolyA ( DNA dna,
int  size 
)

Definition at line 896 of file dnautil.c.

References FALSE, findTailPolyAMaybeMask(), and TRUE.

Referenced by trimSeq().

00899 {
00900 return findTailPolyAMaybeMask(dna, size, TRUE, FALSE);
00901 }

Here is the call graph for this function:

Here is the caller graph for this function:

int nextPowerOfFour ( long  x  ) 

Definition at line 522 of file dnautil.c.

Referenced by findBestAli(), and rwFindTilesBetween().

00527 {
00528 int count = 1;
00529 while (x > 4)
00530     {
00531     count += 1;
00532     x >>= 2;
00533     }
00534 return count;
00535 }

Here is the caller graph for this function:

bits32 packDna16 ( DNA in  ) 

Definition at line 645 of file dnautil.c.

References bits32, and ntValNoN.

Referenced by makeGoodTile(), newNt4(), and saveNt4().

00647 {
00648 bits32 out = 0;
00649 int count = 16;
00650 int bVal;
00651 while (--count >= 0)
00652     {
00653     bVal = ntValNoN[(int)*in++];
00654     out <<= 2;
00655     out += bVal;
00656     }
00657 return out;
00658 }

Here is the caller graph for this function:

UBYTE packDna4 ( DNA in  ) 

Definition at line 675 of file dnautil.c.

References ntValNoN, and UBYTE.

Referenced by twoBitFromDnaSeq().

00677 {
00678 UBYTE out = 0;
00679 int count = 4;
00680 int bVal;
00681 while (--count >= 0)
00682     {
00683     bVal = ntValNoN[(int)*in++];
00684     out <<= 2;
00685     out += bVal;
00686     }
00687 return out;
00688 }

Here is the caller graph for this function:

bits16 packDna8 ( DNA in  ) 

Definition at line 660 of file dnautil.c.

References bits16, and ntValNoN.

Referenced by makeGoodTile().

00662 {
00663 bits16 out = 0;
00664 int count = 8;
00665 int bVal;
00666 while (--count >= 0)
00667     {
00668     bVal = ntValNoN[(int)*in++];
00669     out <<= 2;
00670     out += bVal;
00671     }
00672 return out;
00673 }

Here is the caller graph for this function:

void reverseComplement ( DNA dna,
long  length 
)

Definition at line 445 of file dnautil.c.

References complement(), and reverseBytes().

Referenced by axtSwap(), bigBlat(), cdaAliFromFfAli(), crudeAliFind(), dnaMotifBestStrand(), doBlat(), ffFindAndScore(), fixDirectionAndOffsets(), getWormGeneDna(), getWormGeneExonDna(), gfClient(), gfiExpandAndLoadCached(), gfMakeOoc(), gfPcrClumps(), mafFlipStrand(), outputBed(), outputFa(), outputPsl(), pcrLocalStrand(), pslShowAlignmentStranded(), pslShowAlignmentStranded2(), rcSeqs(), scoreNoninsertingExtensions(), searchOne(), transCountBothStrands(), transIndexBothStrands(), transTripleSearch(), xenAlignBig(), and xenAlignWorm().

00446 {
00447 reverseBytes(dna, length);
00448 complement(dna, length);
00449 }

Here is the call graph for this function:

Here is the caller graph for this function:

void reverseIntRange ( int *  pStart,
int *  pEnd,
int  size 
)

Definition at line 460 of file dnautil.c.

Referenced by axtSwap(), chainSwap(), gfiExpandAndLoadCached(), mafFlipStrand(), mafNeedSubset(), mafSubsetE(), nextBlock(), printAxtTargetBlastTab(), pslFromAlign(), pslFromFakeFfAli(), pslHasIntron(), pslRangeTreeOverlap(), pslRcBoth(), pslRecalcBounds(), pslTrimToQueryRange(), pslTrimToTargetRange(), setPslBounds(), and tabBlastOut().

00461 {
00462 int temp;
00463 temp = *pStart;
00464 *pStart = size - *pEnd;
00465 *pEnd = size - temp;
00466 }

Here is the caller graph for this function:

long reverseOffset ( long  offset,
long  arraySize 
)

Definition at line 453 of file dnautil.c.

Referenced by cdaRcOne(), fixDirectionAndOffsets(), gdfRcGene(), and rcQueryOffsetsAroundSeg().

00454 {
00455 return arraySize-1 - offset;
00456 }

Here is the caller graph for this function:

void reverseUnsignedRange ( unsigned *  pStart,
unsigned *  pEnd,
int  size 
)

Definition at line 470 of file dnautil.c.

00471 {
00472 unsigned temp;
00473 temp = *pStart;
00474 *pStart = size - *pEnd;
00475 *pEnd = size - temp;
00476 }

char* skipIgnoringDash ( char *  a,
int  size,
bool  skipTrailingDash 
)

Definition at line 496 of file dnautil.c.

Referenced by axtSubsetOnT(), axtTransPosToQ(), and mafSubsetE().

00499 {
00500 while (size > 0)
00501     {
00502     if (*a++ != '-')
00503         --size;
00504     }
00505 if (skipTrailingDash)
00506     while (*a == '-')
00507        ++a;
00508 return a;
00509 }

Here is the caller graph for this function:

int tailPolyASizeLoose ( DNA dna,
int  size 
)

Definition at line 888 of file dnautil.c.

References FALSE, findTailPolyAMaybeMask(), and TRUE.

00892 {
00893 return findTailPolyAMaybeMask(dna, size, FALSE, TRUE);
00894 }

Here is the call graph for this function:

void toRna ( DNA dna  ) 

Definition at line 480 of file dnautil.c.

00481 {
00482 DNA c;
00483 for (;;)
00484     {
00485     c = *dna;
00486     if (c == 't')
00487         *dna = 'u';
00488     else if (c == 'T')
00489         *dna = 'U';
00490     else if (c == 0)
00491         break;
00492     ++dna;
00493     }
00494 }

void unpackDna ( bits32 *  tiles,
int  tileCount,
DNA out 
)

Definition at line 690 of file dnautil.c.

References bits32, and valToNt.

Referenced by unalignedUnpackDna().

00692 {
00693 int i, j;
00694 bits32 tile;
00695 
00696 for (i=0; i<tileCount; ++i)
00697     {
00698     tile = tiles[i];
00699     for (j=15; j>=0; --j)
00700         {
00701         out[j] = valToNt[tile & 0x3];
00702         tile >>= 2;
00703         }
00704     out += 16;
00705     }
00706 }

Here is the caller graph for this function:

void unpackDna4 ( UBYTE *  tiles,
int  byteCount,
DNA out 
)

Definition at line 708 of file dnautil.c.

References UBYTE, and valToNt.

00710 {
00711 int i, j;
00712 UBYTE tile;
00713 
00714 for (i=0; i<byteCount; ++i)
00715     {
00716     tile = tiles[i];
00717     for (j=3; j>=0; --j)
00718         {
00719         out[j] = valToNt[tile & 0x3];
00720         tile >>= 2;
00721         }
00722     out += 4;
00723     }
00724 }

void upperToN ( char *  s,
int  size 
)

Definition at line 605 of file dnautil.c.

Referenced by bigBlat(), maskNucSeqList(), readMaskedNib(), and readMaskedTwoBit().

00607 {
00608 char c;
00609 int i;
00610 for (i=0; i<size; ++i)
00611     {
00612     c = s[i];
00613     if (isupper(c))
00614         s[i] = 'n';
00615     }
00616 }

Here is the caller graph for this function:

DNA* valToCodon ( int  val  ) 

Definition at line 304 of file dnautil.c.

References codonTable::codon, and codonTable.

00306 {
00307 assert(val >= 0 && val < 64);
00308 return codonTable[val].codon;
00309 }

void writeSeqWithBreaks ( FILE *  f,
char *  letters,
int  letterCount,
int  maxPerLine 
)

Definition at line 815 of file dnautil.c.

References mustWrite().

Referenced by faWriteNext().

00817 {
00818 int lettersLeft = letterCount;
00819 int lineSize;
00820 while (lettersLeft > 0)
00821     {
00822     lineSize = lettersLeft;
00823     if (lineSize > maxPerLine)
00824         lineSize = maxPerLine;
00825     mustWrite(f, letters, lineSize);
00826     fputc('\n', f);
00827     letters += lineSize;
00828     lettersLeft -= lineSize;
00829     }
00830 }

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

AA aaChars[256]

Definition at line 1014 of file dnautil.c.

Referenced by aaFilter(), aaFilteredSize(), faToProtein(), initAaVal(), and nextSeqFromMem().

int aaVal[256]

Definition at line 1011 of file dnautil.c.

Referenced by gfPepTile(), gfSegmentedFindNearHits(), gfStraightFindNearHits(), and initAaVal().

struct aminoAcidTable aminoAcidTable[]

Initial value:

 
{
    {0, 'A', "ala", "alanine"},
    {1, 'C', "cys", "cysteine"},
    {2, 'D', "asp",  "aspartic acid"},
    {3, 'E', "glu",  "glutamic acid"},
    {4, 'F', "phe",  "phenylalanine"},
    {5, 'G', "gly",  "glycine"},
    {6, 'H', "his",  "histidine"},
    {7, 'I', "ile",  "isoleucine"},
    {8, 'K', "lys",  "lysine"},
    {9, 'L', "leu",  "leucine"},
    {10, 'M',  "met", "methionine"},
    {11, 'N',  "asn", "asparagine"},
    {12, 'P',  "pro", "proline"},
    {13, 'Q',  "gln", "glutamine"},
    {14, 'R',  "arg", "arginine"},
    {15, 'S',  "ser", "serine"},
    {16, 'T',  "thr", "threonine"},
    {17, 'V',  "val", "valine"},
    {18, 'W',  "try", "tryptophan"},
    {19, 'Y',  "tyr", "tyrosine"},
}

Definition at line 1025 of file dnautil.c.

Referenced by initAaVal().

struct codonTable codonTable[]

Definition at line 28 of file dnautil.c.

Referenced by lookupCodon(), lookupMitoCodon(), and valToCodon().

boolean inittedCompTable = FALSE [static]

Definition at line 381 of file dnautil.c.

Referenced by complement(), and initNtCompTable().

boolean inittedNtVal = FALSE [static]

Definition at line 132 of file dnautil.c.

Referenced by initNtVal(), lookupCodon(), and lookupMitoCodon().

char ntChars[256]

Definition at line 333 of file dnautil.c.

Referenced by dnaFilter(), dnaFilteredSize(), dnaFilterToN(), faFastReadNext(), faReadMixedNext(), faToDna(), gffReadDna(), initNtChars(), isAllDna(), isDna(), and nextSeqFromMem().

DNA ntCompTable[256]

Definition at line 380 of file dnautil.c.

Referenced by complement(), and initNtCompTable().

char ntMixedCaseChars[256]

Definition at line 353 of file dnautil.c.

Referenced by dnaMixedCaseFilter(), and initNtMixedCaseChars().

int ntVal[256]

Definition at line 121 of file dnautil.c.

Referenced by badPcrPrimerSeq(), calcGcRatio(), codonFindFrame(), codonVal(), dnaBaseHistogram(), dnaMark1(), dnaMarkTriple(), dnaMatchEntropy(), ffHashFuncN(), fillInMatchEtc(), findImprobableOligo(), gfSegmentedFindNearHits(), gfStraightFindNearHits(), initNtVal(), isKozak(), lookupCodon(), lookupMitoCodon(), makeGoodTile(), oligoProb(), and unN().

int ntVal5[256]

Definition at line 124 of file dnautil.c.

Referenced by initNtVal(), nibOutput(), and nibStreamOne().

int ntValLower[256]

Definition at line 122 of file dnautil.c.

Referenced by initNtVal().

int ntValMasked[256]

Definition at line 129 of file dnautil.c.

Referenced by initNtVal(), and nibOutput().

int ntValNoN[256]

Definition at line 125 of file dnautil.c.

Referenced by addToPatSpace(), countPatSpace(), gfFastFindDnaHits(), initNtVal(), packDna16(), packDna4(), packDna8(), and patSpaceFindOne().

int ntValUpper[256]

Definition at line 123 of file dnautil.c.

Referenced by initNtVal().

char const rcsid[] = "$Id: dnautil.c,v 1.49 2007/03/14 04:54:55 kent Exp $" [static]

Definition at line 18 of file dnautil.c.

AA valToAa[20]

Definition at line 1012 of file dnautil.c.

Referenced by initAaVal().

DNA valToNt[(N_BASE_VAL|MASKED_BASE_BIT)+1]

Definition at line 126 of file dnautil.c.

Referenced by initNtVal(), makeMatchTable(), nibInput(), twoBitReadSeqFragExt(), unpackDna(), unpackDna4(), unpackLeftSide(), unpackMidWord(), and unpackRightSide().

DNA valToNtMasked[256]

Definition at line 130 of file dnautil.c.

Referenced by initNtVal(), and nibInput().


Generated on Tue Dec 25 19:46:43 2007 for blat by  doxygen 1.5.2