inc/pairHmm.h

Go to the documentation of this file.
00001 /* pairHmm - stuff to help implement pairwise hidden markov models,
00002  * which are useful ways of aligning two sequences. 
00003  *
00004  * This file is copyright 2000-2004 Jim Kent, but license is hereby
00005  * granted for all use - public, private or commercial. */
00006 
00007 #ifndef PAIRHMM_H
00008 #define PAIRHMM_H
00009 
00010 /* Mommy coding scheme - this is how one cell in the dynamic programming table
00011  * points to it's parent (mommy) cell.  Since these tables are really big,
00012  * rather than use a simple pointer costing four bytes, we use a encoding
00013  * scheme that requires only one byte. 
00014  *
00015  * Bits 0-4  the "hidden" state of the mommy.  Lets us have 32 hidden states.
00016  *           currently only using 7.
00017  * Bit  5    whether or not mommy is in previous cell in query
00018  * Bit  6    whether or not mommy is in previous cell in target
00019  * Bit  7    set during trace back for cells along optimal path
00020  *
00021  * Since the query and target advancing bits (5 and 6) are never both zero,
00022  * it is safe to use the value of all-bits-zero as an indicator of
00023  * no mommy. */
00024 
00025 /* Compress state, query, and target offset into one byte. */
00026 #define phmmPackMommy(stateIx, qOff, tOff) ((UBYTE)((stateIx) + ((-(qOff))<<5) + ((-(tOff))<<6)))
00027 
00028 /* Traceback sets this, really just for debugging. */
00029 #define phmmMommyTraceBit (1<<7)
00030 
00031 struct phmmMommy
00032 /* This contains the parent info for a single state of the matrix. */
00033     {
00034     UBYTE mommy; /* Unlike a parent, you can only have one mommy! */
00035     };
00036 
00037 extern UBYTE phmmNullMommy; /* mommy value for orphans.... */
00038 
00039 struct phmmState
00040 /* This corresponds to a hidden Markov state.  Each one of
00041  * these has a two dimensional array[targetSize+1][querySize+1]
00042  * of cells. */
00043     {
00044     struct phmmMommy *cells;    /* The 2-D array containing traceback info. */
00045     int *scores;                /* Scores for the current row. */
00046     int *lastScores;            /* Scores for the previous row. */
00047     int stateIx;                /* Numerical handle on state. */
00048     char *name;                 /* Name of state. */
00049     char emitLetter;            /* Single letter representing state. */
00050     };
00051 
00052 struct phmmMatrix
00053 /* The alignment matrix - has an array of states. */
00054     {
00055     char *query;        /* One sequence to align- all lower case. */
00056     char *target;       /* Other sequence to align. */
00057     int querySize;      /* Size of query. */
00058     int targetSize;     /* Size of target. */
00059     int qDim;           /* One plus size of query - dimension of matrix. */
00060     int tDim;           /* One plus size of target - dimension of matrix. */
00061     int stateCount;     /* Number of hidden states in HMM. */
00062     int stateSize;      /* Number of cells in each state's matrix. */
00063     int stateByteSize;  /* Number of bytes used by each state's matrix. */
00064     struct phmmState *states;  /* Array of states. */
00065     struct phmmMommy *allCells; /* Memory for all matrices. */
00066     int *allScores;           /* Memory for two rows of scores. */
00067     };
00068 
00069 struct phmmMatrix *phmmMatrixNew(int stateCount,
00070     char *query, int querySize, char *target, int targetSize);
00071 /* Allocate all memory required for an phmmMatrix. Set up dimensions. */
00072 
00073 void phmmMatrixFree(struct phmmMatrix **pAm);
00074 /* Free up memory required for an phmmMatrix and make sure
00075  * nobody reuses it. */
00076 
00077 struct phmmState *phmmNameState(struct phmmMatrix *am, int stateIx, 
00078         char *name, char emitLetter);
00079 /* Give a name to a state and return a pointer to it. */
00080 
00081 struct phmmAliPair *phmmTraceBack(struct phmmMatrix *am, struct phmmMommy *end);
00082 /* Create list of alignment pair by tracing back through matrix from end
00083  * state back to a start.*/
00084 
00085 void phmmPrintTrace(struct phmmMatrix *am, struct phmmAliPair *pairList, 
00086         boolean showStates, FILE *f, boolean extraAtEnds);
00087 /* Print out trace to file. */
00088 
00089 struct axt *phhmTraceToAxt(struct phmmMatrix *am, struct phmmAliPair *pairList, 
00090         int score, char *qName, char *tName);
00091 /* Convert alignment from traceback format to axt. */
00092 
00093 #endif /* PAIRHMM_H */
00094 

Generated on Tue Dec 25 18:39:29 2007 for blat by  doxygen 1.5.2