00001 /* pairHmm - stuff to help implement pairwise hidden markov models, 00002 * which are useful ways of aligning two sequences. 00003 * 00004 * This file is copyright 2000-2004 Jim Kent, but license is hereby 00005 * granted for all use - public, private or commercial. */ 00006 00007 #ifndef PAIRHMM_H 00008 #define PAIRHMM_H 00009 00010 /* Mommy coding scheme - this is how one cell in the dynamic programming table 00011 * points to it's parent (mommy) cell. Since these tables are really big, 00012 * rather than use a simple pointer costing four bytes, we use a encoding 00013 * scheme that requires only one byte. 00014 * 00015 * Bits 0-4 the "hidden" state of the mommy. Lets us have 32 hidden states. 00016 * currently only using 7. 00017 * Bit 5 whether or not mommy is in previous cell in query 00018 * Bit 6 whether or not mommy is in previous cell in target 00019 * Bit 7 set during trace back for cells along optimal path 00020 * 00021 * Since the query and target advancing bits (5 and 6) are never both zero, 00022 * it is safe to use the value of all-bits-zero as an indicator of 00023 * no mommy. */ 00024 00025 /* Compress state, query, and target offset into one byte. */ 00026 #define phmmPackMommy(stateIx, qOff, tOff) ((UBYTE)((stateIx) + ((-(qOff))<<5) + ((-(tOff))<<6))) 00027 00028 /* Traceback sets this, really just for debugging. */ 00029 #define phmmMommyTraceBit (1<<7) 00030 00031 struct phmmMommy 00032 /* This contains the parent info for a single state of the matrix. */ 00033 { 00034 UBYTE mommy; /* Unlike a parent, you can only have one mommy! */ 00035 }; 00036 00037 extern UBYTE phmmNullMommy; /* mommy value for orphans.... */ 00038 00039 struct phmmState 00040 /* This corresponds to a hidden Markov state. Each one of 00041 * these has a two dimensional array[targetSize+1][querySize+1] 00042 * of cells. */ 00043 { 00044 struct phmmMommy *cells; /* The 2-D array containing traceback info. */ 00045 int *scores; /* Scores for the current row. */ 00046 int *lastScores; /* Scores for the previous row. */ 00047 int stateIx; /* Numerical handle on state. */ 00048 char *name; /* Name of state. */ 00049 char emitLetter; /* Single letter representing state. */ 00050 }; 00051 00052 struct phmmMatrix 00053 /* The alignment matrix - has an array of states. */ 00054 { 00055 char *query; /* One sequence to align- all lower case. */ 00056 char *target; /* Other sequence to align. */ 00057 int querySize; /* Size of query. */ 00058 int targetSize; /* Size of target. */ 00059 int qDim; /* One plus size of query - dimension of matrix. */ 00060 int tDim; /* One plus size of target - dimension of matrix. */ 00061 int stateCount; /* Number of hidden states in HMM. */ 00062 int stateSize; /* Number of cells in each state's matrix. */ 00063 int stateByteSize; /* Number of bytes used by each state's matrix. */ 00064 struct phmmState *states; /* Array of states. */ 00065 struct phmmMommy *allCells; /* Memory for all matrices. */ 00066 int *allScores; /* Memory for two rows of scores. */ 00067 }; 00068 00069 struct phmmMatrix *phmmMatrixNew(int stateCount, 00070 char *query, int querySize, char *target, int targetSize); 00071 /* Allocate all memory required for an phmmMatrix. Set up dimensions. */ 00072 00073 void phmmMatrixFree(struct phmmMatrix **pAm); 00074 /* Free up memory required for an phmmMatrix and make sure 00075 * nobody reuses it. */ 00076 00077 struct phmmState *phmmNameState(struct phmmMatrix *am, int stateIx, 00078 char *name, char emitLetter); 00079 /* Give a name to a state and return a pointer to it. */ 00080 00081 struct phmmAliPair *phmmTraceBack(struct phmmMatrix *am, struct phmmMommy *end); 00082 /* Create list of alignment pair by tracing back through matrix from end 00083 * state back to a start.*/ 00084 00085 void phmmPrintTrace(struct phmmMatrix *am, struct phmmAliPair *pairList, 00086 boolean showStates, FILE *f, boolean extraAtEnds); 00087 /* Print out trace to file. */ 00088 00089 struct axt *phhmTraceToAxt(struct phmmMatrix *am, struct phmmAliPair *pairList, 00090 int score, char *qName, char *tName); 00091 /* Convert alignment from traceback format to axt. */ 00092 00093 #endif /* PAIRHMM_H */ 00094
1.5.2