inc/chain.h

Go to the documentation of this file.
00001 /* chain - pairwise alignments that can include gaps in both
00002  * sequences at once.  This is similar in many ways to psl,
00003  * but more suitable to cross species genomic comparisons. */
00004 
00005 #ifndef CHAIN_H
00006 #define CHAIN_H
00007 
00008 #ifndef LINEFILE_H
00009 #include "linefile.h"
00010 #endif
00011 
00012 
00013 #ifndef BITS_H
00014 #include "bits.h"
00015 #endif
00016 
00017 struct cBlock
00018 /* A gapless part of a chain. */
00019     {
00020     struct cBlock *next;        /* Next in list. */
00021     int tStart,tEnd;            /* Range covered in target. */
00022     int qStart,qEnd;            /* Range covered in query. */
00023     int score;                  /* Score of block. */
00024     void *data;                 /* Some associated data pointer. */
00025     };
00026 
00027 int cBlockCmpTarget(const void *va, const void *vb);
00028 /* Compare to sort based on target start. */
00029 
00030 int cBlockCmpBoth(const void *va, const void *vb);
00031 /* Compare to sort based on query, then target. */
00032 
00033 int cBlockCmpDiagQuery(const void *va, const void *vb);
00034 /* Compare to sort based on diagonal, then query. */
00035 
00036 void cBlocksAddOffset(struct cBlock *blockList, int qOff, int tOff);
00037 /* Add offsets to block list. */
00038 
00039 struct chain
00040 /* A chain of blocks.  Used for output of chainBlocks. */
00041     {
00042     struct chain *next;           /* Next in list. */
00043     struct cBlock *blockList;      /* List of blocks. */
00044     double score;                 /* Total score for chain. */
00045     char *tName;                  /* target name, allocated here. */
00046     int tSize;                    /* Overall size of target. */
00047     /* tStrand always + */
00048     int tStart,tEnd;              /* Range covered in target. */
00049     char *qName;                  /* query name, allocated here. */
00050     int qSize;                    /* Overall size of query. */
00051     char qStrand;                 /* Query strand. */
00052     int qStart,qEnd;              /* Range covered in query. */
00053     int id;                       /* ID of chain in file. */
00054     };
00055 
00056 void chainFree(struct chain **pChain);
00057 /* Free up a chain. */
00058 
00059 void chainFreeList(struct chain **pList);
00060 /* Free a list of dynamically allocated chain's */
00061 
00062 int chainCmpScore(const void *va, const void *vb);
00063 /* Compare to sort based on score. */
00064 
00065 int chainCmpScoreDesc(const void *va, const void *vb);
00066 /* Compare to sort based on score descending. */
00067 
00068 int chainCmpTarget(const void *va, const void *vb);
00069 /* Compare to sort based on target position. */
00070 
00071 int chainCmpQuery(const void *va, const void *vb);
00072 /* Compare to sort based on query chrom and start osition. */
00073 
00074 void chainWrite(struct chain *chain, FILE *f);
00075 /* Write out chain to file in dense format. */
00076 
00077 void chainWriteAll(struct chain *chainList, FILE *f);
00078 /* Write all chains to file. */
00079 
00080 void chainWriteLong(struct chain *chain, FILE *f);
00081 /* Write out chain to file in more verbose format. */
00082 
00083 void chainWriteHead(struct chain *chain, FILE *f);
00084 /* Write chain before block/insert list. */
00085 
00086 struct chain *chainRead(struct lineFile *lf);
00087 /* Read next chain from file.  Return NULL at EOF. 
00088  * Note that chain block scores are not filled in by
00089  * this. */
00090 
00091 struct chain *chainReadChainLine(struct lineFile *lf);
00092 /* Read line that starts with chain.  Allocate memory
00093  * and fill in values.  However don't read link lines. */
00094 
00095 void chainReadBlocks(struct lineFile *lf, struct chain *chain);
00096 /* Read in chain blocks from file. */
00097 
00098 void chainIdReset();
00099 /* Reset chain id. */
00100 
00101 void chainIdNext(struct chain *chain);
00102 /* Add id to chain. */
00103 
00104 void chainSwap(struct chain *chain);
00105 /* Swap target and query side of chain. */
00106 
00107 struct hash *chainReadUsedSwap(char *fileName, boolean swapQ, Bits *bits);
00108 /* Read chains that are marked as used in the 
00109  * bits array (which may be NULL) into a hash keyed by id. */
00110 
00111 struct hash *chainReadAllSwap(char *fileName, boolean swapQ);
00112 /* Read chains into a hash keyed by id. 
00113  * Set swapQ to True to read chain by query. */
00114     
00115 struct hash *chainReadAll(char *fileName);
00116 /* Read chains into a hash keyed by id. */
00117     
00118 struct hash *chainReadAllWithMeta(char *fileName, FILE *f);
00119 /* Read chains into a hash keyed by id and outputs meta data */
00120 
00121 struct chain *chainLookup(struct hash *hash, int id);
00122 /* Find chain in hash. */
00123 
00124 void chainSubsetOnT(struct chain *chain, int subStart, int subEnd, 
00125     struct chain **retSubChain,  struct chain **retChainToFree);
00126 /* Get subchain of chain bounded by subStart-subEnd on 
00127  * target side.  Return result in *retSubChain.  In some
00128  * cases this may be the original chain, in which case
00129  * *retChainToFree is NULL.  When done call chainFree on
00130  * *retChainToFree.  The score and id fields are not really
00131  * properly filled in. */
00132 
00133 void chainFastSubsetOnT(struct chain *chain, struct cBlock *firstBlock,
00134         int subStart, int subEnd, struct chain **retSubChain,  struct chain **retChainToFree);
00135 /* Get subchain as in chainSubsetOnT. Pass in initial block that may
00136  * be known from some index to speed things up. */
00137 
00138 void chainSubsetOnQ(struct chain *chain, int subStart, int subEnd, 
00139     struct chain **retSubChain,  struct chain **retChainToFree);
00140 /* Get subchain of chain bounded by subStart-subEnd on 
00141  * query side.  Return result in *retSubChain.  In some
00142  * cases this may be the original chain, in which case
00143  * *retChainToFree is NULL.  When done call chainFree on
00144  * *retChainToFree.  The score and id fields are not really
00145  * properly filled in. */
00146 
00147 void chainRangeQPlusStrand(struct chain *chain, int *retQs, int *retQe);
00148 /* Return range of bases covered by chain on q side on the plus
00149  * strand. */
00150 
00151 #endif /* CHAIN_H */

Generated on Tue Dec 25 18:39:29 2007 for blat by  doxygen 1.5.2