inc/hgap.h

Go to the documentation of this file.
00001 /*****************************************************************************
00002  * Copyright (C) 2000 Jim Kent.  This source code may be freely used         *
00003  * for personal, academic, and non-profit purposes.  Commercial use          *
00004  * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) *
00005  *****************************************************************************/
00006 /* HGAP - Human Genome Annotation Project database. */
00007 #ifndef HGAP_H
00008 #define HGAP_H
00009 
00010 #ifndef DNASEQ_H
00011 #include "dnaseq.h"
00012 #endif 
00013 
00014 #ifndef UNFIN_H
00015 #include "unfin.h"
00016 #endif
00017 
00018 #ifndef JKSQL_H
00019 #include "jksql.h"
00020 #endif
00021 
00022 typedef unsigned int HGID;      /* A database ID. */
00023 
00024 void hgSetDb(char *dbName);
00025 /* Set the database name. */
00026 
00027 char *hgGetDb();
00028 /* Return the current database name. */
00029 
00030 struct sqlConnection *hgAllocConn();
00031 /* Get free connection if possible. If not allocate a new one. */
00032 
00033 struct sqlConnection *hgFreeConn(struct sqlConnection **pConn);
00034 /* Put back connection for reuse. */
00035 
00036 
00037 HGID hgIdQuery(struct sqlConnection *conn, char *query);
00038 /* Return first field of first table as HGID. 0 return ok. */
00039 
00040 HGID hgRealIdQuery(struct sqlConnection *conn, char *query);
00041 /* Return first field of first table as HGID- abort if 0. */
00042 
00043 
00044 struct sqlConnection *hgStartUpdate();
00045 /* Get a connection for an update.  (Starts allocating id's) */
00046 
00047 void hgEndUpdate(struct sqlConnection **pConn, char *comment, ...);
00048 /* Finish up connection with a printf format comment. */
00049 
00050 HGID hgNextId();
00051 /* Get next unique id.  (Should only be called after hgStartUpdate). */
00052 
00053 FILE *hgCreateTabFile(char *tableName);
00054 /* Open a tab file with name corresponding to tableName.  This
00055  * may just be fclosed when done. (Currently just makes
00056  * tableName.tab in the current directory.) */
00057 
00058 void hgLoadTabFile(struct sqlConnection *conn, char *tableName);
00059 /* Load tab delimited file corresponding to tableName. 
00060  * Should only be used after hgCreatTabFile, and only after
00061  * file closed. */
00062 
00063 
00064 enum 
00065 /* Various constants used. */
00066     {
00067     hgContigPad = 800,    /* Number of N's between contigs. */
00068     };
00069 
00070 struct hgBac
00071 /* This represents a sequenced clone (BAC/PAC/cosmid) */
00072     {
00073     struct hgBac *next;       /* Next in list. */
00074     struct hgNest *nest;      /* Coordinate space. */
00075     HGID id;                  /* HGAP ID. */
00076     char name[16];            /* GenBank accession. */
00077     int contigCount;          /* Number of contigs. */
00078     struct hgContig *contigs; /* Contig list. */
00079     };
00080 
00081 struct hgContig
00082 /* This represents a contig within a BAC. */
00083     {
00084     struct hgContig *next;    /* Next in list. */
00085     struct hgNest *nest;      /* Coordinate space. */
00086     HGID id;                  /* HGAP ID. */
00087     char name[20];            /* Name like AC000007.24 */
00088     struct hgBac *bac;        /* Bac this is in. */
00089     int ix;                   /* Contig index. */
00090     int submitOffset;         /* Position in genBank submission. */
00091     int size;                 /* Size in bases. */
00092     };
00093 
00094 struct hgNest
00095 /* This structure describes the contig tree
00096  * chromosomes->chromosome contigs->bacs->
00097  * bac contigs.  */
00098     {
00099     struct hgNest *next;        /* Pointer to next sibling. */
00100     struct hgNest *children;    /* Children. */
00101     struct hgNest *parent;      /* Parent if any. */
00102     HGID id;                    /* HGAP ID. */
00103     int orientation;            /* +1 or -1 relative to parent. */
00104     int offset;                 /* Offset relative to parent. */
00105     int size;                   /* Size in bases. */
00106     struct hgContig *contig;    /* Associated contig if any. */
00107     };
00108 
00109 struct hgBac *hgGetBac(char *acc);
00110 /* Load BAC with given accession into memory. Don't free this, it's
00111  * managed by system. */
00112 
00113 struct hgContig *hgGetContig(char *acc, int contigIx);
00114 /* Get contig.  contigIx is position in submission, not position in
00115  * ordering. */
00116 
00117 struct dnaSeq *hgContigSeq(struct hgContig *contig);
00118 /* Return DNA associated with contig. */
00119 
00120 struct dnaSeq *hgRnaSeq(char *acc);
00121 /* Return sequence for RNA. */
00122 
00123 void hgRnaSeqAndId(char *acc, struct dnaSeq **retSeq, HGID *retId);
00124 /* Return sequence for RNA and it's database ID. */
00125 
00126 struct dnaSeq *hgBacOrderedSeq(struct hgBac *bac);
00127 /* Return DNA associated with BAC including NNN's between
00128  * contigs in ordered coordinates. */
00129 
00130 struct dnaSeq *hgBacSubmittedSeq(char *acc);
00131 /* Returns DNA associated with BAC in submitted ordering
00132  * and coordinates. */
00133 
00134 struct dnaSeq *hgBacContigSeq(char *acc);
00135 /* Returns list of sequences, one for each contig in BAC. */
00136 
00137 int hgOffset(struct hgNest *source, int offset, struct hgNest *dest);
00138 /* Translate offset from source to destination coordinate space.
00139  * Destination has to be an ancestor (or the same) as source. */
00140 
00141 /* The following is a series of nested structures for
00142  * describing a range of DNA. The later structures include
00143  * the first fields of the earlier ones.  Routines that
00144  * work on the earlier structures will also work on
00145  * the later.  This is a crude but effective form of single 
00146  * inheritance. */
00147 
00148 struct hgRange
00149 /* Just start/end locations somewhere... */
00150     {
00151     struct hgRange *next; /* Next in list. */
00152     int tStart, tEnd;     /* Position in target or only sequence tStart <= x < tEnd */
00153     };
00154 
00155 int hgCmpTStart(const void *va, const void *vb);
00156 /* Compare function to sort by tStart, then by tEnd. */
00157 
00158 struct hgHit
00159 /* A simple hit - an interesting range of a sequence. */
00160     {
00161     struct hgHit *next;   /* Next in list. */
00162     int tStart, tEnd;     /* Position in target or only sequence tStart <= x < tEnd */
00163     int tOrientation;     /* +1 or -1 orientation. */
00164     char *target;         /* Name of target seq. (Not allocated here.) */
00165     };
00166 
00167 int hgCmpTarget(const void *va, const void *vb);
00168 /* Compare function to sort by target, orientaation, tStart, then tEnd. */
00169 
00170 struct hgScoredHit
00171 /* A hit with a log odds score. */
00172     {
00173     struct hgScoredHit *next;
00174     int tStart, tEnd;     /* Position in target or only sequence tStart <= x < tEnd */
00175     int tOrientation;     /* +1 or -1 orientation. */
00176     char *target;         /* Name of target seq. (Not allocated here.) */
00177     int logOdds;          /* Log odds style score - scaled x 1000. */
00178     };
00179 
00180 int hgCmpScore(const void *va, const void *vb);
00181 /* Compare function to sort logOdds score. */
00182 
00183 struct hgAliHit
00184 /* A hit representing an alignment between two sequences without inserts. */
00185     {
00186     struct hgAliHit *next;
00187     int tStart, tEnd;     /* Position in target or only sequence tStart <= x < tEnd */
00188     int tOrientation;     /* +1 or -1 orientation. */
00189     char *target;         /* Name of target seq. (Not allocated here.) */
00190     int logOdds;          /* Log odds style score - scaled x 1000. */
00191     int qStart, qEnd;     /* Position in query sequence. */
00192     int qOrientation;     /* +1 or -1 query orientation. */
00193     char *query;          /* Name of query seq. (Not allocated here.) */
00194     };
00195 
00196 int hgCmpQStart(const void *va, const void *vb);
00197 /* Compare function to sort by qStart, then by qEnd. */
00198 
00199 int hgCmpQuery(const void *va, const void *vb);
00200 /* Compare function to sort by query, orientation, qStart, then qEnd. */
00201 
00202 struct hgBoundedHit
00203 /* An alignment hit that can have soft or hard edges. */
00204     {
00205     struct hgBoundedHit *next;
00206     int tStart, tEnd;     /* Position in target or only sequence tStart <= x < tEnd */
00207     int tOrientation;     /* +1 or -1 orientation. */
00208     char *target;         /* Name of target seq. (Not allocated here.) */
00209     int logOdds;          /* Log odds style score - scaled x 1000. */
00210     int qStart, qEnd;     /* Position in query sequence. */
00211     int qOrientation;     /* +1 or -1 query orientation. */
00212     char *query;          /* Name of query seq. (Not allocated here.) */
00213     bool hardStart;       /* Start position known */
00214     bool hardEnd;         /* End position known */
00215     };
00216 
00217 struct hgAlignment
00218 /* An alignment with gaps. */
00219     {
00220     struct hgAlignment *next;
00221     int tStart, tEnd;     /* Position in target or only sequence tStart <= x < tEnd */
00222     int tOrientation;     /* +1 or -1 orientation. */
00223     char *target;         /* Name of target seq. (Not allocated here.) */
00224     int logOdds;          /* Log odds style score - scaled x 1000. */
00225     int qStart, qEnd;     /* Position in query sequence. */
00226     int qOrientation;     /* +1 or -1 orientation. */
00227     bool hardStart;       /* Start position known */
00228     bool hardEnd;         /* End position known */
00229     struct hgBoundedHit *hitList;  /*  Subalignments. */
00230     };
00231 
00232 #endif /* HGAP_H */
00233 

Generated on Tue Dec 25 18:39:29 2007 for blat by  doxygen 1.5.2