00001 /***************************************************************************** 00002 * Copyright (C) 2000 Jim Kent. This source code may be freely used * 00003 * for personal, academic, and non-profit purposes. Commercial use * 00004 * permitted only by explicit agreement with Jim Kent (jim_kent@pacbell.net) * 00005 *****************************************************************************/ 00006 /* HGAP - Human Genome Annotation Project database. */ 00007 #ifndef HGAP_H 00008 #define HGAP_H 00009 00010 #ifndef DNASEQ_H 00011 #include "dnaseq.h" 00012 #endif 00013 00014 #ifndef UNFIN_H 00015 #include "unfin.h" 00016 #endif 00017 00018 #ifndef JKSQL_H 00019 #include "jksql.h" 00020 #endif 00021 00022 typedef unsigned int HGID; /* A database ID. */ 00023 00024 void hgSetDb(char *dbName); 00025 /* Set the database name. */ 00026 00027 char *hgGetDb(); 00028 /* Return the current database name. */ 00029 00030 struct sqlConnection *hgAllocConn(); 00031 /* Get free connection if possible. If not allocate a new one. */ 00032 00033 struct sqlConnection *hgFreeConn(struct sqlConnection **pConn); 00034 /* Put back connection for reuse. */ 00035 00036 00037 HGID hgIdQuery(struct sqlConnection *conn, char *query); 00038 /* Return first field of first table as HGID. 0 return ok. */ 00039 00040 HGID hgRealIdQuery(struct sqlConnection *conn, char *query); 00041 /* Return first field of first table as HGID- abort if 0. */ 00042 00043 00044 struct sqlConnection *hgStartUpdate(); 00045 /* Get a connection for an update. (Starts allocating id's) */ 00046 00047 void hgEndUpdate(struct sqlConnection **pConn, char *comment, ...); 00048 /* Finish up connection with a printf format comment. */ 00049 00050 HGID hgNextId(); 00051 /* Get next unique id. (Should only be called after hgStartUpdate). */ 00052 00053 FILE *hgCreateTabFile(char *tableName); 00054 /* Open a tab file with name corresponding to tableName. This 00055 * may just be fclosed when done. (Currently just makes 00056 * tableName.tab in the current directory.) */ 00057 00058 void hgLoadTabFile(struct sqlConnection *conn, char *tableName); 00059 /* Load tab delimited file corresponding to tableName. 00060 * Should only be used after hgCreatTabFile, and only after 00061 * file closed. */ 00062 00063 00064 enum 00065 /* Various constants used. */ 00066 { 00067 hgContigPad = 800, /* Number of N's between contigs. */ 00068 }; 00069 00070 struct hgBac 00071 /* This represents a sequenced clone (BAC/PAC/cosmid) */ 00072 { 00073 struct hgBac *next; /* Next in list. */ 00074 struct hgNest *nest; /* Coordinate space. */ 00075 HGID id; /* HGAP ID. */ 00076 char name[16]; /* GenBank accession. */ 00077 int contigCount; /* Number of contigs. */ 00078 struct hgContig *contigs; /* Contig list. */ 00079 }; 00080 00081 struct hgContig 00082 /* This represents a contig within a BAC. */ 00083 { 00084 struct hgContig *next; /* Next in list. */ 00085 struct hgNest *nest; /* Coordinate space. */ 00086 HGID id; /* HGAP ID. */ 00087 char name[20]; /* Name like AC000007.24 */ 00088 struct hgBac *bac; /* Bac this is in. */ 00089 int ix; /* Contig index. */ 00090 int submitOffset; /* Position in genBank submission. */ 00091 int size; /* Size in bases. */ 00092 }; 00093 00094 struct hgNest 00095 /* This structure describes the contig tree 00096 * chromosomes->chromosome contigs->bacs-> 00097 * bac contigs. */ 00098 { 00099 struct hgNest *next; /* Pointer to next sibling. */ 00100 struct hgNest *children; /* Children. */ 00101 struct hgNest *parent; /* Parent if any. */ 00102 HGID id; /* HGAP ID. */ 00103 int orientation; /* +1 or -1 relative to parent. */ 00104 int offset; /* Offset relative to parent. */ 00105 int size; /* Size in bases. */ 00106 struct hgContig *contig; /* Associated contig if any. */ 00107 }; 00108 00109 struct hgBac *hgGetBac(char *acc); 00110 /* Load BAC with given accession into memory. Don't free this, it's 00111 * managed by system. */ 00112 00113 struct hgContig *hgGetContig(char *acc, int contigIx); 00114 /* Get contig. contigIx is position in submission, not position in 00115 * ordering. */ 00116 00117 struct dnaSeq *hgContigSeq(struct hgContig *contig); 00118 /* Return DNA associated with contig. */ 00119 00120 struct dnaSeq *hgRnaSeq(char *acc); 00121 /* Return sequence for RNA. */ 00122 00123 void hgRnaSeqAndId(char *acc, struct dnaSeq **retSeq, HGID *retId); 00124 /* Return sequence for RNA and it's database ID. */ 00125 00126 struct dnaSeq *hgBacOrderedSeq(struct hgBac *bac); 00127 /* Return DNA associated with BAC including NNN's between 00128 * contigs in ordered coordinates. */ 00129 00130 struct dnaSeq *hgBacSubmittedSeq(char *acc); 00131 /* Returns DNA associated with BAC in submitted ordering 00132 * and coordinates. */ 00133 00134 struct dnaSeq *hgBacContigSeq(char *acc); 00135 /* Returns list of sequences, one for each contig in BAC. */ 00136 00137 int hgOffset(struct hgNest *source, int offset, struct hgNest *dest); 00138 /* Translate offset from source to destination coordinate space. 00139 * Destination has to be an ancestor (or the same) as source. */ 00140 00141 /* The following is a series of nested structures for 00142 * describing a range of DNA. The later structures include 00143 * the first fields of the earlier ones. Routines that 00144 * work on the earlier structures will also work on 00145 * the later. This is a crude but effective form of single 00146 * inheritance. */ 00147 00148 struct hgRange 00149 /* Just start/end locations somewhere... */ 00150 { 00151 struct hgRange *next; /* Next in list. */ 00152 int tStart, tEnd; /* Position in target or only sequence tStart <= x < tEnd */ 00153 }; 00154 00155 int hgCmpTStart(const void *va, const void *vb); 00156 /* Compare function to sort by tStart, then by tEnd. */ 00157 00158 struct hgHit 00159 /* A simple hit - an interesting range of a sequence. */ 00160 { 00161 struct hgHit *next; /* Next in list. */ 00162 int tStart, tEnd; /* Position in target or only sequence tStart <= x < tEnd */ 00163 int tOrientation; /* +1 or -1 orientation. */ 00164 char *target; /* Name of target seq. (Not allocated here.) */ 00165 }; 00166 00167 int hgCmpTarget(const void *va, const void *vb); 00168 /* Compare function to sort by target, orientaation, tStart, then tEnd. */ 00169 00170 struct hgScoredHit 00171 /* A hit with a log odds score. */ 00172 { 00173 struct hgScoredHit *next; 00174 int tStart, tEnd; /* Position in target or only sequence tStart <= x < tEnd */ 00175 int tOrientation; /* +1 or -1 orientation. */ 00176 char *target; /* Name of target seq. (Not allocated here.) */ 00177 int logOdds; /* Log odds style score - scaled x 1000. */ 00178 }; 00179 00180 int hgCmpScore(const void *va, const void *vb); 00181 /* Compare function to sort logOdds score. */ 00182 00183 struct hgAliHit 00184 /* A hit representing an alignment between two sequences without inserts. */ 00185 { 00186 struct hgAliHit *next; 00187 int tStart, tEnd; /* Position in target or only sequence tStart <= x < tEnd */ 00188 int tOrientation; /* +1 or -1 orientation. */ 00189 char *target; /* Name of target seq. (Not allocated here.) */ 00190 int logOdds; /* Log odds style score - scaled x 1000. */ 00191 int qStart, qEnd; /* Position in query sequence. */ 00192 int qOrientation; /* +1 or -1 query orientation. */ 00193 char *query; /* Name of query seq. (Not allocated here.) */ 00194 }; 00195 00196 int hgCmpQStart(const void *va, const void *vb); 00197 /* Compare function to sort by qStart, then by qEnd. */ 00198 00199 int hgCmpQuery(const void *va, const void *vb); 00200 /* Compare function to sort by query, orientation, qStart, then qEnd. */ 00201 00202 struct hgBoundedHit 00203 /* An alignment hit that can have soft or hard edges. */ 00204 { 00205 struct hgBoundedHit *next; 00206 int tStart, tEnd; /* Position in target or only sequence tStart <= x < tEnd */ 00207 int tOrientation; /* +1 or -1 orientation. */ 00208 char *target; /* Name of target seq. (Not allocated here.) */ 00209 int logOdds; /* Log odds style score - scaled x 1000. */ 00210 int qStart, qEnd; /* Position in query sequence. */ 00211 int qOrientation; /* +1 or -1 query orientation. */ 00212 char *query; /* Name of query seq. (Not allocated here.) */ 00213 bool hardStart; /* Start position known */ 00214 bool hardEnd; /* End position known */ 00215 }; 00216 00217 struct hgAlignment 00218 /* An alignment with gaps. */ 00219 { 00220 struct hgAlignment *next; 00221 int tStart, tEnd; /* Position in target or only sequence tStart <= x < tEnd */ 00222 int tOrientation; /* +1 or -1 orientation. */ 00223 char *target; /* Name of target seq. (Not allocated here.) */ 00224 int logOdds; /* Log odds style score - scaled x 1000. */ 00225 int qStart, qEnd; /* Position in query sequence. */ 00226 int qOrientation; /* +1 or -1 orientation. */ 00227 bool hardStart; /* Start position known */ 00228 bool hardEnd; /* End position known */ 00229 struct hgBoundedHit *hitList; /* Subalignments. */ 00230 }; 00231 00232 #endif /* HGAP_H */ 00233
1.5.2