00001
00002
00003 #include "common.h"
00004 #include "dnaseq.h"
00005 #include "fa.h"
00006 #include "twoBit.h"
00007 #include "nib.h"
00008 #include "dnaLoad.h"
00009
00010 static char const rcsid[] = "$Id: dnaLoad.c,v 1.8 2005/03/28 17:21:28 hiram Exp $";
00011
00012 struct dnaLoadStack
00013
00014 {
00015 struct dnaLoadStack *next;
00016 struct twoBitFile *twoBit;
00017 struct twoBitIndex *tbi;
00018 struct lineFile *textFile;
00019 boolean textIsFa;
00020 };
00021
00022 struct dnaLoad
00023
00024
00025 {
00026 struct dnaLoad *next;
00027 char *topFileName;
00028 boolean finished;
00029 struct dnaLoadStack *stack;
00030 int curStart;
00031 int curEnd;
00032 int curSize;
00033 };
00034
00035 struct dnaLoadStack *dnaLoadStackNew(char *fileName)
00036
00037 {
00038 struct dnaLoadStack *dls;
00039 AllocVar(dls);
00040 if (twoBitIsFile(fileName))
00041 {
00042 dls->twoBit = twoBitOpen(fileName);
00043 dls->tbi = dls->twoBit->indexList;
00044 }
00045 else
00046 {
00047 char *line;
00048 dls->textFile = lineFileOpen(fileName, TRUE);
00049 if (lineFileNextReal(dls->textFile, &line))
00050 {
00051 line = trimSpaces(line);
00052 if (line[0] == '>')
00053 dls->textIsFa = TRUE;
00054 lineFileReuse(dls->textFile);
00055 }
00056 }
00057 return dls;
00058 }
00059
00060 void dnaLoadStackFree(struct dnaLoadStack **pDls)
00061
00062 {
00063 struct dnaLoadStack *dls = *pDls;
00064 if (dls != NULL)
00065 {
00066 lineFileClose(&dls->textFile);
00067 twoBitClose(&dls->twoBit);
00068 freez(pDls);
00069 }
00070 }
00071
00072 void dnaLoadStackFreeList(struct dnaLoadStack **pList)
00073
00074 {
00075 struct dnaLoadStack *el, *next;
00076
00077 for (el = *pList; el != NULL; el = next)
00078 {
00079 next = el->next;
00080 dnaLoadStackFree(&el);
00081 }
00082 *pList = NULL;
00083 }
00084
00085 void dnaLoadClose(struct dnaLoad **pDl)
00086
00087 {
00088 struct dnaLoad *dl = *pDl;
00089 if (dl != NULL)
00090 {
00091 dnaLoadStackFreeList(&dl->stack);
00092 freeMem(dl->topFileName);
00093 freez(pDl);
00094 }
00095 }
00096
00097 struct dnaLoad *dnaLoadOpen(char *fileName)
00098
00099
00100 {
00101 struct dnaLoad *dl;
00102 AllocVar(dl);
00103 dl->topFileName = cloneString(fileName);
00104 return dl;
00105 }
00106
00107 struct dnaSeq *dnaLoadSingle(char *fileName, int *retStart, int *retEnd, int *retParentSize)
00108
00109 {
00110 struct dnaSeq *seq = NULL;
00111 unsigned start = 0, end = 0;
00112 int parentSize = 0;
00113 if (nibIsFile(fileName))
00114 {
00115
00116 char filePath[PATH_LEN];
00117 char name[PATH_LEN];
00118 nibParseName(0, fileName, filePath, name, &start, &end);
00119
00120 if (end != 0)
00121 {
00122 FILE *f;
00123 int size;
00124 nibOpenVerify(filePath, &f, &size);
00125 parentSize = size;
00126 }
00127 seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName);
00128 if (end == 0)
00129 parentSize = end = seq->size;
00130 freez(&seq->name);
00131 seq->name = cloneString(name);
00132 }
00133 else if (twoBitIsRange(fileName))
00134 {
00135
00136 char *rangeSpec = cloneString(fileName);
00137 int start, end;
00138 char *file, *seqName;
00139 twoBitParseRange(rangeSpec, &file, &seqName, &start, &end);
00140
00141
00142 {
00143 struct twoBitFile *tbf = twoBitOpen(file);
00144 parentSize = twoBitSeqSize(tbf, seqName);
00145 seq = twoBitReadSeqFrag(tbf, seqName, start, end);
00146 twoBitClose(&tbf);
00147 }
00148 if (end == 0)
00149 end = seq->size;
00150 freez(&rangeSpec);
00151 }
00152 if (retStart != NULL)
00153 *retStart = start;
00154 if (retEnd != NULL)
00155 *retEnd = end;
00156 if (retParentSize != NULL)
00157 *retParentSize = parentSize;
00158 return seq;
00159 }
00160
00161 static struct dnaSeq *dnaLoadNextFromStack(struct dnaLoad *dl)
00162
00163
00164 {
00165 struct dnaLoadStack *dls;
00166 struct dnaSeq *seq = NULL;
00167 while ((dls = dl->stack) != NULL)
00168 {
00169 if (dls->twoBit)
00170 {
00171 if (dls->tbi != NULL)
00172 {
00173 seq = twoBitReadSeqFrag(dls->twoBit, dls->tbi->name, 0, 0);
00174 dls->tbi = dls->tbi->next;
00175 return seq;
00176 }
00177 else
00178 {
00179 dl->stack = dls->next;
00180 dnaLoadStackFree(&dls);
00181 }
00182 }
00183 else if (dls->textIsFa)
00184 {
00185 DNA *dna;
00186 char *name;
00187 int size;
00188 if (faMixedSpeedReadNext(dls->textFile, &dna, &size, &name))
00189 {
00190 AllocVar(seq);
00191 seq->dna = needLargeMem(size+1);
00192 memcpy((void *)seq->dna, (void *)dna, size);
00193 seq->dna[size] = 0;
00194 seq->size = size;
00195 seq->name = cloneString(name);
00196 dl->curStart = 0;
00197 dl->curEnd = size;
00198 dl->curSize = size;
00199 return seq;
00200 }
00201 else
00202 {
00203 dl->stack = dls->next;
00204 dnaLoadStackFree(&dls);
00205 }
00206 }
00207 else
00208 {
00209 char *line;
00210 if (lineFileNextReal(dls->textFile, &line))
00211 {
00212 line = trimSpaces(line);
00213 if ((seq = dnaLoadSingle(line, &dl->curStart, &dl->curEnd, &dl->curSize)) != NULL)
00214 return seq;
00215 else
00216 {
00217 struct dnaLoadStack *newDls;
00218 newDls = dnaLoadStackNew(line);
00219 slAddHead(&dl->stack, newDls);
00220 }
00221 }
00222 else
00223 {
00224 dl->stack = dls->next;
00225 dnaLoadStackFree(&dls);
00226 }
00227 }
00228 }
00229 dl->finished = TRUE;
00230 return NULL;
00231 }
00232
00233 static struct dnaSeq *dnaLoadStackOrSingle(struct dnaLoad *dl)
00234
00235 {
00236 struct dnaSeq *seq = NULL;
00237 if (dl->finished)
00238 return NULL;
00239 if (dl->stack == NULL)
00240 {
00241 if ((seq = dnaLoadSingle(dl->topFileName, &dl->curStart, &dl->curEnd, &dl->curSize)) != NULL)
00242 {
00243 dl->finished = TRUE;
00244 return seq;
00245 }
00246 dl->stack = dnaLoadStackNew(dl->topFileName);
00247 }
00248 return dnaLoadNextFromStack(dl);
00249 }
00250
00251 struct dnaSeq *dnaLoadNext(struct dnaLoad *dl)
00252
00253 {
00254 struct dnaSeq *seq;
00255 dl->curSize = dl->curStart = dl->curEnd = 0;
00256 seq = dnaLoadStackOrSingle(dl);
00257 return seq;
00258 }
00259
00260 struct dnaSeq *dnaLoadAll(char *fileName)
00261
00262
00263
00264
00265 {
00266 struct dnaLoad *dl = dnaLoadOpen(fileName);
00267 struct dnaSeq *seqList = NULL, *seq;
00268 while ((seq = dnaLoadNext(dl)) != NULL)
00269 {
00270 slAddHead(&seqList, seq);
00271 }
00272 dnaLoadClose(&dl);
00273 slReverse(seqList);
00274 return seqList;
00275 }
00276
00277 int dnaLoadCurStart(struct dnaLoad *dl)
00278
00279
00280
00281
00282 {
00283 return dl->curStart;
00284 }
00285
00286 int dnaLoadCurEnd(struct dnaLoad *dl)
00287
00288
00289
00290
00291 {
00292 return dl->curEnd;
00293 }
00294
00295 int dnaLoadCurSize(struct dnaLoad *dl)
00296
00297
00298 {
00299 return dl->curSize;
00300 }
00301