00001
00002
00003
00004 #include "common.h"
00005 #include "linefile.h"
00006 #include "dnaseq.h"
00007 #include "genoFind.h"
00008 #include "gfInternal.h"
00009 #include "errabort.h"
00010 #include "nib.h"
00011 #include "twoBit.h"
00012
00013 static char const rcsid[] = "$Id: gfInternal.c,v 1.3 2006/06/22 16:24:44 kent Exp $";
00014
00015
00016 static int extendRespect(int oldX, int newX)
00017
00018 {
00019 int frame = oldX % 3;
00020 newX = newX - (newX % 3) + frame;
00021 return newX;
00022 }
00023
00024 void gfiExpandRange(struct gfRange *range, int qSize, int tSize,
00025 boolean respectFrame, boolean isRc, int expansion)
00026
00027 {
00028 int x;
00029
00030 x = range->qStart - expansion;
00031 if (x < 0) x = 0;
00032 range->qStart = x;
00033
00034 x = range->qEnd + expansion;
00035 if (x > qSize) x = qSize;
00036 range->qEnd = x;
00037
00038 x = range->tStart - expansion;
00039 if (x < 0) x = 0;
00040 if (respectFrame && !isRc)
00041 {
00042 x = extendRespect(range->tStart, x);
00043 }
00044 range->tStart = x;
00045
00046 x = range->tEnd + expansion;
00047 if (x > tSize) x = tSize;
00048 if (respectFrame && isRc)
00049 {
00050 x = extendRespect(range->tEnd, x);
00051 if (x > tSize)
00052 x -= 3;
00053 }
00054 range->tEnd = x;
00055 }
00056
00057 struct dnaSeq *gfiExpandAndLoadCached(struct gfRange *range,
00058 struct hash *tFileCache, char *tSeqDir, int querySize,
00059 int *retTotalSeqSize, boolean respectFrame, boolean isRc, int expansion)
00060
00061
00062
00063 {
00064 struct dnaSeq *target = NULL;
00065 char fileName[PATH_LEN+256];
00066
00067 safef(fileName, sizeof(fileName), "%s/%s", tSeqDir, range->tName);
00068 if (nibIsFile(fileName))
00069 {
00070 struct nibInfo *nib = hashFindVal(tFileCache, fileName);
00071 if (nib == NULL)
00072 {
00073 nib = nibInfoNew(fileName);
00074 hashAdd(tFileCache, fileName, nib);
00075 }
00076 if (isRc)
00077 reverseIntRange(&range->tStart, &range->tEnd, nib->size);
00078 gfiExpandRange(range, querySize, nib->size, respectFrame, isRc, expansion);
00079 target = nibLdPart(fileName, nib->f, nib->size,
00080 range->tStart, range->tEnd - range->tStart);
00081 if (isRc)
00082 {
00083 reverseComplement(target->dna, target->size);
00084 reverseIntRange(&range->tStart, &range->tEnd, nib->size);
00085 }
00086 *retTotalSeqSize = nib->size;
00087 }
00088 else
00089 {
00090 struct twoBitFile *tbf = NULL;
00091 char *tSeqName = strchr(fileName, ':');
00092 int tSeqSize = 0;
00093 if (tSeqName == NULL)
00094 errAbort("No colon in .2bit response from gfServer");
00095 *tSeqName++ = 0;
00096 tbf = hashFindVal(tFileCache, fileName);
00097 if (tbf == NULL)
00098 {
00099 tbf = twoBitOpen(fileName);
00100 hashAdd(tFileCache, fileName, tbf);
00101 }
00102 tSeqSize = twoBitSeqSize(tbf, tSeqName);
00103 if (isRc)
00104 reverseIntRange(&range->tStart, &range->tEnd, tSeqSize);
00105 gfiExpandRange(range, querySize, tSeqSize, respectFrame, isRc, expansion);
00106 target = twoBitReadSeqFragLower(tbf, tSeqName, range->tStart, range->tEnd);
00107 if (isRc)
00108 {
00109 reverseComplement(target->dna, target->size);
00110 reverseIntRange(&range->tStart, &range->tEnd, tSeqSize);
00111 }
00112 *retTotalSeqSize = tSeqSize;
00113 }
00114 return target;
00115 }
00116
00117 void gfiGetSeqName(char *spec, char *name, char *file)
00118
00119
00120
00121 {
00122 if (nibIsFile(spec))
00123 {
00124 splitPath(spec, NULL, name, NULL);
00125 if (file != NULL)
00126 strcpy(file, spec);
00127 }
00128 else
00129 {
00130 char *s = strchr(spec, ':');
00131 if (s == NULL)
00132 errAbort("Expecting colon in %s", spec);
00133 strcpy(name, s+1);
00134 if (file != NULL)
00135 {
00136 int fileNameSize = s - spec;
00137 memcpy(file, spec, fileNameSize);
00138 file[fileNameSize] = 0;
00139 }
00140 }
00141 }
00142