00001
00002
00003
00004
00005
00006 #include "common.h"
00007 #include "dnautil.h"
00008 #include "shaRes.h"
00009 #include "dnaseq.h"
00010 #include "nt4.h"
00011 #include "sig.h"
00012
00013 static char const rcsid[] = "$Id: nt4.c,v 1.5 2005/04/10 14:41:24 markd Exp $";
00014
00015 static size_t bits32PaddedSize(size_t size)
00016 {
00017 return (((size+15)>>4)<<2);
00018 }
00019
00020 static struct nt4Seq *allocNt4(size_t baseCount, char *name)
00021
00022 {
00023 size_t memSize = bits32PaddedSize(baseCount);
00024 struct nt4Seq *seq = needMem(sizeof(*seq));
00025 seq->baseCount = baseCount;
00026 seq->bases = needLargeMem(memSize);
00027 seq->name = cloneString(name);
00028 return seq;
00029 }
00030
00031
00032 struct nt4Seq *newNt4(DNA *dna, int size, char *name)
00033
00034 {
00035 bits32 *packed;
00036 DNA *unpacked;
00037 char last[16];
00038 struct nt4Seq *seq = allocNt4(size, name);
00039 packed = seq->bases;
00040 unpacked = dna;
00041 while (size > 16)
00042 {
00043 *packed++ = packDna16(dna);
00044 dna += 16;
00045 size -= 16;
00046 }
00047 if (size > 0)
00048 {
00049 memcpy(last, dna, size);
00050 *packed++ = packDna16(dna);
00051 }
00052 return seq;
00053 }
00054
00055 void freeNt4(struct nt4Seq **pSeq)
00056
00057 {
00058 struct nt4Seq *seq = *pSeq;
00059 if (seq == NULL)
00060 return;
00061 freeMem(seq->bases);
00062 freeMem(seq->name);
00063 freez(pSeq);
00064 }
00065
00066 static FILE *nt4OpenVerify(char *fileName)
00067
00068
00069 {
00070 FILE *f = mustOpen(fileName, "rb");
00071 bits32 signature;
00072 mustReadOne(f, signature);
00073 if (signature != nt4Signature)
00074 errAbort("%s is not a good Nt4 file", fileName);
00075 return f;
00076 }
00077
00078 struct nt4Seq *loadNt4(char *fileName, char *seqName)
00079
00080 {
00081 bits32 size;
00082 struct nt4Seq *seq;
00083 FILE *f = nt4OpenVerify(fileName);
00084
00085 mustReadOne(f, size);
00086 if (seqName == NULL)
00087 seqName = fileName;
00088 seq = allocNt4(size, seqName);
00089 mustRead(f, seq->bases, bits32PaddedSize(size));
00090 carefulClose(&f);
00091 return seq;
00092 }
00093
00094 void saveNt4(char *fileName, DNA *dna, bits32 dnaSize)
00095
00096 {
00097 FILE *f = mustOpen(fileName, "wb");
00098 bits32 signature = nt4Signature;
00099 bits32 bases;
00100 char last[16];
00101
00102 writeOne(f, signature);
00103 writeOne(f, dnaSize);
00104 while (dnaSize >= 16)
00105 {
00106 bases = packDna16(dna);
00107 writeOne(f, bases);
00108 dna += 16;
00109 dnaSize -= 16;
00110 }
00111 if (dnaSize > 0)
00112 {
00113 zeroBytes(last, sizeof(last));
00114 memcpy(last, dna, dnaSize);
00115 bases = packDna16(last);
00116 writeOne(f, bases);
00117 }
00118 fclose(f);
00119 }
00120
00121 int nt4BaseCount(char *fileName)
00122
00123 {
00124 bits32 size;
00125 FILE *f = nt4OpenVerify(fileName);
00126
00127 mustReadOne(f, size);
00128 fclose(f);
00129 return (int)size;
00130 }
00131
00132 static void unpackRightSide(bits32 tile, int baseCount, DNA *out)
00133
00134 {
00135 int j;
00136
00137 for (j=baseCount; --j>=0; )
00138 {
00139 out[j] = valToNt[tile & 0x3];
00140 tile >>= 2;
00141 }
00142 }
00143
00144 static void unpackLeftSide(bits32 tile, int baseCount, DNA *out)
00145
00146 {
00147 int bitsToDump = ((16-baseCount)<<1);
00148 tile >>= bitsToDump;
00149 out += baseCount;
00150 while (--baseCount >= 0)
00151 {
00152 *--out = valToNt[tile&0x3];
00153 tile >>= 2;
00154 }
00155 }
00156
00157 static void unpackMidWord(bits32 tile, int startBase, int baseCount, DNA *out)
00158
00159 {
00160 tile >>= ((16-startBase-baseCount)<<1);
00161 out += baseCount;
00162 while (--baseCount >= 0)
00163 {
00164 *--out = valToNt[tile&0x3];
00165 tile >>= 2;
00166 }
00167 }
00168
00169 void unalignedUnpackDna(bits32 *tiles, int start, int size, DNA *unpacked)
00170
00171
00172 {
00173 DNA *pt = unpacked;
00174 int sizeLeft = size;
00175 int firstBases, middleBases;
00176 bits32 *startTile, *endTile;
00177
00178 dnaUtilOpen();
00179
00180
00181 startTile = tiles + (start>>4);
00182 endTile = tiles + ((start + size - 1)>>4);
00183 if (startTile == endTile)
00184 {
00185 unpackMidWord(*startTile, start&0xf, size, unpacked);
00186 return;
00187 }
00188
00189
00190 tiles = startTile;
00191
00192
00193
00194 firstBases = (16 - (start&0xf));
00195 unpackRightSide(*tiles, firstBases, pt);
00196 pt += firstBases;
00197 sizeLeft -= firstBases;
00198 tiles += 1;
00199
00200
00201 middleBases = (sizeLeft&0x7ffffff0);
00202 unpackDna(tiles, middleBases>>4, pt);
00203 pt += middleBases;
00204 sizeLeft -= middleBases;
00205 tiles += (middleBases>>4);
00206
00207
00208 if (sizeLeft > 0)
00209 unpackLeftSide(*tiles, sizeLeft, pt);
00210 pt += sizeLeft;
00211
00212
00213 assert(pt == unpacked+size);
00214 *pt = 0;
00215 }
00216
00217 DNA *nt4Unpack(struct nt4Seq *n, int start, int size)
00218
00219 {
00220 DNA *unpacked = needLargeMem(size+1);
00221 unalignedUnpackDna(n->bases, start, size, unpacked);
00222 return unpacked;
00223 }
00224
00225
00226 DNA *nt4LoadPart(char *fileName, int start, int size)
00227
00228 {
00229 bits32 basesInFile;
00230 int tStart, tEnd, tSize;
00231 int end;
00232 FILE *f;
00233 DNA *unpacked;
00234 bits32 *tiles;
00235
00236
00237 f = nt4OpenVerify(fileName);
00238 mustReadOne(f, basesInFile);
00239
00240 assert(start >= 0);
00241 end = start + size;
00242 assert(end <= (int)basesInFile);
00243
00244
00245 tStart = (start>>4);
00246 tEnd = ((end+15)>>4);
00247 tSize = tEnd - tStart;
00248
00249
00250 tiles = needLargeMem(tSize * sizeof(*tiles));
00251 fseek(f, tStart * sizeof(*tiles), SEEK_CUR);
00252 mustRead(f, tiles, tSize * sizeof(*tiles) );
00253
00254
00255 unpacked = needLargeMem(size+1);
00256 unalignedUnpackDna(tiles, start - (tStart<<4), size, unpacked);
00257
00258 freeMem(tiles);
00259 fclose(f);
00260 return unpacked;
00261 }