00001
00002
00003
00004
00005
00006 #include "common.h"
00007 #include "dnaseq.h"
00008 #include "bits.h"
00009 #include "hash.h"
00010 #include "obscure.h"
00011
00012 static char const rcsid[] = "$Id: dnaseq.c,v 1.19 2005/11/10 03:40:34 kent Exp $";
00013
00014
00015 struct dnaSeq *newDnaSeq(DNA *dna, int size, char *name)
00016
00017 {
00018 struct dnaSeq *seq;
00019
00020 seq = needMem(sizeof(*seq));
00021 if (name != NULL)
00022 seq->name = cloneString(name);
00023 seq->dna = dna;
00024 seq->size = size;
00025 seq->mask = NULL;
00026 return seq;
00027 }
00028
00029 struct dnaSeq *cloneDnaSeq(struct dnaSeq *orig)
00030
00031 {
00032 struct dnaSeq *seq = CloneVar(orig);
00033 seq->name = cloneString(seq->name);
00034 seq->dna = needHugeMem(seq->size+1);
00035 memcpy(seq->dna, orig->dna, seq->size+1);
00036 seq->mask = NULL;
00037 if (orig->mask != NULL)
00038 {
00039 seq->mask = bitClone(orig->mask, seq->size);
00040 }
00041 return seq;
00042 }
00043
00044 void freeDnaSeq(struct dnaSeq **pSeq)
00045
00046 {
00047 struct dnaSeq *seq = *pSeq;
00048 if (seq == NULL)
00049 return;
00050 freeMem(seq->name);
00051 freeMem(seq->dna);
00052 bitFree(&seq->mask);
00053 freez(pSeq);
00054 }
00055
00056 void freeDnaSeqList(struct dnaSeq **pSeqList)
00057
00058 {
00059 struct dnaSeq *seq, *next;
00060
00061 for (seq = *pSeqList; seq != NULL; seq = next)
00062 {
00063 next = seq->next;
00064 freeDnaSeq(&seq);
00065 }
00066 *pSeqList = NULL;
00067 }
00068
00069 boolean seqIsLower(bioSeq *seq)
00070
00071 {
00072 int size = seq->size, i;
00073 char *poly = seq->dna;
00074 for (i=0; i<size; ++i)
00075 if (!islower(poly[i]))
00076 return FALSE;
00077 return TRUE;
00078 }
00079
00080 boolean seqIsDna(bioSeq *seq)
00081
00082 {
00083 return isDna(seq->dna, seq->size);
00084 }
00085
00086
00087 aaSeq *translateSeqN(struct dnaSeq *inSeq, unsigned offset, unsigned inSize, boolean stop)
00088
00089
00090 {
00091 aaSeq *seq;
00092 DNA *dna = inSeq->dna;
00093 AA *pep, aa;
00094 int i, lastCodon;
00095 int actualSize = 0;
00096
00097 assert(offset <= inSeq->size);
00098 if ((inSize == 0) || (inSize > (inSeq->size - offset)))
00099 inSize = inSeq->size - offset;
00100 lastCodon = offset + inSize - 3;
00101
00102 AllocVar(seq);
00103 seq->dna = pep = needLargeMem(inSize/3+1);
00104 for (i=offset; i <= lastCodon; i += 3)
00105 {
00106 aa = lookupCodon(dna+i);
00107 if (aa == 0)
00108 {
00109 if (stop)
00110 break;
00111 else
00112 aa = 'Z';
00113 }
00114 *pep++ = aa;
00115 ++actualSize;
00116 }
00117 *pep = 0;
00118 assert(actualSize <= inSize/3+1);
00119 seq->size = actualSize;
00120 seq->name = cloneString(inSeq->name);
00121 return seq;
00122 }
00123
00124 aaSeq *translateSeq(struct dnaSeq *inSeq, unsigned offset, boolean stop)
00125
00126
00127
00128 {
00129 return translateSeqN(inSeq, offset, 0, stop);
00130 }
00131
00132 bioSeq *whichSeqIn(bioSeq **seqs, int seqCount, char *letters)
00133
00134 {
00135 aaSeq *seq;
00136 int i;
00137
00138 for (i=0; i<seqCount; ++i)
00139 {
00140 seq = seqs[i];
00141 if (seq->dna <= letters && letters < seq->dna + seq->size)
00142 return seq;
00143 }
00144 internalErr();
00145 return NULL;
00146 }
00147
00148 Bits *maskFromUpperCaseSeq(bioSeq *seq)
00149
00150
00151 {
00152 int size = seq->size, i;
00153 char *poly = seq->dna;
00154 Bits *b = bitAlloc(size);
00155 for (i=0; i<size; ++i)
00156 {
00157 if (isupper(poly[i]))
00158 bitSetOne(b, i);
00159 }
00160 return b;
00161 }
00162
00163 struct hash *dnaSeqHash(struct dnaSeq *seqList)
00164
00165 {
00166 int size = slCount(seqList)+1;
00167 int sizeLog2 = digitsBaseTwo(size);
00168 struct hash *hash = hashNew(sizeLog2);
00169 struct dnaSeq *seq;
00170 for (seq = seqList; seq != NULL; seq = seq->next)
00171 hashAddUnique(hash, seq->name, seq);
00172 return hash;
00173 }
00174