#include "common.h"#include "hash.h"#include "linefile.h"#include "obscure.h"#include "dnaseq.h"#include "fa.h"#include "nib.h"#include "twoBit.h"#include "repMask.h"#include "gfClientLib.h"Include dependency graph for gfClientLib.c:

Go to the source code of this file.
Functions | |
| void | gfClientFileArray (char *fileName, char ***retFiles, int *retFileCount) |
| void | gfClientUnmask (struct dnaSeq *seqList) |
| static void | maskFromOut (struct dnaSeq *seqList, char *outFile, float minRepDivergence) |
| static void | maskNucSeqList (struct dnaSeq *seqList, char *seqFileName, char *maskType, boolean hardMask, float minRepDivergence) |
| bioSeq * | gfClientSeqList (int fileCount, char *files[], boolean isProt, boolean isTransDna, char *maskType, float minRepDivergence, boolean showStatus) |
| void gfClientFileArray | ( | char * | fileName, | |
| char *** | retFiles, | |||
| int * | retFileCount | |||
| ) |
Definition at line 16 of file gfClientLib.c.
References AllocArray, cloneString(), FALSE, mustOpen(), nibIsFile(), readAllWords(), sameString, TRUE, and twoBitIsSpec().
Referenced by blat().
00020 { 00021 boolean gotSingle = FALSE; 00022 char *buf; /* This will leak memory but won't matter. */ 00023 00024 if (nibIsFile(fileName) || twoBitIsSpec(fileName) 00025 || sameString(fileName, "stdin")) 00026 gotSingle = TRUE; 00027 /* Detect .fa files (where suffix is not standardized) 00028 * by first character being a '>'. */ 00029 else 00030 { 00031 FILE *f = mustOpen(fileName, "r"); 00032 char c = fgetc(f); 00033 fclose(f); 00034 if (c == '>') 00035 gotSingle = TRUE; 00036 } 00037 if (gotSingle) 00038 { 00039 char **files; 00040 *retFiles = AllocArray(files, 1); 00041 files[0] = cloneString(fileName); 00042 *retFileCount = 1; 00043 return; 00044 } 00045 else 00046 { 00047 readAllWords(fileName, retFiles, retFileCount, &buf); 00048 } 00049 }
Here is the call graph for this function:

Here is the caller graph for this function:

| bioSeq* gfClientSeqList | ( | int | fileCount, | |
| char * | files[], | |||
| boolean | isProt, | |||
| boolean | isTransDna, | |||
| char * | maskType, | |||
| float | minRepDivergence, | |||
| boolean | showStatus | |||
| ) |
Definition at line 157 of file gfClientLib.c.
References doMask, faReadAllMixed(), faReadAllPep(), faToDna(), faToProtein(), maskNucSeqList(), dnaSeq::next, NIB_BASE_NAME, NIB_MASK_MIXED, nibIsFile(), nibLoadAllMasked(), slCat(), twoBitIsSpec(), twoBitLoadAll(), and ZeroVar.
Referenced by blat().
00167 { 00168 int i; 00169 char *fileName; 00170 bioSeq *seqList = NULL, *seq; 00171 boolean doMask = (maskType != NULL); 00172 00173 for (i=0; i<fileCount; ++i) 00174 { 00175 struct dnaSeq *list = NULL, sseq; 00176 ZeroVar(&sseq); 00177 fileName = files[i]; 00178 if (nibIsFile(fileName)) 00179 list = nibLoadAllMasked(NIB_MASK_MIXED|NIB_BASE_NAME, fileName); 00180 else if (twoBitIsSpec(fileName)) 00181 list = twoBitLoadAll(fileName); 00182 else if (isProt) 00183 list = faReadAllPep(fileName); 00184 else 00185 list = faReadAllMixed(fileName); 00186 00187 /* If necessary mask sequence from file. */ 00188 if (doMask) 00189 { 00190 maskNucSeqList(list, fileName, maskType, isTransDna, minRepDivergence); 00191 } 00192 else 00193 { 00194 /* If not masking send everything to proper case here. */ 00195 for (seq = list; seq != NULL; seq = seq->next) 00196 { 00197 if (isProt) 00198 faToProtein(seq->dna, seq->size); 00199 else 00200 faToDna(seq->dna, seq->size); 00201 } 00202 } 00203 /* Move local list to end of bigger list. */ 00204 seqList = slCat(seqList, list); 00205 } 00206 if (showStatus) 00207 { 00208 /* Total up size and sequence count and report. */ 00209 int count = 0; 00210 unsigned long totalSize = 0; 00211 for (seq = seqList; seq != NULL; seq = seq->next) 00212 { 00213 totalSize += seq->size; 00214 count += 1; 00215 } 00216 printf("Loaded %lu letters in %d sequences\n", totalSize, count); 00217 } 00218 return seqList; 00219 }
Here is the call graph for this function:

Here is the caller graph for this function:

| void gfClientUnmask | ( | struct dnaSeq * | seqList | ) |
Definition at line 52 of file gfClientLib.c.
References dnaSeq::dna, faToDna(), dnaSeq::next, and dnaSeq::size.
Referenced by blat(), and maskNucSeqList().
00054 { 00055 struct dnaSeq *seq; 00056 for (seq = seqList; seq != NULL; seq = seq->next) 00057 faToDna(seq->dna, seq->size); 00058 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void maskFromOut | ( | struct dnaSeq * | seqList, | |
| char * | outFile, | |||
| float | minRepDivergence | |||
| ) | [static] |
Definition at line 60 of file gfClientLib.c.
References chopLine, dnaSeq::dna, errAbort(), lineFile::fileName, freeHash(), hashAdd(), hashFindVal(), lineFileClose(), lineFileNext(), lineFileOpen(), dnaSeq::name, newHash(), dnaSeq::next, repeatMaskOut::percDel, repeatMaskOut::percDiv, repeatMaskOut::percInc, repeatMaskOut::qEnd, repeatMaskOut::qName, repeatMaskOut::qStart, repeatMaskOutStaticLoad(), dnaSeq::size, startsWith(), toUpperN(), TRUE, and warn().
Referenced by maskNucSeqList().
00063 { 00064 struct lineFile *lf = lineFileOpen(outFile, TRUE); 00065 struct hash *hash = newHash(0); 00066 struct dnaSeq *seq; 00067 char *line; 00068 00069 for (seq = seqList; seq != NULL; seq = seq->next) 00070 hashAdd(hash, seq->name, seq); 00071 if (!lineFileNext(lf, &line, NULL)) 00072 errAbort("Empty mask file %s\n", lf->fileName); 00073 if (!startsWith("There were no", line)) /* No repeats is ok. Not much work. */ 00074 { 00075 if (!startsWith(" SW", line)) 00076 errAbort("%s isn't a RepeatMasker .out file.", lf->fileName); 00077 if (!lineFileNext(lf, &line, NULL) || !startsWith("score", line)) 00078 errAbort("%s isn't a RepeatMasker .out file.", lf->fileName); 00079 lineFileNext(lf, &line, NULL); /* Blank line. */ 00080 while (lineFileNext(lf, &line, NULL)) 00081 { 00082 char *words[32]; 00083 struct repeatMaskOut rmo; 00084 int wordCount; 00085 int seqSize; 00086 int repSize; 00087 wordCount = chopLine(line, words); 00088 if (wordCount < 14) 00089 errAbort("%s line %d - error in repeat mask .out file\n", lf->fileName, lf->lineIx); 00090 repeatMaskOutStaticLoad(words, &rmo); 00091 /* If repeat is more than 15% divergent don't worry about it. */ 00092 if (rmo.percDiv + rmo.percDel + rmo.percInc <= minRepDivergence) 00093 { 00094 if((seq = hashFindVal(hash, rmo.qName)) == NULL) 00095 errAbort("%s is in %s but not corresponding sequence file, files out of sync?\n", 00096 rmo.qName, lf->fileName); 00097 seqSize = seq->size; 00098 if (rmo.qStart <= 0 || rmo.qStart > seqSize || rmo.qEnd <= 0 00099 || rmo.qEnd > seqSize || rmo.qStart > rmo.qEnd) 00100 { 00101 warn("Repeat mask sequence out of range (%d-%d of %d in %s)\n", 00102 rmo.qStart, rmo.qEnd, seqSize, rmo.qName); 00103 if (rmo.qStart <= 0) 00104 rmo.qStart = 1; 00105 if (rmo.qEnd > seqSize) 00106 rmo.qEnd = seqSize; 00107 } 00108 repSize = rmo.qEnd - rmo.qStart + 1; 00109 if (repSize > 0) 00110 toUpperN(seq->dna + rmo.qStart - 1, repSize); 00111 } 00112 } 00113 } 00114 freeHash(&hash); 00115 lineFileClose(&lf); 00116 }
Here is the call graph for this function:

Here is the caller graph for this function:

| static void maskNucSeqList | ( | struct dnaSeq * | seqList, | |
| char * | seqFileName, | |||
| char * | maskType, | |||
| boolean | hardMask, | |||
| float | minRepDivergence | |||
| ) | [static] |
Definition at line 118 of file gfClientLib.c.
References dnaSeq::dna, gfClientUnmask(), maskFromOut(), dnaSeq::next, sameWord, dnaSeq::size, toggleCase(), and upperToN().
Referenced by gfClientSeqList().
00122 { 00123 struct dnaSeq *seq; 00124 char *outFile = NULL, outNameBuf[512]; 00125 00126 if (sameWord(maskType, "upper")) 00127 { 00128 /* Already has dna to be masked in upper case. */ 00129 } 00130 else if (sameWord(maskType, "lower")) 00131 { 00132 for (seq = seqList; seq != NULL; seq = seq->next) 00133 toggleCase(seq->dna, seq->size); 00134 } 00135 else 00136 { 00137 /* Masking from a RepeatMasker .out file. */ 00138 if (sameWord(maskType, "out")) 00139 { 00140 sprintf(outNameBuf, "%s.out", seqFileName); 00141 outFile = outNameBuf; 00142 } 00143 else 00144 { 00145 outFile = maskType; 00146 } 00147 gfClientUnmask(seqList); 00148 maskFromOut(seqList, outFile, minRepDivergence); 00149 } 00150 if (hardMask) 00151 { 00152 for (seq = seqList; seq != NULL; seq = seq->next) 00153 upperToN(seq->dna, seq->size); 00154 } 00155 }
Here is the call graph for this function:

Here is the caller graph for this function:

1.5.2