gfClient/gfClient.c

Go to the documentation of this file.
00001 /* gfClient - A client for the genomic finding program that produces a .psl file. */
00002 /* Copyright 2001-2003 Jim Kent.  All rights reserved. */
00003 #include "common.h"
00004 #include "linefile.h"
00005 #include "aliType.h"
00006 #include "fa.h"
00007 #include "genoFind.h"
00008 #include "psl.h"
00009 #include "options.h"
00010 #include "fuzzyFind.h"
00011 
00012 static char const rcsid[] = "$Id: gfClient.c,v 1.34 2006/11/15 15:20:59 angie Exp $";
00013 
00014 static struct optionSpec optionSpecs[] = {
00015     {"prot", OPTION_BOOLEAN},
00016     {"q", OPTION_STRING},
00017     {"t", OPTION_STRING},
00018     {"minIdentity", OPTION_FLOAT},
00019     {"minScore", OPTION_INT},
00020     {"dots", OPTION_INT},
00021     {"out", OPTION_STRING},
00022     {"maxIntron", OPTION_INT},
00023     {"nohead", OPTION_BOOLEAN},
00024     {NULL, 0}
00025 };
00026 
00027 /* Variables that can be overridden by command line. */
00028 int dots = 0;
00029 int minScore = 30;
00030 double minIdentity = 90;
00031 char *outputFormat = "psl";
00032 char *qType = "dna";
00033 char *tType = "dna";
00034 
00035 void usage()
00036 /* Explain usage and exit. */
00037 {
00038 printf(
00039   "gfClient v. %s - A client for the genomic finding program that produces a .psl file\n"
00040   "usage:\n"
00041   "   gfClient host port seqDir in.fa out.psl\n"
00042   "where\n"
00043   "   host is the name of the machine running the gfServer\n"
00044   "   port is the same as you started the gfServer with\n"
00045   "   seqDir is the path of the .nib or .2bit files relative to the current dir\n"
00046   "       (note these are needed by the client as well as the server)\n"
00047   "   in.fa is a fasta format file.  May contain multiple records\n"
00048   "   out.psl where to put the output\n"
00049   "options:\n"
00050   "   -t=type     Database type.  Type is one of:\n"
00051   "                 dna - DNA sequence\n"
00052   "                 prot - protein sequence\n"
00053   "                 dnax - DNA sequence translated in six frames to protein\n"
00054   "               The default is dna\n"
00055   "   -q=type     Query type.  Type is one of:\n"
00056   "                 dna - DNA sequence\n"
00057   "                 rna - RNA sequence\n"
00058   "                 prot - protein sequence\n"
00059   "                 dnax - DNA sequence translated in six frames to protein\n"
00060   "                 rnax - DNA sequence translated in three frames to protein\n"
00061   "   -prot       Synonymous with -d=prot -q=prot\n"
00062   "   -dots=N   Output a dot every N query sequences\n"
00063   "   -nohead   Suppresses psl five line header\n"
00064   "   -minScore=N sets minimum score.  This is twice the matches minus the \n"
00065   "               mismatches minus some sort of gap penalty.  Default is 30\n"
00066   "   -minIdentity=N Sets minimum sequence identity (in percent).  Default is\n"
00067   "               90 for nucleotide searches, 25 for protein or translated\n"
00068   "               protein searches.\n"
00069   "   -out=type   Controls output file format.  Type is one of:\n"
00070   "                   psl - Default.  Tab separated format without actual sequence\n"
00071   "                   pslx - Tab separated format with sequence\n"
00072   "                   axt - blastz-associated axt format\n"
00073   "                   maf - multiz-associated maf format\n"
00074   "                   sim4 - similar to sim4 format\n"
00075   "                   wublast - similar to wublast format\n"
00076   "                   blast - similar to NCBI blast format\n"
00077   "                   blast8- NCBI blast tabular format\n"
00078   "                   blast9 - NCBI blast tabular format with comments\n"
00079   "   -maxIntron=N  Sets maximum intron size. Default is %d\n",
00080                         gfVersion, ffIntronMaxDefault);
00081 exit(-1);
00082 }
00083 
00084 
00085 struct gfOutput *gvo;
00086 
00087 void gfClient(char *hostName, char *portName, char *tSeqDir, char *inName, 
00088         char *outName, char *tTypeName, char *qTypeName)
00089 /* gfClient - A client for the genomic finding program that produces a .psl file. */
00090 {
00091 struct lineFile *lf = lineFileOpen(inName, TRUE);
00092 static bioSeq seq;
00093 FILE *out = mustOpen(outName, "w");
00094 enum gfType qType = gfTypeFromName(qTypeName);
00095 enum gfType tType = gfTypeFromName(tTypeName);
00096 int dotMod = 0;
00097 char databaseName[256];
00098 struct hash *tFileCache = gfFileCacheNew();
00099 
00100 snprintf(databaseName, sizeof(databaseName), "%s:%s", hostName, portName);
00101 
00102 gvo = gfOutputAny(outputFormat,  round(minIdentity*10), qType == gftProt, tType == gftProt,
00103         optionExists("nohead"), databaseName, 23, 3.0e9, minIdentity, out);
00104 gfOutputHead(gvo, out);
00105 while (faSomeSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name, qType != gftProt))
00106     {
00107     int conn = gfConnect(hostName, portName);
00108     if (dots != 0)
00109         {
00110         if (++dotMod >= dots)
00111             {
00112             dotMod = 0;
00113             fputc('.', stdout);
00114             fflush(stdout);
00115             }
00116         }
00117     if (qType == gftProt && (tType == gftDnaX || tType == gftRnaX))
00118         {
00119         gvo->reportTargetStrand = TRUE;
00120         gfAlignTrans(&conn, tSeqDir, &seq, minScore, tFileCache, gvo);
00121         }
00122     else if ((qType == gftRnaX || qType == gftDnaX) && (tType == gftDnaX || tType == gftRnaX))
00123         {
00124         gvo->reportTargetStrand = TRUE;
00125         gfAlignTransTrans(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, 
00126                 gvo, qType == gftRnaX);
00127         if (qType == gftDnaX)
00128             {
00129             reverseComplement(seq.dna, seq.size);
00130             close(conn);
00131             conn = gfConnect(hostName, portName);
00132             gfAlignTransTrans(&conn, tSeqDir, &seq, TRUE, minScore, tFileCache,
00133                 gvo, FALSE);
00134             }
00135         }
00136     else if ((tType == gftDna || tType == gftRna) && (qType == gftDna || qType == gftRna))
00137         {
00138         gfAlignStrand(&conn, tSeqDir, &seq, FALSE, minScore, tFileCache, gvo);
00139         conn = gfConnect(hostName, portName);
00140         reverseComplement(seq.dna, seq.size);
00141         gfAlignStrand(&conn, tSeqDir, &seq, TRUE,  minScore, tFileCache, gvo);
00142         }
00143     else
00144         {
00145         errAbort("Comparisons between %s queries and %s databases not yet supported",
00146                 qTypeName, tTypeName);
00147         }
00148     gfOutputQuery(gvo, out);
00149     }
00150 if (out != stdout)
00151     printf("Output is in %s\n", outName);
00152 gfFileCacheFree(&tFileCache);
00153 }
00154 
00155 int main(int argc, char *argv[])
00156 /* Process command line. */
00157 {
00158 optionInit(&argc, argv, optionSpecs);
00159 if (argc != 6)
00160     usage();
00161 if (optionExists("prot"))
00162     qType = tType = "prot";
00163 qType = optionVal("q", qType);
00164 tType = optionVal("t", tType);
00165 if (sameWord(tType, "prot") || sameWord(tType, "dnax") || sameWord(tType, "rnax"))
00166     minIdentity = 25;
00167 minIdentity = optionFloat("minIdentity", minIdentity);
00168 minScore = optionInt("minScore", minScore);
00169 dots = optionInt("dots", 0);
00170 outputFormat = optionVal("out", outputFormat);
00171 /* set global for fuzzy find functions */
00172 setFfIntronMax(optionInt("maxIntron", ffIntronMaxDefault));
00173 gfClient(argv[1], argv[2], argv[3], argv[4], argv[5], tType, qType);
00174 return 0;
00175 }

Generated on Tue Dec 25 18:39:29 2007 for blat by  doxygen 1.5.2