EnsemblServer/SSAHAClient.cpp File Reference

#include "ClientServerUtils.h"
#include "SSAHAClient.h"
#include "SequenceReaderFasta.h"
#include "SequenceReaderString.h"
#include "MatchStore.h"
#include "MatchAligner.h"
#include <string.h>
#include <iomanip>
#include <strstream>

Include dependency graph for SSAHAClient.cpp:

Go to the source code of this file.

Functions

void sendQuery (FILE *fp, int sockfd, SequenceReader &seqReader)
int main (int numArgs, char *args[])

Variables

static QueryHeader qinfo
static Handshake hello


Function Documentation

int main ( int  numArgs,
char *  args[] 
)

Definition at line 220 of file SSAHAClient.cpp.

References Connect(), eSortAndReturnSequence, eSortByMatchLength, eSortByPercentMatch, gBaseBits, gResidueBits, qinfo, SA, sendQuery(), Socket(), and SSAHAException::what().

00220                                      {
00221 
00222 try {
00223     int sockfd;
00224     struct sockaddr_in  servaddr;
00225     int fred;
00226 
00227     // only needed if we need to make our own fasta file
00228     ostrstream buf;
00229     istream istr(buf.rdbuf());
00230 
00231     if(( numArgs<11)||( numArgs>13))
00232     {
00233         cerr 
00234         <<"syntax: " << args[0] 
00235         <<" serverMachine serverPort minMatchSize maxGap maxInsert numRepeats\n"
00236         <<"queryType clipThreshold maxMatches sortMode "
00237         << " [substituteThreshold] [bandExtension]\n"
00238         <<"serverMachine: name of machine on which server is running\n"
00239         <<"serverPort   : port number on that machine\n"
00240         <<"minMatchSize : matches must contain at least this many matching symbols\n"
00241         <<"maxGap       : matches may contain gaps of up to this many symbols\n"
00242         <<"maxInsert    : max number of indels between successive hits in a match\n"
00243         <<"numRepeats   : screen out repeating motifs of up to this many symbols\n"
00244         <<"queryType    : DNA or protein\n"
00245         <<"clipThreshold: ignore words occurring this many more times than expected\n"
00246         <<"maxNumMatches: at most this many matches for each query sequence\n"
00247         <<"               (set to 0 to obtain all matches unsorted)\n"
00248         <<"sortMode     : none, size, percent or align\n"
00249         <<"               (ignored if maxNumMatches set to 0)\n"
00250         <<"substituteThreshold : allow 1 base/amino mismatch in words that occur\n"
00251         <<"               up to this many more times than expected (default 0)\n"
00252         <<"bandExtension: band size for banded dynamic programming extends this\n"
00253         <<"               far from the diagonal (ignored unless sortMode set to\n"
00254         <<"               align, defaults to 0)\n"; 
00255                                          
00256         throw SSAHAException("Invalid command line input to client");
00257     }
00258     memset((void*)&qinfo, 0, sizeof(qinfo));
00259     
00260     sockfd = Socket(AF_INET, SOCK_STREAM, 0);
00261 
00262     int portNumber(atoi(args[2]));
00263 
00264     cerr << "Queries will be sent via port number " << portNumber << ".\n";
00265 
00266     qinfo.minPrint=atoi(args[3]);
00267 
00268     cerr << "Only matches greater than " << qinfo.minPrint << " bases will be reported.\n";
00269 
00270     qinfo.maxGap=atoi(args[4]);
00271     qinfo.maxInsert=atoi(args[5]);
00272     qinfo.numRepeats=atoi(args[6]);
00273     qinfo.clipThreshold=atoi(args[8]);
00274     qinfo.maxMatches=atoi(args[9]);
00275 
00276     string queryType(args[7]);
00277     SequenceEncoder* pEncoder;
00278 
00279     if (queryType=="DNA")  {
00280       qinfo.bitsPerSymbol = gBaseBits;
00281       pEncoder = new SequenceEncoderDNA(12);
00282       cerr << "Query sequence is DNA.\n";
00283     } else if (queryType=="protein") { 
00284       qinfo.bitsPerSymbol = gResidueBits;
00285       pEncoder = new SequenceEncoderProtein(5);
00286       cerr << "Query sequence is protein.\n";
00287     } else {
00288       cerr  << "Unknown value for query type (must be \"DNA\" or \"protein\")\n";
00289       throw SSAHAException("Invalid value for queryType");
00290     }
00291 
00292     string sortMode(args[10]);
00293     if (sortMode=="none")  {
00294       qinfo.maxMatches=0;
00295     }
00296     else if (sortMode=="size")  {
00297       qinfo.sortMode = eSortByMatchLength;
00298     } else if (sortMode=="percent") {
00299       qinfo.sortMode = eSortByPercentMatch;
00300     } else if (sortMode=="align") {
00301       qinfo.sortMode = eSortAndReturnSequence;
00302     } else {
00303       cerr  << "Unknown value for sort mode " << "(must be \"none\", \"size\", \"percent\" or \"align\")\n";
00304       throw SSAHAException("Invalid value for sortMode");
00305     }
00306 
00307     qinfo.substituteThreshold = ((numArgs>11)?atoi(args[11]):0);
00308     qinfo.bandExtension = ((numArgs>12)?atoi(args[12]):0);
00309 
00310     while (cin.peek()==' ') cin.ignore(); // zap any leading spaces
00311 
00312     SequenceReader* pReader;
00313     string source;
00314 
00315     if (cin.peek()=='>')  {
00316       cerr  << "First nonspace character is a \">\", assuming input text " << "is in fasta format.\n";
00317       buf << cin.rdbuf();
00318       pReader = new SequenceReaderFile(istr, '>', '>', pEncoder, cerr );
00319     }  else {
00320       cerr << "Assuming input text is a plain string of " << queryType << " data.\n";
00321       buf << ">unnamedQuery\n" << cin.rdbuf();
00322       pReader = new SequenceReaderFile(istr, '>', '>', pEncoder, cerr );
00323     }
00324 
00325     memset((void*)&servaddr, 0, sizeof(servaddr));
00326     servaddr.sin_family = AF_INET;
00327     servaddr.sin_port = htons(portNumber);
00328 
00329     cerr << "Server is assumed to be running on machine " << args[1] << ".\n";
00330 
00331     struct hostent *hp;
00332     hp =  gethostbyname(args[1]);
00333     if (hp==NULL) throw NetworkException("Invalid host name");
00334 
00335       memcpy(&(servaddr.sin_addr.s_addr), *(hp->h_addr_list), sizeof(struct
00336       in_addr));
00337 
00338     Connect(sockfd, (SA *) &servaddr, sizeof(servaddr));
00339     
00340     sendQuery(stdin, sockfd, *pReader);
00341 
00342     exit(0);
00343 
00344 } 
00345 catch (const NetworkException& err ) {
00346     cerr << "Caught NetworkException: " << err.what() << "\n";
00347     if (((string)err.what()).substr(0,16)=="getAtLeast error") {
00348       cout << "ERROR: lost connection, please resubmit your query"  << endl;
00349     }  else {
00350       cout << "ERROR: " << err.what() << endl;
00351     } 
00352     exit(1);
00353   }
00354   catch (const SSAHAException& err ) {
00355     cerr << "Caught SSAHA exception: " << err.what() << "\n";
00356     cout << "ERROR: " << err.what() << endl;
00357     exit(1);
00358   }
00359   catch (const std::exception& err ) {
00360     cerr << "Caught exception: " << err.what() << "\n";
00361     cout << "ERROR: " << err.what() << endl;
00362     exit(1);
00363   }
00364 
00365 }

Here is the call graph for this function:

void sendQuery ( FILE *  fp,
int  sockfd,
SequenceReader seqReader 
)

Definition at line 72 of file SSAHAClient.cpp.

References QueryHeader::bandExtension, QueryHeader::bitsPerSymbol, SequenceReader::changeMode(), SocketInterface::checkSocketEmpty(), MatchRemote::data_, e2bitDNA, e5bitProtein, eSortAndReturnSequence, gBaseBits, SequenceReader::getLastSequenceName(), SequenceReader::getNextSequence(), gMaxBasesPerWord, gResidueBits, hello, MatchStoreRemote::match_, Handshake::maxBufferSize, QueryHeader::maxGap, QueryHeader::maxInsert, MAXLINE, MatchRemote::names_, MatchHeader::numMatches, QueryHeader::numQuerySeqs, QueryHeader::numQueryWords, QueryHeader::numRepeats, MatchHeader::numSubjectNames, qinfo, SocketInterface::receiveString(), SocketInterface::receiveStruct(), SocketInterface::sendSequence(), SocketInterface::sendStruct(), SERV_VERSION, SocketInterface::setTimeOut(), QueryHeader::sortMode, SourceReaderDummy::source_, Handshake::ssahaversion, Handshake::tableType, MatchHeader::wasSuccessful, and Handshake::wordLength.

00072                                                                 {
00073 
00074   char  recvline[MAXLINE];
00075   WordSequence seq;
00076 
00077   // get handshake from server
00078   // initial time out set to 12 seconds
00079   SocketInterface socket( sockfd, 12 );
00080   
00081   socket.receiveStruct(&hello);
00082 
00083   socket.checkSocketEmpty();
00084 
00085   cerr << "Server is using hash table with " 
00086        << ((hello.tableType==e2bitDNA)?gBaseBits:gResidueBits)
00087        << " bits per symbol, " << hello.wordLength 
00088        << " symbols per word.\n";
00089 
00090   if ((hello.tableType==e2bitDNA)&&(qinfo.bitsPerSymbol==gResidueBits)) {
00091     cerr << "Error: can't run a protein query against a DNA database.\n";
00092     throw SSAHAException("Can't run protein query against DNA database");
00093   }
00094   
00095   if (hello.ssahaversion != SERV_VERSION) {
00096     cerr << "Error: SSAHA client/Server version mismatch.\n";
00097     throw SSAHAException("Can't continue: client/server version mismatch");
00098   }
00099   
00100   cerr << "Server will reject queries of more than "  << hello.maxBufferSize << " words in total.\n";
00101 
00102   if ((hello.tableType!=e2bitDNA) && (qinfo.bitsPerSymbol==gBaseBits)) {
00103     cerr << "Sending DNA query to protein/translated database, changing word length to " << gMaxBasesPerWord << ".\n";
00104     hello.wordLength=gMaxBasesPerWord;
00105   } 
00106 
00107   if (qinfo.numRepeats>hello.wordLength) {
00108     cerr << "Warning: only repeats of " << hello.wordLength << " bases or less can be masked for this data, proceeding" << "using this value.\n";
00109     qinfo.numRepeats = hello.wordLength;
00110   }
00111 
00112   cerr << "Server will attempt to screen for tandem repeats of "  << qinfo.numRepeats << " bases or less.\n";
00113 
00114   if (qinfo.maxInsert>=hello.wordLength) {
00115     cerr << "Warning: indels of up to " << hello.wordLength-1  << " bases only can be handled for this data\n";
00116     qinfo.maxInsert = hello.wordLength-1;
00117   }
00118 
00119   if ((qinfo.sortMode == eSortAndReturnSequence) && (qinfo.bandExtension > 15)) {
00120     cerr << "Warning: restricting band size for banded dynamic programming to 15\n";
00121     qinfo.bandExtension=15;
00122   }
00123 
00124   cerr << "Matches can contain up to "  << qinfo.maxInsert << " indels between successive hits.\n";
00125 
00126   cerr << "Matches can contain gaps of up to "  << qinfo.maxGap << " bases.\n";
00127 
00128   // read queries to local memory
00129   const QueryInfo dummy;
00130   vector<QueryInfo> query; 
00131   query.push_back(dummy);
00132   qinfo.numQueryWords = 0;
00133   
00134   SequenceReaderModeReplace mode((qinfo.bitsPerSymbol==gBaseBits)?'A':'X');
00135   seqReader.changeMode( &mode );
00136   
00137   while ( seqReader.getNextSequence( query.back().first, hello.wordLength ) != -1 ) {
00138     seqReader.getLastSequenceName( query.back().second );
00139     qinfo.numQueryWords += query.back().first.size();
00140     query.push_back(dummy);
00141   }
00142   query.pop_back();
00143   qinfo.numQuerySeqs = query.size();
00144 
00145   socket.sendStruct(&qinfo);
00146 
00147   for( vector<QueryInfo>::iterator i(query.begin()) ; i != query.end() ; ++i )  {
00148     socket.sendSequence(i->first);
00149     // cerr << "|" << printWord(i->first,12) << "|\n";
00150     // WordSequence w;
00151     // w = i->first;
00152   }
00153 
00154 
00155   MatchHeader response;
00156   // wait for 60 seconds for initial response
00157   socket.setTimeOut(600);
00158   socket.receiveStruct(&response);
00159 
00160   if (response.wasSuccessful==false)  throw NetworkException("Query request failed!!");
00161 
00162   cerr << "Expecting to receive " << response.numMatches << " matches among " << response.numSubjectNames << " subject sequences.\n";
00163 
00164 
00165   MatchStoreRemote results(query);
00166   SourceReaderDummy subjectReader;
00167 
00168   int numColumns(80);
00169 
00170   MatchAligner* pAligner;
00171 
00172   if (qinfo.bitsPerSymbol == gBaseBits) {
00173     if ( hello.tableType == e2bitDNA ) {
00174         pAligner = new MatchAlignerDNA
00175           ( numColumns, qinfo.bandExtension ); 
00176       } else if ( hello.tableType == e5bitProtein ) {
00177         pAligner = new MatchAlignerTranslatedProtein ( false, numColumns, qinfo.bandExtension );   
00178       } else {
00179         // ( hello.TableType == e5bitTranslatedDNA)
00180         pAligner = new MatchAlignerTranslatedDNA
00181           ( numColumns, qinfo.bandExtension );   
00182       }
00183   }  else { // query is protein
00184       if (hello.tableType == e5bitProtein) {
00185         pAligner = new MatchAlignerProtein ( numColumns, qinfo.bandExtension ); 
00186       } else {
00187        // table is translated DNA
00188         pAligner = new MatchAlignerTranslatedProtein ( true, numColumns, qinfo.bandExtension );   
00189       } 
00190   }
00191 
00192   assert( pAligner!=false);
00193 
00194   // ownership of *pAligner passed to aligner
00195   MatchTaskAlign aligner ( seqReader, subjectReader, pAligner, false, qinfo.sortMode==eSortAndReturnSequence );
00196 
00197   pair<SequenceNumber,std::string> p;
00198   for ( int i(0) ;  i < response.numSubjectNames ; i++ ) {
00199        socket.receiveStruct(&p.first);
00200        socket.receiveString(p.second);
00201        results.match_.names_.insert(p);
00202   } 
00203   
00204   cout << setprecision(2) << setiosflags(ios::fixed);
00205 
00206   cout << "OK: " << response.numMatches << " " << response.numSubjectNames << endl;
00207 
00208   for ( int i(0); i < response.numMatches ; i++ ) {
00209     socket.receiveStruct(&results.match_.data_);
00210     if (qinfo.sortMode == eSortAndReturnSequence ) {
00211       socket.receiveString( subjectReader.source_ );
00212     }
00213     aligner( results );
00214   } 
00215   socket.checkSocketEmpty();
00216 } 

Here is the call graph for this function:


Variable Documentation

Handshake hello [static]

Definition at line 45 of file SSAHAClient.cpp.

Referenced by generateHashTable(), MatchRemote::getQuerySize(), main(), processQuery(), and sendQuery().

QueryHeader qinfo [static]

Definition at line 44 of file SSAHAClient.cpp.


Generated on Fri Dec 21 13:12:47 2007 for ssaha by  doxygen 1.5.2