00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "ClientServerUtils.h"
00032 #include "SSAHAClient.h"
00033 #include "SequenceReaderFasta.h"
00034 #include "SequenceReaderString.h"
00035 #include "MatchStore.h"
00036 #include "MatchAligner.h"
00037 #include <string.h>
00038 #include <iomanip>
00039 #include <strstream>
00040
00041
00042
00043
00044 static QueryHeader qinfo;
00045 static Handshake hello;
00046
00047 MatchRemote::MatchRemote( vector<QueryInfo>& query ) :
00048 query_(query) {}
00049
00050 MatchRemote::~MatchRemote() {}
00051
00052
00053 string MatchRemote::getQueryName( void ) const {
00054 return query_[ data_.queryNum-1 ].second;
00055 }
00056
00057 int MatchRemote::getQuerySize( void ) const {
00058 WordSequence& thisSeq(query_[ data_.queryNum-1 ].first);
00059 return (thisSeq.size()==0)
00060 ? 0
00061 : (thisSeq.size()-1)*hello.wordLength + thisSeq.getNumBasesInLast();
00062 }
00063
00064
00065 void SourceReaderDummy::extractSource ( char** pSource,
00066 SequenceNumber seqNum,
00067 SequenceOffset seqStart,
00068 SequenceOffset seqEnd ) {
00069 *pSource = (char*) source_.c_str();
00070 }
00071
00072 void sendQuery(FILE *fp, int sockfd, SequenceReader& seqReader) {
00073
00074 char recvline[MAXLINE];
00075 WordSequence seq;
00076
00077
00078
00079 SocketInterface socket( sockfd, 12 );
00080
00081 socket.receiveStruct(&hello);
00082
00083 socket.checkSocketEmpty();
00084
00085 cerr << "Server is using hash table with "
00086 << ((hello.tableType==e2bitDNA)?gBaseBits:gResidueBits)
00087 << " bits per symbol, " << hello.wordLength
00088 << " symbols per word.\n";
00089
00090 if ((hello.tableType==e2bitDNA)&&(qinfo.bitsPerSymbol==gResidueBits)) {
00091 cerr << "Error: can't run a protein query against a DNA database.\n";
00092 throw SSAHAException("Can't run protein query against DNA database");
00093 }
00094
00095 if (hello.ssahaversion != SERV_VERSION) {
00096 cerr << "Error: SSAHA client/Server version mismatch.\n";
00097 throw SSAHAException("Can't continue: client/server version mismatch");
00098 }
00099
00100 cerr << "Server will reject queries of more than " << hello.maxBufferSize << " words in total.\n";
00101
00102 if ((hello.tableType!=e2bitDNA) && (qinfo.bitsPerSymbol==gBaseBits)) {
00103 cerr << "Sending DNA query to protein/translated database, changing word length to " << gMaxBasesPerWord << ".\n";
00104 hello.wordLength=gMaxBasesPerWord;
00105 }
00106
00107 if (qinfo.numRepeats>hello.wordLength) {
00108 cerr << "Warning: only repeats of " << hello.wordLength << " bases or less can be masked for this data, proceeding" << "using this value.\n";
00109 qinfo.numRepeats = hello.wordLength;
00110 }
00111
00112 cerr << "Server will attempt to screen for tandem repeats of " << qinfo.numRepeats << " bases or less.\n";
00113
00114 if (qinfo.maxInsert>=hello.wordLength) {
00115 cerr << "Warning: indels of up to " << hello.wordLength-1 << " bases only can be handled for this data\n";
00116 qinfo.maxInsert = hello.wordLength-1;
00117 }
00118
00119 if ((qinfo.sortMode == eSortAndReturnSequence) && (qinfo.bandExtension > 15)) {
00120 cerr << "Warning: restricting band size for banded dynamic programming to 15\n";
00121 qinfo.bandExtension=15;
00122 }
00123
00124 cerr << "Matches can contain up to " << qinfo.maxInsert << " indels between successive hits.\n";
00125
00126 cerr << "Matches can contain gaps of up to " << qinfo.maxGap << " bases.\n";
00127
00128
00129 const QueryInfo dummy;
00130 vector<QueryInfo> query;
00131 query.push_back(dummy);
00132 qinfo.numQueryWords = 0;
00133
00134 SequenceReaderModeReplace mode((qinfo.bitsPerSymbol==gBaseBits)?'A':'X');
00135 seqReader.changeMode( &mode );
00136
00137 while ( seqReader.getNextSequence( query.back().first, hello.wordLength ) != -1 ) {
00138 seqReader.getLastSequenceName( query.back().second );
00139 qinfo.numQueryWords += query.back().first.size();
00140 query.push_back(dummy);
00141 }
00142 query.pop_back();
00143 qinfo.numQuerySeqs = query.size();
00144
00145 socket.sendStruct(&qinfo);
00146
00147 for( vector<QueryInfo>::iterator i(query.begin()) ; i != query.end() ; ++i ) {
00148 socket.sendSequence(i->first);
00149
00150
00151
00152 }
00153
00154
00155 MatchHeader response;
00156
00157 socket.setTimeOut(600);
00158 socket.receiveStruct(&response);
00159
00160 if (response.wasSuccessful==false) throw NetworkException("Query request failed!!");
00161
00162 cerr << "Expecting to receive " << response.numMatches << " matches among " << response.numSubjectNames << " subject sequences.\n";
00163
00164
00165 MatchStoreRemote results(query);
00166 SourceReaderDummy subjectReader;
00167
00168 int numColumns(80);
00169
00170 MatchAligner* pAligner;
00171
00172 if (qinfo.bitsPerSymbol == gBaseBits) {
00173 if ( hello.tableType == e2bitDNA ) {
00174 pAligner = new MatchAlignerDNA
00175 ( numColumns, qinfo.bandExtension );
00176 } else if ( hello.tableType == e5bitProtein ) {
00177 pAligner = new MatchAlignerTranslatedProtein ( false, numColumns, qinfo.bandExtension );
00178 } else {
00179
00180 pAligner = new MatchAlignerTranslatedDNA
00181 ( numColumns, qinfo.bandExtension );
00182 }
00183 } else {
00184 if (hello.tableType == e5bitProtein) {
00185 pAligner = new MatchAlignerProtein ( numColumns, qinfo.bandExtension );
00186 } else {
00187
00188 pAligner = new MatchAlignerTranslatedProtein ( true, numColumns, qinfo.bandExtension );
00189 }
00190 }
00191
00192 assert( pAligner!=false);
00193
00194
00195 MatchTaskAlign aligner ( seqReader, subjectReader, pAligner, false, qinfo.sortMode==eSortAndReturnSequence );
00196
00197 pair<SequenceNumber,std::string> p;
00198 for ( int i(0) ; i < response.numSubjectNames ; i++ ) {
00199 socket.receiveStruct(&p.first);
00200 socket.receiveString(p.second);
00201 results.match_.names_.insert(p);
00202 }
00203
00204 cout << setprecision(2) << setiosflags(ios::fixed);
00205
00206 cout << "OK: " << response.numMatches << " " << response.numSubjectNames << endl;
00207
00208 for ( int i(0); i < response.numMatches ; i++ ) {
00209 socket.receiveStruct(&results.match_.data_);
00210 if (qinfo.sortMode == eSortAndReturnSequence ) {
00211 socket.receiveString( subjectReader.source_ );
00212 }
00213 aligner( results );
00214 }
00215 socket.checkSocketEmpty();
00216 }
00217
00218
00219
00220 int main(int numArgs, char* args[] ) {
00221
00222 try {
00223 int sockfd;
00224 struct sockaddr_in servaddr;
00225 int fred;
00226
00227
00228 ostrstream buf;
00229 istream istr(buf.rdbuf());
00230
00231 if(( numArgs<11)||( numArgs>13))
00232 {
00233 cerr
00234 <<"syntax: " << args[0]
00235 <<" serverMachine serverPort minMatchSize maxGap maxInsert numRepeats\n"
00236 <<"queryType clipThreshold maxMatches sortMode "
00237 << " [substituteThreshold] [bandExtension]\n"
00238 <<"serverMachine: name of machine on which server is running\n"
00239 <<"serverPort : port number on that machine\n"
00240 <<"minMatchSize : matches must contain at least this many matching symbols\n"
00241 <<"maxGap : matches may contain gaps of up to this many symbols\n"
00242 <<"maxInsert : max number of indels between successive hits in a match\n"
00243 <<"numRepeats : screen out repeating motifs of up to this many symbols\n"
00244 <<"queryType : DNA or protein\n"
00245 <<"clipThreshold: ignore words occurring this many more times than expected\n"
00246 <<"maxNumMatches: at most this many matches for each query sequence\n"
00247 <<" (set to 0 to obtain all matches unsorted)\n"
00248 <<"sortMode : none, size, percent or align\n"
00249 <<" (ignored if maxNumMatches set to 0)\n"
00250 <<"substituteThreshold : allow 1 base/amino mismatch in words that occur\n"
00251 <<" up to this many more times than expected (default 0)\n"
00252 <<"bandExtension: band size for banded dynamic programming extends this\n"
00253 <<" far from the diagonal (ignored unless sortMode set to\n"
00254 <<" align, defaults to 0)\n";
00255
00256 throw SSAHAException("Invalid command line input to client");
00257 }
00258 memset((void*)&qinfo, 0, sizeof(qinfo));
00259
00260 sockfd = Socket(AF_INET, SOCK_STREAM, 0);
00261
00262 int portNumber(atoi(args[2]));
00263
00264 cerr << "Queries will be sent via port number " << portNumber << ".\n";
00265
00266 qinfo.minPrint=atoi(args[3]);
00267
00268 cerr << "Only matches greater than " << qinfo.minPrint << " bases will be reported.\n";
00269
00270 qinfo.maxGap=atoi(args[4]);
00271 qinfo.maxInsert=atoi(args[5]);
00272 qinfo.numRepeats=atoi(args[6]);
00273 qinfo.clipThreshold=atoi(args[8]);
00274 qinfo.maxMatches=atoi(args[9]);
00275
00276 string queryType(args[7]);
00277 SequenceEncoder* pEncoder;
00278
00279 if (queryType=="DNA") {
00280 qinfo.bitsPerSymbol = gBaseBits;
00281 pEncoder = new SequenceEncoderDNA(12);
00282 cerr << "Query sequence is DNA.\n";
00283 } else if (queryType=="protein") {
00284 qinfo.bitsPerSymbol = gResidueBits;
00285 pEncoder = new SequenceEncoderProtein(5);
00286 cerr << "Query sequence is protein.\n";
00287 } else {
00288 cerr << "Unknown value for query type (must be \"DNA\" or \"protein\")\n";
00289 throw SSAHAException("Invalid value for queryType");
00290 }
00291
00292 string sortMode(args[10]);
00293 if (sortMode=="none") {
00294 qinfo.maxMatches=0;
00295 }
00296 else if (sortMode=="size") {
00297 qinfo.sortMode = eSortByMatchLength;
00298 } else if (sortMode=="percent") {
00299 qinfo.sortMode = eSortByPercentMatch;
00300 } else if (sortMode=="align") {
00301 qinfo.sortMode = eSortAndReturnSequence;
00302 } else {
00303 cerr << "Unknown value for sort mode " << "(must be \"none\", \"size\", \"percent\" or \"align\")\n";
00304 throw SSAHAException("Invalid value for sortMode");
00305 }
00306
00307 qinfo.substituteThreshold = ((numArgs>11)?atoi(args[11]):0);
00308 qinfo.bandExtension = ((numArgs>12)?atoi(args[12]):0);
00309
00310 while (cin.peek()==' ') cin.ignore();
00311
00312 SequenceReader* pReader;
00313 string source;
00314
00315 if (cin.peek()=='>') {
00316 cerr << "First nonspace character is a \">\", assuming input text " << "is in fasta format.\n";
00317 buf << cin.rdbuf();
00318 pReader = new SequenceReaderFile(istr, '>', '>', pEncoder, cerr );
00319 } else {
00320 cerr << "Assuming input text is a plain string of " << queryType << " data.\n";
00321 buf << ">unnamedQuery\n" << cin.rdbuf();
00322 pReader = new SequenceReaderFile(istr, '>', '>', pEncoder, cerr );
00323 }
00324
00325 memset((void*)&servaddr, 0, sizeof(servaddr));
00326 servaddr.sin_family = AF_INET;
00327 servaddr.sin_port = htons(portNumber);
00328
00329 cerr << "Server is assumed to be running on machine " << args[1] << ".\n";
00330
00331 struct hostent *hp;
00332 hp = gethostbyname(args[1]);
00333 if (hp==NULL) throw NetworkException("Invalid host name");
00334
00335 memcpy(&(servaddr.sin_addr.s_addr), *(hp->h_addr_list), sizeof(struct
00336 in_addr));
00337
00338 Connect(sockfd, (SA *) &servaddr, sizeof(servaddr));
00339
00340 sendQuery(stdin, sockfd, *pReader);
00341
00342 exit(0);
00343
00344 }
00345 catch (const NetworkException& err ) {
00346 cerr << "Caught NetworkException: " << err.what() << "\n";
00347 if (((string)err.what()).substr(0,16)=="getAtLeast error") {
00348 cout << "ERROR: lost connection, please resubmit your query" << endl;
00349 } else {
00350 cout << "ERROR: " << err.what() << endl;
00351 }
00352 exit(1);
00353 }
00354 catch (const SSAHAException& err ) {
00355 cerr << "Caught SSAHA exception: " << err.what() << "\n";
00356 cout << "ERROR: " << err.what() << endl;
00357 exit(1);
00358 }
00359 catch (const std::exception& err ) {
00360 cerr << "Caught exception: " << err.what() << "\n";
00361 cout << "ERROR: " << err.what() << endl;
00362 exit(1);
00363 }
00364
00365 }
00366
00367
00368