EnsemblServer/OrderOrient.cpp File Reference

#include "ClientServerUtils.h"
#include "SequenceReaderFasta.h"
#include "SequenceReaderString.h"
#include <string.h>
#include <iomanip>
#include <strstream>
#include <string>
#include <map>

Include dependency graph for OrderOrient.cpp:

Go to the source code of this file.

Classes

struct  ReadInfo
struct  ReadHitOnContig
struct  ContigHitOnRead
struct  ContigJoin

Typedefs

typedef map< SequenceNumber,
ReadInfo
ReadInfoType
typedef multimap< SequenceNumber,
SequenceNumber
DodgyMatchesType
typedef map< SequenceNumber,
int > 
InsertSizesType
typedef map< SequenceNumber,
std::string > 
NamesType
typedef map< SequenceNumber,
ReadHitOnContig
ContigHitsType
typedef multimap< SequenceNumber,
ContigHitOnRead
ReadHitsType
typedef vector< ContigJoinContigJoinType

Functions

string truncateReadName (const string &name)
void checkContig (ContigHitsType &contigHits, SequenceNumber lastQueryNum)
void sendQuery (FILE *fp, int sockfd, SequenceReader &seqReader, ifstream &pairFile)
int main (int numArgs, char *args[])

Variables

static QueryHeader qinfo
ReadInfoType readInfo
map< SequenceNumber, int > clipStarts
map< SequenceNumber, int > clipEnds
vector< int > contigSizes
DodgyMatchesType dodgyMatches
InsertSizesType insertSizes
NamesType names
ReadHitsType readHits
ContigJoinType contigJoins
int maxIn = 50
int maxInsertSize = 8000
int maxInsertDiff = 50


Typedef Documentation

typedef map<SequenceNumber,ReadHitOnContig> ContigHitsType

Definition at line 113 of file OrderOrient.cpp.

typedef vector<ContigJoin> ContigJoinType

Definition at line 194 of file OrderOrient.cpp.

typedef multimap<SequenceNumber,SequenceNumber> DodgyMatchesType

Definition at line 86 of file OrderOrient.cpp.

typedef map<SequenceNumber,int> InsertSizesType

Definition at line 92 of file OrderOrient.cpp.

typedef map<SequenceNumber,std::string> NamesType

Definition at line 99 of file OrderOrient.cpp.

typedef multimap<SequenceNumber, ContigHitOnRead> ReadHitsType

Definition at line 127 of file OrderOrient.cpp.

typedef map<SequenceNumber,ReadInfo> ReadInfoType

Definition at line 75 of file OrderOrient.cpp.


Function Documentation

void checkContig ( ContigHitsType contigHits,
SequenceNumber  lastQueryNum 
)

Definition at line 228 of file OrderOrient.cpp.

References contigSizes, dodgyMatches, insertSizes, names, readHits, readInfo, and truncateReadName().

Referenced by sendQuery().

00229 {
00230             cout << "\n[ All hits received for contig, checking them...\n";
00231 
00232             for( ContigHitsType::iterator i(contigHits.begin());
00233                  i!=contigHits.end();
00234                  ++i )
00235             {
00236               cout << i->first << " " << truncateReadName(names[i->first])
00237                    << " " << i->second.position
00238                    << " " << (i->second.isForward?'F':'R');
00239 
00240               ContigHitsType::iterator myPair 
00241                 = contigHits.find(readInfo[i->first].pairNum);
00242 
00243               if (myPair==contigHits.end())
00244               {
00245                 int minInsert = (i->second.isForward) 
00246                 ? contigSizes[lastQueryNum] - i->second.position
00247                 : i->second.position;
00248                 // lower bound on insert size
00249                 cout << " no pair, lower bd on insert=" << minInsert;
00250                 if (minInsert>maxInsertSize)
00251                 {
00252                     cout << " REJECTED! - lower bound > max insert size\n";
00253                     dodgyMatches.insert
00254                       (make_pair(i->first,lastQueryNum));
00255                     continue; // next entry, don't add to readHits
00256                 }
00257               }
00258               else
00259               {
00260                 cout << " pair found"; 
00261                 if (i->first<myPair->first)
00262                   // only process for the half of the pair that has
00263                   // lowest subject num 
00264                 {
00265                   int insertSize
00266                     = abs(i->second.position-myPair->second.position);
00267                   cout << ", insert size=" << insertSize;
00268                   if (insertSize>maxInsertSize)
00269                   {
00270                     cout << " REJECTED! - insert size too large\n";
00271                     dodgyMatches.insert
00272                       (make_pair(i->first,lastQueryNum));
00273                     dodgyMatches.insert
00274                       (make_pair(myPair->first,lastQueryNum));
00275                     continue; // next entry, don't add to readHits
00276 
00277                   }
00278                   else if (i->second.isForward==myPair->second.isForward)
00279                   {
00280                     cout << " REJECTED! - both hits in same direction\n";
00281                     dodgyMatches.insert
00282                       (make_pair(i->first,lastQueryNum));
00283                     dodgyMatches.insert
00284                       (make_pair(myPair->first,lastQueryNum));
00285                     continue; // next entry, don't add to readHits
00286                   }
00287                   else
00288                   {
00289                     // check insert size does not contradict a previous one
00290                     if (insertSizes.find(i->first)!=insertSizes.end())
00291                     {
00292                       if (   abs(insertSize-insertSizes[i->first])
00293                            > maxInsertDiff)
00294                       {
00295                         cout << " REJECTED! - inconsistent insert sizes ("
00296                              << insertSize << " and " 
00297                              << insertSizes[i->first]
00298                              << ")\n";
00299                         dodgyMatches.insert
00300                           (make_pair(i->first,lastQueryNum));
00301                         dodgyMatches.insert
00302                           (make_pair(myPair->first,lastQueryNum));
00303                         continue; // next entry, don't add to readHits
00304                       }
00305                       cout << " confirmed existing insert size value\n";
00306                     }
00307                     else insertSizes[i->first]=insertSize;
00308 
00309 
00310                   } // ~else
00311                 } // ~if (i->first...
00312 
00313               } // ~else
00314 
00315               // if the read passes all this the hit gets added to readHits.
00316               // It can still be discarded if the read fails a check elsewhere
00317               // and gets added to dodgyMatches
00318               readHits.insert
00319               ( make_pair
00320                 ( i->first,ContigHitOnRead
00321                   ( lastQueryNum, i->second.position, i->second.isForward )));
00322 
00323               cout << endl;
00324             } // ~for
00325             cout << "...finished checking hits for contig ]" << endl;
00326 
00327             contigHits.clear();
00328 
00329 
00330 } // ~checkContig

Here is the call graph for this function:

Here is the caller graph for this function:

int main ( int  numArgs,
char *  args[] 
)

Definition at line 767 of file OrderOrient.cpp.

References Connect(), eSortByMatchLength, eSortByPercentMatch, gBaseBits, gResidueBits, qinfo, SA, sendQuery(), Socket(), and SSAHAException::what().

00768 {
00769 
00770   try 
00771   {
00772     int sockfd;
00773     struct sockaddr_in  servaddr;
00774     int fred;
00775 
00776     // only needed if we need to make our own fasta file
00777     ostrstream buf;
00778     //    buf << ">unnamedQuery\n" << cin.rdbuf();
00779     //  cout << a.rdbuf();
00780     istream istr(buf.rdbuf());
00781 
00782 
00783 
00784     if( numArgs!=4)
00785     {
00786       cerr 
00787 << "syntax: " << args[0] 
00788 << " serverMachine serverPort pairFilePrefixName\n"
00789 <<"serverMachine: name of machine on which server is running\n"
00790 <<"serverPort   : port number on that machine\n"
00791 <<"pairFileName : names of all valid read pairs\n";
00792 
00793       throw SSAHAException("Invalid command line input to client");
00794     }
00795     memset((void*)&qinfo, 0, sizeof(qinfo));
00796     
00797     sockfd = Socket(AF_INET, SOCK_STREAM, 0);
00798 
00799     int portNumber(atoi(args[2]));
00800 
00801     cerr << "Queries will be sent via port number " << portNumber << ".\n";
00802 
00803     qinfo.minPrint=200;//atoi(args[3]);
00804 
00805     cerr << "Only matches greater than " << qinfo.minPrint 
00806          << " bases will be reported.\n";
00807 
00808 
00809     // read in ASCII file containing pairing and clip information
00810 
00811 
00812     string pairFileName((string)args[3]+(string)".pair");
00813     ifstream pairFile(pairFileName.c_str());
00814 
00815     if (pairFile.fail())
00816     {
00817       cout << "Could not open pairs file " << args[3] << ".pair\n";
00818       throw SSAHAException("Could not open pair file");
00819     }  
00820   
00821     cout << "Reading from " << args[3] << ".pair ..." << endl;
00822 
00823     string buff;
00824     
00825     // print out any comments, leave the rest til later
00826     while (pairFile.peek()=='#')
00827     {
00828       getline(pairFile,buff,'\n');
00829       cout << buff << endl;
00830     }
00831 
00832 //      int estimatedNumReads(21000000);
00833 
00834 //      pairs.reserve(estimatedNumReads);
00835 //      clipStarts.reserve(estimatedNumReads);
00836 //      clipEnds.reserve(estimatedNumReads);
00837 
00838 //      pairs.push_back();
00839 //      clipStarts.push_back();
00840 //      clipEnds.push_back();
00841 
00842 //      while (pairFile.peek()!=EOF)
00843 //      {
00844 //        pairs.push_back();
00845 //        pairFile >> pairs.back();
00846 //        clipStarts.push_back();
00847 //        pairFile >> clipStarts.back();
00848 //        clipEnds.push_back();
00849 //        pairFile >> clipEnds.back();
00850 //      }
00851    
00852 //      pairs.pop_back();
00853 //      clipStarts.pop_back();
00854 //      clipEnds.pop_back();
00855 
00856 //      assert(pairs.size()==clipStarts.size());
00857 //      assert(pairs.size()==clipEnds.size());
00858 
00859 //      cout << " ... done\n";
00860 
00861 //      cout << "According to the pairs file there are "
00862 //       << pairs.size()-1 << " reads in the server hash table" << endl;
00863 
00864     // set up rest of query parameters (same as for SSAHAClient)
00865 
00866     qinfo.maxGap=14; //atoi(args[4]);
00867     qinfo.maxInsert=1; //atoi(args[5]);
00868     qinfo.numRepeats=1; //atoi(args[6]);
00869     qinfo.clipThreshold=10000; //atoi(args[8]);
00870     qinfo.maxMatches=0; //atoi(args[9]);
00871 
00872     string queryType("DNA");
00873     SequenceEncoder* pEncoder;
00874 
00875     if (queryType=="DNA") 
00876     {
00877       qinfo.bitsPerSymbol = gBaseBits;
00878       pEncoder = new SequenceEncoderDNA(12);
00879       cerr << "Query sequence is DNA.\n";
00880     }
00881     else if (queryType=="protein") 
00882     {
00883       qinfo.bitsPerSymbol = gResidueBits;
00884       pEncoder = new SequenceEncoderProtein(5);
00885       cerr << "Query sequence is protein.\n";
00886     }
00887     else
00888     {
00889       cerr 
00890         << "Unknown value for query type (must be \"DNA\" or \"protein\")\n";
00891       throw SSAHAException("Invalid value for queryType");
00892     } // ~if
00893 
00894     string sortMode("none");
00895     if (sortMode=="none")
00896     {
00897       qinfo.maxMatches=0;
00898     }
00899     else if (sortMode=="size")
00900     {
00901       qinfo.sortMode = eSortByMatchLength;
00902     }
00903     else if (sortMode=="percent")
00904     {
00905       qinfo.sortMode = eSortByPercentMatch;
00906     }
00907     else
00908     {
00909       cerr 
00910         << "Unknown value for sort mode "
00911         << "(must be \"none\", \"size\" or \"percent\")\n";
00912       throw SSAHAException("Invalid value for sortMode");
00913     } // ~if
00914 
00915     while (cin.peek()==' ') cin.ignore(); // zap any leading spaces
00916 
00917     SequenceReader* pReader;
00918     string source;
00919 
00920     if (cin.peek()=='>') 
00921     {
00922       cerr 
00923         << "First nonspace character is a \">\", assuming input text "
00924         << "is in fasta format.\n";
00925       pReader = new SequenceReaderFile(cin, '>', '>', pEncoder, cerr );
00926     } // ~if   
00927     else
00928     {
00929       cerr << "Assuming input text is a plain string of " 
00930            << queryType << " data.\n";
00931 
00932       //      ostrstream buf;
00933       buf << ">unnamedQuery\n" << cin.rdbuf();
00934       //  cout << a.rdbuf();
00935       istream istr(buf.rdbuf());
00936       pReader = new SequenceReaderFile(istr, '>', '>', pEncoder, cerr );
00937 
00938       //      cin >> source;
00939       //if(queryType=="DNA")pReader = new SequenceReaderStringDNA(source,cerr);
00940       //     else pReader = new SequenceReaderStringProtein(source,cerr);
00941                                                                     
00942     } // ~else
00943 
00944     //  bzero(&servaddr, sizeof(servaddr));
00945     memset((void*)&servaddr, 0, sizeof(servaddr));
00946     servaddr.sin_family = AF_INET;
00947     servaddr.sin_port = htons(portNumber);
00948 
00949     cerr << "Server is assumed to be running on machine " << args[1] << ".\n";
00950 
00951     struct hostent *hp;
00952     hp =  gethostbyname(args[1]);
00953     if (hp==NULL) throw NetworkException("Invalid host name");
00954 
00955       memcpy(&(servaddr.sin_addr.s_addr), *(hp->h_addr_list), sizeof(struct
00956       in_addr));
00957 
00958 
00959     Connect(sockfd, (SA *) &servaddr, sizeof(servaddr));
00960     
00961     sendQuery(stdin, sockfd, *pReader, pairFile);
00962 
00963     exit(0);
00964 
00965   }    
00966   catch (const NetworkException& err )
00967   {
00968     cerr << "Caught NetworkException: " << err.what() << "\n";
00969     cout << "ERROR: " << err.what() << endl;
00970     exit(1);
00971   }
00972   catch (const SSAHAException& err )
00973   {
00974     cerr << "Caught SSAHA exception: " << err.what() << "\n";
00975     cout << "ERROR: " << err.what() << endl;
00976     exit(1);
00977   }
00978   catch (const std::exception& err )
00979   {
00980     cerr << "Caught exception: " << err.what() << "\n";
00981     cout << "ERROR: " << err.what() << endl;
00982     exit(1);
00983   }
00984 
00985 
00986 
00987 }

Here is the call graph for this function:

void sendQuery ( FILE *  fp,
int  sockfd,
SequenceReader seqReader,
ifstream &  pairFile 
)

Definition at line 336 of file OrderOrient.cpp.

References QueryHeader::bitsPerSymbol, checkContig(), SocketInterface::checkSocketEmpty(), clipEnds, clipStarts, contigSizes, dodgyMatches, gBaseBits, SequenceReader::getLastSequenceName(), SequenceReader::getNextSequence(), gMaxBasesPerWord, gResidueBits, hello, Handshake::maxBufferSize, QueryHeader::maxGap, QueryHeader::maxInsert, MAXLINE, names, MatchInfo::numBases, MatchHeader::numMatches, QueryHeader::numQuerySeqs, QueryHeader::numQueryWords, QueryHeader::numRepeats, MatchHeader::numSubjectNames, qinfo, MatchInfo::queryEnd, MatchInfo::queryNum, MatchInfo::queryStart, readHits, readInfo, SocketInterface::receiveString(), SocketInterface::receiveStruct(), SocketInterface::sendSequence(), SocketInterface::sendStruct(), MatchInfo::subjectEnd, MatchInfo::subjectNum, MatchInfo::subjectStart, truncateReadName(), MatchHeader::wasSuccessful, and Handshake::wordLength.

Referenced by main().

00337 {
00338 
00339   typedef pair< WordSequence, std::string> QueryInfo;
00340 
00341   char  recvline[MAXLINE];
00342 
00343   WordSequence seq;
00344 
00345   // get handshake from server
00346 
00347   SocketInterface socket( sockfd);
00348   
00349   Handshake hello;
00350 
00351   socket.receiveStruct(&hello);
00352 
00353   socket.checkSocketEmpty();
00354 
00355   cerr << "Server is using hash table with " << hello.bitsPerSymbol 
00356        << " bits per symbol, " << hello.wordLength 
00357        << " symbols per word.\n";
00358 
00359   if ((hello.bitsPerSymbol==gBaseBits)&&(qinfo.bitsPerSymbol==gResidueBits))
00360   {
00361     cerr << "Error: can't run a protein query against a DNA database.\n";
00362     throw SSAHAException("Can't run protein query against DNA database");
00363   } // ~if
00364   cerr << "Server will reject queries of more than " 
00365        << hello.maxBufferSize << " words in total.\n";
00366 
00367   if (   (hello.bitsPerSymbol==gResidueBits)
00368        &&(qinfo.bitsPerSymbol==gBaseBits) )
00369   {
00370     cerr 
00371       << "Sending DNA query to protein database, changing word length to "
00372       << gMaxBasesPerWord << ".\n";
00373     hello.wordLength=gMaxBasesPerWord;
00374   } // ~if
00375 
00376   if (qinfo.numRepeats>hello.wordLength)
00377   {
00378     cerr << "Warning: only repeats of " << hello.wordLength 
00379          << " bases or less can be masked for this data, proceeding"
00380          << "using this value.\n";
00381     qinfo.numRepeats = hello.wordLength;
00382   }
00383 
00384   cerr << "Server will attempt to screen for tandem repeats of " 
00385        << qinfo.numRepeats << " bases or less.\n";
00386 
00387   if (qinfo.maxInsert>=hello.wordLength)
00388   {
00389     cerr << "Warning: indels of up to " << hello.wordLength-1 
00390          << " bases only can be handled for this data\n";
00391     qinfo.maxInsert = hello.wordLength-1;
00392   }
00393 
00394   cerr << "Matches can contain up to " 
00395        << qinfo.maxInsert << " indels between successive hits.\n";
00396 
00397   cerr << "Matches can contain gaps of up to " 
00398        << qinfo.maxGap << " bases.\n";
00399 
00400   // read queries to local memory
00401 
00402   //  int totalNumWords(0);
00403   const QueryInfo dummy;
00404   vector<QueryInfo> query; 
00405   query.push_back(dummy);
00406   qinfo.numQueryWords = 0;
00407   
00408   contigSizes.push_back();
00409   
00410   while ( seqReader.getNextSequence
00411           ( query.back().first, hello.wordLength ) != -1 )
00412   {
00413     seqReader.getLastSequenceName( query.back().second );
00414     qinfo.numQueryWords += query.back().first.size();
00415 
00416     contigSizes.push_back
00417     ( 
00418         ( query.back().first.size()-1) * hello.wordLength 
00419         + query.back().first.getNumBasesInLast()
00420     );
00421     query.push_back(dummy);
00422   }
00423   query.pop_back();
00424 
00425   //  QueryHeader qinfo;
00426 
00427   qinfo.numQuerySeqs = query.size();
00428 
00429   socket.sendStruct(&qinfo);
00430 
00431   for( vector<QueryInfo>::iterator i(query.begin()) ; 
00432        i != query.end() ; ++i )
00433   {
00434     socket.sendSequence(i->first);
00435   }
00436 
00437 
00438   MatchHeader response;
00439   socket.receiveStruct(&response);
00440 
00441   if (response.wasSuccessful==false) 
00442     throw NetworkException("Query request failed!!");
00443 
00444   cout << "Expecting to receive " << response.numMatches 
00445        << " matches among " << response.numSubjectNames 
00446        << " subject sequences.\n";
00447 
00448   pair<SequenceNumber,std::string> p;
00449 
00450   for ( int i(0) ;  i < response.numSubjectNames ; i++ )
00451   {
00452        socket.receiveStruct(&p.first);
00453        socket.receiveString(p.second);
00454        names.insert(p);
00455        //       cout << i << ": " << p.first << " " << p.second << endl;
00456   } // ~for
00457 
00458   // Here we parse the pair file to get pair info
00459   // and parse the read names to get clipInfo
00460 
00461   int  pairBufSize(200);
00462   char pairBuf[pairBufSize];
00463   int numPairs(0);
00464 
00465   // lastWanted is the largest sequence number which appears in the match,
00466   // ie for which we are interested in pairing information
00467 
00468   SequenceNumber p1(0), p2(0), firstWanted(0), lastWanted(0);
00469 
00470   if (!names.empty()) 
00471   {
00472     NamesType::iterator i(names.begin());
00473     firstWanted=i->first;
00474     i=names.end();
00475     lastWanted=(--i)->first;
00476     cout << "Range of interest: " << firstWanted 
00477          << " to " << lastWanted << endl;
00478   }
00479 
00480   int insertMean, insertSD;
00481 
00482   cout << "Reading the rest of the pair file ..." << endl;
00483 
00484   while (pairFile.getline(pairBuf, pairBufSize, '\n'))
00485   {
00486     sscanf(pairBuf,"%d %d",&p1, &p2);
00487     // p1 always less than p2, so scan until p2>=firstWanted
00488     if (p2 >= firstWanted) break;
00489   }
00490 
00491   do
00492   {
00493     sscanf(pairBuf,"%d %d %d %d",&p1, &p2, &insertMean, &insertSD);
00494     // pairs are in order of p1, so finish once p1 gets past lastWanted
00495     if (p1 > lastWanted) break;
00496     if (names.find(p1)!=names.end()) 
00497     { // pairs[p1]=p2; 
00498       readInfo.insert(make_pair(p1,ReadInfo(p2,insertMean,insertSD)));
00499       numPairs++; 
00500     }
00501     if (names.find(p2)!=names.end()) 
00502     { // pairs[p2]=p1; 
00503       readInfo.insert(make_pair(p2,ReadInfo(p1,insertMean,insertSD)));
00504       numPairs++; 
00505     }
00506   }
00507   while (pairFile.getline(pairBuf, pairBufSize, '\n'));
00508 
00509   pairFile.close();
00510 
00511   cout << "Found pairing information for " << numPairs << " reads\n";
00512 
00513   string::size_type tabPos;
00514 
00515   // Now grab the clip info from the names
00516 
00517   for ( NamesType::iterator i(names.begin());
00518         i!=names.end(); ++i )
00519   {
00520     tabPos=i->second.find('\t');
00521     ++tabPos=i->second.find('\t',tabPos);
00522     sscanf
00523       (&(i->second.c_str()[tabPos]),
00524        "\tcl|%d\tcr|%d",
00525        &clipStarts[i->first],
00526        &clipEnds[i->first] );
00527 
00528   } // ~FOR
00529 
00530 
00531   // Now read in the match info from the socket.
00532 
00533   MatchInfo thisMatch;
00534 
00535   //  cout << setprecision(2) << setiosflags(ios::fixed);
00536 
00537   //  cout << "OK: " << response.numMatches 
00538   //     << " " << response.numSubjectNames << endl;
00539 
00540 
00541   int numAccepted(0);
00542   SequenceNumber lastQueryNum(0);
00543 
00544   ContigHitsType contigHits;
00545 
00546   for ( int i(0); i < response.numMatches ; i++ )
00547   {
00548 
00549     socket.receiveStruct(&thisMatch);
00550     cout 
00551       << query[thisMatch.queryNum-1].second << "\t"
00552       //query[thisMatch.queryNum-1].second << "\t"
00553       << thisMatch.queryStart << "\t"
00554       << thisMatch.queryEnd << "\t"
00555       << thisMatch.subjectNum << "\t"
00556       << truncateReadName(names[thisMatch.subjectNum]) << "\t"
00557       << thisMatch.subjectStart << "\t"
00558       << thisMatch.subjectEnd << "\t"
00559       << ((thisMatch.isForward)?"F":"R") << "\t"
00560       << thisMatch.numBases << " "; 
00561     //     << "\t"
00562     //     << 100.0*thisMatch.  numBases/
00563     //       (thisMatch.queryEnd-thisMatch.queryStart+1);
00564 
00565     // Only use hits on reads for which there is a read pair.
00566     // Hits must extend to within maxIn pairs of clip points.
00567 
00568     //    if (    (pairs[thisMatch.subjectNum]!=0) // half of a pair
00569     if (    (readInfo.find(thisMatch.subjectNum)!=readInfo.end()) 
00570          && ( thisMatch.subjectStart 
00571               <   clipStarts[ thisMatch.subjectNum ] + maxIn )  
00572          && ( thisMatch.subjectEnd   
00573               >   clipEnds[   thisMatch.subjectNum ] - maxIn )  )
00574     {
00575 
00576       if ((thisMatch.queryNum!=lastQueryNum)&&(lastQueryNum!=0)) 
00577       {
00578         // Then we have moved onto the hits for the next contig.
00579         // Check the hits for the previous one
00580         checkContig(contigHits, lastQueryNum);
00581       } // ~if (thisMatch ..
00582       lastQueryNum=thisMatch.queryNum;
00583 
00584       // place details of this hit into contigHits
00585       if (!contigHits.insert
00586           ( make_pair
00587             ( thisMatch.subjectNum,ReadHitOnContig
00588               ( thisMatch.isForward 
00589                 ?   thisMatch.queryStart
00590                 - thisMatch.subjectStart
00591                 + 1
00592                 :   contigSizes[thisMatch.queryNum]
00593                 - thisMatch.queryStart
00594                 + thisMatch.subjectStart, 
00595                 thisMatch.isForward
00596               )
00597               )
00598             ).second) 
00599       {
00600         // If this insert fails then the same read has a full length hit
00601         // twice in the same contig. It is therefore dodgy.
00602         cout << "REJECTED! - same read hits twice in same contig! "
00603              << query[thisMatch.queryNum-1].second << "0"
00604              << thisMatch.subjectNum << " "
00605              << truncateReadName(names[thisMatch.subjectNum]) << "\t";
00606         dodgyMatches.insert
00607           (make_pair(thisMatch.subjectNum,thisMatch.queryNum));
00608 
00609 
00610       } // ~if (!contigHits ....
00611       else
00612       {
00613         // Accepted means `accepted for further checking.' Hit can
00614         // still be discarded at the checkContigs stage
00615 
00616         cout << "accepted for checking, pair=" 
00617              << readInfo[thisMatch.subjectNum].pairNum;
00618         numAccepted++;
00619       }
00620 
00621     } // ~if
00622 
00623     cout << endl;
00624     
00625 
00626   } // ~for
00627 
00628   // Check the hits for the last contig
00629   if (contigHits.size()!=0) checkContig(contigHits, lastQueryNum);
00630 
00631   cout << numAccepted << " reads passed the initial checking stage" << endl;
00632 
00633   socket.checkSocketEmpty();
00634 
00635   if (dodgyMatches.size()!=0)
00636   {
00637     cout << "Found " << dodgyMatches.size() << " potential inconsistencies:\n";
00638     for (DodgyMatchesType::iterator i(dodgyMatches.begin());
00639          i!=dodgyMatches.end();++i)
00640     {
00641       cout << "read: " << i->first << " " << truncateReadName(names[i->first])
00642            << " contig:" << i->second << " " << query[i->second-1].second 
00643            << endl;
00644       // don't attempt to deduce any O&O from these
00645       readHits.erase(i->first);
00646     }
00647     cout 
00648     << "These reads will not be used for the order/orientation calculation\n";
00649   }
00650 
00651   cout << "Found " << readHits.size() << " potential join points:\n";
00652 
00653   ReadHitsType::iterator ub,i(readHits.begin());
00654   //  SequenceNumber pairNum;
00655   ReadInfo* pReadInfo;
00656 
00657   
00658   //  for (ReadHitsType::iterator i(readHits.begin());i!=readHits.end();++i)
00659   while(i!=readHits.end())
00660   {
00661     cout << " --- \n";
00662     ub = readHits.upper_bound(i->first);
00663     pReadInfo = &(readInfo[i->first]);
00664     //    pairNum = pairs[i->first];
00665     //    InsertSizesType::iterator myInsert;
00666 
00667     if ((pReadInfo->pairNum!=0)&&(i->first<pReadInfo->pairNum))
00668     {
00669       cout << "contigs hit by forward read: " <<  i->first << " " 
00670            << truncateReadName(names[i->first]) << endl; 
00671 
00672       //      myInsert = insertSizes.find(i->first);
00673       //      if (myInsert!=insertSizes.end())
00674       //      {
00675       //        cout << "Got an insert size of " << myInsert->second 
00676       //             << " for this read pair" << endl;
00677       //      }
00678       //      else cout << "No insert size for this read pair" << endl;
00679 
00680       for (ReadHitsType::iterator j(i);j!=ub;j++)
00681       {      
00682         cout << j->second.contigNum << " " 
00683              << query[j->second.contigNum-1].second << " "
00684              << j->second.position << " " 
00685              << (j->second.isForward?'F':'R') << endl;
00686         for (ReadHitsType::iterator 
00687                k(readHits.lower_bound(pReadInfo->pairNum));
00688              k!=readHits.upper_bound(pReadInfo->pairNum);k++)
00689         {      
00690           if (j->second.contigNum!=k->second.contigNum)
00691           {
00692             contigJoins.push_back(ContigJoin(&j->second,&k->second,pReadInfo));
00693             //      if (myInsert==insertSizes.end())
00694             //        contigJoins.push_back(ContigJoin(j->second,k->second));
00695             //      else
00696             //     contigJoins.push_back(ContigJoin(j->second,k->second, 
00697             //    myInsert->second));
00698 
00699           }
00700         } // ~for k
00701 
00702       } // ~for j
00703       cout << "contigs hit by reverse read: " 
00704            << pReadInfo->pairNum << " "
00705            << truncateReadName(names[pReadInfo->pairNum]) << endl; 
00706       for (ReadHitsType::iterator 
00707              j(readHits.lower_bound(pReadInfo->pairNum));
00708            j!=readHits.upper_bound(pReadInfo->pairNum);j++)
00709       {      
00710         cout << j->second.contigNum << " " 
00711              << query[j->second.contigNum-1].second << " "
00712              << j->second.position << " " 
00713              << (j->second.isForward?'F':'R') << endl;
00714 
00715       } // ~for j
00716     } // ~if
00717 
00718     i=readHits.upper_bound(i->first);
00719 
00720 
00721 
00722   } // ~while
00723 
00724 
00725     sort(contigJoins.begin(),contigJoins.end());
00726 
00727     cout << "Found " << contigJoins.size() 
00728          << " potential joins between contigs\n";
00729 
00730     for (ContigJoinType::iterator i(contigJoins.begin());
00731          i!=contigJoins.end();++i)
00732     { 
00733       cout << "Contig " << i->a->contigNum << " " 
00734            << query[i->a->contigNum-1].second 
00735            << i->a->position << "/"
00736            << contigSizes[i->a->contigNum] << " "
00737            << (i->a->isForward?'F':'R')
00738            << " joins contig "
00739            << i->b->contigNum << " " << query[i->b->contigNum-1].second 
00740            << i->b->position << "/" 
00741            << contigSizes[i->b->contigNum] << " "
00742            << (i->b->isForward?'F':'R') << " " 
00743            << i->gapSize << " "
00744            << i->gapSD << endl;
00745 
00746       //      if (i->distance!=ContigJoin::notDetermined)
00747       //      {
00748       //        cout << " distance=" << i->distance;
00749       //`      } // ~if
00750       cout << endl;
00751 
00752     } // ~for
00753 
00754 
00755 
00756 
00757 
00758 } // ~sendQuery

Here is the call graph for this function:

Here is the caller graph for this function:

string truncateReadName ( const string &  name  ) 

Definition at line 215 of file OrderOrient.cpp.

Referenced by checkContig(), and sendQuery().

00216 {
00217   string::size_type nameEnd = name.find_first_of('\t');
00218   return (( nameEnd == string::npos ) ? name : name.substr(0,nameEnd));
00219 
00220 }

Here is the caller graph for this function:


Variable Documentation

map<SequenceNumber, int> clipEnds

Definition at line 80 of file OrderOrient.cpp.

Referenced by sendQuery().

map<SequenceNumber, int> clipStarts

Definition at line 79 of file OrderOrient.cpp.

Referenced by sendQuery().

ContigJoinType contigJoins

Definition at line 200 of file OrderOrient.cpp.

vector<int> contigSizes

Definition at line 81 of file OrderOrient.cpp.

Referenced by checkContig(), ContigJoin::ContigJoin(), and sendQuery().

DodgyMatchesType dodgyMatches

Definition at line 87 of file OrderOrient.cpp.

Referenced by checkContig(), and sendQuery().

InsertSizesType insertSizes

Definition at line 93 of file OrderOrient.cpp.

Referenced by checkContig().

int maxIn = 50

Definition at line 204 of file OrderOrient.cpp.

int maxInsertDiff = 50

Definition at line 212 of file OrderOrient.cpp.

int maxInsertSize = 8000

Definition at line 208 of file OrderOrient.cpp.

NamesType names

Definition at line 100 of file OrderOrient.cpp.

Referenced by checkContig(), main(), and sendQuery().

QueryHeader qinfo [static]

Definition at line 59 of file OrderOrient.cpp.

Referenced by main(), processQuery(), and sendQuery().

ReadHitsType readHits

Definition at line 128 of file OrderOrient.cpp.

Referenced by checkContig(), and sendQuery().

ReadInfoType readInfo

Definition at line 76 of file OrderOrient.cpp.

Referenced by checkContig(), and sendQuery().


Generated on Fri Dec 21 13:12:44 2007 for ssaha by  doxygen 1.5.2