EnsemblServer/SSAHAWrapper.cpp

Go to the documentation of this file.
00001 
00002 // #######################################################################
00003 
00004 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00005 // Version 3.2, released 1st March 2004
00006 // Copyright (c) Genome Research 2002
00007 
00008 // SSAHA is free software; you can redistribute it and/or modify 
00009 // it under the terms of version 2 of the GNU General Public Licence
00010 // as published by the Free Software Foundation.
00011  
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public Licence for more details.
00016  
00017 // You should have received a copy of the GNU General Public Licence
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00020 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00021 
00022 // #######################################################################
00023 
00024 // Module Name  : SSAHAWrapper
00025 // File Name    : SSAHAWrapper.cpp
00026 // Language     : C++
00027 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00028 
00029 // Description:
00030 // Implements C interface for SSAHA
00031 
00032 
00033 // Includes:
00034 #include "SSAHAWrapper.h"
00035 #include "SequenceReaderFasta.h"
00036 #include "HashTableTranslated.h"
00037 #include "SequenceEncoder.h"
00038 #include <vector>
00039 #include <string>
00040 
00041 // ### Function Definitions ###
00042 
00043 // Name:
00044 // Arguments:
00045 // TYPE  NAME  IN/OUT COMMENT
00046 // Returns: TYPE COMMENT
00047 
00048 struct CustomHitStore : public vector<SSAHAHit>, public HitList
00049 {
00050   virtual void addHit
00051    ( const PositionInDatabase& hitPos, 
00052       const SequenceOffset& queryPos )
00053   {
00054     push_back();
00055     back().seqNum = hitPos.sequence;
00056     back().seqPos = hitPos.offset;
00057   } // ~addHit
00058 
00059 }; // ~class HitStore
00060 
00061 
00062 class HashTablePackedCustom;
00063 static HashTableGeneric* pHashTable(NULL);
00064 static const int wordLength=5;
00065 static SequenceEncoderProtein encoder(wordLength, cerr);
00066 static CustomHitStore hits;
00067 //vector<SSAHAHit> hits;
00068 static PackedHitStore hitBuffer;
00069 static vector<int> hitStarts; 
00070 static bool isPacked=false;
00071 
00072 class HashTablePackedCustom : public HashTablePackedProtein
00073 {
00074 
00075  public:
00076   HashTablePackedCustom( ostream& monitoringStream=cerr, string name = ""):
00077     HashTablePackedProtein( monitoringStream, name ) 
00078     {}
00079 
00080   void convertHits( void );
00081 
00082 };
00083 
00084 
00085 // Name: HashTablePackedCustom::convertHits
00086 // Based on HashTablePacked::convertHits
00087 // Takes contents of hitBuffer, unpacks them, puts in hit store
00088 void HashTablePackedCustom::convertHits( void )
00089  //( PackedHitStore& packedHits, HitList& hitListFwd )
00090 {
00091 
00092   //  sort(hitBuffer.begin(),hitBuffer.end());
00093 
00094   vector<HitPacked>::iterator i(hitBuffer.begin());
00095   vector<SeqStartPos>::iterator j(seqStarts_.begin());
00096   //  hitListFwd.reserve(hitListFwd.size()+hitBuffer.size());
00097   hits.reserve( hits.size() + hitBuffer.size());
00098 
00099   // NB seqStarts_ must not be empty else call to back() segfaults
00100 
00101   while ( (j!=&seqStarts_.back()) && (i!=hitBuffer.end()) )
00102   {
00103     vector<SeqStartPos>::iterator ub(upper_bound(j,seqStarts_.end(),i->first));
00104     j=ub; j--;
00105 
00106     while ((i!=hitBuffer.end())&&(i->first<*ub))
00107     {
00108       //      cout << i->first << " ... " 
00109       //   << ub-seqStarts_.begin() << " " 
00110       //   << stepLength_*(i->first - *j) 
00111       //   << endl;
00112       //      hitListFwd.addHit( ub-seqStarts_.begin(), 
00113       //                    stepLength_*(i->first - *j),
00114       //                    i->second );
00115       hits.push_back();
00116       hits.back().seqNum= ub-seqStarts_.begin();
00117       hits.back().seqPos= stepLength_*(i->first - *j);
00118       //      hitListFwd.addHit( 
00119       //                    
00120       //                    i->second );
00121       
00122       i++;
00123       
00124     } // ~while
00125     j=ub;  
00126   } // ~while
00127 
00128 } // ~HashTablePackedCustom::convertHits( void )
00129 
00130 
00131 
00132 
00133 /* return name of the seqNum-th sequence in the table */
00134 const char* getSubjectName( unsigned int seqNum )
00135 {
00136   return pHashTable->getSequenceName( seqNum );
00137 } // ~const char* getSubjectName( unsigned int seqNum )
00138 
00139 
00140 
00141 int loadTable( const char* const tableName )
00142 {
00143   clearSearch();
00144   clearTable();
00145 
00146   if (isPacked)
00147   {
00148     pHashTable = new HashTablePackedCustom( cerr, (string)tableName);
00149   }
00150   else
00151   {
00152     pHashTable = new HashTableFred( cerr, (string)tableName);
00153   }
00154 
00155   pHashTable->loadHashTable(); 
00156   // will throw exception if fails, eg if trying to load wrong table
00157   assert(pHashTable->getWordLength()==wordLength);
00158   pHashTable->setMaxNumHits( 100000000 );
00159 }
00160 
00161 /* perform a SSAHA search */
00162 int doSearch
00163 ( 
00164  const char* const pWords,
00165  const int numWords,
00166  SSAHAHit** const pHits,
00167  int ** const pStarts
00168 )
00169 {
00170   clearSearch();
00171   //  SequenceEncoderProtein encoder(wordLength, cerr);
00172   WordSequence w;
00173 
00174   encoder.linkSeq(w);
00175   encoder.encode(pWords, numWords*wordLength);
00176   encoder.unlinkSeq();
00177 
00178   if(w.empty())
00179   {
00180     hitStarts.push_back(-1);
00181     *pHits=NULL;
00182     *pStarts=&hitStarts[0];
00183     return -1;
00184   }
00185 
00186 
00187   if (isPacked)
00188   {
00189     HashTablePackedCustom* pTable
00190       = dynamic_cast<HashTablePackedCustom*>(pHashTable);
00191     assert(pTable!=NULL);
00192     pTable->setQueryProtein();
00193     for (WordSequence::iterator i(w.begin());i!=&w.back();i++)
00194     {
00195       //    cout << "got: " << printResidue(*i, wordLength) << endl;
00196       hitBuffer.clear();
00197       pTable->matchWord( *i, hitBuffer );
00198       //    pHashTable->convertHits( hitBuffer, hits );
00199       pTable->convertHits();
00200       hitStarts.push_back(hits.size());
00201     }
00202   } // ~for
00203   else
00204   {
00205     HashTableFred* pTable
00206       = dynamic_cast<HashTableFred*>(pHashTable);
00207     assert(pTable!=NULL);
00208     //    int n(hits.size());
00209     for (WordSequence::iterator i(w.begin());i!=&w.back();i++)
00210     {
00211       pTable->matchWord( *i, hits );
00212     }
00213     hitStarts.push_back(hits.size());
00214   }
00215 
00216 
00217   //  hits.push_back();
00218   //  hits.back().seqNum=1103; hits.back().seqPos=-1103;
00219   //  hitStarts.push_back(1);
00220   hitStarts.push_back(-1);
00221   *pHits=&hits[0];
00222   *pStarts=&hitStarts[0];
00223   return 0;
00224 }
00225 
00226 /* Clear search results from RAM (automatically done by loadTable) */
00227 int clearSearch( void )
00228 {
00229   hits.clear();
00230   hitStarts.clear();
00231 }
00232 
00233 /* Clear SSAHA tables from RAM (automatically done by loadTable) */
00234 int clearTable( void )
00235 {
00236   delete pHashTable;
00237   pHashTable = NULL;
00238 }
00239 
00240 
00241 
00242 // End of file SSAHAWrapper.cpp
00243 

Generated on Fri Dec 21 13:12:15 2007 for ssaha by  doxygen 1.5.2