Global/GlobalDefinitions.cpp

Go to the documentation of this file.
00001 
00002 // #######################################################################
00003 
00004 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00005 // Version 3.2, released 1st March 2004
00006 // Copyright (c) Genome Research 2002
00007 
00008 // SSAHA is free software; you can redistribute it and/or modify 
00009 // it under the terms of version 2 of the GNU General Public Licence
00010 // as published by the Free Software Foundation.
00011  
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public Licence for more details.
00016  
00017 // You should have received a copy of the GNU General Public Licence
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00020 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00021 
00022 // #######################################################################
00023 
00024 // Module Name  : GlobalDefinitions
00025 // File Name    : GlobalDefinitions.cpp
00026 // Language     : C++
00027 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00028 
00029 // Description:
00030 
00031 // Includes:
00032 
00033 
00034 #include "GlobalDefinitions.h"
00035 #include "HashTable.h"
00036 #include <iostream>
00037 #include <ctime>
00038 
00039 // ### Function Definitions ###
00040 
00041 char* getTimeNow(void)
00042 {
00043   time_t now = time(NULL);
00044   tm* ptime = localtime(&now);
00045   return asctime(ptime);
00046 }
00047 
00048 PrintFromWord::PrintFromWord
00049 ( const Word word, const HashTableGeneric& hashTable, 
00050   int bitsPerSymbol, const char* tt )
00051   : pWordSeq_(0), word_( word ), length_( hashTable.getWordLength() ),
00052     bitsPerSymbol_( bitsPerSymbol ), tt_( tt ),
00053     mask_( (1 << bitsPerSymbol) - 1 ) 
00054 {}
00055 
00056 
00057 PrintFromWord::PrintFromWord
00058 ( const WordSequence& wordSeq, const HashTableGeneric& hashTable, 
00059   int bitsPerSymbol, const char* tt ) 
00060   : pWordSeq_( &wordSeq ), length_( hashTable.getWordLength() ), 
00061     bitsPerSymbol_( bitsPerSymbol ), tt_( tt ),
00062     mask_( (1 << bitsPerSymbol) - 1 ) 
00063 {}
00064 
00065 
00066 
00067 
00068 void PrintFromWord::doPrint( ostream& os )
00069   {
00070     if ( pWordSeq_ )
00071     {       
00072       for 
00073       ( 
00074         WordSequence::const_iterator
00075         i (pWordSeq_->begin() );
00076         i != static_cast<WordSequence::const_iterator>(&pWordSeq_->back());
00077         ++i 
00078       ) 
00079       {
00080         doWord( os, *i ); 
00081       } // ~for
00082       if (pWordSeq_->size() != 0)
00083       {
00084         if (pWordSeq_->getNumBasesInLast()==0) 
00085         {
00086           doWord(os, pWordSeq_->back());
00087         }
00088         else
00089         {
00090           doWord( os, 
00091                   (pWordSeq_->back() >> 
00092                    (bitsPerSymbol_*(   length_
00093                                      - pWordSeq_->getNumBasesInLast())) ), 
00094                   pWordSeq_->getNumBasesInLast() );
00095         }
00096       }
00097     } // ~if
00098     else 
00099     {
00100       doWord( os, word_ );
00101     } // ~else
00102   } // ~PrintFromWord::doPrint
00103 
00104   void PrintFromWord::doWord( ostream& os, const Word word, int length )
00105   { 
00106     for ( int i( length - 1 ) ; i >= 0 ; i-- )
00107     {
00108 
00109       //      os << &tt_ << " " <<
00110       //    ( ( word & ( mask_ << (bitsPerSymbol_*i) ) ) 
00111       //                       >> (bitsPerSymbol_*i) ) << endl;
00112 
00113       os << tt_
00114     [ ( word & ( mask_ << (bitsPerSymbol_*i) ) ) 
00115     >> (bitsPerSymbol_*i) ];
00116     } 
00117   } // ~doWord
00118 
00119 MakeIntoWord::MakeIntoWord( int bitsPerSymbol, const char* tt ):
00120 bitsPerSymbol_( bitsPerSymbol ), tt_( tt ) 
00121 {
00122   for ( unsigned int i(0) ; i < (1<<bitsPerSymbol_) ; ++i )
00123   {
00124     map_.insert( make_pair( (static_cast<char>(tolower(tt[i]))),i ) );
00125     map_.insert( make_pair( (static_cast<char>(toupper(tt[i]))),i ) );
00126   };
00127 } // ~MakeIntoWord::MakeIntoWord( int bitsPerSymbol, const char* tt )
00128 
00129 /*Word makeWord(const string& s)
00130 {
00131   Word w(0);
00132   if ( s.size() > (4*sizeof(Word)) ) return w;
00133   for ( int i(0) ; i < s.size() ; i++ )
00134   {
00135     w <<= 2;
00136     if      ((s[i] == 'C') || (s[i] == 'c'))   w |= 0x1; 
00137     else if ((s[i] == 'G') || (s[i] == 'g'))   w |= 0x2; 
00138     else if ((s[i] == 'T') || (s[i] == 't'))   w |= 0x3; 
00139     else if ((s[i] != 'A') && (s[i] != 'a')) { w = 0; break; }
00140   } // ~for
00141   return w;
00142 } // ~makeWord(const string& s)
00143 */
00144 Word MakeIntoWord::operator()( const string& s )
00145 {
00146   Word w(0);
00147   if ( s.size()*bitsPerSymbol_ > (8*sizeof(Word)) ) return w;
00148   for ( unsigned int i(0) ; i < s.size() ; i++ )
00149   {
00150     w <<= bitsPerSymbol_;
00151     if (map_.find(s[i]) == map_.end() ) { w=0; break; }
00152     else w |= map_[s[i]];
00153   } // ~for
00154   return w;
00155   
00156 } // ~Word MakeIntoWord::operator()( const string& s )
00157 
00158   // Function Name: reverseComplement
00159   // Arguments: Word
00160   // Returns:   Word
00161   // Tried several ways of computing the reverse complement (including
00162   // use of look up tables etc.). This was the fastest. 
00163   Word reverseComplement( Word word, int wordLength ) 
00164   {
00165     Word revComp( 0 ), mask( 0x3 );
00166     const int lim ( ( wordLength - 1 )*2 );
00167     for ( int i( 0 ) ; i <= lim ; i+=2 )
00168     {  
00169       revComp |= (((word & mask)^mask) >> i) << lim - i;
00170       mask <<= 2;
00171     } // ~for
00172     //    revComp |= gCursedWord * ((word&gCursedWord)!=(Word)0);
00173     return revComp;
00174   } // ~reverseComplement( Word ) const
00175 
00176   // Function Name: reverseComplement
00177   // Arguments: const WordSequence& (in), WordSequence& (out), int (in)
00178   // Returns:   void
00179   // This computes the reverse complement of seq and places it in revComp
00180 void reverseComplement
00181 ( const WordSequence& seq, WordSequence& revComp, 
00182   int wordLength )
00183 {
00184   //  assert(numBasesInLast==seq.getNumBasesInLast());
00185   for ( WordSequence::const_reverse_iterator thisWord( seq.rbegin() ); 
00186         thisWord != seq.rend();
00187         thisWord ++ )
00188   {
00189     revComp.push_back( reverseComplement( *thisWord, wordLength ) );
00190   } // ~for
00191   shiftSequence( revComp, gBaseBits, wordLength, 
00192                  wordLength - seq.getNumBasesInLast() );
00193 
00194   revComp.setNumBasesInLast(seq.getNumBasesInLast());
00195 } // ~reverseComplement( WordSequence& ...
00196 
00197 //  void reverseComplement
00198 //  ( const WordSequence& seq, 
00199 //    WordSequence& revComp,
00200 //    int wordLength )
00201 //    { 
00202 //      return reverseComplement(seq, revComp, wordLength, wordLength); 
00203 //    } // ~reverseComplement( WordSequence& ... 
00204 
00205 
00206   // Function Name: shiftSequence
00207   // Arguments: WordSequence& (in/out), int (in)
00208   // Returns:   void 
00209   // This shifts a WordSequence 'i places to the left', i.e. base i+1 of
00210   // the sequence becomes base 1. The first i bases are lost. 
00211   void shiftSequence
00212   ( WordSequence& sequence, int bitsPerSymbol, int wordLength, int i )
00213   {
00214     if ( sequence.size() < 1 ) return;
00215 
00216     register Word oldCarry(0), thisWord; 
00217     register int shiftNum( bitsPerSymbol * (wordLength - i) );
00218     register Word carryMask
00219     (    ( ( (unsigned long long)1 << (bitsPerSymbol*i) ) - 1 )  << shiftNum    );
00220 
00221     register Word andMask( ( (unsigned long long)1 << ( bitsPerSymbol * wordLength ) ) - 1 );
00222 
00223     for (int j(sequence.size()-1); j>= 0; j--)
00224     {
00225       thisWord = sequence[j];
00226       sequence[j] 
00227         = ( ( ( thisWord << (bitsPerSymbol*i) ) & andMask ) | oldCarry );
00228          oldCarry = (thisWord & carryMask) >> shiftNum ;
00229       //      oldCarry = ((thisWord & carryMask) >> shiftNum )
00230       //        | (gCursedWord * ( (thisWord & gCursedWord) != (Word)0 ));
00231     } // ~for
00232 
00233   } // ~shiftSequence( WordSequence& sequence ) 
00234 
00235 void loadFromFile
00236 ( const string& fileName, const char* buffer, const unsigned long numBytes,
00237 ostream& monitoringStream_ )
00238 {
00239 
00240 
00241     ifstreamSSAHA inFile( fileName.c_str() );
00242     
00243     if ( inFile.fail() )
00244     {
00245       monitoringStream_ << "Error: failed to open " 
00246                         << fileName << ", aborting load." << endl;
00247       throw SSAHAException((string)"Could not open file " + fileName);
00248     } // ~if
00249 
00250     inFile.read( buffer, numBytes );  
00251 
00252     if (inFile.gcount() != numBytes)
00253     {
00254       monitoringStream_ << "Error: expecting " << numBytes  
00255                         << " bytes, but only " << inFile.gcount()
00256                         << "were read.\n";
00257       throw SSAHAException("Insufficient data in file.");
00258     }
00259 
00260     // check for EOF
00261     if (inFile.peek()!=EOF)
00262     {
00263       monitoringStream_ << "Error: expecting " << numBytes  
00264                         << " bytes, but more were found in file.\n";
00265       throw SSAHAException("Too much data in file.");
00266     }
00267 
00268 
00269     monitoringStream_ << "Loaded file " << fileName << " (" 
00270                       << numBytes
00271                       << " bytes).\n";
00272 
00273     monitoringStream_ << "Closing file " << fileName << "\n";
00274     inFile.close();
00275 
00276 } // ~loadFromFile
00277 
00278 void saveToFile
00279 ( const string& fileName, const char* buffer, const unsigned long numBytes,
00280 ostream& monitoringStream_ )
00281 {
00282 
00283     ofstreamSSAHA outFile( fileName.c_str() );
00284     
00285     outFile.write( buffer, numBytes );
00286 
00287     if ( outFile.fail() )
00288     {
00289       monitoringStream_ << "Error: failed to write " 
00290                         << fileName << ", aborting save." << endl;
00291       throw SSAHAException((string)"Problem saving file " + fileName);
00292     } // ~if
00293 
00294     monitoringStream_ << "Saved file " << fileName << "." << endl;
00295 
00296     outFile.close();
00297 
00298 } // ~saveToFile
00299 
00300 
00301 #ifdef USING_POSIX_MEMORY_MAPPING
00302 
00303 const MemoryMapper::Mode MemoryMapper::createMap = 
00304 MemoryMapper::Mode
00305  ( O_CREAT|O_EXCL|O_RDWR,PROT_READ|PROT_WRITE,MAP_SHARED, true, true );
00306 
00307 const MemoryMapper::Mode MemoryMapper::readMap =
00308 MemoryMapper::Mode
00309 // ( O_CREAT|O_EXCL|O_RDWR,PROT_READ|PROT_WRITE,MAP_SHARED, false, false );
00310  ( O_RDONLY,PROT_READ,MAP_PRIVATE, false, false );
00311 
00312 
00313 
00314 void* MemoryMapper::linkToMap
00315 ( const Mode& mode, const string& name, unsigned long numBytes )
00316 {
00317 
00318   //    caddr_t pg_addr;
00319  
00320     static const int shareMode =  S_IRWXO|S_IRWXG|S_IRWXU;
00321  
00322     // Create a file 
00323  
00324     fileDesc_ = shm_open(name.c_str(), mode.shared, shareMode);
00325     if(fileDesc_ < 0)
00326     {
00327       throw SSAHAException("Failed to open shared memory");
00328     }
00329  
00330     // Set the size 
00331  
00332     if (mode.truncateFile==true)
00333     {
00334       if((ftruncate(fileDesc_, numBytes)) == -1)
00335       {
00336         throw SSAHAException("Failed to truncate shared memory file");
00337       }
00338     } 
00339 
00340     // Map the file into the address space of the process 
00341     //    pg_addr = (caddr_t) 
00342     //   mmap(0, size, mode.access, mode.flags, fileDesc_, 0);
00343  
00344       //    if(pg_addr == (caddr_t) -1)
00345       //  {
00346       //    perror("mmap failure");
00347       //     exit(1);
00348       //   }
00349 
00350     //    (*ptr_)=(char*) pg_addr; 
00351  
00352     return  mmap(0, numBytes, mode.access, mode.flags, fileDesc_, 0);
00353     //    size_=size; 
00354     //  isAllocated_=true;
00355 } // AllocatorMapped::allocate
00356 
00357 #endif
00358 
00359 
00360 
00361 
00362 // Name:
00363 // Arguments:
00364 // TYPE  NAME  IN/OUT COMMENT
00365 // Returns: TYPE COMMENT
00366 
00367 // End of file GlobalDefinitions.cpp
00368 
00369 

Generated on Fri Dec 21 13:12:15 2007 for ssaha by  doxygen 1.5.2