Global/GlobalDefinitions.h File Reference

#include <unistd.h>
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
#include <map>
#include <utility>
#include <sys/types.h>
#include <stdio.h>
#include <sys/file.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <errno.h>

Include dependency graph for GlobalDefinitions.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Namespaces

namespace  std

Classes

class  WordSequence
struct  PositionInDatabase
struct  HitInfo
class  HitListVector
class  MachineInfo
class  SSAHAException
class  ofstreamSSAHA
class  ifstreamSSAHA
class  NullBuffer
class  PrintFromWord
class  printBase
class  printResidue
class  MakeIntoWord
class  Allocator< T >
class  AllocatorLocal< T >

Defines

#define DEBUG_L3(X)
#define DEBUG_L2(X)
#define DEBUG_L1(X)

Typedefs

typedef unsigned int Word
typedef unsigned char uchar
typedef unsigned short ushort
typedef WordSequence::iterator WordSequenceIterator
typedef WordSequence::reverse_iterator WordSequenceReverseIterator
typedef unsigned int SequenceNumber
typedef int SequenceOffset
typedef HitListVector HitList
typedef printBase printWord

Enumerations

enum  {
  gBaseBits = 2, gResidueBits = 5, gBasesPerCodon = 3, gCodonBits = gBaseBits*gBasesPerCodon,
  gBitsPerWord = 8*sizeof(Word), gMaxBasesPerWord = gBitsPerWord / gBaseBits, gNumReadingFrames = 3, gNumDirections = 2,
  gNumCodonEncodings = 22
}
enum  SourceDataType { gDNAData = 0, gProteinData = 1, gUnknownData = 2 }
enum  HitListFormatType {
  gStandard = 0, g32BitPacked = 1, g32BitPackedProtein = 2, gTranslated = 3,
  gHybrid = 4, gNotSpecified = 5
}

Functions

char * getTimeNow (void)
Word reverseComplement (Word word, int wordLength)
void reverseComplement (const WordSequence &seq, WordSequence &revComp, int wordLength)
void shiftSequence (WordSequence &sequence, int bitsPerSymbol, int wordLength, int i=1)
void shiftSequenceDNA (WordSequence &sequence, int wordLength, int i=1)
void shiftSequenceProtein (WordSequence &sequence, int wordLength, int i=1)
void loadFromFile (const string &fileName, const char *buffer, const unsigned long numBytes, ostream &monitoringStream_=cerr)
void saveToFile (const string &fileName, const char *buffer, const unsigned long numBytes, ostream &monitoringStream_=cerr)

Variables

static const char gBaseNames [] = "ACGT"
static const char gResidueNames [] = "*ACDEFGHIKLMNPQRSTVWXY??????????"
static const char gCodonNames [] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF"
static const Word gCursedWord = 1<<31
static MakeIntoWord makeBase (gBaseBits, gBaseNames)
static MakeIntoWord makeWord (gBaseBits, gBaseNames)
static MakeIntoWord makeResidue (gResidueBits, gResidueNames)


Define Documentation

#define DEBUG_L1 (  ) 

Definition at line 51 of file GlobalDefinitions.h.

#define DEBUG_L2 (  ) 

Definition at line 50 of file GlobalDefinitions.h.

Referenced by MatchStoreImp::addMatch(), SequenceReaderFile::computeNumSequencesInFile(), MatchAlgorithmUngapped::generateMatches(), SequenceReaderFile::getLastSequenceName(), SequenceReaderMulti::getNextSequence(), SequenceReaderFastq::getNextSequence(), SequenceReaderFile::getNextSequence(), SequenceReaderMulti::getSequence(), SequenceReaderFile::getSequence(), SequenceReaderFile::printSideInfo(), and SequenceReaderFile::printSource().

#define DEBUG_L3 (  ) 

Definition at line 49 of file GlobalDefinitions.h.

Referenced by SequenceReaderMulti::findReader(), SequenceReaderModeFlagReplace::mismatch(), SequenceReaderModeReportReplace::mismatch(), SequenceReaderModeReplace::mismatch(), SequenceReaderModeReport::mismatch(), SequenceReaderModeIgnore::mismatch(), and SequenceReaderFile::printName().


Typedef Documentation

typedef HitListVector HitList

Definition at line 386 of file GlobalDefinitions.h.

typedef printBase printWord

Definition at line 630 of file GlobalDefinitions.h.

typedef unsigned int SequenceNumber

Definition at line 250 of file GlobalDefinitions.h.

typedef int SequenceOffset

Definition at line 251 of file GlobalDefinitions.h.

typedef unsigned char uchar

Definition at line 146 of file GlobalDefinitions.h.

typedef unsigned short ushort

Definition at line 147 of file GlobalDefinitions.h.

typedef unsigned int Word

Definition at line 128 of file GlobalDefinitions.h.

typedef WordSequence::iterator WordSequenceIterator

Definition at line 244 of file GlobalDefinitions.h.

typedef WordSequence::reverse_iterator WordSequenceReverseIterator

Definition at line 245 of file GlobalDefinitions.h.


Enumeration Type Documentation

anonymous enum

Enumerator:
gBaseBits 
gResidueBits 
gBasesPerCodon 
gCodonBits 
gBitsPerWord 
gMaxBasesPerWord 
gNumReadingFrames 
gNumDirections 
gNumCodonEncodings 

Definition at line 150 of file GlobalDefinitions.h.

00151 {
00152   gBaseBits = 2,
00153   gResidueBits = 5,
00154   gBasesPerCodon = 3,
00155   gCodonBits = gBaseBits*gBasesPerCodon,
00156   gBitsPerWord = 8*sizeof(Word),
00157   gMaxBasesPerWord = gBitsPerWord / gBaseBits,
00158   gNumReadingFrames = 3, 
00159   gNumDirections = 2,
00160   gNumCodonEncodings = 22 
00161   // = 20 amino acids + 1 stop codon + selenocysteine = 22
00162   // now 20 amino acids + stop codon + X - TC 27.3.2
00163 };

enum HitListFormatType

Enumerator:
gStandard 
g32BitPacked 
g32BitPackedProtein 
gTranslated 
gHybrid 
gNotSpecified 

Definition at line 172 of file GlobalDefinitions.h.

00173 {
00174   gStandard = 0,
00175   g32BitPacked = 1,
00176   g32BitPackedProtein =2,
00177   gTranslated = 3,
00178   gHybrid = 4,
00179   gNotSpecified = 5
00180 };

enum SourceDataType

Enumerator:
gDNAData 
gProteinData 
gUnknownData 

Definition at line 165 of file GlobalDefinitions.h.

00166 {
00167   gDNAData = 0,
00168   gProteinData = 1,
00169   gUnknownData = 2
00170 };


Function Documentation

char* getTimeNow ( void   ) 

Definition at line 41 of file GlobalDefinitions.cpp.

Referenced by main(), and processQuery().

00042 {
00043   time_t now = time(NULL);
00044   tm* ptime = localtime(&now);
00045   return asctime(ptime);
00046 }

Here is the caller graph for this function:

void loadFromFile ( const string &  fileName,
const char *  buffer,
const unsigned long  numBytes,
ostream &  monitoringStream_ = cerr 
)

Definition at line 236 of file GlobalDefinitions.cpp.

Referenced by AllocatorLocal< PositionInDatabase >::load(), HashTableGeneric::loadHashTable(), HashTableTranslated::loadHashTable(), and HashTablePacked::loadHitList().

00238 {
00239 
00240 
00241     ifstreamSSAHA inFile( fileName.c_str() );
00242     
00243     if ( inFile.fail() )
00244     {
00245       monitoringStream_ << "Error: failed to open " 
00246                         << fileName << ", aborting load." << endl;
00247       throw SSAHAException((string)"Could not open file " + fileName);
00248     } // ~if
00249 
00250     inFile.read( buffer, numBytes );  
00251 
00252     if (inFile.gcount() != numBytes)
00253     {
00254       monitoringStream_ << "Error: expecting " << numBytes  
00255                         << " bytes, but only " << inFile.gcount()
00256                         << "were read.\n";
00257       throw SSAHAException("Insufficient data in file.");
00258     }
00259 
00260     // check for EOF
00261     if (inFile.peek()!=EOF)
00262     {
00263       monitoringStream_ << "Error: expecting " << numBytes  
00264                         << " bytes, but more were found in file.\n";
00265       throw SSAHAException("Too much data in file.");
00266     }
00267 
00268 
00269     monitoringStream_ << "Loaded file " << fileName << " (" 
00270                       << numBytes
00271                       << " bytes).\n";
00272 
00273     monitoringStream_ << "Closing file " << fileName << "\n";
00274     inFile.close();
00275 
00276 } // ~loadFromFile

Here is the caller graph for this function:

void reverseComplement ( const WordSequence seq,
WordSequence revComp,
int  wordLength 
)

Definition at line 181 of file GlobalDefinitions.cpp.

References gBaseBits, WordSequence::getNumBasesInLast(), reverseComplement(), WordSequence::setNumBasesInLast(), and shiftSequence().

00183 {
00184   //  assert(numBasesInLast==seq.getNumBasesInLast());
00185   for ( WordSequence::const_reverse_iterator thisWord( seq.rbegin() ); 
00186         thisWord != seq.rend();
00187         thisWord ++ )
00188   {
00189     revComp.push_back( reverseComplement( *thisWord, wordLength ) );
00190   } // ~for
00191   shiftSequence( revComp, gBaseBits, wordLength, 
00192                  wordLength - seq.getNumBasesInLast() );
00193 
00194   revComp.setNumBasesInLast(seq.getNumBasesInLast());
00195 } // ~reverseComplement( WordSequence& ...

Here is the call graph for this function:

Word reverseComplement ( Word  word,
int  wordLength 
)

Definition at line 163 of file GlobalDefinitions.cpp.

00164   {
00165     Word revComp( 0 ), mask( 0x3 );
00166     const int lim ( ( wordLength - 1 )*2 );
00167     for ( int i( 0 ) ; i <= lim ; i+=2 )
00168     {  
00169       revComp |= (((word & mask)^mask) >> i) << lim - i;
00170       mask <<= 2;
00171     } // ~for
00172     //    revComp |= gCursedWord * ((word&gCursedWord)!=(Word)0);
00173     return revComp;
00174   } // ~reverseComplement( Word ) const

void saveToFile ( const string &  fileName,
const char *  buffer,
const unsigned long  numBytes,
ostream &  monitoringStream_ = cerr 
)

Definition at line 279 of file GlobalDefinitions.cpp.

Referenced by AllocatorLocal< PositionInDatabase >::save(), HashTableGeneric::saveHashTable(), and HashTablePacked::saveHitList().

00281 {
00282 
00283     ofstreamSSAHA outFile( fileName.c_str() );
00284     
00285     outFile.write( buffer, numBytes );
00286 
00287     if ( outFile.fail() )
00288     {
00289       monitoringStream_ << "Error: failed to write " 
00290                         << fileName << ", aborting save." << endl;
00291       throw SSAHAException((string)"Problem saving file " + fileName);
00292     } // ~if
00293 
00294     monitoringStream_ << "Saved file " << fileName << "." << endl;
00295 
00296     outFile.close();
00297 
00298 } // ~saveToFile

Here is the caller graph for this function:

void shiftSequence ( WordSequence sequence,
int  bitsPerSymbol,
int  wordLength,
int  i = 1 
)

Definition at line 212 of file GlobalDefinitions.cpp.

Referenced by main(), HashTable::matchSequenceStandard(), HashTablePacked::matchSequenceStandard(), reverseComplement(), shiftSequenceDNA(), shiftSequenceProtein(), and WordSequenceShifted::WordSequenceShifted().

00213   {
00214     if ( sequence.size() < 1 ) return;
00215 
00216     register Word oldCarry(0), thisWord; 
00217     register int shiftNum( bitsPerSymbol * (wordLength - i) );
00218     register Word carryMask
00219     (    ( ( (unsigned long long)1 << (bitsPerSymbol*i) ) - 1 )  << shiftNum    );
00220 
00221     register Word andMask( ( (unsigned long long)1 << ( bitsPerSymbol * wordLength ) ) - 1 );
00222 
00223     for (int j(sequence.size()-1); j>= 0; j--)
00224     {
00225       thisWord = sequence[j];
00226       sequence[j] 
00227         = ( ( ( thisWord << (bitsPerSymbol*i) ) & andMask ) | oldCarry );
00228          oldCarry = (thisWord & carryMask) >> shiftNum ;
00229       //      oldCarry = ((thisWord & carryMask) >> shiftNum )
00230       //        | (gCursedWord * ( (thisWord & gCursedWord) != (Word)0 ));
00231     } // ~for
00232 
00233   } // ~shiftSequence( WordSequence& sequence ) 

Here is the caller graph for this function:

void shiftSequenceDNA ( WordSequence sequence,
int  wordLength,
int  i = 1 
) [inline]

Definition at line 697 of file GlobalDefinitions.h.

References gBaseBits, and shiftSequence().

00698 { 
00699   shiftSequence( sequence, gBaseBits, wordLength, i ); 
00700 }

Here is the call graph for this function:

void shiftSequenceProtein ( WordSequence sequence,
int  wordLength,
int  i = 1 
) [inline]

Definition at line 706 of file GlobalDefinitions.h.

References gResidueBits, and shiftSequence().

00707 { 
00708   shiftSequence( sequence, gResidueBits, wordLength, i ); 
00709 }

Here is the call graph for this function:


Variable Documentation

const char gBaseNames[] = "ACGT" [static]

Definition at line 92 of file GlobalDefinitions.h.

Referenced by main(), and HashTableGeneric::printHashStats().

const char gCodonNames[] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF" [static]

Definition at line 126 of file GlobalDefinitions.h.

Referenced by main(), and operator<<().

const Word gCursedWord = 1<<31 [static]

Definition at line 144 of file GlobalDefinitions.h.

Referenced by codonizeAndFlag(), codonizeAndFlagReverse(), HashTablePacked::countWords(), HashTable::countWords(), getCodonFromWord(), HashTablePacked::hashWords(), HashTable::hashWords(), main(), HashTableView< PositionInDatabase, HashTable >::matchWord(), HashTablePacked::matchWordSubstitute(), SequenceReaderModeFlagReplace::mismatch(), SequenceAdapterWithOverlap::operator[](), and SequenceEncoder::unlinkSeq().

const char gResidueNames[] = "*ACDEFGHIKLMNPQRSTVWXY??????????" [static]

Definition at line 96 of file GlobalDefinitions.h.

Referenced by MatchAlignerTranslated::codonize(), MatchAlignerTranslated::getCodon(), main(), and HashTableGeneric::printHashStats().

MakeIntoWord makeBase(gBaseBits, gBaseNames) [static]

Referenced by main().

MakeIntoWord makeResidue(gResidueBits, gResidueNames) [static]

Referenced by main().

MakeIntoWord makeWord(gBaseBits, gBaseNames) [static]

Referenced by main().


Generated on Fri Dec 21 13:13:32 2007 for ssaha by  doxygen 1.5.2