Global/SSAHAMain.h

Go to the documentation of this file.
00001 /*  Last edited: May 30 13:49 2002 (ac2) */
00002 
00003 // #######################################################################
00004 
00005 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00006 // Version 3.2, released 1st March 2004
00007 // Copyright (c) Genome Research 2002
00008 
00009 // SSAHA is free software; you can redistribute it and/or modify 
00010 // it under the terms of version 2 of the GNU General Public Licence
00011 // as published by the Free Software Foundation.
00012  
00013 // This program is distributed in the hope that it will be useful,
00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 // GNU General Public Licence for more details.
00017  
00018 // You should have received a copy of the GNU General Public Licence
00019 // along with this program; if not, write to the Free Software
00020 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00021 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00022 
00023 // #######################################################################
00024 
00025 // Module Name  : SSAHAMain
00026 // File Name    : SSAHAMain.h
00027 // Language     : C++
00028 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00029 
00030 // Include guard:
00031 #ifndef INCLUDED_SSAHAMain
00032 #define INCLUDED_SSAHAMain
00033 
00034 using namespace std;
00035 // Description:
00036 
00037 // Includes:
00038 
00039 // NB it is good practise for #include statements in header files to be
00040 // replaced by forward declarations if at all possible
00041 #include <string>
00042 #include <iostream>
00043 #include <memory>
00044 class SequenceReaderMulti;
00045 class SequenceReader;
00046 class HashTable;
00047 
00048 // ### Class Declarations ###
00049 
00050 struct QueryParameterStruct
00051 {
00052   string queryName;
00053   string queryType;
00054   string queryFormat;
00055   string subjectName;
00056   string subjectType;
00057   string subjectFormat;
00058   int    queryStart;
00059   int    queryEnd;
00060   int    wordLength;
00061   int    stepLength;
00062   int    maxStore; 
00063   int    minPrint;
00064   int    maxGap;
00065   int    maxInsert;
00066   int    numRepeats;
00067   int    sortMatches;
00068   int    doAlignment;
00069   int    substituteThreshold;
00070   int    bandExtension;
00071   string queryReplace;
00072   string subjectReplace;
00073   //  string reportMode;
00074   string logMode;
00075   string saveName;
00076   bool   parserFriendly;
00077   bool   packHits;
00078   bool   reverseQuery;
00079   bool   runQuery;
00080   bool   printHashStats;
00081   friend ostream& operator<<(ostream& os, const QueryParameterStruct& qps )
00082   {
00083     return os << "queryName:\t" << qps.queryName   
00084        << "\nqueryType:\t" << qps.queryType   
00085        << "\nqueryFormat:\t" << qps.queryFormat   
00086        << "\nsubjectName:\t" << qps.subjectName 
00087        << "\nsubjectType:\t" << qps.subjectType  
00088        << "\nsubjectFormat:\t" << qps.subjectFormat  
00089        << "\nqueryStart:\t" << qps.queryStart  
00090        << "\nqueryEnd:\t" << qps.queryEnd  
00091        << "\nwordLength:\t" << qps.wordLength  
00092        << "\nstepLength:\t" << qps.stepLength   
00093        << "\nmaxStore:\t" << qps.maxStore    
00094        << "\nminPrint:\t" << qps.minPrint     
00095        << "\nmaxGap:  \t" << qps.maxGap     
00096        << "\nmaxInsert:\t" << qps.maxInsert     
00097        << "\nnumRepeats:\t" << qps.numRepeats
00098        << "\nsortMatches:\t" << qps.sortMatches
00099        << "\ndoAlignment:\t" << qps.doAlignment
00100        << "\nsubstituteThreshold:\t" << qps.substituteThreshold
00101        << "\nbandExtension:\t" << qps.bandExtension
00102       //       << "\nreportMode:\t" << qps.reportMode  
00103        << "\nqueryReplace:\t" << qps.queryReplace  
00104        << "\nsubjectReplace:\t" << qps.subjectReplace  
00105        << "\nlogMode:\t" << qps.logMode  
00106        << "\nsaveName:\t" << qps.saveName    
00107        << "\nparserFriendly:\t" 
00108        << (( qps.parserFriendly ) ? (string)"true" : (string)"false") 
00109        << "\npackHits:\t" 
00110        << (( qps.packHits ) ? (string)"true" : (string)"false") 
00111        << "\nreverseQuery:\t" 
00112        << (( qps.reverseQuery ) ? (string)"true" : (string)"false") 
00113        << "\nrunQuery:\t" 
00114        << (( qps.runQuery ) ? (string)"true" : (string)"false") 
00115        << "\nprintHashStats:\t" 
00116        << (( qps.printHashStats ) ? (string)"true" : (string)"false") 
00117        << "\n";
00118   } // ~operator<<
00119 
00120 };
00121 
00122 QueryParameterStruct defaultParams = 
00123 {
00124   "",        // string queryName;
00125   "DNA",     // string queryType;
00126   "",        // string queryFormat;
00127   "",        // string subjectName;
00128   "DNA",     // string subjectType;
00129   "",        // string subjectFormat;
00130   1,         // int    queryStart;
00131   -1,        // int    queryEnd; 
00132   -1,        // int    wordLength;
00133   -1,        // int    stepLength;
00134   100000,    // int    maxToStore; 
00135   1,         // int    minToPrint;
00136   -1,        // int    maxGap;
00137   0,         // int    maxInsert;
00138   0,         // int    numRepeats;
00139   0,         // int    sortMatches;
00140   60,         // int    doAlignment;
00141   0,         // int    substituteThreshold;
00142   0,         // int    bandExtension;
00143   //  "tag",  // string reportMode;
00144   "default",  // string queryReplace;
00145   "tag",  // string subjectReplace;
00146   "cerr",    // string logMode;
00147   "",        // string saveName;
00148   false,     // bool   parserFriendly;
00149   false,     // bool   packHits;
00150   true,     // bool  reverseQuery; // made true the default TC 11.12.2001 
00151   true,      // bool   runQuery;
00152   false      // bool   printHashStats;
00153 };  
00154 
00155 // Class Name : CommandLineArg
00156 // Description: This class and its subclasses interpret command line arguments.
00157 // The interpretation is done by the static function parseCommandLine.
00158 // It creates an instance of CommandLineArg for each valid command line option.
00159 // Each instance recognizes a long and short option name and links to
00160 // an int, string or bool. 
00161 class CommandLineArg
00162 {
00163   public:
00164 
00165   // Name:      parseCommandLine
00166   // Arguments: command line arguments (in), QueryParameterStruct& (out)
00167   // Go through the command line arguments: if an argument is a recognized
00168   // keyword, take the following argument and place it in the appropriate
00169   // part of queryParams. Does not make any attempt to check if parameter 
00170   // values are sensible.
00171   static void parseCommandLine
00172   ( int numArgs, char* args[], QueryParameterStruct& queryParams );
00173 
00174 
00175   CommandLineArg( const string& nameLong, const string& nameShort ) :
00176     nameLong_( nameLong ), nameShort_( nameShort ) {}
00177   // Is the current argument equal to 'my' argument name?
00178   virtual bool isThisMe( const string& argName )
00179   { 
00180     return ( ( argName == nameLong_ ) || ( argName == nameShort_ ) );
00181   }
00182   virtual void addValue( const string& value ) = 0;
00183   protected:
00184   string nameLong_;
00185   string nameShort_;
00186 };
00187 
00188 // Class Name : CommandLineArgString
00189 // Description: Recognize the name of an option that takes a string value, 
00190 // and copy that value to the destination string
00191 class CommandLineArgString : public CommandLineArg
00192 {
00193 public:
00194   CommandLineArgString
00195   ( const string& nameLong, const string& nameShort, string& destination ) :
00196     CommandLineArg( nameLong, nameShort ), destination_( destination ) {}
00197   void addValue( const string& value )
00198   {
00199     destination_ = value;
00200   }
00201 protected:
00202   string& destination_;
00203 };
00204 
00205 // Class Name : CommandLineArgInt
00206 // Description: Recognize the name of an option that takes an integer value, 
00207 // and copy that value to the destination int
00208 class CommandLineArgInt : public CommandLineArg
00209 {
00210 public:
00211   CommandLineArgInt
00212   ( const string& nameLong, const string& nameShort, int& destination ) :
00213     CommandLineArg( nameLong, nameShort ), destination_( destination ) {}
00214   void addValue( const string& value )
00215   {
00216     destination_ = atoi(value.c_str());
00217   }
00218 protected:
00219   int& destination_;
00220 };
00221 
00222 // Class Name : CommandLineArgBool
00223 // Description: Recognize the name of a boolean option. If recognized, set
00224 // the destination bool to 'true'
00225 class CommandLineArgBool : public CommandLineArg
00226 {
00227 public:
00228   CommandLineArgBool
00229   ( const string& nameLong, const string& nameShort, bool& destination ) :
00230     CommandLineArg( nameLong, nameShort ), destination_( destination ) {}
00231 
00232   virtual bool isThisMe( const string& argName )
00233   { 
00234     if ( ( argName == nameLong_ ) || ( argName == nameShort_ ) )
00235     {
00236       destination_ = true;
00237       return true;
00238     } // ~if
00239     return false;
00240   } // ~grabNextArg
00241 
00242   void addValue( const string& value )
00243   {
00244     destination_ = atoi(value.c_str());
00245   }
00246 protected:
00247   bool& destination_;
00248 };
00249 
00250 // ### Function Declarations ###
00251 
00252 // Name:
00253 // Arguments:
00254 // TYPE  NAME  IN/OUT COMMENT
00255 // Returns: TYPE COMMENT
00256 
00257 class SequenceEncoder;
00258 
00259 SequenceReader* findSequenceReader
00260 ( const string& fileName, const string& fileType, SequenceEncoder* encoder );
00261 
00262 
00263 
00264 // Name:      fillSequenceReaderMulti
00265 // Arguments: SequenceReaderMulti& (out), string& (in), string& (in)
00266 // This uses the filename suffix to deduce what file type to create
00267 // if fileName = *.fasta it creates an instance of SequenceReaderFasta and
00268 // adds it to m.
00269 // (Other file formats are TBD)
00270 // If filename suffix does not exist or is not recognized, it assumes
00271 // the filename must be a directory. It then calls itself for each filename
00272 // found in the directory. By calling itself recursively in this way, all
00273 // files in a directory tree are incorporated into m.
00274 void fillSequenceReaderMulti( SequenceReaderMulti& m, 
00275                               SequenceEncoder* encoder,
00276                               const string& fileName,
00277                               const string& dirName = "" );
00278 
00279 // Name:      processQuery
00280 // Arguments: QueryParameterStruct& (in)
00281 // 1. Takes queryParams produced by parseCommandLine
00282 // 2. Check the values therein are sensible.
00283 // 3. Set up a SequenceReader for the query data
00284 // 4. Either load in a hash table or create one from the subject data
00285 // 5. Pass the hash table and query sequence reader to the QueryManager
00286 // 6. Run the query!
00287 void processQuery( QueryParameterStruct& queryParams );
00288 
00289 static const char* helpText[] ={
00290 "NAME\n",
00291 " ssaha - performs rapid searching of DNA and protein databases\n",
00292 "SYNOPSIS\n",
00293 " ssaha [-h] [-help]\n", 
00294 " - print this help message.\n",
00295 " ssaha queryFile subjectFile [ -optionName_1 [optionValue_1] ] ...",
00296 "   [ -optionName_n [optionValue_n] ] \n",
00297 " - create a hash table from the sequences in subjectFile and", 
00298 "   use it to search subjectFile for the sequences in queryFile.\n", 
00299 " ssaha subjectFile [ -optionName_1 [optionValue_1] ] ...",
00300 "   [ -optionName_n [optionValue_n] ] ...\n",
00301 " - just create a hash table from the sequences in subjectFile.",
00302 "   The -saveName option must be set (see OPTIONS).\n",
00303 "DESCRIPTION\n",
00304 " ssaha is a tool for rapidly finding near exact matches in DNA or protein",
00305 " databases. The name is an acronym standing for Sequence Search and Alignment",
00306 " by Hashing Algorithm. It works by converting a sequence database into a",
00307 " hash table. This is then rapidly quizzed for hits, which are concatenated",
00308 " into matches.\n",
00309 "OPTIONS\n",
00310 " Options may be specified by either their full names or short names and may",
00311 " appear on the command line in any order.\n",
00312 " Full Name      Short      Description\n",
00313 "-queryFormat   -qf        Acceptable values:",
00314 "                          fasta - fasta file",
00315 "                          fastq - fastq file\n",
00316 "                          Default value:",                        
00317 "                          If not specified, attempts to deduce file type",
00318 "                          based on the filename suffix as follows:\n",
00319 "                          File suffix    Deduced file type",
00320 "                          .fasta, .fa    fasta file",
00321 "                          .fastq         fastq file",
00322 "                          else assumed to be a directory of files, each of",
00323 "                          whose names indicates the file type as specified",
00324 "                          by the above rules.\n",
00325 "-subjectFormat -sf        Acceptable values:",
00326 "                          fasta - fasta file",
00327 "                          fastq - fastq file",
00328 "                          hash  - precomputed hash table\n",
00329 "                          Default value:",                       
00330 "                          If not specified, attempts to deduce file type",
00331 "                          based on the filename suffix as follows:\n",
00332 "                          File suffix    Deduced file type",
00333 "                          .fasta, .fa    fasta file",
00334 "                          .fastq         fastq file",
00335 "                          else assumed to be a directory of files, each of",
00336 "                          whose names indicates the file type as specified",
00337 "                          by the above rules.\n",
00338 "                          Note:",
00339 "                          If -sf is set to hash, the -wl, -sl, and -ph",
00340 "                          options will, if present, be ignored.\n", 
00341 "-queryType     -qt        Acceptable values:",
00342 "                          DNA",
00343 "                          protein\n",
00344 "                          Default value:",
00345 "                          DNA\n",
00346 "-subjectType   -st        Acceptable values:",
00347 "                          DNA (in which case queryType must also be DNA)",
00348 "                          protein",
00349 "                          codon (i.e. do 6 way DNA to protein translation)\n",
00350 "                          Default value:",
00351 "                          DNA\n",
00352 "-hashStats     -hs        Show information about the hash table currently",
00353 "                          in use.\n",
00354 "-parserFriendly -pf       Show one match per line as a set of tab delimited",
00355 " (a.k.a. perlFriendly)    fields:\n",
00356 "                          match direction: F forward, R reverse",
00357 "                          query name",
00358 "                          query start",
00359 "                          query end",
00360 "                          subject name",
00361 "                          subject start",
00362 "                          subject end",
00363 "                          number of matching bases",
00364 "                          percentage identity\n",
00365 "-logMode       -lm        Controls the output of log information",
00366 "                          Acceptable values:",
00367 "                          cerr - send to standard error",
00368 "                          cout - send to standard output",
00369 "                          null - suppress log output",
00370 "                          any other value sends log information to a file",
00371 "                          of the same name\n",
00372 "                          Default value:",
00373 "                          cerr\n",
00374 "-packHits      -ph        Store position of each word in a \"packed\"", 
00375 "                          format comprising 32 bits per word. This halves",
00376 "                          the size of the .body file at the expense of a",
00377 "                          slight decrease in search speed.\n",
00378 "-wordLength    -wl        Size in base pairs of the words used to form",
00379 "                          the hash table. May vary from 1 to (assuming",
00380 "                          sufficient RAM is available) 16.",
00381 "                          Default value is 10.\n",
00382 "-maxGap        -mg        Maximum gap allowed between successive hits for",
00383 "                          them to count as part of the same match.",
00384 "                          Default value is 0.\n",
00385 "-maxInsert     -mi        Maximum number of insertions/deletions allowed",
00386 "                          between successive hits for them to count as part",
00387 "                          of the same match.",
00388 "                          Default value is 0.\n",
00389 "-maxStore      -ms        Largest number of times that a word may occur in",
00390 "                          the hash table for it to be used for matching",
00391 "                          expressed as a multiple of the number of",
00392 "                          occurrences per word that would be expected",
00393 "                          for a random database of the same size as the",
00394 "                          subject database.",
00395 "                          Default value is 10000.\n",
00396 "-numRepeats    -nr        Maximum size of tandem repeating motif that can be",
00397 "                          detected in the query sequence. This option may",
00398 "                          produce faster and better matches when dealing",
00399 "                          with data containing tandem repeats.",
00400 "                          Defaults to 0, and must be less than or equal to",
00401 "                          the word length.",
00402 "                          Notes:",
00403 "                          1. This option does nothing if -ph is also set.",
00404 "                          2. To get the best results with this option, set",
00405 "                          -mg to be at least equal to the word length.",
00406 "                          Setting the -mi option may also help.\n",
00407 "-minPrint      -mp        The minimum number of matching bases or residues",
00408 "                          that must be found in the query and subject",
00409 "                          sequences before they are considered as a match",
00410 "                          and thus printed.",
00411 "                          Default value is 1.\n",
00412 "-queryStart    -qs        Specifies the number of the first query sequence to",
00413 "                          be matched with the subject sequences (numbering of",
00414 "                          both the query and subject sequences starts at 1).",
00415 "                          Default value is 1.\n",
00416 "-queryEnd      -qe        Specifies the number of the last query sequence to",
00417 "                          be matched with the subject sequences. If not",
00418 "                          specified, continues until the end of the query",
00419 "                          sequence data is reached.\n",
00420 "-reportMode    -rm        Specifies behaviour upon encountering unexpected",
00421 "                          alphanumeric characters in query or subject ",
00422 "                          sequences:\n",
00423 "                          ignore - do nothing",
00424 "                          report - report to standard error",
00425 "                          replaceA   - silently replace character with 'A'",
00426 "                          replaceG, replaceC, replaceT - as for replaceA",
00427 "                          rrepA   - replace character with 'A' and report",
00428 "                          rrepG, rrepC, rrepT - as for rrepA",
00429 "                          Default value is `ignore.'\n",
00430 "                          NB FOR VERSION 3.0, THE -reportMode OPTION HAS BEEN",
00431 "                          SUPERCEDED BY THE -queryReplace AND -subjectReplace", 
00432 "                          OPTIONS - SEE THE APPROPRIATE HELP ENTRIES\n",
00433 "-reverseQuery  -rq        When matching the reverse strand of a query,",
00434 "                          convert the positions of any matches found",
00435 "                          into the coordinate frame of the forward strand.",
00436 "                          Has no effect if queryType is set to protein.\n",
00437 "-saveName      -sn        Specifies that the hash table must be saved before",
00438 "                          the program exits. This option must be followed by",
00439 "                          a string fileNameRoot. The hash table data is",
00440 "                          saved into the files",
00441 "                            fileNameRoot.head",
00442 "                            fileNameRoot.body",
00443 "                            fileNameRoot.name",
00444 "                            fileNameRoot.size\n",
00445 "                          Notes:",
00446 "                          1. If no query file is specified (usage (iii)",
00447 "                          above) it is an error not to set this option.",
00448 "                          2. It is an error to set this option if", 
00449 "                          subjectType is set to `hash.'",
00450 "                          3. If the -ph option is also set, the -sn option",
00451 "                          also produces a fileNameRoot.start file.\n",
00452 "-sortMatches   -sm        Output only the top n matches for each query,",
00453 "                          sorted by number of matching bases, then by",
00454 "                          subject name, then by start position in the",
00455 "                          query sequence.",
00456 "                          Default value is zero, which outputs all matches",
00457 "                          for each query and does no sorting.\n",
00458 "-stepLength    -sl        Number of base pairs gap between words used to ",
00459 "                          produce hash table. Ignored if a precomputed ",
00460 "                          hash table is being used. Default value is ",
00461 "                          equal to wordLength.\n",
00462 "-queryReplace  -qr        Specifies behaviour upon encountering unexpected",
00463 "                          alphanumeric characters in query sequences:\n",
00464 "                          ignore - do nothing",
00465 "                          report - report to standard error",
00466 "                          A,G, etc. - replace with that character:", 
00467 "                          must be A, G, C, T for DNA, or a valid IUPAC",
00468 "                          amino acid code for protein.",
00469 "                          Default: replace with 'A' for DNA, 'X' for protein\n",
00470 "-subjectReplace -sr       Specifies behaviour upon encountering unexpected",
00471 "                          alphanumeric characters in subject sequences:\n",
00472 "                          ignore - do nothing",
00473 "                          report - report to standard error",
00474 "                          A,G, etc. - replace with that character", 
00475 "                          Must be A, G, C, T for DNA or a valid IUPAC",
00476 "                          amino acid code for protein",
00477 "                          tag - `tag' the word so that it is not put", 
00478 "                          into the hash table.",
00479 "                          Default: tag\n",
00480 "-substituteWords -sw      Look for single base/amino mismatches in words",
00481 "                          that occur less than this many times more often",
00482 "                          than would be expected for a random database of",
00483 "                          the same size as the subject database.\n", 
00484 "                          Only looks for:",
00485 "                          purine (G-A)/pyrimidine (T-C) mismatches for DNA",
00486 "                          mismatches with positive BLOSUM score for protein",
00487 "                          Set to zero to switch this feature off.",
00488 "                          Default value: 0 (switched off)\n",
00489 "-doAlignment   -da        Produce a graphical alignment of the matching region",
00490 "                          using banded dynamic programming. The alignment",
00491 "                          will be formatted to the specified number of columns.",
00492 "                          Set to zero to suppress alignments, otherwise",
00493 "                          must be at least 20.",
00494 "                          Default value: 80\n",
00495 "-bandExtension -be        Specify size of the band to use for banded dynamic",
00496 "                          programming, when producing a graphical alignment.",
00497 "                          0 - diagonal only",
00498 "                          n - n cells each side of diagonal",
00499 "                          Only has an effect when -be is nonzero",
00500 "                          Default value: 0 (diagonal only)",
00501 "OUTPUT FORMAT\n",
00502 "When full alignments are requested (-da set to nonzero) the software produces",
00503 "a line of information then a graphical alignment for each match found:\n",
00504 "i) DNA against DNA (untranslated)\n",
00505 "RF      p1_1a788a06.q1c 515     538     p1_1a788f11.p1c 493     516     24",
00506 "100.00",
00507 "Alignment score: 13",
00508 "Q:000000515 tttt-tgagacggagtctcgctct",
00509 "            ||||x||||||xx|||||||||||",
00510 "S:000000493 ttttttgagacaaagtctcgctct\n",
00511 "ii) Protein against protein\n",
00512 "FF      SW:PPSA_AERPE   625     642     SW:PPSA_METTH   577     592     8  ",
00513 "44.44",
00514 "Alignment score: 31",
00515 "Q:000000625 KGGEKYETLDERNPMIGW",
00516 "            x|||x |xx |x|||x||",
00517 "S:000000577 EGGEN-EPY-EHNPMLGW\n",
00518 "iii) Protein query against translated DNA subject ",
00519 "(format for translated DNA query against protein subject is similar)\n",
00520 "FR      SW:PPS2_HUMAN   467     473     p1_1a788c10.q1c 507     529     7",
00521 "100.00",
00522 "Alignment score: 22",
00523 "Q:000000467 E..E..G..--V..L..D..P..",
00524 "            |||||||||  ||||||Nxx|||",
00525 "S:000000507 gaggagggcatgtattaaaccca\n",
00526 "iv) DNA against DNA (translated)\n",
00527 "FF      p1_1a788a01.p1c 52      76      p1_1a788b11.p1c 314     336     24",    
00528 "96.00",
00529 "Alignment score: 3",
00530 "Q:000000052 atggtatgtctttcttttact-agat-",
00531 "            ||xV..C..L..S..F..    R..  ",
00532 "S:000000314 attgtttgtctctccttc---taga-a\n",
00533 " From left to right the fields in the match information line are as follows:\n",
00534 " i)    First character: query direction (F forward, R reverse)",
00535 "      Second character: subject direction (F forward, R reverse)",
00536 " ii)   query name",
00537 " iii)  query start",
00538 " iv)   query end",
00539 " v)    subject name",
00540 " vi)   subject start",
00541 " vii)  subject end",
00542 " viii) estimated number of matching bases",
00543 " ix)   estimated percentage identity\n",
00544 " The last two quantities are not exact values, they are approximations used",
00545 " to order the matches (if requested) before the full alignment is done.\n",
00546 " With the alignments switched off (-da 0), an entry like the one below is", 
00547 " produced for each sequence in the query.\n",
00548 " Matches For Query 6 (653 bases): p1_1a788a03.q1c\n", 
00549 " F 6 : p1_1a788a03.q1c  Bases: 650   Q: 1 to 650    S: 1 to 650     100.00%",
00550 " R 5 : p1_1a788a03.p1c  Bases: 100   Q: 22 to 121   S: 501 to 600   100.00%\n",
00551 " The top line shows the query number, name and size of the query sequence.",
00552 " Below that is one line for each match found in the subject database. From",
00553 " left to right, the entries on these lines are as follows:\n",
00554 " match direction: F forward, R reverse",
00555 " subject number",
00556 " subject name",
00557 " number of matching bases",
00558 " query start",
00559 " query end",
00560 " subject start",
00561 " subject end",
00562 " percentage identity\n",
00563 "Notes:\n",
00564 " 1. The output format is different if the program is run with the", 
00565 " -parserFriendly option set. See the description of that option for details.\n",
00566 " 2. Because SSAHA works by looking for whole-word matches, the `number of",
00567 " matching bases' and `percentage identity' fields must be considered as lower",
00568 " bounds on the true values of these quantities.\n", 
00569 "FURTHER INFORMATION\n",
00570 " The SSAHA home page is at http://www.sanger.ac.uk/Software/analysis/SSAHA/\n",
00571 " Zemin Ning, Anthony. J. Cox and James C. Mullikin. SSAHA: A Fast Search",
00572 " Method for Large DNA Databases. Submitted to Genome Research.\n",
00573 "@"
00574 };
00575 
00576 
00577 
00578 
00579 
00580 // End of include guard:
00581 #endif
00582 
00583 // End of file SSAHAMain.h
00584 
00585 
00586 
00587 
00588 
00589 

Generated on Fri Dec 21 13:12:15 2007 for ssaha by  doxygen 1.5.2