SequenceReader/GenerateTestFastaFiles.cpp

Go to the documentation of this file.
00001 
00002 // #######################################################################
00003 
00004 // SSAHA : Sequence Search and Alignment by Hashing Algorithm
00005 // Version 3.2, released 1st March 2004
00006 // Copyright (c) Genome Research 2002
00007 
00008 // SSAHA is free software; you can redistribute it and/or modify 
00009 // it under the terms of version 2 of the GNU General Public Licence
00010 // as published by the Free Software Foundation.
00011  
00012 // This program is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public Licence for more details.
00016  
00017 // You should have received a copy of the GNU General Public Licence
00018 // along with this program; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00020 // or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
00021 
00022 // #######################################################################
00023 
00024 // Module Name  : GenerateTestFastaFiles
00025 // File Name    : GenerateTestFastaFiles.cpp
00026 // Language     : C++
00027 // Module Author: Anthony J. Cox (ac2@sanger.ac.uk)
00028 
00029 // Description:
00030 
00031 // Includes:
00032 
00033 #include "GenerateTestFastaFiles.h"
00034 #include <iostream>
00035 #include <fstream>
00036 #include <stdlib.h>
00037 
00038 // ### Function Definitions ###
00039 
00040 // Name:
00041 // Arguments:
00042 // TYPE  NAME  IN/OUT COMMENT
00043 // Returns: TYPE COMMENT
00044 
00045 BaseGenerator::BaseGenerator( int numBases, int seed ) 
00046 : numBases_( numBases )
00047 {
00048   const char baseNames[] = "AGCT";
00049 
00050   if ( seed != -9999 ) srand(seed);
00051   bases_.reserve( numBases_ );
00052   int base;
00053 
00054   for ( int i(0) ; i < numBases_; i++ )
00055   {
00056     base = rand() & 0x3;
00057     bases_ += baseNames[base];
00058   } // ~for i
00059 } // constructor
00060 
00061 
00062 
00063 // Name:
00064 // Arguments:
00065 // TYPE  NAME  IN/OUT COMMENT
00066 // Returns: TYPE COMMENT
00067 
00068 void BaseGenerator::generateSubjectFile
00069 (  int seqSize, int basesOverlap, const char* fileName )
00070 {
00071   ofstream subjectFile(fileName,ios::out);
00072 
00073   int numSeqs = 1 + ( numBases_ - seqSize ) / ( seqSize - basesOverlap );
00074   int seqSpacing = seqSize - basesOverlap;
00075 
00076   cout << "Num seqs:" << numSeqs << endl;
00077 
00078   for ( int i(0) ; i < numSeqs  ; i ++ )
00079   {
00080 
00081     subjectFile << ">SubjectSequence_" << i+1 << " - bases "
00082                 << ( i * seqSpacing ) << " to "
00083                 << ( i * seqSpacing ) + seqSize - 1
00084                 << endl;
00085 
00086     subjectFile << bases_.substr( (i*seqSpacing), seqSize ) << endl;
00087 
00088     subjectFile << endl;
00089 
00090   } // ~for i
00091     
00092 } // ~BaseGenerator::generateSubjectFile
00093 
00094 // Name:
00095 // Arguments:
00096 // TYPE  NAME  IN/OUT COMMENT
00097 // Returns: TYPE COMMENT
00098 
00099 void BaseGenerator::generateSubjectFileFastq
00100 (  int seqSize, int basesOverlap, const char* fileName )
00101 {
00102   ofstream subjectFile(fileName,ios::out);
00103 
00104   int numSeqs = 1 + ( numBases_ - seqSize ) / ( seqSize - basesOverlap );
00105   int seqSpacing = seqSize - basesOverlap;
00106 
00107   cout << "Num seqs:" << numSeqs << endl;
00108 
00109   for ( int i(0) ; i < numSeqs  ; i ++ )
00110   {
00111 
00112     subjectFile << "@SubjectSequence_" << i+1 << " - bases "
00113                 << ( i * seqSpacing ) << " to "
00114                 << ( i * seqSpacing ) + seqSize - 1
00115                 << endl;
00116 
00117     subjectFile << bases_.substr( (i*seqSpacing), seqSize ) << endl;
00118 
00119     subjectFile << "+SubjectSequence_" << i+1 << " - bases "
00120                 << ( i * seqSpacing ) << " to "
00121                 << ( i * seqSpacing ) + seqSize - 1
00122                 << endl;
00123 
00124     for ( int j(0) ; j < seqSize ; j++ ) subjectFile << "99 ";
00125     subjectFile << endl;
00126     
00127 
00128   } // ~for i
00129     
00130 } // ~BaseGenerator::generateSubjectFileFastq
00131 
00132 // Name:
00133 // Arguments:
00134 // TYPE  NAME  IN/OUT COMMENT
00135 // Returns: TYPE COMMENT
00136 
00137 void BaseGenerator::generateQueryFile
00138 ( int baseStart, int seqSize, const char* fileName )
00139 {
00140 
00141   if ( baseStart + seqSize > numBases_ )
00142   {
00143     cerr << "Error: requested query sequence exceeds total data size" << endl;
00144     return;
00145   } //~if
00146 
00147   ofstream queryFile(fileName,ios::out);
00148   queryFile << ">QuerySequence "  
00149               << baseStart << " to " 
00150               << baseStart + seqSize << endl;
00151 
00152   queryFile << bases_.substr( baseStart, seqSize );
00153  
00154   queryFile << endl;
00155 
00156 
00157 
00158 } // ~BaseGenerator::generateQueryFile
00159 
00160 void BaseGenerator::getBases
00161 ( int baseStart, int seqSize, string& outputString )
00162 {
00163   if ( baseStart + seqSize > numBases_ )
00164   {
00165     cerr << "Error: requested query sequence exceeds total data size" << endl;
00166     return;
00167   } //~if
00168 
00169   outputString =  bases_.substr( baseStart, seqSize );
00170   
00171 }
00172 
00173 
00174 
00175 
00176 
00177 // End of file GenerateTestFastaFiles.cpp
00178 

Generated on Fri Dec 21 13:12:16 2007 for ssaha by  doxygen 1.5.2