00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "GenerateTestFastaFiles.h"
00034 #include <iostream>
00035 #include <fstream>
00036 #include <stdlib.h>
00037
00038
00039
00040
00041
00042
00043
00044
00045 BaseGenerator::BaseGenerator( int numBases, int seed )
00046 : numBases_( numBases )
00047 {
00048 const char baseNames[] = "AGCT";
00049
00050 if ( seed != -9999 ) srand(seed);
00051 bases_.reserve( numBases_ );
00052 int base;
00053
00054 for ( int i(0) ; i < numBases_; i++ )
00055 {
00056 base = rand() & 0x3;
00057 bases_ += baseNames[base];
00058 }
00059 }
00060
00061
00062
00063
00064
00065
00066
00067
00068 void BaseGenerator::generateSubjectFile
00069 ( int seqSize, int basesOverlap, const char* fileName )
00070 {
00071 ofstream subjectFile(fileName,ios::out);
00072
00073 int numSeqs = 1 + ( numBases_ - seqSize ) / ( seqSize - basesOverlap );
00074 int seqSpacing = seqSize - basesOverlap;
00075
00076 cout << "Num seqs:" << numSeqs << endl;
00077
00078 for ( int i(0) ; i < numSeqs ; i ++ )
00079 {
00080
00081 subjectFile << ">SubjectSequence_" << i+1 << " - bases "
00082 << ( i * seqSpacing ) << " to "
00083 << ( i * seqSpacing ) + seqSize - 1
00084 << endl;
00085
00086 subjectFile << bases_.substr( (i*seqSpacing), seqSize ) << endl;
00087
00088 subjectFile << endl;
00089
00090 }
00091
00092 }
00093
00094
00095
00096
00097
00098
00099 void BaseGenerator::generateSubjectFileFastq
00100 ( int seqSize, int basesOverlap, const char* fileName )
00101 {
00102 ofstream subjectFile(fileName,ios::out);
00103
00104 int numSeqs = 1 + ( numBases_ - seqSize ) / ( seqSize - basesOverlap );
00105 int seqSpacing = seqSize - basesOverlap;
00106
00107 cout << "Num seqs:" << numSeqs << endl;
00108
00109 for ( int i(0) ; i < numSeqs ; i ++ )
00110 {
00111
00112 subjectFile << "@SubjectSequence_" << i+1 << " - bases "
00113 << ( i * seqSpacing ) << " to "
00114 << ( i * seqSpacing ) + seqSize - 1
00115 << endl;
00116
00117 subjectFile << bases_.substr( (i*seqSpacing), seqSize ) << endl;
00118
00119 subjectFile << "+SubjectSequence_" << i+1 << " - bases "
00120 << ( i * seqSpacing ) << " to "
00121 << ( i * seqSpacing ) + seqSize - 1
00122 << endl;
00123
00124 for ( int j(0) ; j < seqSize ; j++ ) subjectFile << "99 ";
00125 subjectFile << endl;
00126
00127
00128 }
00129
00130 }
00131
00132
00133
00134
00135
00136
00137 void BaseGenerator::generateQueryFile
00138 ( int baseStart, int seqSize, const char* fileName )
00139 {
00140
00141 if ( baseStart + seqSize > numBases_ )
00142 {
00143 cerr << "Error: requested query sequence exceeds total data size" << endl;
00144 return;
00145 }
00146
00147 ofstream queryFile(fileName,ios::out);
00148 queryFile << ">QuerySequence "
00149 << baseStart << " to "
00150 << baseStart + seqSize << endl;
00151
00152 queryFile << bases_.substr( baseStart, seqSize );
00153
00154 queryFile << endl;
00155
00156
00157
00158 }
00159
00160 void BaseGenerator::getBases
00161 ( int baseStart, int seqSize, string& outputString )
00162 {
00163 if ( baseStart + seqSize > numBases_ )
00164 {
00165 cerr << "Error: requested query sequence exceeds total data size" << endl;
00166 return;
00167 }
00168
00169 outputString = bases_.substr( baseStart, seqSize );
00170
00171 }
00172
00173
00174
00175
00176
00177
00178