include/statistics.h File Reference

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  statisticalParameters

Functions

void statistics_initialize (struct PSSMatrix PSSMatrix, uint8 databaseSize, int4 numberOfSequences)
int4 statistics_ungappedNormalized2nominal (float normalizedScore)
float statistics_gappedNominal2normalized (int4 nominalScore)
double statistics_gappedCalculateEvalue (float normalizedScore)
int4 statistics_gappedEvalue2nominal (double evalue)
int4 statistics_ungappedNucleotideTrigger (struct PSSMatrix PSSMatrix)

Variables

float statistics_log2
float statistics_gappedLogK
int4 statistics_querySize
int4 statistics_effectiveQuerySize
uint8 statistics_databaseSize
uint8 statistics_effectiveDatabaseSize
uint8 statistics_searchSpaceSize
int4 statistics_lengthAdjust
float statistics_ungappedLambda
float statistics_ungappedH
float statistics_ungappedK
float statistics_ungappedLogK
statisticalParameters statistics_gappedParams
int4 statistics_ungappedNominalDropoff
int4 statistics_gappedNominalDropoff
int4 statistics_gappedFinalNominalDropoff


Function Documentation

double statistics_gappedCalculateEvalue ( float  normalizedScore  ) 

Definition at line 197 of file statistics.c.

References statistics_searchSpaceSize.

Referenced by fasterGappedExtension_score(), gappedExtension_score(), and print_gappedAlignmentsBrief().

00198 {
00199         return statistics_searchSpaceSize / pow(2, normalizedScore);
00200 }

Here is the caller graph for this function:

int4 statistics_gappedEvalue2nominal ( double  evalue  ) 

Definition at line 203 of file statistics.c.

References statisticalParameters::lambda, statistics_gappedLogK, statistics_gappedParams, statistics_log2, and statistics_searchSpaceSize.

Referenced by blast_search().

00204 {
00205         double normalizedScore;
00206 
00207         normalizedScore = log(statistics_searchSpaceSize / evalue) / (double)statistics_log2;
00208 
00209 //    printf("[%f,%f,%f,%f, %f]\n", normalizedScore, statistics_log2 * normalizedScore, statistics_log2 * normalizedScore + statistics_gappedLogK,(statistics_log2 * normalizedScore + statistics_gappedLogK)
00210 //             / (double)statistics_gappedParams.lambda, statistics_gappedParams.lambda);
00211 
00212         return ceil((statistics_log2 * normalizedScore + statistics_gappedLogK)
00213                / statistics_gappedParams.lambda);
00214 }

Here is the caller graph for this function:

float statistics_gappedNominal2normalized ( int4  nominalScore  ) 

Definition at line 179 of file statistics.c.

References statisticalParameters::lambda, statistics_gappedLogK, statistics_gappedParams, and statistics_log2.

Referenced by fasterGappedExtension_score(), gappedExtension_score(), and print_gappedAlignmentsBrief().

00180 {
00181         float normalizedScore;
00182 
00183         normalizedScore = (((float)nominalScore * statistics_gappedParams.lambda)
00184                     - statistics_gappedLogK) / statistics_log2;
00185 
00186         return normalizedScore;
00187 }

Here is the caller graph for this function:

void statistics_initialize ( struct PSSMatrix  PSSMatrix,
uint8  databaseSize,
int4  numberOfSequences 
)

Definition at line 45 of file statistics.c.

References statisticalParameters::alpha, statisticalParameters::beta, encoding_alphabetType, encoding_protein, statisticalParameters::H, statisticalParameters::K, statisticalParameters::lambda, PSSMatrix::length, parameters_databaseSize, parameters_extendGap, parameters_gappedFinalNormalizedDropoff, parameters_gappedNormalizedDropoff, parameters_numberOfSequences, parameters_scoringMatrix, parameters_startGap, parameters_ungappedNormalizedDropoff, statistics_calcLengthAdjust(), statistics_calcLengthAdjustNew(), statistics_calculateUngappedKarlinParameters(), statistics_databaseSize, statistics_effectiveDatabaseSize, statistics_effectiveQuerySize, statistics_gappedFinalNominalDropoff, statistics_gappedLogK, statistics_gappedNominalDropoff, statistics_gappedParams, statistics_lengthAdjust, statistics_log2, statistics_lookupPrecomputedParams(), statistics_numberOfSequences, statistics_querySize, statistics_searchSpaceSize, statistics_ungappedH, statistics_ungappedK, statistics_ungappedLambda, statistics_ungappedLogK, statistics_ungappedNominalDropoff, and PSSMatrix::strandLength.

Referenced by blast_search().

00046 {
00047         // Record the size (number of letters) of query and subject database
00048         if (PSSMatrix.strandLength == 0)
00049         statistics_querySize = PSSMatrix.length;
00050         else
00051         statistics_querySize = PSSMatrix.strandLength;
00052 
00053     // Use command-line parameter for database size if given
00054     if (parameters_databaseSize == 0)
00055                 statistics_databaseSize = databaseSize;
00056     else
00057         statistics_databaseSize = parameters_databaseSize;
00058 
00059     statistics_numberOfSequences = numberOfSequences;
00060 
00061     // Use command-line parameter for number of sequences if given
00062     if (parameters_numberOfSequences != 0)
00063         numberOfSequences = parameters_numberOfSequences;
00064 
00065     // Calculate ungapped karlin-atschul parameters lambda, K and H for given
00066         // query matrix, and average residue compositions for the subject database
00067         statistics_calculateUngappedKarlinParameters(PSSMatrix);
00068 
00069     if (encoding_alphabetType == encoding_protein)
00070     {
00071         // Lookup precomputed gapped lambda, K, H, alpha and beta values based on
00072         // scoring matrix and gap penalties
00073         statistics_gappedParams = statistics_lookupPrecomputedParams(
00074             parameters_scoringMatrix, parameters_startGap, parameters_extendGap);
00075         }
00076     else
00077     {
00078         // For nucleotide, use ungapped parameters for gapped alignment
00079         statistics_gappedParams.lambda = statistics_ungappedLambda;
00080         statistics_gappedParams.K = statistics_ungappedK;
00081         statistics_gappedParams.H = statistics_ungappedH;
00082         statistics_gappedParams.alpha = 0;
00083         statistics_gappedParams.beta = 0;
00084     }
00085 
00086     // Log of K (for gapped and ungapped extension) and log of 2
00087         statistics_ungappedLogK = log(statistics_ungappedK);
00088         statistics_gappedLogK = log(statistics_gappedParams.K);
00089         statistics_log2 = log(2.0);
00090 
00091         // Calculate nominal dropoff for ungapped extension using normalized dropoff
00092         statistics_ungappedNominalDropoff = ceil(parameters_ungappedNormalizedDropoff *
00093                                             statistics_log2 / statistics_ungappedLambda);
00094 
00095         // Same for initial and final gapped extension dropoffs
00096         statistics_gappedNominalDropoff = floor(parameters_gappedNormalizedDropoff *
00097                                           statistics_log2 / statistics_gappedParams.lambda);
00098         statistics_gappedFinalNominalDropoff = floor(parameters_gappedFinalNormalizedDropoff *
00099                                                statistics_log2 / statistics_gappedParams.lambda);
00100 
00101         // Calculate length adjust for query and subject sequences
00102     if (encoding_alphabetType == encoding_protein)
00103     {
00104         // Round lengthAdjust value down
00105         statistics_lengthAdjust = floor(statistics_calcLengthAdjustNew(numberOfSequences));
00106         }
00107     else
00108     {
00109             statistics_lengthAdjust = floor(statistics_calcLengthAdjust(numberOfSequences));
00110     }
00111 
00112         // Using length adjustment, calculate effective query, database length and search space
00113     statistics_effectiveQuerySize
00114                 = statistics_querySize - statistics_lengthAdjust;
00115         statistics_effectiveDatabaseSize
00116                 = statistics_databaseSize - numberOfSequences * statistics_lengthAdjust;
00117         statistics_searchSpaceSize
00118                 = statistics_effectiveQuerySize * statistics_effectiveDatabaseSize;
00119 }

Here is the call graph for this function:

Here is the caller graph for this function:

int4 statistics_ungappedNormalized2nominal ( float  normalizedScore  ) 

Definition at line 142 of file statistics.c.

References statistics_log2, statistics_ungappedLambda, and statistics_ungappedLogK.

Referenced by blast_search().

00143 {
00144         float nominalScore;
00145 
00146         nominalScore = (normalizedScore * statistics_log2 + statistics_ungappedLogK) / statistics_ungappedLambda;
00147 
00148         return floor(nominalScore);
00149 }

Here is the caller graph for this function:

int4 statistics_ungappedNucleotideTrigger ( struct PSSMatrix  PSSMatrix  ) 

Definition at line 217 of file statistics.c.

References int4, PSSMatrix::length, statistics_databaseSize, statistics_log2, statistics_numberOfSequences, statistics_ungappedLambda, and statistics_ungappedLogK.

Referenced by blast_search().

00218 {
00219         double evalue, normalizedScore;
00220         int4 averageSubjectLength;
00221 
00222     evalue = 0.025;
00223     averageSubjectLength = statistics_databaseSize / statistics_numberOfSequences;
00224 
00225         if (averageSubjectLength > PSSMatrix.length)
00226         {
00227                 normalizedScore = log(PSSMatrix.length * averageSubjectLength / evalue)
00228                                 / (double)statistics_log2;
00229         }
00230         else
00231         {
00232                 normalizedScore = log(averageSubjectLength * averageSubjectLength / evalue)
00233                                 / (double)statistics_log2;
00234         }
00235 
00236         return ceil((statistics_log2 * normalizedScore + statistics_ungappedLogK)
00237                / statistics_ungappedLambda);
00238 }

Here is the caller graph for this function:


Variable Documentation

uint8 statistics_databaseSize

Definition at line 18 of file statistics.c.

Referenced by blast_search(), print_XMLfooter(), statistics_calcLengthAdjust(), statistics_calcLengthAdjustNew(), statistics_initialize(), and statistics_ungappedNucleotideTrigger().

uint8 statistics_effectiveDatabaseSize

Definition at line 19 of file statistics.c.

Referenced by blast_search(), and statistics_initialize().

int4 statistics_effectiveQuerySize

Definition at line 17 of file statistics.c.

Referenced by blast_search(), and statistics_initialize().

int4 statistics_gappedFinalNominalDropoff

Definition at line 36 of file statistics.c.

Referenced by alignments_checkForJoin(), alignments_getTracebacks(), alignments_regularGappedAlignment(), blast_search(), gappedExtension_dpBeforeSeed(), statistics_initialize(), and unpack_getRegions().

float statistics_gappedLogK

Definition at line 13 of file statistics.c.

Referenced by statistics_calcLengthAdjustNew(), statistics_gappedEvalue2nominal(), statistics_gappedNominal2normalized(), statistics_gappedNormalized2nominal(), and statistics_initialize().

int4 statistics_gappedNominalDropoff

Definition at line 35 of file statistics.c.

Referenced by alignments_findGoodAlignments(), alignments_regularGappedAlignment(), blast_search(), and statistics_initialize().

struct statisticalParameters statistics_gappedParams

Definition at line 31 of file statistics.c.

Referenced by blast_search(), print_XMLfooter(), statistics_calcLengthAdjustNew(), statistics_gappedEvalue2nominal(), statistics_gappedNominal2normalized(), statistics_gappedNormalized2nominal(), and statistics_initialize().

int4 statistics_lengthAdjust

Definition at line 21 of file statistics.c.

Referenced by blast_search(), print_XMLfooter(), and statistics_initialize().

float statistics_log2

Definition at line 12 of file statistics.c.

Referenced by statistics_gappedEvalue2nominal(), statistics_gappedNominal2normalized(), statistics_gappedNormalized2nominal(), statistics_initialize(), statistics_ungappedEvalue2nominal(), statistics_ungappedNominal2normalized(), statistics_ungappedNormalized2nominal(), and statistics_ungappedNucleotideTrigger().

int4 statistics_querySize

Definition at line 16 of file statistics.c.

Referenced by blast_search(), statistics_calcLengthAdjust(), statistics_calcLengthAdjustNew(), statistics_calculateUngappedKarlinParameters(), and statistics_initialize().

uint8 statistics_searchSpaceSize

Definition at line 20 of file statistics.c.

Referenced by blast_search(), print_XMLfooter(), statistics_gappedCalculateEvalue(), statistics_gappedEvalue2nominal(), statistics_initialize(), statistics_ungappedCalculateEvalue(), and statistics_ungappedEvalue2nominal().

float statistics_ungappedH

Definition at line 26 of file statistics.c.

Referenced by blast_search(), statistics_calcLengthAdjust(), statistics_calculateUngappedKarlinParameters(), and statistics_initialize().

float statistics_ungappedK

Definition at line 27 of file statistics.c.

Referenced by blast_search(), statistics_calcLengthAdjust(), statistics_calculateUngappedKarlinParameters(), and statistics_initialize().

float statistics_ungappedLambda

Definition at line 25 of file statistics.c.

Referenced by blast_search(), statistics_calculateUngappedKarlinParameters(), statistics_initialize(), statistics_ungappedEvalue2nominal(), statistics_ungappedNominal2normalized(), statistics_ungappedNormalized2nominal(), and statistics_ungappedNucleotideTrigger().

float statistics_ungappedLogK

Definition at line 28 of file statistics.c.

Referenced by statistics_calcLengthAdjust(), statistics_initialize(), statistics_ungappedEvalue2nominal(), statistics_ungappedNominal2normalized(), statistics_ungappedNormalized2nominal(), and statistics_ungappedNucleotideTrigger().

int4 statistics_ungappedNominalDropoff

Definition at line 34 of file statistics.c.

Referenced by blast_search(), statistics_initialize(), ungappedExtension_extend(), ungappedExtension_nucleotideExtend(), and ungappedExtension_oneHitExtend().


Generated on Wed Dec 19 20:52:17 2007 for fsa-blast by  doxygen 1.5.2