Slice Tools
  • Home
  • SourceForge Page


  • libSlice
  • Home
  • Modules
  • Files
  • Examples
  • Additional Information


  • SourceForge.net Logo
     

    getConsQC.c

    Go to the documentation of this file.
    00001 // $Id: getConsQC.c,v 1.1 2005/07/29 02:55:17 mschatz Exp $
    00002 
    00013 #include <ctype.h>
    00014 #include <stdio.h>
    00015 #include <math.h>
    00016 #include <stdlib.h>
    00017 
    00018 #include "Slice.h"
    00019 
    00020 
    00021 // Bit Flags
    00022 // Quality in both directions
    00023 #define QUALITY_OTHER_STRAND     0x1  
    00024 
    00025 // No conflicts with existing consensus
    00026 #define NO_CONFLICT              0x2  
    00027 
    00028 // Quality in at least one direction
    00029 #define QUALITY_STRAND           0x4  
    00030 
    00031 // Low quality read in other direction
    00032 #define NON_QUALITY_OTHER_STRAND 0x8  
    00033 
    00034 // More than one in quality direction
    00035 #define QUALITY_SAME_STRAND      0x10 
    00036 
    00037 // Non quality read in high or 2 low reads
    00038 #define NON_QUALITY_SAME_STRAND  0x20 
    00039 
    00040 // Conflict with low quality read
    00041 #define NON_QUALITY_CONFLICT     0x40 
    00042 
    00043 // Only one read
    00044 #define SINGLE_COVERAGE          0x80 
    00045 
    00046 // Low quality read in same direction
    00047 #define NON_QUALITY_STRAND       0x100
    00048 
    00049 // High quality conflict
    00050 #define QUALITY_CONFLICT         0x200
    00051 
    00052 // Ambigious existing consensus
    00053 #define AMBIGUITY                0x400
    00054 
    00055 // for (NON_QUALITY_SAME_STRAND || QUALITY_SAME_STRAND)
    00056 #define ANY_SAME_STRAND          0x800
    00057 
    00058 // Consensus is a gap
    00059 #define GAP_CONSENSUS            0x1000
    00060 
    00061 // Depth of Coverage == 0
    00062 #define DEPTHZERO                0x2000
    00063 
    00064 #define FORWARD                  0
    00065 #define REVERSE                  1
    00066 
    00067 // Quality classes
    00068 #define QUALITY_CLASS_0     0  
    00069 #define QUALITY_CLASS_1     1  
    00070 #define QUALITY_CLASS_2     2 
    00071 #define QUALITY_CLASS_3     3
    00072 #define QUALITY_CLASS_4     4
    00073 #define QUALITY_CLASS_5     5
    00074 #define QUALITY_CLASS_6     6
    00075 #define QUALITY_CLASS_7     7
    00076 #define QUALITY_CLASS_8     8
    00077 #define QUALITY_CLASS_9     9
    00078 #define QUALITY_CLASS_10    10
    00079 #define QUALITY_CLASS_11    11
    00080 #define QUALITY_CLASS_12    12
    00081 #define QUALITY_CLASS_13    13
    00082 #define QUALITY_CLASS_14    14
    00083 #define QUALITY_CLASS_15    15
    00084 #define QUALITY_CLASS_16    16
    00085 #define QUALITY_CLASS_17    17
    00086 #define QUALITY_CLASS_18    18
    00087 #define QUALITY_CLASS_19    19
    00088 #define QUALITY_CLASS_20    20
    00089 #define QUALITY_CLASS_21    21
    00090 #define QUALITY_CLASS_22    22
    00091 #define QUALITY_CLASS_23    23
    00092 
    00093 #define QUALITY_CLASS_ERROR 0
    00094 
    00095 
    00099 char QUALITY_CLASS(int x)
    00100 {
    00101   // Set the AND_SAME_STRAND bit if either of the same values are true
    00102   if ((x & NON_QUALITY_SAME_STRAND) || (x & QUALITY_SAME_STRAND))
    00103   {
    00104     x += ANY_SAME_STRAND;
    00105   }
    00106 
    00107   if (x & DEPTHZERO)
    00108   {
    00109     return QUALITY_CLASS_0;
    00110   }
    00111   else if ((x & GAP_CONSENSUS) &&  
    00112            (x & NO_CONFLICT))
    00113   {
    00114     return QUALITY_CLASS_21;
    00115   }
    00116   else if ((x & NO_CONFLICT) && 
    00117            (x & QUALITY_OTHER_STRAND))
    00118   {
    00119     return QUALITY_CLASS_1;
    00120   }
    00121   else if ((x & NO_CONFLICT) && 
    00122            (x & QUALITY_STRAND) && 
    00123            (x & NON_QUALITY_OTHER_STRAND))
    00124   {
    00125     return QUALITY_CLASS_2;
    00126   }
    00127   else if ((x & NO_CONFLICT) &&
    00128            (x & QUALITY_STRAND) &&
    00129            (x & ANY_SAME_STRAND))
    00130   {
    00131     return QUALITY_CLASS_3;
    00132   }
    00133   else if ((x & NO_CONFLICT) &&
    00134            (x & NON_QUALITY_OTHER_STRAND))
    00135   {
    00136     return QUALITY_CLASS_4;
    00137   }
    00138   else if ((x & NO_CONFLICT) &&
    00139            (x & NON_QUALITY_SAME_STRAND))
    00140   {
    00141     return QUALITY_CLASS_5;
    00142   }
    00143   else if ((x & NON_QUALITY_CONFLICT) &&
    00144            (x & QUALITY_OTHER_STRAND))
    00145   {
    00146     return QUALITY_CLASS_6;
    00147   }
    00148   else if ((x & NON_QUALITY_CONFLICT) &&
    00149            (x & QUALITY_STRAND) &&
    00150            (x & NON_QUALITY_OTHER_STRAND))
    00151   {
    00152     return QUALITY_CLASS_7;
    00153   }
    00154   else if ((x & NON_QUALITY_CONFLICT) &&
    00155            (x & QUALITY_STRAND) &&
    00156            (x & ANY_SAME_STRAND))
    00157   {
    00158     return QUALITY_CLASS_8;
    00159   }
    00160   else if ((x & NO_CONFLICT) &&
    00161            (x & QUALITY_STRAND) &&
    00162            (x & SINGLE_COVERAGE))
    00163   {
    00164     return QUALITY_CLASS_9;
    00165   }
    00166   else if ((x & NON_QUALITY_CONFLICT) &&
    00167            (x & NON_QUALITY_OTHER_STRAND))
    00168   {
    00169     return QUALITY_CLASS_10;
    00170   }
    00171   else if ((x & NO_CONFLICT) &&
    00172            (x & NON_QUALITY_STRAND) &&
    00173            (x & SINGLE_COVERAGE))
    00174   {
    00175     return QUALITY_CLASS_11;
    00176   }
    00177   else if ((x & NON_QUALITY_CONFLICT) &&
    00178            (x & NON_QUALITY_SAME_STRAND))
    00179   {
    00180     return QUALITY_CLASS_12;
    00181   }
    00182   else if ((x & NON_QUALITY_CONFLICT) &&
    00183            (x & QUALITY_STRAND) &&
    00184            (x & SINGLE_COVERAGE))
    00185   {
    00186     return QUALITY_CLASS_13;
    00187   }
    00188   else if ((x & NON_QUALITY_CONFLICT) &&
    00189            (x & NON_QUALITY_STRAND) &&
    00190            (x & SINGLE_COVERAGE))
    00191   {
    00192     return QUALITY_CLASS_14;
    00193   }
    00194   else if ((x & QUALITY_CONFLICT) &&
    00195            (x & QUALITY_OTHER_STRAND))
    00196   {
    00197     return QUALITY_CLASS_15;
    00198   }
    00199   else if ((x & QUALITY_CONFLICT) &&
    00200            (x & QUALITY_STRAND) &&
    00201            (x & NON_QUALITY_OTHER_STRAND))
    00202   {
    00203     return QUALITY_CLASS_16;
    00204   }
    00205   else if ((x & QUALITY_CONFLICT) &&
    00206            (x & QUALITY_STRAND) &&
    00207            (x & ANY_SAME_STRAND))
    00208   {
    00209     return QUALITY_CLASS_17;
    00210   }
    00211   else if ((x & QUALITY_CONFLICT) &&
    00212            (x & NON_QUALITY_OTHER_STRAND))
    00213   {
    00214     return QUALITY_CLASS_18;
    00215   }
    00216   else if ((x & QUALITY_CONFLICT) &&
    00217            (x & NON_QUALITY_SAME_STRAND))
    00218   {
    00219     return QUALITY_CLASS_19;
    00220   }
    00221   else if ((x & QUALITY_CONFLICT) &&
    00222            (x & QUALITY_STRAND) &&
    00223            (x & SINGLE_COVERAGE))
    00224   {
    00225     return QUALITY_CLASS_20;
    00226   }
    00227   else if ((x & AMBIGUITY) &&
    00228            (x & SINGLE_COVERAGE))
    00229   {
    00230     return QUALITY_CLASS_21;
    00231   }
    00232   else if (x & AMBIGUITY)
    00233   {
    00234     return QUALITY_CLASS_22;
    00235   }
    00236   else if ((x & QUALITY_CONFLICT) &&
    00237            (x & NON_QUALITY_STRAND) &&
    00238            (x & SINGLE_COVERAGE))
    00239   {
    00240     return QUALITY_CLASS_23;
    00241   }
    00242   else if ((x & (QUALITY_STRAND | NON_QUALITY_STRAND)) == 0)
    00243   {
    00244     // No reads in agreement with the consensus
    00245     return QUALITY_CLASS_23;
    00246   }
    00247 
    00248   return QUALITY_CLASS_ERROR;
    00249 }
    00250 
    00251 
    00253 
    00261 char libSlice_getConsQC(const libSlice_Slice *s, int highQualityThreshold)
    00262 {
    00263   // Initialize Counters index 0 = Forward strand, 1 = Reverse strand
    00264   int HQStrand[2]              = {0,0};  
    00265   int LQStrand[2]              = {0,0};
    00266   int HQStrandDisagree[2]      = {0,0};
    00267   int LQStrandDisagree[2]      = {0,0};
    00268 
    00269   // Counters
    00270   int nonHomogCount = 0; // number of bases in conflict with consensus
    00271   int homogCount    = 0; // number of bases in agreement
    00272 
    00273   char existing_consensus  = s->c;
    00274   int class = 0;
    00275 
    00276   // Initialize Flags
    00277   int gap_consensus            = 0;
    00278   int single_coverage          = 0;
    00279   int no_conflict              = 1; 
    00280 
    00281   int quality_strand           = 0; 
    00282   int quality_same_strand      = 0; 
    00283   int quality_other_strand     = 0; 
    00284   int quality_conflict         = 0; 
    00285   int non_quality_strand       = 0; 
    00286   int non_quality_same_strand  = 0; 
    00287   int non_quality_other_strand = 0; 
    00288   int non_quality_conflict     = 0; 
    00289   int ambiguity                = 0;
    00290   int depthzero                = 0;
    00291   int i;
    00292 
    00293   if (highQualityThreshold == 0)
    00294   {
    00295     highQualityThreshold = HIGH_QUALITY_THRESHOLD;
    00296   }
    00297 
    00298   depthzero = (s->dcov == 0);
    00299   
    00300   // Loop through each base call to compute counts of high/low quality
    00301   for(i = 0; i < s->dcov; i++)
    00302   {
    00303     int rc        = s->rc[i];
    00304     int qual      = s->qv[i];          // base call quality value
    00305     char basecall = toupper(s->bc[i]); // base call value
    00306 
    00307     if (basecall == existing_consensus)
    00308     {
    00309       homogCount++;
    00310       if (qual >= highQualityThreshold)
    00311       { 
    00312         HQStrand[rc]++;
    00313       }
    00314       else
    00315       {
    00316         LQStrand[rc]++;
    00317       }
    00318     }
    00319     else
    00320     {
    00321       nonHomogCount++;
    00322       no_conflict = 0;
    00323 
    00324       if (qual >= highQualityThreshold)
    00325       { 
    00326         HQStrandDisagree[rc]++;
    00327       }
    00328       else
    00329       {
    00330         LQStrandDisagree[rc]++;
    00331       }
    00332     }
    00333   }
    00334 
    00335   if (homogCount == 1)
    00336   {
    00337     single_coverage = 1;
    00338   }
    00339 
    00340   // Compute additional properties for quality classes.
    00341   if (existing_consensus == 'A' || existing_consensus == 'T' || 
    00342       existing_consensus == 'G' || existing_consensus == 'C' ||
    00343       existing_consensus == '-')
    00344   {
    00345     gap_consensus = (existing_consensus == '-');
    00346 
    00347     quality_strand       = HQStrand[FORWARD] || HQStrand[REVERSE];
    00348     quality_other_strand = HQStrand[FORWARD] && HQStrand[REVERSE];
    00349 
    00350     quality_same_strand  = HQStrand[FORWARD] > 1 || HQStrand[REVERSE] > 1;
    00351 
    00352     quality_conflict     = HQStrandDisagree[FORWARD]||HQStrandDisagree[REVERSE];
    00353 
    00354     non_quality_conflict = LQStrandDisagree[FORWARD]||LQStrandDisagree[REVERSE];
    00355 
    00356     if (quality_strand)
    00357     {
    00358       int strand               = HQStrand[FORWARD] < HQStrand[REVERSE];
    00359       int other_strand         = !strand;
    00360 
    00361       non_quality_strand       = LQStrand[strand] > 0;
    00362       non_quality_same_strand  = LQStrand[strand] > 0;
    00363       non_quality_other_strand = LQStrand[other_strand] > 0;
    00364     }
    00365     else
    00366     {
    00367       non_quality_strand       = LQStrand[FORWARD] || LQStrand[REVERSE];
    00368       non_quality_other_strand = LQStrand[FORWARD] && LQStrand[REVERSE];
    00369 
    00370       non_quality_same_strand  = LQStrand[FORWARD] > 1 || LQStrand[REVERSE] > 1;
    00371     }
    00372   }
    00373   else
    00374   {
    00375     ambiguity = 1;
    00376   }
    00377 
    00378   if (quality_conflict)
    00379   {
    00380     // This ensures that the two are mutually exclusive, with preference
    00381     // towards quality_conflict.
    00382     non_quality_conflict = 0;
    00383   }
    00384 
    00385 
    00386   // Compute the quality class
    00387   class += (no_conflict              * NO_CONFLICT);
    00388   class += (quality_strand           * QUALITY_STRAND);
    00389   class += (quality_same_strand      * QUALITY_SAME_STRAND);
    00390   class += (quality_other_strand     * QUALITY_OTHER_STRAND);
    00391   class += (quality_conflict         * QUALITY_CONFLICT);
    00392   class += (non_quality_strand       * NON_QUALITY_STRAND);
    00393   class += (non_quality_same_strand  * NON_QUALITY_SAME_STRAND);
    00394   class += (non_quality_other_strand * NON_QUALITY_OTHER_STRAND);
    00395   class += (non_quality_conflict     * NON_QUALITY_CONFLICT);
    00396   class += (single_coverage          * SINGLE_COVERAGE);
    00397   class += (ambiguity                * AMBIGUITY);
    00398   class += (gap_consensus            * GAP_CONSENSUS);
    00399   class += (depthzero                * DEPTHZERO);
    00400   
    00401   return QUALITY_CLASS(class);
    00402 }
    00403 
    00404 
    00406 
    00415 char * libSlice_getConsQCRange(const libSlice_Slice s[], 
    00416                                int len, 
    00417                                int highQualityThreshold)
    00418 {
    00419   int i;
    00420   char *cqc = libSlice_newmem(len+1,sizeof(char));
    00421   cqc[len] = '\0';
    00422 
    00423   for(i = 0; i < len; i++)
    00424   {
    00425     cqc[i] = libSlice_getConsQC(&s[i], highQualityThreshold);
    00426   }
    00427 
    00428   return cqc;
    00429 }