Slice Tools libSlice |
getConsQC.cGo to the documentation of this file.00001 // $Id: getConsQC.c,v 1.1 2005/07/29 02:55:17 mschatz Exp $ 00002 00013 #include <ctype.h> 00014 #include <stdio.h> 00015 #include <math.h> 00016 #include <stdlib.h> 00017 00018 #include "Slice.h" 00019 00020 00021 // Bit Flags 00022 // Quality in both directions 00023 #define QUALITY_OTHER_STRAND 0x1 00024 00025 // No conflicts with existing consensus 00026 #define NO_CONFLICT 0x2 00027 00028 // Quality in at least one direction 00029 #define QUALITY_STRAND 0x4 00030 00031 // Low quality read in other direction 00032 #define NON_QUALITY_OTHER_STRAND 0x8 00033 00034 // More than one in quality direction 00035 #define QUALITY_SAME_STRAND 0x10 00036 00037 // Non quality read in high or 2 low reads 00038 #define NON_QUALITY_SAME_STRAND 0x20 00039 00040 // Conflict with low quality read 00041 #define NON_QUALITY_CONFLICT 0x40 00042 00043 // Only one read 00044 #define SINGLE_COVERAGE 0x80 00045 00046 // Low quality read in same direction 00047 #define NON_QUALITY_STRAND 0x100 00048 00049 // High quality conflict 00050 #define QUALITY_CONFLICT 0x200 00051 00052 // Ambigious existing consensus 00053 #define AMBIGUITY 0x400 00054 00055 // for (NON_QUALITY_SAME_STRAND || QUALITY_SAME_STRAND) 00056 #define ANY_SAME_STRAND 0x800 00057 00058 // Consensus is a gap 00059 #define GAP_CONSENSUS 0x1000 00060 00061 // Depth of Coverage == 0 00062 #define DEPTHZERO 0x2000 00063 00064 #define FORWARD 0 00065 #define REVERSE 1 00066 00067 // Quality classes 00068 #define QUALITY_CLASS_0 0 00069 #define QUALITY_CLASS_1 1 00070 #define QUALITY_CLASS_2 2 00071 #define QUALITY_CLASS_3 3 00072 #define QUALITY_CLASS_4 4 00073 #define QUALITY_CLASS_5 5 00074 #define QUALITY_CLASS_6 6 00075 #define QUALITY_CLASS_7 7 00076 #define QUALITY_CLASS_8 8 00077 #define QUALITY_CLASS_9 9 00078 #define QUALITY_CLASS_10 10 00079 #define QUALITY_CLASS_11 11 00080 #define QUALITY_CLASS_12 12 00081 #define QUALITY_CLASS_13 13 00082 #define QUALITY_CLASS_14 14 00083 #define QUALITY_CLASS_15 15 00084 #define QUALITY_CLASS_16 16 00085 #define QUALITY_CLASS_17 17 00086 #define QUALITY_CLASS_18 18 00087 #define QUALITY_CLASS_19 19 00088 #define QUALITY_CLASS_20 20 00089 #define QUALITY_CLASS_21 21 00090 #define QUALITY_CLASS_22 22 00091 #define QUALITY_CLASS_23 23 00092 00093 #define QUALITY_CLASS_ERROR 0 00094 00095 00099 char QUALITY_CLASS(int x) 00100 { 00101 // Set the AND_SAME_STRAND bit if either of the same values are true 00102 if ((x & NON_QUALITY_SAME_STRAND) || (x & QUALITY_SAME_STRAND)) 00103 { 00104 x += ANY_SAME_STRAND; 00105 } 00106 00107 if (x & DEPTHZERO) 00108 { 00109 return QUALITY_CLASS_0; 00110 } 00111 else if ((x & GAP_CONSENSUS) && 00112 (x & NO_CONFLICT)) 00113 { 00114 return QUALITY_CLASS_21; 00115 } 00116 else if ((x & NO_CONFLICT) && 00117 (x & QUALITY_OTHER_STRAND)) 00118 { 00119 return QUALITY_CLASS_1; 00120 } 00121 else if ((x & NO_CONFLICT) && 00122 (x & QUALITY_STRAND) && 00123 (x & NON_QUALITY_OTHER_STRAND)) 00124 { 00125 return QUALITY_CLASS_2; 00126 } 00127 else if ((x & NO_CONFLICT) && 00128 (x & QUALITY_STRAND) && 00129 (x & ANY_SAME_STRAND)) 00130 { 00131 return QUALITY_CLASS_3; 00132 } 00133 else if ((x & NO_CONFLICT) && 00134 (x & NON_QUALITY_OTHER_STRAND)) 00135 { 00136 return QUALITY_CLASS_4; 00137 } 00138 else if ((x & NO_CONFLICT) && 00139 (x & NON_QUALITY_SAME_STRAND)) 00140 { 00141 return QUALITY_CLASS_5; 00142 } 00143 else if ((x & NON_QUALITY_CONFLICT) && 00144 (x & QUALITY_OTHER_STRAND)) 00145 { 00146 return QUALITY_CLASS_6; 00147 } 00148 else if ((x & NON_QUALITY_CONFLICT) && 00149 (x & QUALITY_STRAND) && 00150 (x & NON_QUALITY_OTHER_STRAND)) 00151 { 00152 return QUALITY_CLASS_7; 00153 } 00154 else if ((x & NON_QUALITY_CONFLICT) && 00155 (x & QUALITY_STRAND) && 00156 (x & ANY_SAME_STRAND)) 00157 { 00158 return QUALITY_CLASS_8; 00159 } 00160 else if ((x & NO_CONFLICT) && 00161 (x & QUALITY_STRAND) && 00162 (x & SINGLE_COVERAGE)) 00163 { 00164 return QUALITY_CLASS_9; 00165 } 00166 else if ((x & NON_QUALITY_CONFLICT) && 00167 (x & NON_QUALITY_OTHER_STRAND)) 00168 { 00169 return QUALITY_CLASS_10; 00170 } 00171 else if ((x & NO_CONFLICT) && 00172 (x & NON_QUALITY_STRAND) && 00173 (x & SINGLE_COVERAGE)) 00174 { 00175 return QUALITY_CLASS_11; 00176 } 00177 else if ((x & NON_QUALITY_CONFLICT) && 00178 (x & NON_QUALITY_SAME_STRAND)) 00179 { 00180 return QUALITY_CLASS_12; 00181 } 00182 else if ((x & NON_QUALITY_CONFLICT) && 00183 (x & QUALITY_STRAND) && 00184 (x & SINGLE_COVERAGE)) 00185 { 00186 return QUALITY_CLASS_13; 00187 } 00188 else if ((x & NON_QUALITY_CONFLICT) && 00189 (x & NON_QUALITY_STRAND) && 00190 (x & SINGLE_COVERAGE)) 00191 { 00192 return QUALITY_CLASS_14; 00193 } 00194 else if ((x & QUALITY_CONFLICT) && 00195 (x & QUALITY_OTHER_STRAND)) 00196 { 00197 return QUALITY_CLASS_15; 00198 } 00199 else if ((x & QUALITY_CONFLICT) && 00200 (x & QUALITY_STRAND) && 00201 (x & NON_QUALITY_OTHER_STRAND)) 00202 { 00203 return QUALITY_CLASS_16; 00204 } 00205 else if ((x & QUALITY_CONFLICT) && 00206 (x & QUALITY_STRAND) && 00207 (x & ANY_SAME_STRAND)) 00208 { 00209 return QUALITY_CLASS_17; 00210 } 00211 else if ((x & QUALITY_CONFLICT) && 00212 (x & NON_QUALITY_OTHER_STRAND)) 00213 { 00214 return QUALITY_CLASS_18; 00215 } 00216 else if ((x & QUALITY_CONFLICT) && 00217 (x & NON_QUALITY_SAME_STRAND)) 00218 { 00219 return QUALITY_CLASS_19; 00220 } 00221 else if ((x & QUALITY_CONFLICT) && 00222 (x & QUALITY_STRAND) && 00223 (x & SINGLE_COVERAGE)) 00224 { 00225 return QUALITY_CLASS_20; 00226 } 00227 else if ((x & AMBIGUITY) && 00228 (x & SINGLE_COVERAGE)) 00229 { 00230 return QUALITY_CLASS_21; 00231 } 00232 else if (x & AMBIGUITY) 00233 { 00234 return QUALITY_CLASS_22; 00235 } 00236 else if ((x & QUALITY_CONFLICT) && 00237 (x & NON_QUALITY_STRAND) && 00238 (x & SINGLE_COVERAGE)) 00239 { 00240 return QUALITY_CLASS_23; 00241 } 00242 else if ((x & (QUALITY_STRAND | NON_QUALITY_STRAND)) == 0) 00243 { 00244 // No reads in agreement with the consensus 00245 return QUALITY_CLASS_23; 00246 } 00247 00248 return QUALITY_CLASS_ERROR; 00249 } 00250 00251 00253 00261 char libSlice_getConsQC(const libSlice_Slice *s, int highQualityThreshold) 00262 { 00263 // Initialize Counters index 0 = Forward strand, 1 = Reverse strand 00264 int HQStrand[2] = {0,0}; 00265 int LQStrand[2] = {0,0}; 00266 int HQStrandDisagree[2] = {0,0}; 00267 int LQStrandDisagree[2] = {0,0}; 00268 00269 // Counters 00270 int nonHomogCount = 0; // number of bases in conflict with consensus 00271 int homogCount = 0; // number of bases in agreement 00272 00273 char existing_consensus = s->c; 00274 int class = 0; 00275 00276 // Initialize Flags 00277 int gap_consensus = 0; 00278 int single_coverage = 0; 00279 int no_conflict = 1; 00280 00281 int quality_strand = 0; 00282 int quality_same_strand = 0; 00283 int quality_other_strand = 0; 00284 int quality_conflict = 0; 00285 int non_quality_strand = 0; 00286 int non_quality_same_strand = 0; 00287 int non_quality_other_strand = 0; 00288 int non_quality_conflict = 0; 00289 int ambiguity = 0; 00290 int depthzero = 0; 00291 int i; 00292 00293 if (highQualityThreshold == 0) 00294 { 00295 highQualityThreshold = HIGH_QUALITY_THRESHOLD; 00296 } 00297 00298 depthzero = (s->dcov == 0); 00299 00300 // Loop through each base call to compute counts of high/low quality 00301 for(i = 0; i < s->dcov; i++) 00302 { 00303 int rc = s->rc[i]; 00304 int qual = s->qv[i]; // base call quality value 00305 char basecall = toupper(s->bc[i]); // base call value 00306 00307 if (basecall == existing_consensus) 00308 { 00309 homogCount++; 00310 if (qual >= highQualityThreshold) 00311 { 00312 HQStrand[rc]++; 00313 } 00314 else 00315 { 00316 LQStrand[rc]++; 00317 } 00318 } 00319 else 00320 { 00321 nonHomogCount++; 00322 no_conflict = 0; 00323 00324 if (qual >= highQualityThreshold) 00325 { 00326 HQStrandDisagree[rc]++; 00327 } 00328 else 00329 { 00330 LQStrandDisagree[rc]++; 00331 } 00332 } 00333 } 00334 00335 if (homogCount == 1) 00336 { 00337 single_coverage = 1; 00338 } 00339 00340 // Compute additional properties for quality classes. 00341 if (existing_consensus == 'A' || existing_consensus == 'T' || 00342 existing_consensus == 'G' || existing_consensus == 'C' || 00343 existing_consensus == '-') 00344 { 00345 gap_consensus = (existing_consensus == '-'); 00346 00347 quality_strand = HQStrand[FORWARD] || HQStrand[REVERSE]; 00348 quality_other_strand = HQStrand[FORWARD] && HQStrand[REVERSE]; 00349 00350 quality_same_strand = HQStrand[FORWARD] > 1 || HQStrand[REVERSE] > 1; 00351 00352 quality_conflict = HQStrandDisagree[FORWARD]||HQStrandDisagree[REVERSE]; 00353 00354 non_quality_conflict = LQStrandDisagree[FORWARD]||LQStrandDisagree[REVERSE]; 00355 00356 if (quality_strand) 00357 { 00358 int strand = HQStrand[FORWARD] < HQStrand[REVERSE]; 00359 int other_strand = !strand; 00360 00361 non_quality_strand = LQStrand[strand] > 0; 00362 non_quality_same_strand = LQStrand[strand] > 0; 00363 non_quality_other_strand = LQStrand[other_strand] > 0; 00364 } 00365 else 00366 { 00367 non_quality_strand = LQStrand[FORWARD] || LQStrand[REVERSE]; 00368 non_quality_other_strand = LQStrand[FORWARD] && LQStrand[REVERSE]; 00369 00370 non_quality_same_strand = LQStrand[FORWARD] > 1 || LQStrand[REVERSE] > 1; 00371 } 00372 } 00373 else 00374 { 00375 ambiguity = 1; 00376 } 00377 00378 if (quality_conflict) 00379 { 00380 // This ensures that the two are mutually exclusive, with preference 00381 // towards quality_conflict. 00382 non_quality_conflict = 0; 00383 } 00384 00385 00386 // Compute the quality class 00387 class += (no_conflict * NO_CONFLICT); 00388 class += (quality_strand * QUALITY_STRAND); 00389 class += (quality_same_strand * QUALITY_SAME_STRAND); 00390 class += (quality_other_strand * QUALITY_OTHER_STRAND); 00391 class += (quality_conflict * QUALITY_CONFLICT); 00392 class += (non_quality_strand * NON_QUALITY_STRAND); 00393 class += (non_quality_same_strand * NON_QUALITY_SAME_STRAND); 00394 class += (non_quality_other_strand * NON_QUALITY_OTHER_STRAND); 00395 class += (non_quality_conflict * NON_QUALITY_CONFLICT); 00396 class += (single_coverage * SINGLE_COVERAGE); 00397 class += (ambiguity * AMBIGUITY); 00398 class += (gap_consensus * GAP_CONSENSUS); 00399 class += (depthzero * DEPTHZERO); 00400 00401 return QUALITY_CLASS(class); 00402 } 00403 00404 00406 00415 char * libSlice_getConsQCRange(const libSlice_Slice s[], 00416 int len, 00417 int highQualityThreshold) 00418 { 00419 int i; 00420 char *cqc = libSlice_newmem(len+1,sizeof(char)); 00421 cqc[len] = '\0'; 00422 00423 for(i = 0; i < len; i++) 00424 { 00425 cqc[i] = libSlice_getConsQC(&s[i], highQualityThreshold); 00426 } 00427 00428 return cqc; 00429 } |