Slice Tools libSlice |
SliceData.ccGo to the documentation of this file.00001 #include "SliceData.hh" 00002 #include <stdlib.h> // for atol 00003 00008 namespace libSlice 00009 { 00010 using std::string; 00011 00013 00023 SliceData::SliceData(const ReadManager & readManager, 00024 const char * index, 00025 const char * gindex, 00026 const char * existing, 00027 long sliceRangeIndex, 00028 long sliceRangeGindex) 00029 : m_readManager(readManager) 00030 { 00031 m_slice = NULL; 00032 m_highQualityThreshold = 0; 00033 00034 m_nucData = ""; 00035 m_readIdData = ""; 00036 m_qualValData = ""; 00037 00038 m_errorData = ""; 00039 m_errorCode = SLICE_NOERROR; 00040 m_errorOperation = STROP_NONE; 00041 00042 m_doAmbiguity = 0; 00043 m_scoringModel = 0; 00044 00045 m_index = 0; 00046 m_gindex = 0; 00047 m_indexstr = "Unknown"; 00048 m_existing = (existing) ? existing : "-"; 00049 00050 if (index) 00051 { 00052 m_index = atol(index); 00053 m_index += sliceRangeIndex; 00054 00055 m_indexstr = index; 00056 } 00057 00058 if (gindex) 00059 { 00060 m_gindex = atol(gindex); 00061 m_gindex += sliceRangeGindex; 00062 } 00063 00064 00065 double normal = 0.2; 00066 setDistributionTable(normal, normal, normal, normal, normal); 00067 } 00068 00069 00071 00074 SliceData::~SliceData() 00075 { 00076 if (m_slice) 00077 { 00078 if (m_slice->rc) delete [] m_slice->rc; 00079 if (m_slice->qv) delete [] m_slice->qv; 00080 if (m_slice->bc) delete [] m_slice->bc; 00081 00082 delete m_slice; 00083 m_slice = NULL; 00084 } 00085 } 00086 00088 00090 void SliceData::setDistributionTable(double freqA, double freqC, 00091 double freqG, double freqT, 00092 double freqGap) 00093 { 00094 m_distribution.freqA = freqA; 00095 m_distribution.freqC = freqC; 00096 m_distribution.freqG = freqG; 00097 m_distribution.freqT = freqT; 00098 m_distribution.freqGap = freqGap; 00099 } 00100 00102 void SliceData::setHighQualityThreshold(int highQualityThreshold) 00103 { 00104 m_highQualityThreshold = highQualityThreshold; 00105 } 00106 00108 00111 void SliceData::addNuc(const char * nucData) 00112 { 00113 if (nucData) m_nucData.append(nucData); 00114 } 00115 00117 00120 void SliceData::addQualVal(const char * qualVal) 00121 { 00122 if (qualVal) m_qualValData.append(qualVal); 00123 } 00124 00126 00129 void SliceData::addReadId(const char * readId) 00130 { 00131 if (readId) m_readIdData.append(readId); 00132 } 00133 00135 void SliceData::setConsensus(char consensus) 00136 { 00137 m_existing = consensus; 00138 if (m_slice) 00139 { 00140 m_slice->c = consensus; 00141 00142 } 00143 } 00144 00146 00157 // TODO: Investigate strtok 00158 char * SliceData::foreach(const string & data, int numresults, 00159 StringOperation operation, 00160 const char * underflowError, 00161 const char * overflowError) 00162 { 00163 const char * databuf = data.c_str(); 00164 char * retval = new char [numresults+1]; 00165 retval[numresults] = '\0'; 00166 00167 string::size_type startpos = 0; 00168 string::size_type endpos = 0; 00169 00170 for (int i=0; i < numresults; i++) 00171 { 00172 endpos = data.find(' ', startpos); 00173 00174 if (endpos == data.npos) 00175 { 00176 if (i != (numresults-1)) 00177 { 00178 m_errorOperation = operation; 00179 m_errorCode = SLICE_ERR_UNDERFLOW; 00180 00181 throw Exception(underflowError, "SliceData::foreach"); 00182 } 00183 00184 endpos = data.length() + 1; 00185 } 00186 00187 char elementResult = '\0'; 00188 00189 if (operation == STROP_GET_QUAL_VAL) 00190 { 00191 elementResult = (char) (atoi(databuf + startpos) & 0xff); 00192 } 00193 else if (operation == STROP_IS_REVERSE_COMPLIMENT) 00194 { 00195 const string & element = data.substr(startpos, (endpos - startpos)); 00196 const ReadData * rd = m_readManager.find(element.c_str()); 00197 00198 if (!rd) 00199 { 00200 m_errorOperation = operation; 00201 m_errorCode = SLICE_ERR_NO_READID; 00202 m_errorData = element; 00203 00204 throw Exception("ReadID " + element + " not found", 00205 "SliceData::foreach"); 00206 } 00207 00208 elementResult = rd->isReverseCompliment(); 00209 } 00210 00211 retval[i] = elementResult; 00212 startpos = endpos + 1; 00213 } 00214 00215 if (numresults && (endpos != data.length()+1)) 00216 { 00217 m_errorOperation = operation; 00218 m_errorCode = SLICE_ERR_OVERFLOW; 00219 00220 throw Exception(overflowError, "SliceData::foreach"); 00221 } 00222 00223 return retval; 00224 } 00225 00227 00234 char * SliceData::getQualVals(int len) 00235 { 00236 // There better only be number data in m_qualValData 00237 if (m_qualValData.find_first_not_of("0123456789 ") != m_qualValData.npos) 00238 { 00239 m_errorCode = SLICE_ERR_QV_NONNUMERIC; 00240 m_errorOperation = STROP_GET_QUAL_VAL; 00241 00242 throw Exception("Invalid Quality Value in slice " + m_indexstr, 00243 "SliceData::getQualVals"); 00244 } 00245 00246 return foreach(m_qualValData, len, STROP_GET_QUAL_VAL, 00247 "Number of Bases > Number of QualVals", 00248 "Number of QualVals > Number of Bases"); 00249 } 00250 00252 00259 char * SliceData::getReverseCompliments(int len) 00260 { 00261 return foreach(m_readIdData, len, STROP_IS_REVERSE_COMPLIMENT, 00262 "Number of Bases > Number of ReadIds", 00263 "Number of ReadIds > Number of Bases"); 00264 } 00265 00267 00271 char SliceData::getExisting() const 00272 { 00273 char retval = '-'; 00274 00275 if (m_existing.length() == 1) 00276 { 00277 retval = m_existing[0]; 00278 } 00279 else 00280 { 00281 //TODO: Decide what validation should be done (if any) 00282 } 00283 00284 return retval; 00285 } 00286 00288 unsigned short SliceData::getDepth() const 00289 { 00290 return m_nucData.length(); 00291 } 00292 00294 char * SliceData::getBases(int len) const 00295 { 00296 char * retval = new char [len + 1]; 00297 strcpy(retval, m_nucData.c_str()); 00298 return retval; 00299 } 00300 00302 00310 void SliceData::flattenSlice() 00311 { 00312 if (!m_slice) 00313 { 00314 m_slice = new libSlice_Slice; 00315 memset (m_slice, 0, sizeof(libSlice_Slice)); 00316 00317 int depth = getDepth(); 00318 00319 m_slice->dcov = depth; 00320 m_slice->c = getExisting(); 00321 m_slice->bc = getBases(depth); 00322 m_slice->qv = getQualVals(depth); 00323 m_slice->rc = getReverseCompliments(depth); 00324 } 00325 } 00326 00328 char SliceData::getBase(unsigned short index) 00329 { 00330 flattenSlice(); 00331 00332 if (index > getDepth()) 00333 { 00334 throw(Exception("Index > Depth", "getBase")); 00335 } 00336 00337 return m_slice->bc[index]; 00338 } 00339 00341 char SliceData::getQualVal(unsigned short index) 00342 { 00343 flattenSlice(); 00344 00345 if (index > getDepth()) 00346 { 00347 throw(Exception("Index > Depth", "getQualVal")); 00348 } 00349 00350 return m_slice->qv[index]; 00351 } 00352 00354 //TODO: Refactor to utilize foreach 00355 ReadData * SliceData::getRead(unsigned short index) const 00356 { 00357 if (index > getDepth()) 00358 { 00359 throw(Exception("Index > Depth", "getRead")); 00360 } 00361 00362 string::size_type startpos = 0; 00363 string::size_type endpos = 0; 00364 00365 while (index) 00366 { 00367 startpos = m_readIdData.find(' ', startpos); 00368 startpos++; 00369 00370 if (startpos == m_readIdData.npos) 00371 { 00372 throw Exception("Index > Number of ReadIds", "getReadId"); 00373 } 00374 00375 index--; 00376 } 00377 00378 endpos = m_readIdData.find(' ', startpos); 00379 if (endpos == m_readIdData.npos) 00380 { 00381 endpos = m_readIdData.length(); 00382 } 00383 00384 const string & element = m_readIdData.substr(startpos, 00385 (endpos - startpos)); 00386 ReadData * rd = m_readManager.find(element.c_str()); 00387 00388 if (!rd) 00389 { 00390 throw Exception("ReadID " + element + " not found", 00391 "SliceData::getRead"); 00392 } 00393 00394 return rd; 00395 } 00396 00397 /* 00398 * This turned out to be slower than getRead 00399 * 00400 const ReadData * SliceData::getRead2(unsigned short index) 00401 { 00402 int depth = getDepth(); 00403 00404 if (index > depth) 00405 { 00406 throw(Exception("Index > Depth", "getRead2")); 00407 } 00408 00409 if (m_readVector.empty()) 00410 { 00411 if (depth > 1) 00412 { 00413 int len = m_readIdData.length(); 00414 char buffer[len+1]; 00415 strcpy(buffer, m_readIdData.c_str()); 00416 00417 char * start = NULL; 00418 00419 for (int i = 0; i < len; i++) 00420 { 00421 if (buffer[i] != ' ' && start == NULL) 00422 { 00423 start = &buffer[i]; 00424 } 00425 else if (buffer[i] == ' ') 00426 { 00427 buffer[i] = '\0'; 00428 00429 if (start) 00430 { 00431 // Start points to the beginning of the null terminated string to 00432 // extract 00433 00434 m_readVector.push_back(m_readManager.find(start)); 00435 start = NULL; 00436 } 00437 } 00438 } 00439 } 00440 else 00441 { 00442 m_readVector.push_back(m_readManager.find(m_readIdData.c_str())); 00443 } 00444 00445 if (m_readVector.size() != depth) 00446 { 00447 throw Exception("Number of reads doesn't match number of bases", 00448 "SliceData::getRead2"); 00449 00450 } 00451 } 00452 00453 return m_readVector[index]; 00454 } 00455 00456 */ 00457 00459 00461 long SliceData::getIndex() const 00462 { 00463 return m_index; 00464 } 00465 00467 00469 long SliceData::getGindex() const 00470 { 00471 return m_gindex; 00472 } 00473 00475 char SliceData::getConsensus() 00476 { 00477 flattenSlice(); 00478 00479 return m_slice->c; 00480 } 00481 00482 00484 00492 ConsensusData SliceData::doCalculations(bool recallEmpty) 00493 { 00494 flattenSlice(); 00495 int errCode; 00496 00497 libSlice_Consensus resultConsensus; 00498 00499 // This should probably never be set 00500 if (recallEmpty) { libSlice_setRecallEmpty(recallEmpty); } 00501 00502 00503 // Don't do ambiguity here, get that separately 00504 if ((errCode = libSlice_getConsensusParam(m_slice, 00505 &resultConsensus, 00506 &m_distribution, 00507 m_highQualityThreshold, 00508 0))) 00509 { 00510 m_errorCode = SLICE_ERR_GETCONSENSUS; 00511 00512 throw Exception("Error Calculating consensus", 00513 "_getConsensus"); 00514 } 00515 00516 if (m_doAmbiguity == 2) 00517 { 00518 char origFlags = resultConsensus.ambiguityFlags; 00519 libSlice_updateAmbiguity(m_slice, &resultConsensus, m_highQualityThreshold); 00520 00521 if (0 && origFlags != resultConsensus.ambiguityFlags) 00522 { 00523 cerr << "Annotation: Updating Ambiguity " 00524 << libSlice_convertAmbiguityFlags(origFlags) 00525 << " to " 00526 << libSlice_convertAmbiguityFlags(resultConsensus.ambiguityFlags) 00527 << " at gindex " << m_gindex << endl; 00528 } 00529 } 00530 else if (m_doAmbiguity == 3) 00531 { 00532 char origFlags = resultConsensus.ambiguityFlags; 00533 libSlice_updateAmbiguityConic(m_slice, 00534 &resultConsensus, 00535 m_highQualityThreshold); 00536 00537 if (0 && origFlags != resultConsensus.ambiguityFlags) 00538 { 00539 cerr << "Conic: Updating Ambiguity " 00540 << libSlice_convertAmbiguityFlags(origFlags) 00541 << " to " 00542 << libSlice_convertAmbiguityFlags(resultConsensus.ambiguityFlags) 00543 << " at gindex " << m_gindex << endl; 00544 } 00545 } 00546 00547 char ambiguityCode; 00548 00549 if (m_slice->dcov == 0 && !recallEmpty) 00550 { 00551 // Don't update the slice 00552 ambiguityCode = m_slice->c; 00553 } 00554 else 00555 { 00556 ambiguityCode = libSlice_convertAmbiguityFlags(resultConsensus.ambiguityFlags); 00557 if (m_doAmbiguity) 00558 { 00559 m_slice->c = ambiguityCode; 00560 } 00561 else 00562 { 00563 m_slice->c = resultConsensus.consensus; 00564 } 00565 } 00566 00567 m_existing = (string) "" + m_slice->c; 00568 00569 char qualityClass = libSlice_getConsQC(m_slice, m_highQualityThreshold); 00570 00571 return ConsensusData(&resultConsensus, 00572 ambiguityCode, 00573 qualityClass, 00574 m_doAmbiguity); 00575 } 00576 00578 00587 void SliceData::print(ostream & out, const string & prefix) 00588 { 00589 flattenSlice(); 00590 00591 out << prefix 00592 << "<Slice Index=\"" << m_index 00593 << "\" Gindex=\"" << m_gindex 00594 << "\" Existing=\"" << m_existing 00595 << "\" Depth=\"" << getDepth() 00596 << "\">" << endl; 00597 00598 for (int i=0; i < m_slice->dcov; i++) 00599 { 00600 out << prefix 00601 << " <Base i=\"" << i 00602 << "\" bc=\"" << m_slice->bc[i] 00603 << "\" qv=\"" << (int) m_slice->qv[i] 00604 << "\" rc=\"" << (int) m_slice->rc[i] 00605 << "\" />" << endl; 00606 } 00607 00608 out << prefix << "</Slice>" << endl; 00609 } 00610 00612 00617 void SliceData::printXML(ostream & out, const string & prefix) 00618 { 00619 out << prefix 00620 << "<Slice Index=\"" << m_index 00621 << "\" Gindex=\"" << m_gindex 00622 << "\" Existing=\"" << m_existing 00623 << "\">" << endl; 00624 00625 out << prefix << " <Nuc>" << m_nucData << "</Nuc>" << endl; 00626 out << prefix << " <Qualval>" << m_qualValData << "</Qualval>" << endl; 00627 out << prefix << " <ReadID>" << m_readIdData << "</ReadID>" << endl; 00628 00629 out << prefix << "</Slice>" << endl; 00630 } 00631 00633 00641 void SliceData::resetReadId() 00642 { 00643 m_readIdData = ""; 00644 00645 if (m_slice) 00646 { 00647 delete m_slice; 00648 m_slice = NULL; 00649 } 00650 } 00651 00653 00657 void SliceData::resetQualVal() 00658 { 00659 m_qualValData = ""; 00660 00661 if (m_slice) 00662 { 00663 delete m_slice; 00664 m_slice = NULL; 00665 } 00666 } 00667 00669 00673 void SliceData::resetNuc() 00674 { 00675 m_nucData = ""; 00676 00677 if (m_slice) 00678 { 00679 delete m_slice; 00680 m_slice = NULL; 00681 } 00682 } 00683 00685 00689 void SliceData::resetIndex(long index, long gindex) 00690 { 00691 m_index = index; 00692 m_gindex = gindex; 00693 } 00694 00696 const string & SliceData::getQualValData() const 00697 { 00698 return m_qualValData; 00699 } 00700 00702 const string & SliceData::getReadIdData() const 00703 { 00704 return m_readIdData; 00705 } 00706 00708 const string & SliceData::getNucData() const 00709 { 00710 return m_nucData; 00711 } 00712 00714 00718 void SliceData::printError(ostream & out) const 00719 { 00720 if (m_errorCode != SLICE_NOERROR) 00721 { 00722 out << " Error Slice index=" << m_index 00723 << " code=" << m_errorCode 00724 << " msg="; 00725 00726 switch (m_errorCode) 00727 { 00728 case SLICE_NOERROR: 00729 out << "No Error"; 00730 break; 00731 00732 case SLICE_ERR_QV_NONNUMERIC: 00733 out << "Non Numberic values in Quality Value"; 00734 break; 00735 00736 case SLICE_ERR_UNDERFLOW: 00737 out << "Less data than required in operation"; 00738 break; 00739 00740 case SLICE_ERR_OVERFLOW: 00741 out << "More data than required in operation"; 00742 break; 00743 00744 case SLICE_ERR_NO_READID: 00745 out << "ReadId not found" 00746 << " readid=" << m_errorData; 00747 break; 00748 00749 case SLICE_ERR_GETCONSENSUS: 00750 out << "Internal error in getConsensus"; 00751 break; 00752 00753 case SLICE_ERR_CALCULATION_NOT_DONE: 00754 out << "Results requested before calculations performed"; 00755 break; 00756 }; 00757 00758 if (m_errorCode == SLICE_ERR_OVERFLOW || m_errorCode == SLICE_ERR_UNDERFLOW) 00759 { 00760 switch (m_errorOperation) 00761 { 00762 case STROP_GET_QUAL_VAL: 00763 out << " operation=" << m_errorOperation 00764 << " op_str=" << "getQualval"; 00765 break; 00766 00767 case STROP_IS_REVERSE_COMPLIMENT: 00768 out << " operation=" << m_errorOperation 00769 << " op_str=" << "getReverseCompliment"; 00770 break; 00771 00772 case STROP_NONE: 00773 break; 00774 }; 00775 } 00776 00777 // out << endl; 00778 } 00779 } 00780 00782 int SliceData::getErrorCode() const 00783 { 00784 return m_errorCode; 00785 } 00786 00788 int SliceData::getErrorOperation() const 00789 { 00790 return m_errorOperation; 00791 } 00792 00794 00799 void SliceData::setDoAmbiguity(int doAmbiguity) 00800 { 00801 m_doAmbiguity = doAmbiguity; 00802 } 00803 00805 char SliceData::getBaseCompliment(char base) 00806 { 00807 return libSlice_getCompliment(base); 00808 } 00809 00810 void SliceData::setScoringModel(int scoringModel) 00811 { 00812 m_scoringModel = scoringModel; 00813 } 00814 00815 } 00816 00818 00821 ostream & operator<< (ostream & os, libSlice::SliceData & slice) 00822 { 00823 slice.print(os); 00824 return os; 00825 } 00826 00827 // Convienent way to print *slices out 00831 ostream & operator<< (ostream & os, libSlice::SliceData * slice) 00832 { 00833 slice->print(os); 00834 return os; 00835 } |