Slice Tools
  • Home
  • SourceForge Page


  • libSlice
  • Home
  • Modules
  • Files
  • Examples
  • Additional Information


  • SourceForge.net Logo
     

    SliceData.cc

    Go to the documentation of this file.
    00001 #include "SliceData.hh"
    00002 #include <stdlib.h>  // for atol
    00003 
    00008 namespace libSlice
    00009 {
    00010   using std::string;
    00011 
    00013 
    00023 SliceData::SliceData(const ReadManager & readManager,
    00024                      const char * index, 
    00025                      const char * gindex,
    00026                      const char * existing,
    00027                      long sliceRangeIndex,
    00028                      long sliceRangeGindex)
    00029  : m_readManager(readManager)
    00030 {
    00031   m_slice = NULL;
    00032   m_highQualityThreshold = 0;
    00033 
    00034   m_nucData     = "";
    00035   m_readIdData  = "";
    00036   m_qualValData = "";
    00037 
    00038   m_errorData = "";
    00039   m_errorCode = SLICE_NOERROR;
    00040   m_errorOperation = STROP_NONE;
    00041 
    00042   m_doAmbiguity = 0;
    00043   m_scoringModel = 0;
    00044 
    00045   m_index    = 0;
    00046   m_gindex   = 0;
    00047   m_indexstr = "Unknown";
    00048   m_existing = (existing) ? existing :  "-";
    00049 
    00050   if (index)
    00051   {
    00052     m_index = atol(index);
    00053     m_index += sliceRangeIndex;
    00054 
    00055     m_indexstr = index;
    00056   }
    00057 
    00058   if (gindex)
    00059   {
    00060     m_gindex = atol(gindex);
    00061     m_gindex += sliceRangeGindex;
    00062   }
    00063 
    00064 
    00065   double normal = 0.2;
    00066   setDistributionTable(normal, normal, normal, normal, normal);
    00067 }
    00068 
    00069 
    00071 
    00074 SliceData::~SliceData()
    00075 {
    00076   if (m_slice)
    00077   {
    00078     if (m_slice->rc) delete [] m_slice->rc;
    00079     if (m_slice->qv) delete [] m_slice->qv;
    00080     if (m_slice->bc) delete [] m_slice->bc;
    00081 
    00082     delete m_slice;
    00083     m_slice = NULL;
    00084   }
    00085 }
    00086 
    00088 
    00090 void SliceData::setDistributionTable(double freqA, double freqC,
    00091                                      double freqG, double freqT,
    00092                                      double freqGap)
    00093 {
    00094   m_distribution.freqA   = freqA;
    00095   m_distribution.freqC   = freqC;
    00096   m_distribution.freqG   = freqG;
    00097   m_distribution.freqT   = freqT;
    00098   m_distribution.freqGap = freqGap;
    00099 }
    00100 
    00102 void SliceData::setHighQualityThreshold(int highQualityThreshold)
    00103 {
    00104   m_highQualityThreshold = highQualityThreshold;
    00105 }
    00106 
    00108 
    00111 void SliceData::addNuc(const char * nucData)
    00112 {
    00113   if (nucData) m_nucData.append(nucData);
    00114 }
    00115 
    00117 
    00120 void SliceData::addQualVal(const char * qualVal)
    00121 {
    00122   if (qualVal) m_qualValData.append(qualVal);
    00123 }
    00124 
    00126 
    00129 void SliceData::addReadId(const char * readId)
    00130 {
    00131   if (readId) m_readIdData.append(readId);
    00132 }
    00133 
    00135 void SliceData::setConsensus(char consensus)
    00136 {
    00137   m_existing = consensus;
    00138   if (m_slice)
    00139   {
    00140     m_slice->c = consensus;
    00141 
    00142   }
    00143 }
    00144 
    00146 
    00157 // TODO: Investigate strtok
    00158 char * SliceData::foreach(const string & data, int numresults,
    00159                           StringOperation operation,
    00160                           const char * underflowError,
    00161                           const char * overflowError)
    00162 {
    00163   const char * databuf = data.c_str();
    00164   char * retval = new char [numresults+1];
    00165   retval[numresults] = '\0';
    00166 
    00167   string::size_type startpos = 0;
    00168   string::size_type endpos = 0;
    00169 
    00170   for (int i=0; i < numresults; i++)
    00171   {
    00172     endpos = data.find(' ', startpos);
    00173 
    00174     if (endpos == data.npos)
    00175     {
    00176       if (i != (numresults-1))
    00177       {
    00178         m_errorOperation = operation;
    00179         m_errorCode = SLICE_ERR_UNDERFLOW;
    00180 
    00181         throw Exception(underflowError, "SliceData::foreach");
    00182       }
    00183 
    00184       endpos = data.length() + 1;
    00185     }
    00186     
    00187     char elementResult = '\0';
    00188 
    00189     if (operation == STROP_GET_QUAL_VAL)
    00190     {
    00191       elementResult = (char) (atoi(databuf + startpos) & 0xff);
    00192     }
    00193     else if (operation == STROP_IS_REVERSE_COMPLIMENT)
    00194     {
    00195       const string & element = data.substr(startpos, (endpos - startpos));
    00196       const ReadData * rd = m_readManager.find(element.c_str());
    00197       
    00198       if (!rd)
    00199       {
    00200         m_errorOperation = operation;
    00201         m_errorCode = SLICE_ERR_NO_READID;
    00202         m_errorData = element;
    00203 
    00204         throw Exception("ReadID " + element + " not found",
    00205                             "SliceData::foreach");
    00206       }
    00207 
    00208       elementResult = rd->isReverseCompliment();
    00209     }
    00210 
    00211     retval[i] = elementResult;
    00212     startpos = endpos + 1;
    00213   }
    00214 
    00215   if (numresults && (endpos != data.length()+1))
    00216   {
    00217     m_errorOperation = operation;
    00218     m_errorCode = SLICE_ERR_OVERFLOW;
    00219 
    00220     throw Exception(overflowError, "SliceData::foreach");
    00221   }
    00222 
    00223   return retval;
    00224 }
    00225 
    00227 
    00234 char * SliceData::getQualVals(int len)
    00235 {
    00236   // There better only be number data in m_qualValData
    00237   if (m_qualValData.find_first_not_of("0123456789 ") != m_qualValData.npos)
    00238   {
    00239     m_errorCode = SLICE_ERR_QV_NONNUMERIC;
    00240     m_errorOperation = STROP_GET_QUAL_VAL;
    00241 
    00242     throw Exception("Invalid Quality Value in slice " + m_indexstr,
    00243                         "SliceData::getQualVals");
    00244   }
    00245 
    00246   return foreach(m_qualValData, len, STROP_GET_QUAL_VAL,
    00247                  "Number of Bases > Number of QualVals",
    00248                  "Number of QualVals > Number of Bases");
    00249 }
    00250 
    00252 
    00259 char * SliceData::getReverseCompliments(int len)
    00260 {
    00261   return foreach(m_readIdData, len, STROP_IS_REVERSE_COMPLIMENT,
    00262                  "Number of Bases > Number of ReadIds",
    00263                  "Number of ReadIds > Number of Bases");
    00264 }
    00265 
    00267 
    00271 char SliceData::getExisting() const
    00272 {
    00273   char retval = '-';
    00274 
    00275   if (m_existing.length() == 1)
    00276   {
    00277     retval = m_existing[0];
    00278   }
    00279   else
    00280   {
    00281     //TODO: Decide what validation should be done (if any)
    00282   }
    00283 
    00284   return retval;
    00285 }
    00286 
    00288 unsigned short SliceData::getDepth() const
    00289 {
    00290   return m_nucData.length();
    00291 }
    00292 
    00294 char * SliceData::getBases(int len) const
    00295 {
    00296   char * retval = new char [len + 1];
    00297   strcpy(retval, m_nucData.c_str());
    00298   return retval;
    00299 }
    00300 
    00302 
    00310 void SliceData::flattenSlice() 
    00311 {
    00312   if (!m_slice)
    00313   {
    00314     m_slice = new libSlice_Slice;
    00315     memset (m_slice, 0, sizeof(libSlice_Slice));
    00316 
    00317     int depth = getDepth();
    00318 
    00319     m_slice->dcov = depth;
    00320     m_slice->c    = getExisting();
    00321     m_slice->bc   = getBases(depth);
    00322     m_slice->qv   = getQualVals(depth);
    00323     m_slice->rc   = getReverseCompliments(depth);
    00324   }
    00325 }
    00326 
    00328 char SliceData::getBase(unsigned short index)
    00329 {
    00330   flattenSlice();
    00331 
    00332   if (index > getDepth())
    00333   {
    00334     throw(Exception("Index > Depth", "getBase"));
    00335   }
    00336 
    00337   return m_slice->bc[index];
    00338 }
    00339 
    00341 char SliceData::getQualVal(unsigned short index)
    00342 {
    00343   flattenSlice();
    00344 
    00345   if (index > getDepth())
    00346   {
    00347     throw(Exception("Index > Depth", "getQualVal"));
    00348   }
    00349 
    00350   return m_slice->qv[index];
    00351 }
    00352 
    00354 //TODO: Refactor to utilize foreach
    00355 ReadData * SliceData::getRead(unsigned short index) const
    00356 {
    00357   if (index > getDepth())
    00358   {
    00359     throw(Exception("Index > Depth", "getRead"));
    00360   }
    00361 
    00362   string::size_type startpos = 0;
    00363   string::size_type endpos = 0;
    00364 
    00365   while (index)
    00366   {
    00367     startpos = m_readIdData.find(' ', startpos);
    00368     startpos++;
    00369 
    00370     if (startpos == m_readIdData.npos)
    00371     {
    00372       throw Exception("Index > Number of ReadIds", "getReadId");
    00373     }
    00374 
    00375     index--;
    00376   }
    00377 
    00378   endpos = m_readIdData.find(' ', startpos);
    00379   if (endpos == m_readIdData.npos)
    00380   {
    00381     endpos = m_readIdData.length();
    00382   }
    00383 
    00384   const string & element = m_readIdData.substr(startpos, 
    00385                                                     (endpos - startpos));
    00386   ReadData * rd = m_readManager.find(element.c_str());
    00387       
    00388   if (!rd)
    00389   {
    00390     throw Exception("ReadID " + element + " not found",
    00391                         "SliceData::getRead");
    00392   }
    00393 
    00394   return rd;
    00395 }
    00396 
    00397 /*
    00398  * This turned out to be slower than getRead
    00399  * 
    00400 const ReadData * SliceData::getRead2(unsigned short index)
    00401 {
    00402   int depth = getDepth();
    00403 
    00404   if (index > depth)
    00405   {
    00406     throw(Exception("Index > Depth", "getRead2"));
    00407   }
    00408 
    00409   if (m_readVector.empty()) 
    00410   {
    00411     if (depth > 1)
    00412     {
    00413       int len = m_readIdData.length();
    00414       char buffer[len+1];
    00415       strcpy(buffer, m_readIdData.c_str());
    00416 
    00417       char * start = NULL;
    00418 
    00419       for (int i = 0; i < len; i++)
    00420       {
    00421         if (buffer[i] != ' ' && start == NULL)
    00422         {
    00423           start = &buffer[i];
    00424         }
    00425         else if (buffer[i] == ' ')
    00426         {
    00427           buffer[i] = '\0';
    00428 
    00429           if (start)
    00430           {
    00431             // Start points to the beginning of the null terminated string to
    00432             // extract
    00433             
    00434             m_readVector.push_back(m_readManager.find(start));
    00435             start = NULL;
    00436           }
    00437         }
    00438       }
    00439     }
    00440     else
    00441     {
    00442       m_readVector.push_back(m_readManager.find(m_readIdData.c_str()));
    00443     }
    00444 
    00445     if (m_readVector.size() != depth)
    00446     {
    00447       throw Exception("Number of reads doesn't match number of bases",
    00448                           "SliceData::getRead2");
    00449 
    00450     }
    00451   }
    00452 
    00453   return m_readVector[index];
    00454 }
    00455 
    00456 */
    00457 
    00459 
    00461 long SliceData::getIndex() const
    00462 {
    00463   return m_index;
    00464 }
    00465 
    00467 
    00469 long SliceData::getGindex() const
    00470 {
    00471   return m_gindex;
    00472 }
    00473 
    00475 char SliceData::getConsensus()
    00476 {
    00477   flattenSlice();
    00478 
    00479   return m_slice->c;
    00480 }
    00481 
    00482 
    00484 
    00492 ConsensusData SliceData::doCalculations(bool recallEmpty)
    00493 {
    00494   flattenSlice();
    00495   int errCode;
    00496 
    00497   libSlice_Consensus resultConsensus;
    00498 
    00499   // This should probably never be set
    00500   if (recallEmpty) { libSlice_setRecallEmpty(recallEmpty); }
    00501 
    00502 
    00503   // Don't do ambiguity here, get that separately
    00504   if ((errCode = libSlice_getConsensusParam(m_slice, 
    00505                                             &resultConsensus, 
    00506                                             &m_distribution, 
    00507                                             m_highQualityThreshold,
    00508                                             0)))
    00509   {
    00510     m_errorCode = SLICE_ERR_GETCONSENSUS;
    00511 
    00512     throw Exception("Error Calculating consensus",
    00513                         "_getConsensus");
    00514   }
    00515   
    00516   if (m_doAmbiguity == 2)
    00517   {
    00518     char origFlags = resultConsensus.ambiguityFlags;
    00519     libSlice_updateAmbiguity(m_slice, &resultConsensus, m_highQualityThreshold);
    00520 
    00521     if (0 && origFlags != resultConsensus.ambiguityFlags)
    00522     {
    00523       cerr << "Annotation: Updating Ambiguity " 
    00524            << libSlice_convertAmbiguityFlags(origFlags)
    00525            << " to " 
    00526            << libSlice_convertAmbiguityFlags(resultConsensus.ambiguityFlags)
    00527            << " at gindex " << m_gindex << endl;
    00528     }
    00529   }
    00530   else if (m_doAmbiguity == 3)
    00531   {
    00532     char origFlags = resultConsensus.ambiguityFlags;
    00533     libSlice_updateAmbiguityConic(m_slice, 
    00534                                   &resultConsensus, 
    00535                                   m_highQualityThreshold);
    00536                                   
    00537     if (0 && origFlags != resultConsensus.ambiguityFlags)
    00538     {
    00539       cerr << "Conic: Updating Ambiguity " 
    00540            << libSlice_convertAmbiguityFlags(origFlags)
    00541            << " to " 
    00542            << libSlice_convertAmbiguityFlags(resultConsensus.ambiguityFlags)
    00543            << " at gindex " << m_gindex << endl;
    00544     }
    00545   }
    00546 
    00547   char ambiguityCode;
    00548 
    00549   if (m_slice->dcov == 0 && !recallEmpty)
    00550   {
    00551     // Don't update the slice
    00552     ambiguityCode = m_slice->c;
    00553   }
    00554   else
    00555   {
    00556     ambiguityCode = libSlice_convertAmbiguityFlags(resultConsensus.ambiguityFlags);
    00557     if (m_doAmbiguity)
    00558     {
    00559       m_slice->c = ambiguityCode;
    00560     }
    00561     else
    00562     {
    00563       m_slice->c = resultConsensus.consensus;
    00564     }
    00565   }
    00566 
    00567   m_existing = (string) "" + m_slice->c;
    00568 
    00569   char qualityClass = libSlice_getConsQC(m_slice, m_highQualityThreshold);
    00570 
    00571   return ConsensusData(&resultConsensus, 
    00572                        ambiguityCode, 
    00573                        qualityClass, 
    00574                        m_doAmbiguity);
    00575 }
    00576 
    00578 
    00587 void SliceData::print(ostream & out, const string & prefix)
    00588 {
    00589   flattenSlice();
    00590 
    00591   out << prefix
    00592       << "<Slice Index=\"" << m_index
    00593       << "\" Gindex=\""    << m_gindex
    00594       << "\" Existing=\""  << m_existing
    00595       << "\" Depth=\""     << getDepth()
    00596       << "\">" << endl;
    00597 
    00598   for (int i=0; i < m_slice->dcov; i++)
    00599   {
    00600     out << prefix
    00601         << "  <Base i=\"" << i
    00602         << "\" bc=\"" <<       m_slice->bc[i] 
    00603         << "\" qv=\"" << (int) m_slice->qv[i]
    00604         << "\" rc=\"" << (int) m_slice->rc[i]
    00605         << "\" />" << endl;
    00606   }
    00607 
    00608   out << prefix << "</Slice>" << endl;
    00609 }
    00610 
    00612 
    00617 void SliceData::printXML(ostream & out, const string & prefix)
    00618 {
    00619   out << prefix
    00620       << "<Slice Index=\"" << m_index
    00621       << "\" Gindex=\""    << m_gindex
    00622       << "\" Existing=\""  << m_existing
    00623       << "\">" << endl;
    00624 
    00625   out << prefix << "  <Nuc>"     << m_nucData     << "</Nuc>"     << endl;
    00626   out << prefix << "  <Qualval>" << m_qualValData << "</Qualval>" << endl;
    00627   out << prefix << "  <ReadID>"  << m_readIdData  << "</ReadID>"  << endl;
    00628 
    00629   out << prefix << "</Slice>" << endl;
    00630 }
    00631 
    00633 
    00641 void SliceData::resetReadId()
    00642 {
    00643   m_readIdData = "";
    00644   
    00645   if (m_slice)
    00646   {
    00647     delete m_slice;
    00648     m_slice = NULL;
    00649   }
    00650 }
    00651 
    00653 
    00657 void SliceData::resetQualVal()
    00658 {
    00659   m_qualValData = "";
    00660 
    00661   if (m_slice)
    00662   {
    00663     delete m_slice;
    00664     m_slice = NULL;
    00665   }
    00666 }
    00667 
    00669 
    00673 void SliceData::resetNuc()
    00674 {
    00675   m_nucData = "";
    00676 
    00677   if (m_slice)
    00678   {
    00679     delete m_slice;
    00680     m_slice = NULL;
    00681   }
    00682 }
    00683 
    00685 
    00689 void SliceData::resetIndex(long index, long gindex)
    00690 {
    00691   m_index = index;
    00692   m_gindex = gindex;
    00693 }
    00694 
    00696 const string & SliceData::getQualValData() const
    00697 {
    00698   return m_qualValData;
    00699 }
    00700 
    00702 const string & SliceData::getReadIdData() const
    00703 {
    00704   return m_readIdData;
    00705 }
    00706 
    00708 const string & SliceData::getNucData() const
    00709 {
    00710   return m_nucData;
    00711 }
    00712 
    00714 
    00718 void SliceData::printError(ostream & out) const
    00719 {
    00720   if (m_errorCode != SLICE_NOERROR)
    00721   {
    00722     out << "  Error Slice index=" << m_index
    00723         << " code=" << m_errorCode
    00724         << " msg=";
    00725 
    00726     switch (m_errorCode)
    00727     {
    00728       case SLICE_NOERROR:
    00729          out << "No Error";
    00730          break;
    00731 
    00732       case SLICE_ERR_QV_NONNUMERIC:
    00733          out << "Non Numberic values in Quality Value";
    00734          break;
    00735 
    00736       case SLICE_ERR_UNDERFLOW:
    00737          out << "Less data than required in operation";
    00738          break;
    00739 
    00740       case SLICE_ERR_OVERFLOW:
    00741          out << "More data than required in operation";
    00742          break;
    00743 
    00744       case SLICE_ERR_NO_READID:
    00745          out << "ReadId not found"
    00746              << " readid=" << m_errorData;
    00747          break;
    00748 
    00749       case SLICE_ERR_GETCONSENSUS:
    00750          out << "Internal error in getConsensus";
    00751          break;
    00752 
    00753       case SLICE_ERR_CALCULATION_NOT_DONE:
    00754          out << "Results requested before calculations performed";
    00755          break;
    00756     };
    00757 
    00758     if (m_errorCode == SLICE_ERR_OVERFLOW || m_errorCode == SLICE_ERR_UNDERFLOW)
    00759     {
    00760       switch (m_errorOperation)
    00761       {
    00762         case STROP_GET_QUAL_VAL:
    00763           out << " operation=" << m_errorOperation
    00764               << " op_str="    << "getQualval";
    00765           break;
    00766 
    00767         case STROP_IS_REVERSE_COMPLIMENT:
    00768           out << " operation=" << m_errorOperation
    00769               << " op_str="    << "getReverseCompliment";
    00770           break;
    00771 
    00772         case STROP_NONE:
    00773           break;
    00774       };
    00775     }
    00776    
    00777 //    out << endl;
    00778   }
    00779 }
    00780 
    00782 int SliceData::getErrorCode() const
    00783 {
    00784   return m_errorCode;
    00785 }
    00786 
    00788 int SliceData::getErrorOperation() const
    00789 {
    00790   return m_errorOperation;
    00791 }
    00792 
    00794 
    00799 void SliceData::setDoAmbiguity(int doAmbiguity)
    00800 {
    00801   m_doAmbiguity = doAmbiguity;
    00802 }
    00803 
    00805 char SliceData::getBaseCompliment(char base)
    00806 {
    00807   return libSlice_getCompliment(base);
    00808 }
    00809 
    00810 void SliceData::setScoringModel(int scoringModel)
    00811 {
    00812   m_scoringModel = scoringModel;
    00813 }
    00814 
    00815 }
    00816 
    00818 
    00821 ostream & operator<< (ostream & os, libSlice::SliceData & slice)
    00822 {
    00823   slice.print(os);
    00824   return os;
    00825 }
    00826 
    00827 // Convienent way to print *slices out
    00831 ostream & operator<< (ostream & os, libSlice::SliceData * slice)
    00832 {
    00833   slice->print(os);
    00834   return os;
    00835 }