Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

NCExtractionMDA.cxx

Go to the documentation of this file.
00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 #include "NCUtils/Extraction/NCExtractionMDA.h"
00018 #include "NCUtils/NCType.h"
00019 
00020 #include "MessageService/MsgService.h"
00021 #include "Conventions/Detector.h"
00022 #include "Conventions/BeamType.h"
00023 #include "Conventions/ReleaseType.h"
00024 #include "Conventions/SimFlag.h"
00025 #include "Util/UtilString.h"
00026 #include "AnalysisNtuples/ANtpDefaultValue.h"
00027 
00028 #include "NCUtils/Cuts/NCAnalysisCutsNC.h"
00029 
00030 #include "TDirectory.h"
00031 #include "TLeaf.h"
00032 #include "TObjArray.h"
00033 
00034 #include <algorithm>
00035 #include <cassert>
00036 
00037 #include <iostream>
00038 #include <fstream>
00039 using std::cout;
00040 using std::endl;
00041 
00042 #include <fstream>
00043 using std::ifstream;
00044 
00045 using std::string;
00046 
00047 ClassImp(NCExtractionMDA)
00048 
00049 CVSID("$Id: NCExtractionMDA.cxx,v 1.8 2009/09/16 13:51:16 bckhouse Exp $");
00050 
00051 #include "NCUtils/Extraction/MicroDSTMaker.h"
00052 REGISTER_NCEXTRACTION(NCExtractionMDA, MDA)
00053 
00054 //----------------------------------------------------------------------
00055 NCExtractionMDA::NCExtractionMDA(NCAnalysisCuts* cuts,
00056                                  const Registry& r) :
00057   NCExtraction(cuts, r),
00058   fIsFilled(false),
00059   fThreshCut(ANtpDefaultValue::kDouble)
00060 {
00061   //Get macro config parameters
00062   int tmpi;
00063   const char* tmps;
00064 
00065   // Need this key for some reason
00066   assert(r.KeyExists("MDAMCPath"));
00067 
00068   if (r.Get("TreeName",   tmps)) fTreeName   = tmps;
00069   if (r.Get("MDAMCPath",  tmps)) fFilePath   = tmps;
00070   if (r.Get("TrainTrueE", tmpi)) fTrainTrueE = tmpi;
00071 }
00072 
00073 //-----------------------------------------------------------------------
00074 bool NCExtractionMDA::ReadCalibInfoFromFile(NCEventInfo& evtInfo)
00075 {
00076 
00077   // Try $SRT_LOCAL, $SRT_PRIVATE_CONTEXT then $SRT_PUBLIC_CONTEXT
00078   TString basedir(getenv("SRT_LOCAL"));
00079   if(basedir=="") basedir=getenv("SRT_PRIVATE_CONTEXT");
00080   // SRT_PRIVATE_CONTEXT gets set to '.' instead of empty, for some reason
00081   if(basedir==".") basedir=getenv("SRT_PUBLIC_CONTEXT");
00082 
00083   TString dataDir = basedir+"/NCUtils/data/";
00084 
00085   Detector::Detector_t detType = (Detector::Detector_t)evtInfo.header->detector;
00086 
00087   fTrainTrueE=false;
00088 
00089   if (fTrainTrueE){
00090     fCoeffFileName = dataDir +
00091       "Mda_Coeff_Near_L010z185i_CedarDaikon_TTE.dat";
00092 
00093     fHistDefFileName = dataDir +
00094       "Mda_HistDef_Near_L010z185i_CedarDaikon_TTE.dat";
00095   }else{
00096     fCoeffFileName = dataDir +
00097       WhichMdaCoeffHistDef(true, detType, fReleaseType);
00098 
00099     fHistDefFileName = dataDir +
00100       WhichMdaCoeffHistDef(false, detType, fReleaseType);
00101   }
00102 
00103   //Setup chain for P Vector here, since we only run this once.
00104   //Could go in constructor but would have to multiply constructor
00105   //arguments, cleaner this way.
00106   fChain = new TChain(fTreeName);
00107 
00108   MSG("NCExtractionMDA",Msg::kInfo) << "Trying to add " << fFilePath
00109                                     << " to MDA chain" << endl;
00110   fChain->Add(fFilePath);
00111 
00112 
00113   //Reads the SAS coefficient input file
00114   //(containing quadratic, linear and constant coeffs)
00115   //and fills the necessary matrices and vectors
00116   //Also fills a vector
00117 
00118   MSG("NCExtractionMDA",Msg::kDebug)<<"Reading Calibration Info! "
00119                                     << " Should be done once per file. "
00120                                     << endl;
00121 
00122   static const Int_t kLineSize=4000;
00123 
00124   ifstream sasInput;
00125 
00126   MSG("NCExtractionMDA",Msg::kInfo)<< "MDA Calib File: "
00127                                    << fCoeffFileName
00128                                    << endl;
00129 
00130   sasInput.open(fCoeffFileName);
00131   if (sasInput.fail()) {
00132 
00133     MAXMSG("NCExtractionMDA",Msg::kInfo, 20)<< "SAS coefficient file "
00134                                             << "not found! "
00135                                             << "Not calculating MDA PID..."
00136                                             << endl;
00137     return false;
00138   }
00139 
00140   Char_t oneLine[kLineSize];
00141 
00142   string sepaCol=" ";
00143   vector<string> oneVec;
00144 
00145   //Read the first line and get the Threshold cut value
00146   sasInput.getline(oneLine,kLineSize);
00147   string topLine=oneLine;
00148   UtilString::StringTok(oneVec,topLine,sepaCol);
00149   // The file has 0=CC, 1=NC - NCUtils convention is the other way round
00150   fThreshCut = 1 - atof((oneVec.at(2)).c_str());
00151 
00152   MSG("NCExtractionMDA",Msg::kInfo) << "MDA Threshold cut: "
00153                                     << fThreshCut
00154                                     << endl;
00155 
00156   //Reset the line vector
00157   oneVec.clear();
00158 
00159   //Read the second line (mean values) and get the number
00160   //of variables
00161   sasInput.getline(oneLine,kLineSize);
00162   topLine=oneLine;
00163   UtilString::StringTok(oneVec,topLine,sepaCol);
00164 
00165   //Read the mean values into a vector
00166   TVectorD meanTemp(oneVec.size()-2);
00167   for(UInt_t col=2; col < oneVec.size(); col++){
00168     meanTemp(col-2)=(atof((oneVec.at(col)).c_str()));
00169   }
00170 
00171   for(Int_t m=0; m < meanTemp.GetNoElements(); m++){
00172     MSG("NCExtractionMDA",Msg::kDebug)<< "Mean:"
00173                                       << m
00174                                       << "="
00175                                       << meanTemp(m)
00176                                       << endl;
00177   }
00178   fMeanVec.push_back(meanTemp);
00179 
00180   //Declare matrices and vectors for coefficient reading
00181   TMatrixD quadTemp(oneVec.size()-2, oneVec.size()-2);
00182 
00183   TVectorD linTemp(oneVec.size()-2);
00184 
00185   TVectorD constTemp(oneVec.size()-2);
00186 
00187   vector <string> tempClassVec;
00188 
00189   //Count the rows for each matrix
00190   Int_t rowCounter=0;
00191 
00192   //Reset the number of classes counter
00193   fNClass=0;
00194 
00195   //Reset the ifstream pointer to the beginning of the file
00196   // sasInput.seekg(0,ios::beg);
00197 
00198   //Reset the line vector
00199   oneVec.clear();
00200 
00201   while (! sasInput.eof() ){
00202     sasInput.getline(oneLine,kLineSize);
00203     topLine=oneLine;
00204     UtilString::StringTok(oneVec,topLine,sepaCol);
00205 
00206     string coeffType;
00207 
00208     for(UInt_t col=0; col < oneVec.size(); col++){
00209 
00210       if (col==0) {
00211 
00212         tempClassVec.push_back(oneVec.at(col));
00213 
00214       }else if (col==1){
00215 
00216         coeffType=oneVec.at(col);
00217 
00218         if(coeffType!="_LINEAR_" && coeffType !="_CONST_"){
00219           if (fVarNameVec.size()<(oneVec.size()-2))
00220             fVarNameVec.push_back(oneVec.at(col));
00221           fTypeCoeffVec.push_back("_QUAD_");
00222         }else fTypeCoeffVec.push_back(oneVec.at(col));
00223 
00224       }else{
00225 
00226         if (coeffType=="_LINEAR_"){
00227 
00228           linTemp(col-2)=(atof((oneVec.at(col)).c_str()));
00229 
00230         }else if (coeffType=="_CONST_"){
00231 
00232           constTemp(col-2)=(atof((oneVec.at(col)).c_str()));
00233 
00234         }else{
00235           quadTemp(rowCounter,(col-2))
00236             =(atof((oneVec.at(col)).c_str()));
00237         }
00238       }
00239     }
00240     oneVec.clear();
00241     rowCounter++;
00242 
00243     if (coeffType=="_CONST_"){
00244       fNClass++;
00245       rowCounter=0;
00246       fTypeClassVec.push_back(tempClassVec);
00247       fQuadCoeff.push_back(quadTemp);
00248       fLinCoeff.push_back(linTemp);
00249       fConstCoeff.push_back(constTemp);
00250 
00251       tempClassVec.clear();
00252 
00253     }else continue;
00254   }
00255 
00256   if(sasInput) sasInput.close();
00257 
00258   MSG("NCExtractionMDA",Msg::kDebug)<<"Completed file Reading" << endl;
00259 
00260   // TODO: Replace this with NCEventInfo's SetChainBranches or whatever it's called
00261   //Associate branches with chain
00262   fChain->SetBranchAddress("header.", &evtInfo.header);
00263   fChain->SetBranchAddress("event.",  &evtInfo.event);
00264   fChain->SetBranchAddress("shower.", &evtInfo.shower);
00265   fChain->SetBranchAddress("track.",  &evtInfo.track);
00266 
00267   //Recover ANtp style variable names from SAS format.
00268   //Best to do this outside main loop
00269   std::vector < std::string> varNameVecDot(fVarNameVec.size());
00270 
00271   for(UInt_t i = 0; i < fVarNameVec.size();i++){
00272     string varNameTemp=fVarNameVec.at(i);
00273 
00274     //Replace "_" with "." in the ntuple variable names
00275     if (varNameTemp.length() > varNameTemp.find_first_of("_"))
00276       varNameTemp.replace(varNameTemp.find_first_of("_"),1,".");
00277 
00278     //Handle var names with additional "Info_"
00279     if (varNameTemp.length() > varNameTemp.find("Info_"))
00280       varNameTemp.replace(varNameTemp.find_first_of("_"),1,".");
00281 
00282     varNameVecDot.at(i)=(varNameTemp);
00283 
00284   }
00285 
00286   // Loop once over all chain leaves and get index vector for the
00287   // relevant variables
00288 
00289   // Get complete list of variables.
00290   TObjArray *fListLeaves;
00291 
00292   fListLeaves=fChain->GetListOfLeaves();
00293 
00294   //Get Iterator on list of leaves
00295   TIter *leafIter= new TIter(fListLeaves);
00296   TLeaf* thisLeaf;
00297   Int_t iterIndex=0;
00298 
00299   leafIter->Reset();
00300 
00301   fLeafIndexVec.resize(varNameVecDot.size());
00302 
00303   // Note: Order of variables as read into fVarNameVec must be
00304   // preserved or P vector will not match calib coefficients
00305   while ((thisLeaf = dynamic_cast<TLeaf*>(leafIter->Next()))){
00306 
00307     string varName=thisLeaf->GetName();
00308 
00309     for(UInt_t i = 0; i < varNameVecDot.size();i++){
00310 
00311       if(varName == varNameVecDot.at(i)){
00312         fLeafIndexVec.at(i)=iterIndex;
00313       }
00314     }
00315     iterIndex++;
00316   }
00317 
00318   return true;
00319 }
00320 
00321 void NCExtractionMDA::FillPVector(NCEventInfo& evtInfo)
00322 {
00323 
00324   MSG("NCExtractionMDA",Msg::kDebug)<<"Filling P Vector" << endl;
00325 
00326   //Now use fVarNameVec to fill the variable values p-vector
00327   if(fVarNameVec.size() == 0) return;
00328 
00329   Double_t valueF;
00330   Int_t valueI;
00331 
00332   TVectorD valueTemp(fVarNameVec.size());
00333 
00334   //Associate branches with chain
00335   // TODO replcce with the NCEventInfo member function
00336   fChain->SetBranchAddress("header.", &evtInfo.header);
00337   fChain->SetBranchAddress("event.",  &evtInfo.event);
00338   fChain->SetBranchAddress("shower.", &evtInfo.shower);
00339   fChain->SetBranchAddress("track.",  &evtInfo.track);
00340 
00341   //Get complete list of variables.
00342   TObjArray *fListLeaves;
00343   fListLeaves=fChain->GetListOfLeaves();
00344 
00345   //Get relevant leaves from index vector
00346   TLeaf* thisLeaf;
00347 
00348   for(UInt_t i = 0; i < fLeafIndexVec.size();i++){
00349     thisLeaf = dynamic_cast<TLeaf*>(fListLeaves->At(fLeafIndexVec.at(i)));
00350 
00351     MSG("NCExtractionMDA",Msg::kDebug)<< i <<" - Found variable "
00352                                       << "ANtp: " << thisLeaf->GetName()
00353                                       << endl;
00354 
00355 
00356     if(strcmp(thisLeaf->GetTypeName(),"Float_t")==0
00357        || strcmp(thisLeaf->GetTypeName(),"float")==0
00358        || strcmp(thisLeaf->GetTypeName(),"Double_t")==0
00359        || strcmp(thisLeaf->GetTypeName(),"double")==0){
00360 
00361       valueF=thisLeaf->GetValue();
00362 
00363       if (valueF > 1e+8 || ANtpDefVal::IsDefault(valueF)
00364           || ANtpDefVal::IsDefault(static_cast<Float_t>(valueF))
00365           || !TMath::Finite(valueF) || TMath::IsNaN(valueF)
00366           || valueF==99999){
00367         MSG("NCExtractionMDA",Msg::kDebug) << "Found "
00368                                            << "default value"
00369                                            << endl;
00370         valueTemp(i)=(fMeanVec.at(0))(i);
00371 
00372       } else {
00373 
00374         valueTemp(i)=valueF;
00375 
00376       }//if(defaultF)
00377     }//if(float)
00378     else if(strcmp(thisLeaf->GetTypeName(),"Int_t")==0
00379             || strcmp(thisLeaf->GetTypeName(),"UInt_t")==0
00380             || strcmp(thisLeaf->GetTypeName(),"int")==0){
00381       valueI=static_cast<Int_t>(thisLeaf->GetValue());
00382 
00383       if (valueI > 1e+8 || ANtpDefVal::IsDefault(valueI)
00384           || !TMath::Finite(valueI) || TMath::IsNaN(valueI)
00385           || valueI==99999){
00386         MSG("NCExtractionMDA",Msg::kDebug) << "Found default "
00387                                            << "value"
00388                                            << endl;
00389         valueTemp(i)=(fMeanVec.at(0))(i);
00390 
00391       } else {
00392 
00393         valueTemp(i)=static_cast <Double_t> (valueI);
00394 
00395       }//if(default)
00396     }//if(int)
00397     MSG("NCExtractionMDA",Msg::kDebug) << "Final value:"
00398                                        <<thisLeaf->GetName() << ", "
00399                                        << valueTemp(i) << endl;
00400 
00401 
00402   }//for fLeafIndexVec
00403 
00404   varPVector.push_back(valueTemp);
00405   valueTemp.Clear();
00406 }
00407 
00408 
00409 //-----------------------------------------------------------------------
00410 double NCExtractionMDA::GetIdProbability(NCEventInfo& evtInfo, int)
00411 {
00412   //Necessary for data running because Prepare now only runs for MC
00413   if (!fIsFilled) fIsFilled = ReadCalibInfoFromFile(evtInfo);
00414   assert(fIsFilled);
00415   //If event length is larger than 40 planes, classify as CC
00416   if ( (evtInfo.event->endPlane - evtInfo.event->begPlane+1) > 40 )
00417     return 1.0;
00418 
00419   //Fill P vector of variables for event
00420   FillPVector(evtInfo);
00421 
00422   VecDouble_t probClass(fNClass);
00423   VecDouble_t discrimValue(fNClass);
00424   //Calculate discriminant function Q for each class
00425   for (Int_t n=0; n < fNClass; n++){
00426     //Create an auxiliary copy of the Pvector
00427     TVectorD varPClone=varPVector.at(0);
00428 
00429     //Note that varPClone is modified by the "*=" operator
00430     Double_t quadTerm=((varPClone)*=(fQuadCoeff.at(n)))*(varPVector.at(0));
00431 
00432     Double_t linTerm=(fLinCoeff.at(n))*(varPVector.at(0));
00433 
00434     Double_t constTerm=(fConstCoeff.at(n))(0);
00435 
00436     discrimValue.at(n)=quadTerm+linTerm+constTerm;
00437 
00438     MSG("NCExtractionMDA",Msg::kDebug)<< "Q("
00439                                       << (fTypeClassVec.at(n)).at(0)
00440                                       <<") = "
00441                                       << discrimValue.at(n) <<endl;
00442   }
00443 
00444   TVectorD tempDiff(fNClass-1);
00445   VecTVectorD discrimDiff;
00446 
00447   //Calculate discriminator difference vectors
00448   //P(a) = e^a/(e^(a)+e^(b)+e^(c)+...) = 1/(1+e^(b-a)+e^(c-a)+...)
00449   //Avoids e^X where X is big => machine-dep floating exception.
00450 
00451   for (Int_t n=0; n < fNClass; n++){
00452 
00453     Int_t diffCount=0;
00454 
00455     for (Int_t m=0; m < fNClass; m++){
00456       if(m!=n){
00457         tempDiff(diffCount)=(discrimValue.at(m)-discrimValue.at(n));
00458         diffCount++;
00459       } else continue;
00460     }
00461 
00462     if (TMath::Abs(tempDiff.Min())>700. || TMath::Abs(tempDiff.Max())>700){
00463       MSG("NCExtractionMDA",Msg::kDebug)<<"Bypass floating exception "
00464                                         << endl;
00465       break;
00466     }else discrimDiff.push_back(tempDiff);
00467 
00468     tempDiff.Zero();
00469   }
00470 
00471   if(discrimDiff.size()!=static_cast<UInt_t>(fNClass)){
00472     varPVector.clear();
00473     probClass.clear();
00474 
00475     return ANtpDefVal::kFloat;
00476   }
00477 
00478   double pidNC = 0;
00479 
00480   //Calculate probability value for each class
00481   for (Int_t n=0; n < fNClass; n++){
00482 
00483     Double_t denomSum=0.0;
00484 
00485     for (Int_t m=0; m < (fNClass-1); m++){
00486       denomSum+=TMath::Exp((discrimDiff.at(n))(m));
00487     }
00488 
00489     probClass.at(n)=(1.0/(1.0+denomSum));
00490 
00491     MSG("NCExtractionMDA",Msg::kDebug) << "P("
00492                                        << (fTypeClassVec.at(n)).at(0)
00493                                        <<") = "
00494                                        << probClass.at(n) <<endl;
00495 
00496 
00497     if((fTypeClassVec.at(n)).at(0)=="ncu")
00498       pidNC=probClass.at(n);
00499 
00500   }
00501 
00502   varPVector.clear();
00503   probClass.clear();
00504 
00505   MSG("NCExtractionMDA",Msg::kDebug)<<"Calculating Prob(NC) done" << endl;
00506 
00507   //NCUtils assumes NC has low values of separation parameter
00508   //and CC high values, so this makes it happy.
00509   if (pidNC < 0.000001) return 1.0;
00510   else return (1.0-pidNC);
00511 
00512 }
00513 
00514 TString NCExtractionMDA::WhichMdaCoeffHistDef(Bool_t isCoeff,
00515                                               Detector::Detector_t detType,
00516                                               ReleaseType::Release_t mcType)
00517 {
00518   assert(mcType != ReleaseType::kUnknown);
00519 
00520   assert(!ReleaseType::IsCarrot(mcType));
00521 
00522   if(detType == Detector::kFar){
00523     if(ReleaseType::IsDaikon(mcType)){
00524       TString s = isCoeff ? "Coeff" : "HistDef";
00525       return "Mda_"+s+"_Far_L010z185i_CedarDaikon.dat";
00526     }
00527     return "";
00528   }
00529 
00530   if(detType == Detector::kNear){
00531     if(ReleaseType::IsDaikon(mcType)){
00532       TString s = isCoeff ? "Coeff" : "HistDef";
00533       return "Mda_"+s+"_Near_L010z185i_CedarDaikon.dat";
00534     }
00535     return "";
00536   }
00537 
00538   return "";
00539 }

Generated on Mon Feb 15 11:07:03 2010 for loon by  doxygen 1.3.9.1