00001
00002
00003
00004 #include <algorithm>
00005 #include <cmath>
00006 #include <iomanip>
00007 #include <iostream>
00008 #include <stdio.h>
00009 #include <stdlib.h>
00010 #include <sstream>
00011
00012
00013 #include "TFile.h"
00014 #include "TTree.h"
00015
00016
00017 #include "LikeModule.h"
00018
00019 using namespace std;
00020
00021
00022 Lit::LikeModule::LikeModule()
00023 :fDimn(0),
00024 fTree(0)
00025 {
00026 }
00027
00028
00029 Lit::LikeModule::~LikeModule()
00030 {
00031 if(fTree)
00032 {
00033 delete fTree;
00034 }
00035 }
00036
00037
00038 bool Lit::LikeModule::Write(const string &filepath, const string &treename)
00039 {
00040 TFile *file = new TFile(filepath.c_str(), "UPDATE");
00041 if(!file || !file -> IsOpen())
00042 {
00043 cout << "LikeModule::Write() - failed to recreate ROOT file:\n" << filepath << endl;
00044 return false;
00045 }
00046
00047
00048 Write(file, treename);
00049
00050 file -> Write();
00051 file -> Close();
00052
00053 return true;
00054 }
00055
00056
00057 void Lit::LikeModule::Write(TDirectory *dir, const string &treename)
00058 {
00059 if(!dir)
00060 {
00061 cerr << "LikeModule::Write() - null TDirectory pointer" << endl;
00062 return;
00063 }
00064
00065 if(dir -> Get(treename.c_str()))
00066 {
00067 const string treename_tmp = treename + ";*";
00068 cout << "LikeModule::Write - delete object with name " << treename_tmp << endl;
00069 dir -> Delete(treename_tmp.c_str());
00070 }
00071
00072 Event *event = new Event();
00073 TTree *tree = new TTree(treename.c_str(), "event tree");
00074 tree -> SetDirectory(dir);
00075 tree -> Branch("event", "Lit::Event", &event);
00076
00077 double size = 0.0;
00078 for(EventVec::const_iterator it = fEvent.begin(); it != fEvent.end(); ++it)
00079 {
00080 (*event) = (*it);
00081 size += tree -> Fill();
00082 }
00083
00084 cout << "LikeModule::Write() - filled tree with " << size/1048576.0 << "MB and "
00085 << fEvent.size () << " events" << endl;
00086
00087 delete event;
00088 }
00089
00090
00091 bool Lit::LikeModule::Read(const string &filepath, const int nevent, const string &treename)
00092 {
00093 TFile *file = new TFile(filepath.c_str(), "READ");
00094 if(!file || !file -> IsOpen())
00095 {
00096 cout << "LikeModule::Read() - failed to read ROOT file:\n" << filepath << endl;
00097 return false;
00098 }
00099
00100 const bool result = Read(file, nevent, treename);
00101
00102 file -> Close();
00103
00104 return result;
00105 }
00106
00107
00108 bool Lit::LikeModule::Read(TDirectory *dir, int nevent, const string &treename)
00109 {
00110 if(!dir)
00111 {
00112 cerr << "LikeModule::Read - invalid TDirectory pointer" << endl;
00113 return false;
00114 }
00115
00116 TTree *tree = dynamic_cast<TTree *>(dir -> Get(treename.c_str()));
00117 if(!tree)
00118 {
00119 cout << "LikeModule::Read() - failed to find " << treename << " tree" << endl;
00120 return false;
00121 }
00122
00123 Event *event = new Event();
00124 tree -> SetBranchAddress("event", &event);
00125
00126 if(nevent < 1 || nevent > tree -> GetEntries())
00127 {
00128 nevent = tree -> GetEntries();
00129 }
00130
00131 double size = 0.0;
00132 for(int i = 0; i < nevent; ++i)
00133 {
00134 size += tree -> GetEntry(i);
00135 Add(*event);
00136 }
00137
00138 cout << "LikeModule::Read() - read " << size/1048576.0 << "MB and "
00139 << fEvent.size () << " events" << endl;
00140
00141 delete event;
00142
00143 return true;
00144 }
00145
00146
00147 void Lit::LikeModule::Add(const Event &event)
00148 {
00149 if(fTree)
00150 {
00151 cerr << "LikeModule::Add() - can not add event: tree is already built" << endl;
00152 return;
00153 }
00154
00155 if(fDimn < 1)
00156 {
00157 fDimn = event.GetNVar();
00158 }
00159 else if(fDimn != event.GetNVar())
00160 {
00161 cerr << "LikeModule::Add() - number of dimensions does not match previous events" << endl;
00162 return;
00163 }
00164
00165 fEvent.push_back(event);
00166
00167 for(unsigned int ivar = 0; ivar < fDimn; ++ivar)
00168 {
00169 fVar[ivar].push_back(event.GetVar(ivar));
00170 }
00171
00172 std::map<short, unsigned int>::iterator cit = fCount.find(event.GetType());
00173 if(cit == fCount.end())
00174 {
00175 fCount[event.GetType()] = 1;
00176 }
00177 else
00178 {
00179 ++(cit -> second);
00180 }
00181 }
00182
00183
00184 bool Lit::LikeModule::Fill(const unsigned short optimize_depth, const std::string &option)
00185 {
00186 if(fTree)
00187 {
00188 cerr << "LikeModule::Fill - tree already have been created" << endl;
00189 return false;
00190 }
00191
00192
00193
00194 unsigned int min = 0;
00195 if(option.find("trim") != string::npos)
00196 {
00197 for(map<short, unsigned int>::const_iterator it = fCount.begin(); it != fCount.end(); ++it)
00198 {
00199 if(min == 0 || min > it -> second)
00200 {
00201 min = it -> second;
00202 }
00203 }
00204
00205 cout << "LikeModule::Fill - trimming all classes to " << min << " events" << endl;
00206 for(map<short, unsigned int>::const_iterator it = fCount.begin(); it != fCount.end(); ++it)
00207 {
00208 cout << "class " << it -> first << " has "
00209 << setfill(' ') << setw(8) << it -> second << " events" << endl;
00210 }
00211
00212 fCount.clear();
00213 fVar.clear();
00214
00215 EventVec evec;
00216
00217 for(EventVec::const_iterator event = fEvent.begin(); event != fEvent.end(); ++event)
00218 {
00219 std::map<short, unsigned int>::iterator cit = fCount.find(event -> GetType());
00220 if(cit == fCount.end())
00221 {
00222 fCount[event -> GetType()] = 1;
00223 }
00224 else if(cit -> second < min)
00225 {
00226 ++(cit -> second);
00227 }
00228 else
00229 {
00230 continue;
00231 }
00232
00233 for(unsigned int d = 0; d < fDimn; ++d)
00234 {
00235 fVar[d].push_back(event -> GetVar(d));
00236 }
00237
00238 evec.push_back(*event);
00239 }
00240
00241 cout << "LikeModule::Fill - erased " << fEvent.size() - evec.size() << " events" << endl;
00242
00243 fEvent = evec;
00244 }
00245
00246
00247 fCount.clear();
00248
00249
00250 for(VarMap::iterator it = fVar.begin(); it != fVar.end(); ++it)
00251 {
00252 std::sort((it -> second).begin(), (it -> second).end());
00253 }
00254
00255 if(option.find("metric") != string::npos)
00256 {
00257 ComputeMetric();
00258
00259
00260
00261 for(VarMap::iterator it = fVar.begin(); it != fVar.end(); ++it)
00262 {
00263 std::sort((it -> second).begin(), (it -> second).end());
00264 }
00265 }
00266
00267
00268
00269
00270
00271 fTree = Balance(optimize_depth);
00272
00273 if(!fTree)
00274 {
00275 cout << "LikeModule::Fill - failed to create tree" << endl;
00276 return false;
00277 }
00278
00279 for(EventVec::const_iterator event = fEvent.begin(); event != fEvent.end(); ++event)
00280 {
00281 fTree -> Add(*event, 0);
00282
00283 std::map<short, unsigned int>::iterator cit = fCount.find(event -> GetType());
00284 if(cit == fCount.end())
00285 {
00286 fCount[event -> GetType()] = 1;
00287 }
00288 else
00289 {
00290 ++(cit -> second);
00291 }
00292 }
00293
00294 for(map<short, unsigned int>::const_iterator it = fCount.begin(); it != fCount.end(); ++it)
00295 {
00296 cout << "class " << it -> first << " has "
00297 << setfill(' ') << setw(8) << it -> second << " events" << endl;
00298 }
00299
00300 if(option.find("erase") != string::npos)
00301 {
00302 cout << "Erasing event vector with size " << fEvent.size() << endl;
00303 fEvent.clear();
00304 fVar.clear();
00305 }
00306
00307 return true;
00308 }
00309
00310
00311 bool Lit::LikeModule::Find(Event event, const unsigned int nfind) const
00312 {
00313
00314
00315
00316
00317 if(!fTree)
00318 {
00319 cerr << "LikeModule::Find() - tree has not been filled" << endl;
00320 return false;
00321 }
00322 if(fDimn != event.GetNVar())
00323 {
00324 cerr << "LikeModule::Find() - number of dimension does not match training events" << endl;
00325 return false;
00326 }
00327 if(nfind < 1)
00328 {
00329 cerr << "LikeModule::Find() - requested 0 nearest neighbors" << endl;
00330 return false;
00331 }
00332
00333
00334
00335 if(!fVarScale.empty())
00336 {
00337 event = Scale(event);
00338 }
00339
00340 fResult.Clear();
00341
00342 fResult.fEvent = event;
00343
00344
00345 Lit::Find(fResult.fList, fTree, event, nfind);
00346
00347 for(List::const_iterator lit = fResult.GetList().begin(); lit != fResult.GetList().end(); ++lit)
00348 {
00349 const Node *node = lit -> first;
00350
00351 map<short, double>::iterator pit = fResult.fProbNN.find(node -> GetType());
00352 if(pit == fResult.fProbNN.end())
00353 {
00354 fResult.fProbNN[node -> GetType()] = node -> GetWeight()/double(nfind);
00355 }
00356 else
00357 {
00358 pit -> second += node -> GetWeight()/double(nfind);
00359 }
00360 }
00361
00362 return true;
00363 }
00364
00365
00366 bool Lit::LikeModule::Find(const unsigned int nfind, const string &option) const
00367 {
00368 if(fCount.empty() || !fTree)
00369 {
00370 return false;
00371 }
00372
00373 static std::map<short, unsigned int>::const_iterator cit = fCount.end();
00374
00375 if(cit == fCount.end())
00376 {
00377 cit = fCount.begin();
00378 }
00379
00380 const short etype = (cit++) -> first;
00381
00382 if(option == "flat")
00383 {
00384 VarVec dvec;
00385 for(unsigned int d = 0; d < fDimn; ++d)
00386 {
00387 VarMap::const_iterator vit = fVar.find(d);
00388 if(vit == fVar.end())
00389 {
00390 return false;
00391 }
00392
00393 const std::vector<double> &vvec = vit -> second;
00394
00395 if(vvec.empty())
00396 {
00397 return false;
00398 }
00399
00400
00401 const VarType min = vvec.front();
00402 const VarType max = vvec.back();
00403 const VarType width = max - min;
00404
00405 if(width < 0.0 || width > 0.0)
00406 {
00407 dvec.push_back(min + width*double(std::rand())/double(RAND_MAX));
00408 }
00409 else
00410 {
00411 return false;
00412 }
00413 }
00414
00415 const Event event(dvec, 1.0, etype);
00416
00417 Find(event, nfind);
00418
00419 return true;
00420 }
00421
00422 return false;
00423 }
00424
00425
00426 Lit::Node* Lit::LikeModule::Balance(const unsigned int bdepth)
00427 {
00428
00429
00430
00431
00432 if(fVar.empty() || fDimn != fVar.size())
00433 {
00434 cout << "LikeModule::Balance - can not build a tree" << endl;
00435 return 0;
00436 }
00437
00438 const unsigned int size = (fVar.begin() -> second).size();
00439 if(size < 1)
00440 {
00441 cout << "LikeModule::Balance - can not build a tree without events" << endl;
00442 return 0;
00443 }
00444
00445 for(VarMap::const_iterator it = fVar.begin(); it != fVar.end(); ++it)
00446 {
00447 if((it -> second).size() != size)
00448 {
00449 cout << "LikeModule::Balance - # of variables doesn't match between dimensions" << endl;
00450 return 0;
00451 }
00452 }
00453
00454 if(double(fDimn*size) < pow(2.0, double(bdepth)))
00455 {
00456 cout << "LikeModule::Balance - balance depth exceeds number of events" << endl;
00457 return 0;
00458 }
00459
00460 cout << "Balancing tree for " << fDimn << " variables with " << size << " values" << endl;
00461
00462 vector<Node *> pvec, cvec;
00463
00464 VarMap::const_iterator it = fVar.find(0);
00465 if(it == fVar.end() || (it -> second).size() < 2)
00466 {
00467 cout << "Missing 0 variable" << endl;
00468 return 0;
00469 }
00470
00471 const Event pevent(VarVec(fDimn, (it -> second)[size/2]), -1.0, -1);
00472
00473 Node *tree = new Node(0, pevent, 0);
00474
00475 pvec.push_back(tree);
00476
00477 for(unsigned int depth = 1; depth < bdepth; ++depth)
00478 {
00479 const unsigned int mod = depth % fDimn;
00480
00481 VarMap::const_iterator vit = fVar.find(mod);
00482 if(vit == fVar.end())
00483 {
00484 cerr << "Missing " << mod << " variable" << endl;
00485 return 0;
00486 }
00487 const vector<double> &dvec = vit -> second;
00488
00489 if(dvec.size() < 2)
00490 {
00491 cerr << "Missing " << mod << " variable" << endl;
00492 return 0;
00493 }
00494
00495
00496
00497 unsigned int ichild = 1;
00498 for(vector<Node *>::iterator pit = pvec.begin(); pit != pvec.end(); ++pit)
00499 {
00500 Node *parent = *pit;
00501
00502 const VarType lmedian = dvec[size*ichild/(2*pvec.size() + 1)];
00503
00504 ++ichild;
00505
00506 const VarType rmedian = dvec[size*ichild/(2*pvec.size() + 1)];
00507
00508 ++ichild;
00509
00510 const Event levent(VarVec(fDimn, lmedian), -1.0, -1);
00511 const Event revent(VarVec(fDimn, rmedian), -1.0, -1);
00512
00513 Node *lchild = new Node(parent, levent, mod);
00514 Node *rchild = new Node(parent, revent, mod);
00515
00516 parent -> SetNodeL(lchild);
00517 parent -> SetNodeR(rchild);
00518
00519 cvec.push_back(lchild);
00520 cvec.push_back(rchild);
00521 }
00522
00523 pvec = cvec;
00524 cvec.clear();
00525 }
00526
00527 return tree;
00528 }
00529
00530
00531 void Lit::LikeModule::ComputeMetric(const unsigned int ifrac)
00532 {
00533
00534
00535
00536
00537
00538 if(ifrac > 49)
00539 {
00540 cerr << "LikeModule::ComputeMetric() - fraction can not exceed 50%" << endl;
00541 return;
00542 }
00543 if(fEvent.empty())
00544 {
00545 cerr << "LikeModule::ComputeMetric() - event vector is empty" << endl;
00546 return;
00547 }
00548 if(!fVarScale.empty())
00549 {
00550 cerr << "LikeModule::ComputeMetric() - metric is already computed" << endl;
00551 return;
00552 }
00553
00554 cout << "Computing scale for 1d axes: ifrac = " << ifrac << endl;
00555
00556 fVarScale.clear();
00557
00558 for(VarMap::const_iterator vit = fVar.begin(); vit != fVar.end(); ++vit)
00559 {
00560 const vector<double> &dvec = vit -> second;
00561
00562 vector<double>::const_iterator beg_it = dvec.end();
00563 vector<double>::const_iterator end_it = dvec.end();
00564 for(vector<double>::const_iterator dit = dvec.begin(); dit != dvec.end(); ++dit)
00565 {
00566 const int dist = distance(dvec.begin(), dit);
00567
00568 if((100*dist)/dvec.size() == ifrac && beg_it == dvec.end())
00569 {
00570 beg_it = dit;
00571 }
00572
00573 if((100*dist)/dvec.size() == 100 - ifrac && end_it == dvec.end())
00574 {
00575 end_it = dit;
00576 }
00577 }
00578
00579 if(beg_it == dvec.end() || end_it == dvec.end())
00580 {
00581 cerr << "LikeModule::ComputeMetric() - failed to determine scale " << vit -> first << endl;
00582 continue;
00583 }
00584
00585 const double lpos = *beg_it;
00586 const double rpos = *end_it;
00587
00588 if(!(lpos < rpos))
00589 {
00590 cerr << "LikeModule::ComputeMetric() - min value is greater than max value" << endl;
00591 continue;
00592 }
00593
00594 cout << "Variable " << vit -> first
00595 << " included " << distance(beg_it, end_it) + 1
00596 << " events: width = " << setfill(' ') << setw(5) << setprecision(3) << rpos - lpos
00597 << ", (min, max) = (" << setfill(' ') << setw(5) << setprecision(3) << lpos
00598 << ", " << setfill(' ') << setw(5) << setprecision(3) << rpos << ")" << endl;
00599
00600 fVarScale[vit -> first] = rpos - lpos;
00601 }
00602
00603 fVar.clear();
00604
00605 for(unsigned int ievent = 0; ievent < fEvent.size(); ++ievent)
00606 {
00607 fEvent[ievent] = Scale(fEvent[ievent]);
00608
00609 for(unsigned int ivar = 0; ivar < fDimn; ++ivar)
00610 {
00611 fVar[ivar].push_back(fEvent[ievent].GetVar(ivar));
00612 }
00613 }
00614 }
00615
00616
00617 const Lit::Event Lit::LikeModule::Scale(const Event &event) const
00618 {
00619
00620
00621
00622 if(fVarScale.empty())
00623 {
00624 return event;
00625 }
00626
00627 if(event.GetNVar() != fVarScale.size())
00628 {
00629 cerr << "LikeModule::Scale() - mismatched metric and event size" << endl;
00630 return event;
00631 }
00632
00633 VarVec vvec(event.GetNVar(), 0.0);
00634
00635 for(unsigned int ivar = 0; ivar < event.GetNVar(); ++ivar)
00636 {
00637 map<int, double>::const_iterator fit = fVarScale.find(ivar);
00638 if(fit == fVarScale.end())
00639 {
00640 cerr << "LikeModule::Scale() - failed to find scale for " << ivar << endl;
00641 continue;
00642 }
00643
00644 if(fit -> second > 0.0)
00645 {
00646 vvec[ivar] = event.GetVar(ivar)/fit -> second;
00647 }
00648 else
00649 {
00650 cerr << "Variable " << ivar << " has zero width" << endl;
00651 }
00652 }
00653
00654 return Event(vvec, event.GetWeight(), event.GetType(), event.GetTgtVec());
00655 }
00656
00657
00658 void Lit::LikeModule::Print() const
00659 {
00660 Print(std::cout);
00661 }
00662
00663
00664 void Lit::LikeModule::Print(std::ostream &os) const
00665 {
00666 os << "---------------------------------------------------------------------------------"<< endl;
00667 os << "Dumping stringstream content of LikeModule" << endl;
00668 os << "---------------------------------------------------------------------------------"<< endl;
00669 }