00001
00002
00003
00004 #include <cassert>
00005 #include <climits>
00006 #include <cmath>
00007 #include <iterator>
00008 #include <iomanip>
00009 #include <sstream>
00010
00011
00012 #include "TFile.h"
00013 #include "TMath.h"
00014
00015
00016 #include "Registry/Registry.h"
00017 #include "Util/UtilString.h"
00018
00019
00020 #include "PhysicsNtuple/Default.h"
00021 #include "PhysicsNtuple/Factory.h"
00022 #include "LikeModule.h"
00023
00024
00025 #include "FillkNN.h"
00026
00027 REGISTER_ANP_OBJECT(AlgEvent,FillkNN)
00028 REGISTER_ANP_OBJECT(AlgSnarl,FillkNN)
00029
00030 using namespace std;
00031
00032
00033 Anp::FillkNN::FillkNN()
00034 :fNAbortFind(0),
00035 fNTrackMiss(0),
00036 fNTrackFill(0),
00037 fNEventMiss(0),
00038 fNEventFill(0),
00039 fDirName("fill_knn"),
00040 fFilePath(),
00041 fMaxNEvent(0),
00042 fKeyBase(5200),
00043 fKeySignal(1),
00044 fKeyTruth(7000),
00045 fTrimDelta(0),
00046 fKNeighbor(81),
00047 fKNeighborMod(20),
00048 fAddAll(false),
00049 fAddEvent(true),
00050 fCreateNew(false),
00051 fPrint(false),
00052 fRegression(false),
00053 fTrim(true),
00054 fUseKernel(false),
00055 fUseTrack(true),
00056 fModule(0)
00057 {
00058 }
00059
00060
00061 Anp::FillkNN::~FillkNN()
00062 {
00063 if(fModule)
00064 {
00065 delete fModule;
00066 fModule = 0;
00067 }
00068 }
00069
00070
00071 bool Anp::FillkNN::Run(Record &record)
00072 {
00073
00074
00075
00076 if(!fModule) return true;
00077
00078 map<short, const DataVec> dmap;
00079
00080 if(fUseTrack)
00081 {
00082 for(TrackIterator itrack = record.TrackBegIterator(); itrack != record.TrackEndIterator(); ++itrack)
00083 {
00084
00085 if(fCreateNew)
00086 {
00087 if(FillkNN::Fill(itrack -> DataBeg(), itrack -> DataEnd(), itrack -> Weight()))
00088 {
00089 ++fNTrackFill;
00090 }
00091 else
00092 {
00093 ++fNTrackMiss;
00094 }
00095 }
00096 else
00097 {
00098 const DataVec dvec = FillkNN::Find(itrack -> DataBeg(), itrack -> DataEnd(), fKeyBase);
00099
00100 if(dvec.empty())
00101 {
00102 ++fNTrackMiss;
00103 }
00104 else
00105 {
00106 ++fNTrackFill;
00107 }
00108
00109 for(DataVec::const_iterator dit = dvec.begin(); dit != dvec.end(); ++dit)
00110 {
00111 itrack -> Add(dit -> Key(), dit -> Data());
00112 }
00113
00114 if(!dmap.insert(map<short, const DataVec>::value_type(itrack -> TrackIndex(), dvec)).second)
00115 {
00116 cerr << "FillkNN::Run - duplicate track index " << itrack -> TrackIndex() << endl;
00117 }
00118 }
00119 }
00120 }
00121
00122 for(EventIterator event = record.EventBegIterator(); event != record.EventEndIterator(); ++event)
00123 {
00124 if(fUseTrack)
00125 {
00126 if(!fCreateNew && fAddEvent)
00127 {
00128 const TrackIter track = Anp::LongestTrack(*event, record);
00129 if(track == record.TrackEnd())
00130 {
00131 continue;
00132 }
00133
00134 const map<short, const DataVec>::const_iterator dit = dmap.find(track -> TrackIndex());
00135 if(dit == dmap.end())
00136 {
00137 cerr << "FillkNN::Run - failed to find track index " << track -> TrackIndex() << endl;
00138 continue;
00139 }
00140
00141 const DataVec &dvec = dit -> second;
00142
00143 for(DataVec::const_iterator dit = dvec.begin(); dit != dvec.end(); ++dit)
00144 {
00145 event -> Add(dit -> Key(), dit -> Data());
00146 }
00147 }
00148 }
00149 else if(fCreateNew)
00150 {
00151 if(FillkNN::Fill(event -> DataBeg(), event -> DataEnd(), event -> Weight()))
00152 {
00153 ++fNEventFill;
00154 }
00155 else
00156 {
00157 ++fNEventMiss;
00158 }
00159 }
00160 else
00161 {
00162 const DataVec dvec = FillkNN::Find(event -> DataBeg(), event -> DataEnd(), fKeyBase);
00163
00164 if(dvec.empty())
00165 {
00166 ++fNEventMiss;
00167 }
00168 else
00169 {
00170 ++fNEventFill;
00171 }
00172
00173 for(DataVec::const_iterator dit = dvec.begin(); dit != dvec.end(); ++dit)
00174 {
00175 event -> Add(dit -> Key(), dit -> Data());
00176 }
00177 }
00178 }
00179
00180 return true;
00181 }
00182
00183
00184 bool Anp::FillkNN::Run(Event &event, const Record &record, const bool pass)
00185 {
00186
00187
00188
00189 if(!pass || !fModule) return true;
00190
00191 if(fUseTrack)
00192 {
00193 const TrackIter track = Anp::LongestTrack(event, record);
00194 if(track == record.TrackEnd())
00195 {
00196 return true;
00197 }
00198
00199 if(fCreateNew)
00200 {
00201 if(FillkNN::Fill(track -> DataBeg(), track -> DataEnd(), event.Weight()))
00202 {
00203 ++fNTrackFill;
00204 }
00205 else
00206 {
00207 ++fNTrackMiss;
00208 }
00209 }
00210 else
00211 {
00212 const DataVec dvec = FillkNN::Find(track -> DataBeg(), track -> DataEnd(), fKeyBase);
00213
00214 if(dvec.empty())
00215 {
00216 ++fNTrackMiss;
00217 }
00218 else
00219 {
00220 ++fNTrackFill;
00221 }
00222
00223 for(DataVec::const_iterator dit = dvec.begin(); dit != dvec.end(); ++dit)
00224 {
00225 event.Add(dit -> Key(), dit -> Data());
00226 }
00227 }
00228 }
00229 else
00230 {
00231 if(fCreateNew)
00232 {
00233 if(FillkNN::Fill(event.DataBeg(), event.DataEnd(), event.Weight()))
00234 {
00235 ++fNEventFill;
00236 }
00237 else
00238 {
00239 ++fNEventMiss;
00240 }
00241 }
00242 else
00243 {
00244 const DataVec dvec = FillkNN::Find(event.DataBeg(), event.DataEnd(), fKeyBase);
00245
00246 if(dvec.empty())
00247 {
00248 ++fNEventMiss;
00249 }
00250 else
00251 {
00252 ++fNEventFill;
00253 }
00254
00255 for(DataVec::const_iterator dit = dvec.begin(); dit != dvec.end(); ++dit)
00256 {
00257 event.Add(dit -> Key(), dit -> Data());
00258 }
00259 }
00260 }
00261
00262 return true;
00263 }
00264
00265
00266 void Anp::FillkNN::Config(const Registry ®)
00267 {
00268
00269
00270
00271
00272 fKeys = Anp::ReadList<short>(reg, "FillkNNKeyList", ", ");
00273 fTgts = Anp::ReadList<short>(reg, "FillkNNTgtList", ", ");
00274
00275
00276
00277
00278 Anp::Read(reg, "FillkNNFilePath", fFilePath);
00279 Anp::Read(reg, "FillkNNDirName", fDirName);
00280
00281 if((fDirName == "fill_knn" || fDirName.empty()) && !fKeys.empty())
00282 {
00283
00284
00285
00286 stringstream dname;
00287 dname << "knn";
00288 for(ShortVec::const_iterator it = fKeys.begin(); it != fKeys.end(); ++it)
00289 {
00290 dname << "_" << *it;
00291 }
00292
00293 fDirName = dname.str();
00294 }
00295
00296
00297
00298
00299 reg.Get("FillkNNMaxNEvent", fMaxNEvent);
00300 reg.Get("FillkNNKeyBase", fKeyBase);
00301 reg.Get("FillkNNKeySignal", fKeySignal);
00302 reg.Get("FillkNNKeyTruth", fKeyTruth);
00303 reg.Get("FillkNNTrimDelta", fTrimDelta);
00304
00305 int value_int = 0;
00306 if(reg.Get("FillkNNKNeighbor", value_int) && value_int >= 0)
00307 {
00308 fKNeighbor = static_cast<unsigned int>(value_int);
00309 }
00310
00311 value_int = 0;
00312 if(reg.Get("FillkNNKNeighborMod", value_int) && value_int > 0)
00313 {
00314 fKNeighborMod = static_cast<unsigned int>(value_int);
00315 }
00316
00317
00318
00319
00320 Anp::Read(reg, "FillkNNAddAll", fAddAll);
00321 Anp::Read(reg, "FillkNNAddEvent", fAddEvent);
00322 Anp::Read(reg, "FillkNN", fCreateNew);
00323 Anp::Read(reg, "FillkNNPrint", fPrint);
00324 Anp::Read(reg, "FillkNNRegression", fRegression);
00325 Anp::Read(reg, "FillkNNTrim", fTrim);
00326 Anp::Read(reg, "FillkNNUseKernel", fUseKernel);
00327 Anp::Read(reg, "FillkNNUseTrack", fUseTrack);
00328
00329 if(reg.KeyExists("PrintConfig"))
00330 {
00331 cout << "FillkNN::Config" << endl;
00332
00333 Anp::PrintList<short>(" KeyList", fKeys);
00334 Anp::PrintList<short>(" TgtList", fTgts);
00335
00336 cout << " DirName = " << fDirName << endl
00337 << " FilePath = " << fFilePath << endl
00338 << " MaxNEvent = " << fMaxNEvent << endl
00339 << " KeyBase = " << fKeyBase << endl
00340 << " KeySignal = " << fKeySignal << endl
00341 << " KeyTruth = " << fKeyTruth << endl
00342 << " KNeighbor = " << fKNeighbor << endl
00343 << " KNeighborMod = " << fKNeighborMod << endl
00344 << " AddAll = " << fAddAll << endl
00345 << " AddEvent = " << fAddEvent << endl
00346 << " CreateNew = " << fCreateNew << endl
00347 << " Print = " << fPrint << endl
00348 << " Regression = " << fRegression << endl
00349 << " Trim = " << fTrim << endl
00350 << " TrimDelta = " << fTrimDelta << endl
00351 << " UseKernel = " << fUseKernel << endl
00352 << " UseTrack = " << fUseTrack << endl;
00353 }
00354
00355
00356
00357
00358 if(fModule)
00359 {
00360 delete fModule;
00361 }
00362 }
00363
00364
00365 bool Anp::FillkNN::Init(const Header &)
00366 {
00367
00368
00369
00370
00371 if(fModule)
00372 {
00373 cerr << "FillkNN::Init - knn module already exists" << endl;
00374 return true;
00375 }
00376 else
00377 {
00378 fModule = new Lit::LikeModule();
00379 }
00380
00381
00382
00383
00384 if(fCreateNew) return true;
00385
00386 cout << "FillkNN::Init - attempting to read prototype from:\n "
00387 << fFilePath << ":" << fDirName << endl;
00388
00389 TFile *file = TFile::Open(fFilePath.c_str(), "READ");
00390 if(!file || !(file -> IsOpen()))
00391 {
00392 cerr << "FillkNN::Init - failed to open ROOT file" << endl;
00393 return false;
00394 }
00395
00396
00397
00398
00399 TDirectory *dir = dynamic_cast<TDirectory *>(file -> Get(fDirName.c_str()));
00400
00401 if(dir && fModule -> Read(dir, fMaxNEvent))
00402 {
00403
00404
00405
00406
00407
00408
00409
00410 const Lit::EventVec &evec = fModule -> GetEventVec();
00411
00412 for(Lit::EventVec::const_iterator it = evec.begin(); it != evec.end(); ++it)
00413 {
00414 if(std::find(fTypes.begin(), fTypes.end(), it -> GetType()) == fTypes.end())
00415 {
00416 fTypes.push_back(it -> GetType());
00417 }
00418 }
00419
00420 std::sort(fTypes.begin(), fTypes.end());
00421
00422 for(unsigned int i = 0; i < fTypes.size(); ++i)
00423 {
00424 if(i == 0)
00425 {
00426 cout << "FillkNN::Init - prototypes: ";
00427 }
00428 if(i + 1 != fTypes.size())
00429 {
00430 cout << fTypes[i] << ", ";
00431 }
00432 else
00433 {
00434 cout << fTypes[i] << endl;
00435 }
00436 }
00437
00438 if(fTrim)
00439 {
00440 fModule -> Fill(7, "metric trim erase");
00441 }
00442 else
00443 {
00444 fModule -> Fill(7, "metric erase");
00445 }
00446 }
00447 else
00448 {
00449 cerr << "FillkNN::Init - failed to read LikeModule" << endl;
00450 delete fModule; fModule = 0;
00451 }
00452
00453
00454
00455
00456 file -> Close();
00457
00458 return true;
00459 }
00460
00461
00462 void Anp::FillkNN::End(const DataBlock &)
00463 {
00464 if(fModule && fCreateNew)
00465 {
00466 TFile *file = TFile::Open(fFilePath.c_str(), "UPDATE");
00467
00468 if(file && file -> IsOpen())
00469 {
00470
00471
00472
00473
00474
00475
00476 if(fTrim)
00477 {
00478 fModule -> Fill(7, "trim");
00479 }
00480 else
00481 {
00482 fModule -> Fill(7, "");
00483 }
00484
00485 cout << "FillkNN::End - writing prototype to\n "
00486 << fFilePath << ":" << fDirName << endl;
00487
00488 if(file -> Get(fDirName.c_str()))
00489 {
00490 const string delname = fDirName + ";1";
00491 file -> Delete(delname.c_str());
00492 cout << "FillkNN::End - deleted TDirectory " << delname << endl;
00493 }
00494
00495 fModule -> Write(file -> mkdir(fDirName.c_str()));
00496
00497 file -> Write();
00498 file -> Close();
00499 }
00500 }
00501
00502 if(fModule)
00503 {
00504 delete fModule;
00505 fModule = 0;
00506 }
00507
00508 cout << " NAbortFind = " << fNAbortFind << endl
00509 << " NTrackMiss = " << fNTrackMiss << endl
00510 << " NTrackFill = " << fNTrackFill << endl
00511 << " NEventMiss = " << fNEventMiss << endl
00512 << " NEventFill = " << fNEventFill << endl;
00513 }
00514
00515
00516 bool Anp::FillkNN::Fill(DataIter beg, DataIter end, const double weight)
00517 {
00518
00519
00520
00521
00522 if(!fModule)
00523 {
00524 cerr << "FillkNN::Fill - knn module does not exist" << endl;
00525 return false;
00526 }
00527
00528 short etype = 0;
00529 if(!fRegression)
00530 {
00531 assert(fKeyTruth < SHRT_MAX && "int to short overflow");
00532
00533 const DataIter fit = std::find(beg, end, static_cast<short>(fKeyTruth));
00534 if(fit == end)
00535 {
00536 return false;
00537 }
00538
00539 assert(fit -> Data() < SHRT_MAX && "int to short overflow");
00540 etype = static_cast<short>(fit -> Data());
00541
00542 if(fTrim && fTrimDelta > 0)
00543 {
00544
00545
00546
00547 ShortMap::iterator cur_it = fCount.insert(ShortMap::value_type(etype, 0)).first;
00548
00549
00550
00551
00552 ShortMap::const_iterator min_it = fCount.end();
00553
00554 for(ShortMap::const_iterator cit = fCount.begin(); cit != fCount.end(); ++cit)
00555 {
00556 if(min_it == fCount.end())
00557 {
00558 min_it = cit;
00559 }
00560 else if(cit -> second < min_it -> second)
00561 {
00562 min_it = cit;
00563 }
00564 }
00565
00566 if(min_it != fCount.end() && cur_it -> second > fTrimDelta + min_it -> second)
00567 {
00568 return false;
00569 }
00570
00571 ++(cur_it -> second);
00572 }
00573 }
00574
00575 const vector<float> dvec = FillkNN::Get(beg, end, fKeys);
00576 const vector<float> tvec = FillkNN::Get(beg, end, fTgts);
00577
00578 if(dvec.size() != fKeys.size())
00579 {
00580 return false;
00581 }
00582
00583 fModule -> Add(Lit::Event(dvec, weight, etype, tvec));
00584
00585 return true;
00586 }
00587
00588
00589 const Anp::DataVec Anp::FillkNN::Find(DataIter beg, DataIter end, const short keybase) const
00590 {
00591
00592
00593
00594 DataVec dvec;
00595
00596 if(!fModule)
00597 {
00598 cerr << "FillkNN::Fill - knn module does not exist" << endl;
00599 return dvec;
00600 }
00601
00602
00603
00604
00605 if(std::find(dvec.begin(), dvec.end(), keybase) != dvec.end())
00606 {
00607 ++fNAbortFind;
00608 return dvec;
00609 }
00610
00611
00612
00613
00614 const vector<float> kvec = FillkNN::Get(beg, end, fKeys);
00615 if(kvec.size() != fKeys.size())
00616 {
00617 return dvec;
00618 }
00619
00620 fModule -> Find(Lit::Event(kvec, 1.0, 0), fKNeighbor);
00621
00622 if(fPrint)
00623 {
00624 cout << "Printing kNN result for key " << keybase << endl;
00625 fModule -> GetResult().Print();
00626 }
00627
00628 const Lit::List &rlist = fModule -> GetResult().GetList();
00629
00630 if(rlist.empty())
00631 {
00632 cerr << "FillkNN::Find - kNN result list is empty" << endl;
00633 return dvec;
00634 }
00635
00636 if(fRegression)
00637 {
00638 double totalw = 0.0;
00639 unsigned int icount = 0;
00640 vector<double> tvec;
00641
00642 for(Lit::List::const_iterator lit = rlist.begin(); lit != rlist.end(); ++lit)
00643 {
00644 const Lit::Event &event_ = lit->first->GetEvent();
00645
00646 ++icount;
00647 totalw += event_.GetWeight();
00648
00649 if(tvec.empty())
00650 {
00651 tvec.insert(tvec.end(), event_.GetNTgt(), 0.0);
00652 }
00653
00654 if(event_.GetNTgt() < 1)
00655 {
00656 cerr << "FillkNN::Find - target list is empty" << endl;
00657 }
00658
00659 for(unsigned int ivar = 0; ivar < event_.GetNTgt(); ++ivar)
00660 {
00661 tvec[ivar] += event_.GetTgt(ivar)*event_.GetWeight();
00662 }
00663
00664
00665
00666
00667 if(icount % fKNeighborMod != 0) continue;
00668
00669
00670
00671
00672 if(!(totalw > 0.0))
00673 {
00674 cerr << "FillkNN::Find - non positive total weight" << endl;
00675 continue;
00676 }
00677
00678 if(fPrint)
00679 {
00680 cout << "FillkNN::Find - saving regression result for icount = " << icount << endl;
00681 }
00682
00683 for(unsigned int ivar = 0; ivar < tvec.size(); ++ivar)
00684 {
00685 dvec.push_back(Anp::Data(keybase + icount + ivar, tvec[ivar]/totalw));
00686 }
00687 }
00688 }
00689 else
00690 {
00691 double totalw = 0.0;
00692 unsigned int count = 0;
00693 map<short, double> pmap;
00694
00695 for(Lit::List::const_iterator lit = rlist.begin(); lit != rlist.end(); ++lit)
00696 {
00697 const Lit::Node &node = *(lit -> first);
00698
00699
00700
00701
00702 if(!(lit -> second > 0.0)) continue;
00703
00704
00705
00706
00707 map<short, double>::iterator pit =
00708 pmap.insert(map<short, double>::value_type(node.GetType(), 0.0)).first;
00709
00710 pit -> second += node.GetWeight();
00711 totalw += node.GetWeight();
00712
00713 ++count;
00714
00715
00716
00717
00718 if(count % fKNeighborMod != 0) continue;
00719
00720 if(!(totalw > 0.0))
00721 {
00722 cerr << "FillkNN::Find - non positive total weight" << endl;
00723 continue;
00724 }
00725
00726
00727
00728
00729 for(ShortVec::const_iterator itype = fTypes.begin(); itype != fTypes.end(); ++itype)
00730 {
00731 const short type = *itype;
00732 double prob = 0.0;
00733
00734 map<short, double>::const_iterator pit = pmap.find(type);
00735 if(pit != pmap.end())
00736 {
00737 prob = (pit -> second)/totalw;
00738 }
00739
00740 if(fAddAll)
00741 {
00742 dvec.push_back(Anp::Data(keybase + count + type, prob));
00743 }
00744 else if(type == fKeySignal)
00745 {
00746 dvec.push_back(Anp::Data(keybase + count, prob));
00747 }
00748 }
00749 }
00750
00751
00752
00753
00754 dvec.push_back(Anp::Data(keybase, std::sqrt(rlist.back().second)));
00755 }
00756
00757 return dvec;
00758 }
00759
00760
00761 double Anp::FillkNN:: PidKer(Lit::List::const_iterator beg, Lit::List::const_iterator end) const
00762 {
00763
00764
00765
00766 double maxradius = -1.0;
00767
00768 for(Lit::List::const_iterator lit = beg; lit != end; ++lit)
00769 {
00770 if(maxradius < lit -> second)
00771 {
00772 maxradius = lit -> second;
00773 }
00774 }
00775
00776 if(!(maxradius > 0.0))
00777 {
00778 cerr << "FillkNN::PidKer - maximum radius is not positive" << endl;
00779 return -100.0;
00780 }
00781
00782 maxradius = 1.0/std::sqrt(maxradius);
00783
00784 double count_signal = 0.0, count_all = 0.0;
00785
00786 for(Lit::List::const_iterator lit = beg; lit != end; ++lit)
00787 {
00788 const Lit::Node &node = *(lit -> first);
00789
00790 const double weight = Kernel(std::sqrt(lit -> second)*maxradius) * node.GetWeight();
00791
00792 if(weight < 0.0)
00793 {
00794 cerr << "FillkNN::PidKer - negative event weight " << weight << endl;
00795 continue;
00796 }
00797
00798 count_all += weight;
00799
00800 if(node.GetType() == fKeySignal)
00801 {
00802 count_signal += weight;
00803 }
00804 }
00805
00806 if(count_all > 0.0)
00807 {
00808 return count_signal/count_all;
00809 }
00810
00811 cerr << "FillkNN:: PidKer - total event count is not positive" << endl;
00812
00813 return -100.0;
00814 }
00815
00816
00817 const vector<float> Anp::FillkNN::Get(DataIter beg, DataIter end, const ShortVec &kvec) const
00818 {
00819
00820
00821
00822
00823
00824
00825 map<short, float> fmap;
00826
00827 for(DataIter dit = beg; dit != end; ++dit)
00828 {
00829 if(find(kvec.begin(), kvec.end(), dit -> Key()) != kvec.end())
00830 {
00831 if(!fmap.insert(map<short, float>::value_type(dit -> Key(), dit -> Data())).second)
00832 {
00833 cerr << "FillkNN::Get - failed to add data due to duplicate key" << endl;
00834 }
00835 }
00836 }
00837
00838 if(fPrint && !kvec.empty() && fmap.size() != kvec.size())
00839 {
00840 cout << "FillkNN::Get - failed to read all keys" << endl;
00841
00842 for(unsigned int i = 0; i < kvec.size(); ++i)
00843 {
00844 map<short, float>::const_iterator fit = fmap.find(kvec[i]);
00845 if(fit != fmap.end())
00846 {
00847 cout << " (key, data) = (" << kvec[i] << ", " << fit -> second << ")" << endl;
00848 }
00849 else
00850 {
00851 cout << " (key, data) = (" << kvec[i] << ", unknown)" << endl;
00852 }
00853 }
00854 }
00855
00856 vector<float> fvec;
00857
00858 for(map<short, float>::const_iterator it = fmap.begin(); it != fmap.end(); ++it)
00859 {
00860 fvec.push_back(it -> second);
00861 }
00862
00863 return fvec;
00864 }
00865
00866
00867 double Anp::FillkNN::Kernel(const double value) const
00868 {
00869
00870
00871
00872
00873 const double avalue = std::fabs(value);
00874
00875 if(!(avalue < 1.0))
00876 {
00877 return 0.0;
00878 }
00879
00880 const double prod = 1.0 - avalue*avalue;
00881
00882 return 15.0*prod*prod/16.0;
00883 }