Logo ROOT   6.12/06
Reference Guide
MethodBoost.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCompositeBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * U. of Bonn, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  **********************************************************************************/
31 
32 /*! \class TMVA::MethodBoost
33 \ingroup TMVA
34 
35 Class for boosting a TMVA method
36 
37 This class is meant to boost a single classifier. Boosting means
38 training the classifier a few times. Every time the weights of the
39 events are modified according to how well the classifier performed
40 on the test sample.
41 
42 */
43 
44 #include "TMVA/MethodBoost.h"
45 
46 #include "TMVA/ClassifierFactory.h"
47 #include "TMVA/Config.h"
48 #include "TMVA/Configurable.h"
49 #include "TMVA/DataSet.h"
50 #include "TMVA/DataSetInfo.h"
51 #include "TMVA/IMethod.h"
52 #include "TMVA/MethodBase.h"
53 #include "TMVA/MethodCategory.h"
55 #include "TMVA/MethodDT.h"
56 #include "TMVA/MethodFisher.h"
57 #include "TMVA/PDF.h"
58 #include "TMVA/Results.h"
59 #include "TMVA/Timer.h"
60 #include "TMVA/Tools.h"
61 #include "TMVA/Types.h"
62 
63 #include "TMVA/SeparationBase.h"
65 #include "TMVA/GiniIndex.h"
66 #include "TMVA/CrossEntropy.h"
69 
70 #include "Riostream.h"
71 #include "TRandom3.h"
72 #include "TFile.h"
73 #include "TMath.h"
74 #include "TObjString.h"
75 #include "TH1F.h"
76 #include "TH2F.h"
77 #include "TGraph.h"
78 #include "TSpline.h"
79 #include "TDirectory.h"
80 #include "TTree.h"
81 
82 #include <algorithm>
83 #include <iomanip>
84 #include <vector>
85 #include <cmath>
86 
87 
88 REGISTER_METHOD(Boost)
89 
91 
92 ////////////////////////////////////////////////////////////////////////////////
93 
95  const TString& methodTitle,
96  DataSetInfo& theData,
97  const TString& theOption ) :
98  TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption)
99  , fBoostNum(0)
100  , fDetailedMonitoring(kFALSE)
101  , fAdaBoostBeta(0)
102  , fRandomSeed(0)
103  , fBaggedSampleFraction(0)
104  , fBoostedMethodTitle(methodTitle)
105  , fBoostedMethodOptions(theOption)
106  , fMonitorBoostedMethod(kFALSE)
107  , fMonitorTree(0)
108  , fBoostWeight(0)
109  , fMethodError(0)
110  , fROC_training(0.0)
111  , fOverlap_integral(0.0)
112  , fMVAvalues(0)
113 {
114  fMVAvalues = new std::vector<Float_t>;
115  fDataSetManager = NULL;
117 }
118 
119 ////////////////////////////////////////////////////////////////////////////////
120 
122  const TString& theWeightFile)
123  : TMVA::MethodCompositeBase( Types::kBoost, dsi, theWeightFile)
124  , fBoostNum(0)
125  , fDetailedMonitoring(kFALSE)
126  , fAdaBoostBeta(0)
127  , fRandomSeed(0)
128  , fBaggedSampleFraction(0)
129  , fBoostedMethodTitle("")
130  , fBoostedMethodOptions("")
131  , fMonitorBoostedMethod(kFALSE)
132  , fMonitorTree(0)
133  , fBoostWeight(0)
134  , fMethodError(0)
135  , fROC_training(0.0)
136  , fOverlap_integral(0.0)
137  , fMVAvalues(0)
138 {
139  fMVAvalues = new std::vector<Float_t>;
140  fDataSetManager = NULL;
142 }
143 
144 ////////////////////////////////////////////////////////////////////////////////
145 /// destructor
146 
148 {
149  fMethodWeight.clear();
150 
151  // the histogram themselves are deleted when the file is closed
152 
153  fTrainSigMVAHist.clear();
154  fTrainBgdMVAHist.clear();
155  fBTrainSigMVAHist.clear();
156  fBTrainBgdMVAHist.clear();
157  fTestSigMVAHist.clear();
158  fTestBgdMVAHist.clear();
159 
160  if (fMVAvalues) {
161  delete fMVAvalues;
162  fMVAvalues = 0;
163  }
164 }
165 
166 
167 ////////////////////////////////////////////////////////////////////////////////
168 /// Boost can handle classification with 2 classes and regression with one regression-target
169 
171 {
172  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
173  // if (type == Types::kRegression && numberTargets == 1) return kTRUE;
174  return kFALSE;
175 }
176 
177 
178 ////////////////////////////////////////////////////////////////////////////////
179 
181 {
182  DeclareOptionRef( fBoostNum = 1, "Boost_Num",
183  "Number of times the classifier is boosted" );
184 
185  DeclareOptionRef( fMonitorBoostedMethod = kTRUE, "Boost_MonitorMethod",
186  "Write monitoring histograms for each boosted classifier" );
187 
188  DeclareOptionRef( fDetailedMonitoring = kFALSE, "Boost_DetailedMonitoring",
189  "Produce histograms for detailed boost monitoring" );
190 
191  DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
192  AddPreDefVal(TString("RealAdaBoost"));
193  AddPreDefVal(TString("AdaBoost"));
194  AddPreDefVal(TString("Bagging"));
195 
196  DeclareOptionRef(fBaggedSampleFraction=.6,"Boost_BaggedSampleFraction","Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
197 
198  DeclareOptionRef( fAdaBoostBeta = 1.0, "Boost_AdaBoostBeta",
199  "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
200 
201  DeclareOptionRef( fTransformString = "step", "Boost_Transform",
202  "Type of transform applied to every boosted method linear, log, step" );
203  AddPreDefVal(TString("step"));
204  AddPreDefVal(TString("linear"));
205  AddPreDefVal(TString("log"));
206  AddPreDefVal(TString("gauss"));
207 
208  DeclareOptionRef( fRandomSeed = 0, "Boost_RandomSeed",
209  "Seed for random number generator used for bagging" );
210 
211  TMVA::MethodCompositeBase::fMethods.reserve(fBoostNum);
212 }
213 
214 ////////////////////////////////////////////////////////////////////////////////
215 /// options that are used ONLY for the READER to ensure backward compatibility
216 /// they are hence without any effect (the reader is only reading the training
217 /// options that HAD been used at the training of the .xml weight file at hand
218 
220 {
221 
223 
224  DeclareOptionRef( fHistoricOption = "ByError", "Boost_MethodWeightType",
225  "How to set the final weight of the boosted classifiers" );
226  AddPreDefVal(TString("ByError"));
227  AddPreDefVal(TString("Average"));
228  AddPreDefVal(TString("ByROC"));
229  AddPreDefVal(TString("ByOverlap"));
230  AddPreDefVal(TString("LastMethod"));
231 
232  DeclareOptionRef( fHistoricOption = "step", "Boost_Transform",
233  "Type of transform applied to every boosted method linear, log, step" );
234  AddPreDefVal(TString("step"));
235  AddPreDefVal(TString("linear"));
236  AddPreDefVal(TString("log"));
237  AddPreDefVal(TString("gauss"));
238 
239  // this option here
240  //DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
241  // still exists, but these two possible values
242  AddPreDefVal(TString("HighEdgeGauss"));
243  AddPreDefVal(TString("HighEdgeCoPara"));
244  // have been deleted .. hope that works :)
245 
246  DeclareOptionRef( fHistoricBoolOption, "Boost_RecalculateMVACut",
247  "Recalculate the classifier MVA Signallike cut at every boost iteration" );
248 
249 }
250 
251 ////////////////////////////////////////////////////////////////////////////////
252 /// just registering the string from which the boosted classifier will be created
253 
254 Bool_t TMVA::MethodBoost::BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption )
255 {
256  fBoostedMethodName = Types::Instance().GetMethodName( theMethod );
257  fBoostedMethodTitle = methodTitle;
258  fBoostedMethodOptions = theOption;
259  TString opts=theOption;
260  opts.ToLower();
261  // if (opts.Contains("vartransform")) Log() << kFATAL << "It is not possible to use boost in conjunction with variable transform. Please remove either Boost_Num or VarTransform from the option string"<< methodTitle<<Endl;
262 
263  return kTRUE;
264 }
265 
266 ////////////////////////////////////////////////////////////////////////////////
267 
269 {
270 }
271 
272 ////////////////////////////////////////////////////////////////////////////////
273 /// initialisation routine
274 
276 {
277 
278  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
279 
280  results->Store(new TH1F("MethodWeight","Normalized Classifier Weight",fBoostNum,0,fBoostNum),"ClassifierWeight");
281  results->Store(new TH1F("BoostWeight","Boost Weight",fBoostNum,0,fBoostNum),"BoostWeight");
282  results->Store(new TH1F("ErrFraction","Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),"ErrorFraction");
283  if (fDetailedMonitoring){
284  results->Store(new TH1F("ROCIntegral_test","ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegral_test");
285  results->Store(new TH1F("ROCIntegralBoosted_test","ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_test");
286  results->Store(new TH1F("ROCIntegral_train","ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),"ROCIntegral_train");
287  results->Store(new TH1F("ROCIntegralBoosted_train","ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_train");
288  results->Store(new TH1F("OverlapIntegal_train","Overlap integral (training sample)",fBoostNum,0,fBoostNum),"Overlap");
289  }
290 
291 
292  results->GetHist("ClassifierWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
293  results->GetHist("ClassifierWeight")->GetYaxis()->SetTitle("Classifier Weight");
294  results->GetHist("BoostWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
295  results->GetHist("BoostWeight")->GetYaxis()->SetTitle("Boost Weight");
296  results->GetHist("ErrorFraction")->GetXaxis()->SetTitle("Index of boosted classifier");
297  results->GetHist("ErrorFraction")->GetYaxis()->SetTitle("Error Fraction");
298  if (fDetailedMonitoring){
299  results->GetHist("ROCIntegral_test")->GetXaxis()->SetTitle("Index of boosted classifier");
300  results->GetHist("ROCIntegral_test")->GetYaxis()->SetTitle("ROC integral of single classifier");
301  results->GetHist("ROCIntegralBoosted_test")->GetXaxis()->SetTitle("Number of boosts");
302  results->GetHist("ROCIntegralBoosted_test")->GetYaxis()->SetTitle("ROC integral boosted");
303  results->GetHist("ROCIntegral_train")->GetXaxis()->SetTitle("Index of boosted classifier");
304  results->GetHist("ROCIntegral_train")->GetYaxis()->SetTitle("ROC integral of single classifier");
305  results->GetHist("ROCIntegralBoosted_train")->GetXaxis()->SetTitle("Number of boosts");
306  results->GetHist("ROCIntegralBoosted_train")->GetYaxis()->SetTitle("ROC integral boosted");
307  results->GetHist("Overlap")->GetXaxis()->SetTitle("Index of boosted classifier");
308  results->GetHist("Overlap")->GetYaxis()->SetTitle("Overlap integral");
309  }
310 
311  results->Store(new TH1F("SoverBtotal","S/B in reweighted training sample",fBoostNum,0,fBoostNum),"SoverBtotal");
312  results->GetHist("SoverBtotal")->GetYaxis()->SetTitle("S/B (boosted sample)");
313  results->GetHist("SoverBtotal")->GetXaxis()->SetTitle("Index of boosted classifier");
314 
315  results->Store(new TH1F("SeparationGain","SeparationGain",fBoostNum,0,fBoostNum),"SeparationGain");
316  results->GetHist("SeparationGain")->GetYaxis()->SetTitle("SeparationGain");
317  results->GetHist("SeparationGain")->GetXaxis()->SetTitle("Index of boosted classifier");
318 
319 
320 
321  fMonitorTree= new TTree("MonitorBoost","Boost variables");
322  fMonitorTree->Branch("iMethod",&fCurrentMethodIdx,"iMethod/I");
323  fMonitorTree->Branch("boostWeight",&fBoostWeight,"boostWeight/D");
324  fMonitorTree->Branch("errorFraction",&fMethodError,"errorFraction/D");
325  fMonitorBoostedMethod = kTRUE;
326 
327 }
328 
329 
330 ////////////////////////////////////////////////////////////////////////////////
331 
333 {
334  Log() << kDEBUG << "CheckSetup: fBoostType="<<fBoostType << Endl;
335  Log() << kDEBUG << "CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<Endl;
336  Log() << kDEBUG << "CheckSetup: fBoostWeight="<<fBoostWeight<<Endl;
337  Log() << kDEBUG << "CheckSetup: fMethodError="<<fMethodError<<Endl;
338  Log() << kDEBUG << "CheckSetup: fBoostNum="<<fBoostNum << Endl;
339  Log() << kDEBUG << "CheckSetup: fRandomSeed=" << fRandomSeed<< Endl;
340  Log() << kDEBUG << "CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<Endl;
341  Log() << kDEBUG << "CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<Endl;
342  Log() << kDEBUG << "CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod? "true" : "false") << Endl;
343  Log() << kDEBUG << "CheckSetup: MName=" << fBoostedMethodName << " Title="<< fBoostedMethodTitle<< Endl;
344  Log() << kDEBUG << "CheckSetup: MOptions="<< fBoostedMethodOptions << Endl;
345  Log() << kDEBUG << "CheckSetup: fMonitorTree=" << fMonitorTree <<Endl;
346  Log() << kDEBUG << "CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx << Endl;
347  if (fMethods.size()>0) Log() << kDEBUG << "CheckSetup: fMethods[0]" <<fMethods[0]<<Endl;
348  Log() << kDEBUG << "CheckSetup: fMethodWeight.size()" << fMethodWeight.size() << Endl;
349  if (fMethodWeight.size()>0) Log() << kDEBUG << "CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<Endl;
350  Log() << kDEBUG << "CheckSetup: trying to repair things" << Endl;
351 
352 }
353 ////////////////////////////////////////////////////////////////////////////////
354 
356 {
357  TDirectory* methodDir( 0 );
358  TString dirName,dirTitle;
359  Int_t StopCounter=0;
360  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
361 
362 
363  InitHistos();
364 
365  if (Data()->GetNTrainingEvents()==0) Log() << kFATAL << "<Train> Data() has zero events" << Endl;
366  Data()->SetCurrentType(Types::kTraining);
367 
368  if (fMethods.size() > 0) fMethods.clear();
369  fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
370 
371  Log() << kINFO << "Training "<< fBoostNum << " " << fBoostedMethodName << " with title " << fBoostedMethodTitle << " Classifiers ... patience please" << Endl;
372  Timer timer( fBoostNum, GetName() );
373 
374  ResetBoostWeights();
375 
376  // clean boosted method options
377  CleanBoostOptions();
378 
379 
380  // remove transformations for individual boosting steps
381  // the transformation of the main method will be rerouted to each of the boost steps
382  Ssiz_t varTrafoStart=fBoostedMethodOptions.Index("~VarTransform=");
383  if (varTrafoStart >0) {
384  Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(":",varTrafoStart);
385  if (varTrafoEnd<varTrafoStart)
386  varTrafoEnd=fBoostedMethodOptions.Length();
387  fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
388  }
389 
390  //
391  // training and boosting the classifiers
392  for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
393  // the first classifier shows the option string output, the rest not
394  if (fCurrentMethodIdx>0) TMVA::MsgLogger::InhibitOutput();
395 
397  fBoostedMethodName.Data(), GetJobName(), Form("%s_B%04i", fBoostedMethodTitle.Data(), fCurrentMethodIdx),
398  DataInfo(), fBoostedMethodOptions);
400 
401  // suppressing the rest of the classifier output the right way
402  fCurrentMethod = (dynamic_cast<MethodBase*>(method));
403 
404  if (fCurrentMethod==0) {
405  Log() << kFATAL << "uups.. guess the booking of the " << fCurrentMethodIdx << "-th classifier somehow failed" << Endl;
406  return; // hope that makes coverity happy (as if fears I might use the pointer later on, not knowing that FATAL exits
407  }
408 
409  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
410  if (fCurrentMethod->GetMethodType() == Types::kCategory) { // DSMTEST
411  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(fCurrentMethod)); // DSMTEST
412  if (!methCat) // DSMTEST
413  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" << Endl; // DSMTEST
414  methCat->fDataSetManager = fDataSetManager; // DSMTEST
415  } // DSMTEST
416 
417  fCurrentMethod->SetMsgType(kWARNING);
418  fCurrentMethod->SetupMethod();
419  fCurrentMethod->ParseOptions();
420  // put SetAnalysisType here for the needs of MLP
421  fCurrentMethod->SetAnalysisType( GetAnalysisType() );
422  fCurrentMethod->ProcessSetup();
423  fCurrentMethod->CheckSetup();
424 
425 
426  // reroute transformationhandler
427  fCurrentMethod->RerouteTransformationHandler (&(this->GetTransformationHandler()));
428 
429 
430  // creating the directory of the classifier
431  if(!IsSilentFile())
432  {
433  if (fMonitorBoostedMethod) {
434  methodDir=GetFile()->GetDirectory(dirName=Form("%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
435  if (methodDir==0) {
436  methodDir=BaseDir()->mkdir(dirName,dirTitle=Form("Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
437  }
438  fCurrentMethod->SetMethodDir(methodDir);
439  fCurrentMethod->BaseDir()->cd();
440  }
441  }
442 
443  // training
444  TMVA::MethodCompositeBase::fMethods.push_back(method);
445  timer.DrawProgressBar( fCurrentMethodIdx );
446  if (fCurrentMethodIdx==0) MonitorBoost(Types::kBoostProcBegin,fCurrentMethodIdx);
447  MonitorBoost(Types::kBeforeTraining,fCurrentMethodIdx);
448  TMVA::MsgLogger::InhibitOutput(); //suppressing Logger outside the method
449  if (fBoostType=="Bagging") Bagging(); // you want also to train the first classifier on a bagged sample
450  SingleTrain();
452  if(!IsSilentFile())fCurrentMethod->WriteMonitoringHistosToFile();
453 
454  // calculate MVA values of current method for all events in training sample
455  // (used later on to get 'misclassified events' etc for the boosting
456  CalcMVAValues();
457 
458  if(!IsSilentFile()) if (fCurrentMethodIdx==0 && fMonitorBoostedMethod) CreateMVAHistorgrams();
459 
460  // get ROC integral and overlap integral for single method on
461  // training sample if fMethodWeightType == "ByROC" or the user
462  // wants detailed monitoring
463 
464  // boosting (reweight training sample)
465  MonitorBoost(Types::kBeforeBoosting,fCurrentMethodIdx);
466  SingleBoost(fCurrentMethod);
467 
468  MonitorBoost(Types::kAfterBoosting,fCurrentMethodIdx);
469  results->GetHist("BoostWeight")->SetBinContent(fCurrentMethodIdx+1,fBoostWeight);
470  results->GetHist("ErrorFraction")->SetBinContent(fCurrentMethodIdx+1,fMethodError);
471 
472  if (fDetailedMonitoring) {
473  fROC_training = GetBoostROCIntegral(kTRUE, Types::kTraining, kTRUE);
474  results->GetHist("ROCIntegral_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kTRUE, Types::kTesting));
475  results->GetHist("ROCIntegralBoosted_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTesting));
476  results->GetHist("ROCIntegral_train")->SetBinContent(fCurrentMethodIdx+1, fROC_training);
477  results->GetHist("ROCIntegralBoosted_train")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTraining));
478  results->GetHist("Overlap")->SetBinContent(fCurrentMethodIdx+1, fOverlap_integral);
479  }
480 
481 
482 
483  fMonitorTree->Fill();
484 
485  // stop boosting if needed when error has reached 0.5
486  // thought of counting a few steps, but it doesn't seem to be necessary
487  Log() << kDEBUG << "AdaBoost (methodErr) err = " << fMethodError << Endl;
488  if (fMethodError > 0.49999) StopCounter++;
489  if (StopCounter > 0 && fBoostType != "Bagging") {
490  timer.DrawProgressBar( fBoostNum );
491  fBoostNum = fCurrentMethodIdx+1;
492  Log() << kINFO << "Error rate has reached 0.5 ("<< fMethodError<<"), boosting process stopped at #" << fBoostNum << " classifier" << Endl;
493  if (fBoostNum < 5)
494  Log() << kINFO << "The classifier might be too strong to boost with Beta = " << fAdaBoostBeta << ", try reducing it." <<Endl;
495  break;
496  }
497  }
498 
499  //as MethodBoost acts not on a private event sample (like MethodBDT does), we need to remember not
500  // to leave "boosted" events to the next classifier in the factory
501 
502  ResetBoostWeights();
503 
504  Timer* timer1= new Timer( fBoostNum, GetName() );
505  // normalizing the weights of the classifiers
506  for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
507  // performing post-boosting actions
508 
509  timer1->DrawProgressBar( fCurrentMethodIdx );
510 
511  if (fCurrentMethodIdx==fBoostNum) {
512  Log() << kINFO << "Elapsed time: " << timer1->GetElapsedTime()
513  << " " << Endl;
514  }
515 
516  TH1F* tmp = dynamic_cast<TH1F*>( results->GetHist("ClassifierWeight") );
517  if (tmp) tmp->SetBinContent(fCurrentMethodIdx+1,fMethodWeight[fCurrentMethodIdx]);
518 
519  }
520 
521  // Ensure that in case of only 1 boost the method weight equals
522  // 1.0. This avoids unexpected behaviour in case of very bad
523  // classifiers which have fBoostWeight=1 or fMethodError=0.5,
524  // because their weight would be set to zero. This behaviour is
525  // not ok if one boosts just one time.
526  if (fMethods.size()==1) fMethodWeight[0] = 1.0;
527 
528  MonitorBoost(Types::kBoostProcEnd);
529 
530  delete timer1;
531 }
532 
533 ////////////////////////////////////////////////////////////////////////////////
534 
536 {
537  fBoostedMethodOptions=GetOptions();
538 }
539 
540 ////////////////////////////////////////////////////////////////////////////////
541 
543 {
544  if (fBoostNum <=0) Log() << kFATAL << "CreateHistograms called before fBoostNum is initialized" << Endl;
545  // calculating histograms boundaries and creating histograms..
546  // nrms = number of rms around the average to use for outline (of the 0 classifier)
547  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
548  Int_t signalClass = 0;
549  if (DataInfo().GetClassInfo("Signal") != 0) {
550  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
551  }
552  gTools().ComputeStat( GetEventCollection( Types::kMaxTreeType ), fMVAvalues,
553  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
554 
556  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
557  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.00001;
558 
559  // creating all the histograms
560  for (UInt_t imtd=0; imtd<fBoostNum; imtd++) {
561  fTrainSigMVAHist .push_back( new TH1F( Form("MVA_Train_S_%04i",imtd), "MVA_Train_S", fNbins, xmin, xmax ) );
562  fTrainBgdMVAHist .push_back( new TH1F( Form("MVA_Train_B%04i", imtd), "MVA_Train_B", fNbins, xmin, xmax ) );
563  fBTrainSigMVAHist.push_back( new TH1F( Form("MVA_BTrain_S%04i",imtd), "MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
564  fBTrainBgdMVAHist.push_back( new TH1F( Form("MVA_BTrain_B%04i",imtd), "MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
565  fTestSigMVAHist .push_back( new TH1F( Form("MVA_Test_S%04i", imtd), "MVA_Test_S", fNbins, xmin, xmax ) );
566  fTestBgdMVAHist .push_back( new TH1F( Form("MVA_Test_B%04i", imtd), "MVA_Test_B", fNbins, xmin, xmax ) );
567  }
568 }
569 
570 ////////////////////////////////////////////////////////////////////////////////
571 /// resetting back the boosted weights of the events to 1
572 
574 {
575  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
576  const Event *ev = Data()->GetEvent(ievt);
577  ev->SetBoostWeight( 1.0 );
578  }
579 }
580 
581 ////////////////////////////////////////////////////////////////////////////////
582 
584 {
585  TDirectory* dir=0;
586  if (fMonitorBoostedMethod) {
587  for (UInt_t imtd=0;imtd<fBoostNum;imtd++) {
588 
589  //writing the histograms in the specific classifier's directory
590  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[imtd]);
591  if (!m) continue;
592  dir = m->BaseDir();
593  dir->cd();
594  fTrainSigMVAHist[imtd]->SetDirectory(dir);
595  fTrainSigMVAHist[imtd]->Write();
596  fTrainBgdMVAHist[imtd]->SetDirectory(dir);
597  fTrainBgdMVAHist[imtd]->Write();
598  fBTrainSigMVAHist[imtd]->SetDirectory(dir);
599  fBTrainSigMVAHist[imtd]->Write();
600  fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
601  fBTrainBgdMVAHist[imtd]->Write();
602  }
603  }
604 
605  // going back to the original folder
606  BaseDir()->cd();
607 
608  fMonitorTree->Write();
609 }
610 
611 ////////////////////////////////////////////////////////////////////////////////
612 
614 {
616  if (fMonitorBoostedMethod) {
617  UInt_t nloop = fTestSigMVAHist.size();
618  if (fMethods.size()<nloop) nloop = fMethods.size();
619  //running over all the events and populating the test MVA histograms
620  Data()->SetCurrentType(Types::kTesting);
621  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
622  const Event* ev = GetEvent(ievt);
623  Float_t w = ev->GetWeight();
624  if (DataInfo().IsSignal(ev)) {
625  for (UInt_t imtd=0; imtd<nloop; imtd++) {
626  fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
627  }
628  }
629  else {
630  for (UInt_t imtd=0; imtd<nloop; imtd++) {
631  fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
632  }
633  }
634  }
635  Data()->SetCurrentType(Types::kTraining);
636  }
637 }
638 
639 ////////////////////////////////////////////////////////////////////////////////
640 
642 {
644  if (treetype==Types::kTraining) return;
645  UInt_t nloop = fTestSigMVAHist.size();
646  if (fMethods.size()<nloop) nloop = fMethods.size();
647  if (fMonitorBoostedMethod) {
648  TDirectory* dir=0;
649  for (UInt_t imtd=0;imtd<nloop;imtd++) {
650  //writing the histograms in the specific classifier's directory
651  MethodBase* mva = dynamic_cast<MethodBase*>(fMethods[imtd]);
652  if (!mva) continue;
653  dir = mva->BaseDir();
654  if (dir==0) continue;
655  dir->cd();
656  fTestSigMVAHist[imtd]->SetDirectory(dir);
657  fTestSigMVAHist[imtd]->Write();
658  fTestBgdMVAHist[imtd]->SetDirectory(dir);
659  fTestBgdMVAHist[imtd]->Write();
660  }
661  }
662 }
663 
664 ////////////////////////////////////////////////////////////////////////////////
665 /// process user options
666 
668 {
669 }
670 
671 ////////////////////////////////////////////////////////////////////////////////
672 /// initialization
673 
675 {
676  Data()->SetCurrentType(Types::kTraining);
677  MethodBase* meth = dynamic_cast<MethodBase*>(GetLastMethod());
678  if (meth){
679  meth->SetSilentFile(IsSilentFile());
680  if(IsModelPersistence()){
681  TString _fFileDir= DataInfo().GetName();
682  _fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
683  meth->SetWeightFileDir(_fFileDir);
684  }
685  meth->SetModelPersistence(IsModelPersistence());
686  meth->TrainMethod();
687  }
688 }
689 
690 ////////////////////////////////////////////////////////////////////////////////
691 /// find the CUT on the individual MVA that defines an event as
692 /// correct or misclassified (to be used in the boosting process)
693 
695 {
696  if (!method || method->GetMethodType() == Types::kDT ){ return;}
697 
698  // creating a fine histograms containing the error rate
699  const Int_t nBins=10001;
700  Double_t minMVA=150000;
701  Double_t maxMVA=-150000;
702  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
703  GetEvent(ievt);
704  Double_t val=method->GetMvaValue();
705  //Helge .. I think one could very well use fMVAValues for that ... -->to do
706  if (val>maxMVA) maxMVA=val;
707  if (val<minMVA) minMVA=val;
708  }
709  maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
710 
711  Double_t sum = 0.;
712 
713  TH1D *mvaS = new TH1D(Form("MVAS_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
714  TH1D *mvaB = new TH1D(Form("MVAB_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
715  TH1D *mvaSC = new TH1D(Form("MVASC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
716  TH1D *mvaBC = new TH1D(Form("MVABC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
717 
718 
719  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
720  if (fDetailedMonitoring){
721  results->Store(mvaS, Form("MVAS_%d",fCurrentMethodIdx));
722  results->Store(mvaB, Form("MVAB_%d",fCurrentMethodIdx));
723  results->Store(mvaSC,Form("MVASC_%d",fCurrentMethodIdx));
724  results->Store(mvaBC,Form("MVABC_%d",fCurrentMethodIdx));
725  }
726 
727  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
728 
729  Double_t weight = GetEvent(ievt)->GetWeight();
730  Double_t mvaVal=method->GetMvaValue();
731  sum +=weight;
732  if (DataInfo().IsSignal(GetEvent(ievt))){
733  mvaS->Fill(mvaVal,weight);
734  }else {
735  mvaB->Fill(mvaVal,weight);
736  }
737  }
738  SeparationBase *sepGain;
739 
740 
741  // Boosting should use Misclassification not Gini Index (changed, Helge 31.5.2013)
742  // WARNING! It works with Misclassification only if you fix the signal to
743  // background at every step. Strangely enough, there are better results
744  // ( as seen with BDT ) if you use Gini Index, and accept that sometimes no
745  // sensible cut is found - i.e. the cut is then outside the MVA value range,
746  // all events are classified as background and then according to the Boost
747  // algorithm something is renormed 'automatically' ... so that in the next
748  // step again the result is something sensible.
749  // Strange ... that THIS is supposed to be right?
750 
751  // SeparationBase *sepGain2 = new MisClassificationError();
752  //sepGain = new MisClassificationError();
753  sepGain = new GiniIndex();
754  //sepGain = new CrossEntropy();
755 
756  Double_t sTot = mvaS->GetSum();
757  Double_t bTot = mvaB->GetSum();
758 
759  mvaSC->SetBinContent(1,mvaS->GetBinContent(1));
760  mvaBC->SetBinContent(1,mvaB->GetBinContent(1));
761  Double_t sSel=0;
762  Double_t bSel=0;
763  Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
764  Double_t mvaCut=mvaSC->GetBinLowEdge(1);
765  Double_t sSelCut=sSel;
766  Double_t bSelCut=bSel;
767  // std::cout << "minMVA =" << minMVA << " maxMVA = " << maxMVA << " width = " << mvaSC->GetBinWidth(1) << std::endl;
768 
769  // for (Int_t ibin=1;ibin<=nBins;ibin++) std::cout << " cutvalues[" << ibin<<"]="<<mvaSC->GetBinLowEdge(ibin) << " " << mvaSC->GetBinCenter(ibin) << std::endl;
770  Double_t mvaCutOrientation=1; // 1 if mva > mvaCut --> Signal and -1 if mva < mvaCut (i.e. mva*-1 > mvaCut*-1) --> Signal
771  for (Int_t ibin=1;ibin<=nBins;ibin++){
772  mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1));
773  mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1));
774 
775  sSel=mvaSC->GetBinContent(ibin);
776  bSel=mvaBC->GetBinContent(ibin);
777 
778  // if (ibin==nBins){
779  // std::cout << "Last bin s="<< sSel <<" b="<<bSel << " s="<< sTot-sSel <<" b="<<bTot-bSel << endl;
780  // }
781 
782  if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
783  // && (mvaSC->GetBinCenter(ibin) >0 || (fCurrentMethodIdx+1)%2 )
784  ){
785  separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
786  // mvaCut=mvaSC->GetBinCenter(ibin);
787  mvaCut=mvaSC->GetBinLowEdge(ibin+1);
788  // if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) mvaCutOrientation=-1;
789  if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
790  else mvaCutOrientation=1;
791  sSelCut=sSel;
792  bSelCut=bSel;
793  // std::cout << "new cut at " << mvaCut << "with s="<<sTot-sSel << " b="<<bTot-bSel << std::endl;
794  }
795  /*
796  Double_t ori;
797  if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) ori=-1;
798  else ori=1;
799  std::cout << ibin << " mvacut="<<mvaCut
800  << " sTot=" << sTot
801  << " bTot=" << bTot
802  << " sSel=" << sSel
803  << " bSel=" << bSel
804  << " s/b(1)=" << sSel/bSel
805  << " s/b(2)=" << (sTot-sSel)/(bTot-bSel)
806  << " sepGain="<<sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
807  << " sepGain2="<<sepGain2->GetSeparationGain(sSel,bSel,sTot,bTot)
808  << " " <<ori
809  << std::endl;
810  */
811 
812  }
813 
814  if (0){
815  double parentIndex=sepGain->GetSeparationIndex(sTot,bTot);
816  double leftIndex =sepGain->GetSeparationIndex(sSelCut,bSelCut);
817  double rightIndex =sepGain->GetSeparationIndex(sTot-sSelCut,bTot-bSelCut);
818  std::cout
819  << " sTot=" << sTot
820  << " bTot=" << bTot
821  << " s="<<sSelCut
822  << " b="<<bSelCut
823  << " s2="<<(sTot-sSelCut)
824  << " b2="<<(bTot-bSelCut)
825  << " s/b(1)=" << sSelCut/bSelCut
826  << " s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
827  << " index before cut=" << parentIndex
828  << " after: left=" << leftIndex
829  << " after: right=" << rightIndex
830  << " sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
831  << " sepGain="<<separationGain
832  << " sepGain="<<sepGain->GetSeparationGain(sSelCut,bSelCut,sTot,bTot)
833  << " cut=" << mvaCut
834  << " idx="<<fCurrentMethodIdx
835  << " cutOrientation="<<mvaCutOrientation
836  << std::endl;
837  }
838  method->SetSignalReferenceCut(mvaCut);
839  method->SetSignalReferenceCutOrientation(mvaCutOrientation);
840 
841  results->GetHist("SeparationGain")->SetBinContent(fCurrentMethodIdx+1,separationGain);
842 
843 
844  Log() << kDEBUG << "(old step) Setting method cut to " <<method->GetSignalReferenceCut()<< Endl;
845 
846  if(IsSilentFile())
847  {
848  mvaS ->Delete();
849  mvaB ->Delete();
850  mvaSC->Delete();
851  mvaBC->Delete();
852  }
853 }
854 
855 ////////////////////////////////////////////////////////////////////////////////
856 
858 {
859  Double_t returnVal=-1;
860 
861 
862  if (fBoostType=="AdaBoost") returnVal = this->AdaBoost (method,1);
863  else if (fBoostType=="RealAdaBoost") returnVal = this->AdaBoost (method,0);
864  else if (fBoostType=="Bagging") returnVal = this->Bagging ();
865  else{
866  Log() << kFATAL << "<Boost> unknown boost option " << fBoostType<< " called" << Endl;
867  }
868  fMethodWeight.push_back(returnVal);
869  return returnVal;
870 }
871 ////////////////////////////////////////////////////////////////////////////////
872 /// the standard (discrete or real) AdaBoost algorithm
873 
875 {
876  if (!method) {
877  Log() << kWARNING << " AdaBoost called without classifier reference - needed for calculating AdaBoost " << Endl;
878  return 0;
879  }
880 
881  Float_t w,v; Bool_t sig=kTRUE;
882  Double_t sumAll=0, sumWrong=0;
883  Bool_t* WrongDetection=new Bool_t[GetNEvents()];
884  QuickMVAProbEstimator *MVAProb=NULL;
885 
886  if (discreteAdaBoost) {
887  FindMVACut(method);
888  Log() << kDEBUG << " individual mva cut value = " << method->GetSignalReferenceCut() << Endl;
889  } else {
890  MVAProb=new TMVA::QuickMVAProbEstimator();
891  // the RealAdaBoost does use a simple "yes (signal)" or "no (background)"
892  // answer from your single MVA, but a "signal probability" instead (in the BDT case,
893  // that would be the 'purity' in the leaf node. For some MLP parameter, the MVA output
894  // can also interpreted as a probability, but here I try a general approach to get this
895  // probability from the MVA distributions...
896 
897  for (Long64_t evt=0; evt<GetNEvents(); evt++) {
898  const Event* ev = Data()->GetEvent(evt);
899  MVAProb->AddEvent(fMVAvalues->at(evt),ev->GetWeight(),ev->GetClass());
900  }
901  }
902 
903 
904  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE;
905 
906  // finding the wrong events and calculating their total weights
907  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
908  const Event* ev = GetEvent(ievt);
909  sig=DataInfo().IsSignal(ev);
910  v = fMVAvalues->at(ievt);
911  w = ev->GetWeight();
912  sumAll += w;
913  if(!IsSilentFile())
914  {
915  if (fMonitorBoostedMethod) {
916  if (sig) {
917  fBTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,w);
918  fTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,ev->GetOriginalWeight());
919  }
920  else {
921  fBTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,w);
922  fTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,ev->GetOriginalWeight());
923  }
924  }
925  }
926 
927  if (discreteAdaBoost){
928  if (sig == method->IsSignalLike(fMVAvalues->at(ievt))){
929  WrongDetection[ievt]=kFALSE;
930  }else{
931  WrongDetection[ievt]=kTRUE;
932  sumWrong+=w;
933  }
934  }else{
935  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
936  mvaProb = 2*(mvaProb-0.5);
937  Int_t trueType;
938  if (DataInfo().IsSignal(ev)) trueType = 1;
939  else trueType = -1;
940  sumWrong+= w*trueType*mvaProb;
941  }
942  }
943 
944  fMethodError=sumWrong/sumAll;
945 
946  // calculating the fMethodError and the boostWeight out of it uses the formula
947  // w = ((1-err)/err)^beta
948 
949  Double_t boostWeight=0;
950 
951  if (fMethodError == 0) { //no misclassification made.. perfect, no boost ;)
952  Log() << kWARNING << "Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " << Endl;
953  }else{
954 
955  if (discreteAdaBoost)
956  boostWeight = TMath::Log((1.-fMethodError)/fMethodError)*fAdaBoostBeta;
957  else
958  boostWeight = TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
959 
960 
961  // std::cout << "boostweight = " << boostWeight << std::endl;
962 
963  // ADA boosting, rescaling the weight of the wrong events according to the error level
964  // over the entire test sample rescaling all the weights to have the same sum, but without
965  // touching the original weights (changing only the boosted weight of all the events)
966  // first reweight
967 
968  Double_t newSum=0., oldSum=0.;
969 
970 
971  Double_t boostfactor = TMath::Exp(boostWeight);
972 
973 
974  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
975  const Event* ev = Data()->GetEvent(ievt);
976  oldSum += ev->GetWeight();
977  if (discreteAdaBoost){
978  // events are classified as Signal OR background .. right or wrong
979  if (WrongDetection[ievt] && boostWeight != 0) {
980  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
981  else ev->ScaleBoostWeight(1./boostfactor);
982  }
983  // if (ievt<30) std::cout<<ievt<<" var0="<<ev->GetValue(0)<<" var1="<<ev->GetValue(1)<<" weight="<<ev->GetWeight() << " boostby:"<<boostfactor<<std::endl;
984 
985  }else{
986  // events are classified by their probability of being signal or background
987  // (eventually you should write this one - i.e. re-use the MVA value that were already
988  // calculated and stored.. however ,for the moment ..
989  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
990  mvaProb = 2*(mvaProb-0.5);
991  // mvaProb = (1-mvaProb);
992 
993  Int_t trueType=1;
994  if (DataInfo().IsSignal(ev)) trueType = 1;
995  else trueType = -1;
996 
997  boostfactor = TMath::Exp(-1*boostWeight*trueType*mvaProb);
998  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
999  else ev->ScaleBoostWeight(1./boostfactor);
1000 
1001  }
1002  newSum += ev->GetWeight();
1003  }
1004 
1005  Double_t normWeight = oldSum/newSum;
1006  // next normalize the weights
1007  Double_t normSig=0, normBkg=0;
1008  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1009  const Event* ev = Data()->GetEvent(ievt);
1010  ev->ScaleBoostWeight(normWeight);
1011  if (ev->GetClass()) normSig+=ev->GetWeight();
1012  else normBkg+=ev->GetWeight();
1013  }
1014 
1015  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
1016  results->GetHist("SoverBtotal")->SetBinContent(fCurrentMethodIdx+1, normSig/normBkg);
1017 
1018  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1019  const Event* ev = Data()->GetEvent(ievt);
1020 
1021  if (ev->GetClass()) ev->ScaleBoostWeight(oldSum/normSig/2);
1022  else ev->ScaleBoostWeight(oldSum/normBkg/2);
1023  }
1024  }
1025 
1026  delete[] WrongDetection;
1027  if (MVAProb) delete MVAProb;
1028 
1029  fBoostWeight = boostWeight; // used ONLY for the monitoring tree
1030 
1031  return boostWeight;
1032 }
1033 
1034 
1035 ////////////////////////////////////////////////////////////////////////////////
1036 /// Bagging or Bootstrap boosting, gives new random poisson weight for every event
1037 
1039 {
1040  TRandom3 *trandom = new TRandom3(fRandomSeed+fMethods.size());
1041  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1042  const Event* ev = Data()->GetEvent(ievt);
1043  ev->SetBoostWeight(trandom->PoissonD(fBaggedSampleFraction));
1044  }
1045  fBoostWeight = 1; // used ONLY for the monitoring tree
1046  return 1.;
1047 }
1048 
1049 
1050 ////////////////////////////////////////////////////////////////////////////////
1051 /// Get help message text
1052 ///
1053 /// typical length of text line:
1054 /// "|--------------------------------------------------------------|"
1055 
1057 {
1058  Log() << Endl;
1059  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1060  Log() << Endl;
1061  Log() << "This method combines several classifier of one species in a "<<Endl;
1062  Log() << "single multivariate quantity via the boost algorithm." << Endl;
1063  Log() << "the output is a weighted sum over all individual classifiers" <<Endl;
1064  Log() << "By default, the AdaBoost method is employed, which gives " << Endl;
1065  Log() << "events that were misclassified in the previous tree a larger " << Endl;
1066  Log() << "weight in the training of the following classifier."<<Endl;
1067  Log() << "Optionally, Bagged boosting can also be applied." << Endl;
1068  Log() << Endl;
1069  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1070  Log() << Endl;
1071  Log() << "The most important parameter in the configuration is the "<<Endl;
1072  Log() << "number of boosts applied (Boost_Num) and the choice of boosting"<<Endl;
1073  Log() << "(Boost_Type), which can be set to either AdaBoost or Bagging." << Endl;
1074  Log() << "AdaBoosting: The most important parameters in this configuration" <<Endl;
1075  Log() << "is the beta parameter (Boost_AdaBoostBeta) " << Endl;
1076  Log() << "When boosting a linear classifier, it is sometimes advantageous"<<Endl;
1077  Log() << "to transform the MVA output non-linearly. The following options" <<Endl;
1078  Log() << "are available: step, log, and minmax, the default is no transform."<<Endl;
1079  Log() <<Endl;
1080  Log() << "Some classifiers are hard to boost and do not improve much in"<<Endl;
1081  Log() << "their performance by boosting them, some even slightly deteriorate"<< Endl;
1082  Log() << "due to the boosting." <<Endl;
1083  Log() << "The booking of the boost method is special since it requires"<<Endl;
1084  Log() << "the booing of the method to be boosted and the boost itself."<<Endl;
1085  Log() << "This is solved by booking the method to be boosted and to add"<<Endl;
1086  Log() << "all Boost parameters, which all begin with \"Boost_\" to the"<<Endl;
1087  Log() << "options string. The factory separates the options and initiates"<<Endl;
1088  Log() << "the boost process. The TMVA macro directory contains the example"<<Endl;
1089  Log() << "macro \"Boost.C\"" <<Endl;
1090 }
1091 
1092 ////////////////////////////////////////////////////////////////////////////////
1093 
1095 {
1096  return 0;
1097 }
1098 
1099 ////////////////////////////////////////////////////////////////////////////////
1100 /// return boosted MVA response
1101 
1103 {
1104  Double_t mvaValue = 0;
1105  Double_t norm = 0;
1106  Double_t epsilon = TMath::Exp(-1.);
1107  //Double_t fact = TMath::Exp(-1.)+TMath::Exp(1.);
1108  for (UInt_t i=0;i< fMethods.size(); i++){
1109  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[i]);
1110  if (m==0) continue;
1111  Double_t val = fTmpEvent ? m->GetMvaValue(fTmpEvent) : m->GetMvaValue();
1112  Double_t sigcut = m->GetSignalReferenceCut();
1113 
1114  // default is no transform
1115  if (fTransformString == "linear"){
1116 
1117  }
1118  else if (fTransformString == "log"){
1119  if (val < sigcut) val = sigcut;
1120 
1121  val = TMath::Log((val-sigcut)+epsilon);
1122  }
1123  else if (fTransformString == "step" ){
1124  if (m->IsSignalLike(val)) val = 1.;
1125  else val = -1.;
1126  }
1127  else if (fTransformString == "gauss"){
1128  val = TMath::Gaus((val-sigcut),1);
1129  }
1130  else {
1131  Log() << kFATAL << "error unknown transformation " << fTransformString<<Endl;
1132  }
1133  mvaValue+=val*fMethodWeight[i];
1134  norm +=fMethodWeight[i];
1135  // std::cout << "mva("<<i<<") = "<<val<<" " << valx<< " " << mvaValue<<" and sigcut="<<sigcut << std::endl;
1136  }
1137  mvaValue/=norm;
1138  // cannot determine error
1139  NoErrorCalc(err, errUpper);
1140 
1141  return mvaValue;
1142 }
1143 
1144 ////////////////////////////////////////////////////////////////////////////////
1145 /// Calculate the ROC integral of a single classifier or even the
1146 /// whole boosted classifier. The tree type (training or testing
1147 /// sample) is specified by 'eTT'.
1148 ///
1149 /// If tree type kTraining is set, the original training sample is
1150 /// used to compute the ROC integral (original weights).
1151 ///
1152 /// - singleMethod - if kTRUE, return ROC integral of single (last
1153 /// trained) classifier; if kFALSE, return ROC
1154 /// integral of full classifier
1155 ///
1156 /// - eTT - tree type (Types::kTraining / Types::kTesting)
1157 ///
1158 /// - CalcOverlapIntergral - if kTRUE, the overlap integral of the
1159 /// signal/background MVA distributions
1160 /// is calculated and stored in
1161 /// 'fOverlap_integral'
1162 
1164 {
1165  // set data sample training / testing
1166  Data()->SetCurrentType(eTT);
1167 
1168  MethodBase* method = singleMethod ? dynamic_cast<MethodBase*>(fMethods.back()) : 0; // ToDo CoVerity flags this line as there is no protection against a zero-pointer delivered by dynamic_cast
1169  // to make CoVerity happy (although, OF COURSE, the last method in the committee
1170  // has to be also of type MethodBase as ANY method is... hence the dynamic_cast
1171  // will never by "zero" ...
1172  if (singleMethod && !method) {
1173  Log() << kFATAL << " What do you do? Your method:"
1174  << fMethods.back()->GetName()
1175  << " seems not to be a propper TMVA method"
1176  << Endl;
1177  std::exit(1);
1178  }
1179  Double_t err = 0.0;
1180 
1181  // temporary renormalize the method weights in case of evaluation
1182  // of full classifier.
1183  // save the old normalization of the methods
1184  std::vector<Double_t> OldMethodWeight(fMethodWeight);
1185  if (!singleMethod) {
1186  // calculate sum of weights of all methods
1187  Double_t AllMethodsWeight = 0;
1188  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1189  AllMethodsWeight += fMethodWeight.at(i);
1190  // normalize the weights of the classifiers
1191  if (AllMethodsWeight != 0.0) {
1192  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1193  fMethodWeight[i] /= AllMethodsWeight;
1194  }
1195  }
1196 
1197  // calculate MVA values
1198  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
1199  std::vector <Float_t>* mvaRes;
1200  if (singleMethod && eTT==Types::kTraining)
1201  mvaRes = fMVAvalues; // values already calculated
1202  else {
1203  mvaRes = new std::vector <Float_t>(GetNEvents());
1204  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1205  GetEvent(ievt);
1206  (*mvaRes)[ievt] = singleMethod ? method->GetMvaValue(&err) : GetMvaValue(&err);
1207  }
1208  }
1209 
1210  // restore the method weights
1211  if (!singleMethod)
1212  fMethodWeight = OldMethodWeight;
1213 
1214  // now create histograms for calculation of the ROC integral
1215  Int_t signalClass = 0;
1216  if (DataInfo().GetClassInfo("Signal") != 0) {
1217  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1218  }
1219  gTools().ComputeStat( GetEventCollection(eTT), mvaRes,
1220  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1221 
1223  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
1224  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.0001;
1225 
1226  // calculate ROC integral
1227  TH1* mva_s = new TH1F( "MVA_S", "MVA_S", fNbins, xmin, xmax );
1228  TH1* mva_b = new TH1F( "MVA_B", "MVA_B", fNbins, xmin, xmax );
1229  TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1230  if (CalcOverlapIntergral) {
1231  mva_s_overlap = new TH1F( "MVA_S_OVERLAP", "MVA_S_OVERLAP", fNbins, xmin, xmax );
1232  mva_b_overlap = new TH1F( "MVA_B_OVERLAP", "MVA_B_OVERLAP", fNbins, xmin, xmax );
1233  }
1234  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1235  const Event* ev = GetEvent(ievt);
1236  Float_t w = (eTT==Types::kTesting ? ev->GetWeight() : ev->GetOriginalWeight());
1237  if (DataInfo().IsSignal(ev)) mva_s->Fill( (*mvaRes)[ievt], w );
1238  else mva_b->Fill( (*mvaRes)[ievt], w );
1239 
1240  if (CalcOverlapIntergral) {
1241  Float_t w_ov = ev->GetWeight();
1242  if (DataInfo().IsSignal(ev))
1243  mva_s_overlap->Fill( (*mvaRes)[ievt], w_ov );
1244  else
1245  mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1246  }
1247  }
1248  gTools().NormHist( mva_s );
1249  gTools().NormHist( mva_b );
1250  PDF *fS = new PDF( "PDF Sig", mva_s, PDF::kSpline2 );
1251  PDF *fB = new PDF( "PDF Bkg", mva_b, PDF::kSpline2 );
1252 
1253  // calculate ROC integral from fS, fB
1254  Double_t ROC = MethodBase::GetROCIntegral(fS, fB);
1255 
1256  // calculate overlap integral
1257  if (CalcOverlapIntergral) {
1258  gTools().NormHist( mva_s_overlap );
1259  gTools().NormHist( mva_b_overlap );
1260 
1261  fOverlap_integral = 0.0;
1262  for (Int_t bin=1; bin<=mva_s_overlap->GetNbinsX(); bin++){
1263  Double_t bc_s = mva_s_overlap->GetBinContent(bin);
1264  Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1265  if (bc_s > 0.0 && bc_b > 0.0)
1266  fOverlap_integral += TMath::Min(bc_s, bc_b);
1267  }
1268 
1269  delete mva_s_overlap;
1270  delete mva_b_overlap;
1271  }
1272 
1273  delete mva_s;
1274  delete mva_b;
1275  delete fS;
1276  delete fB;
1277  if (!(singleMethod && eTT==Types::kTraining)) delete mvaRes;
1278 
1279  Data()->SetCurrentType(Types::kTraining);
1280 
1281  return ROC;
1282 }
1283 
1285 {
1286  // Calculate MVA values of current method fMethods.back() on
1287  // training sample
1288 
1289  Data()->SetCurrentType(Types::kTraining);
1290  MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back());
1291  if (!method) {
1292  Log() << kFATAL << "dynamic cast to MethodBase* failed" <<Endl;
1293  return;
1294  }
1295  // calculate MVA values
1296  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1297  GetEvent(ievt);
1298  fMVAvalues->at(ievt) = method->GetMvaValue();
1299  }
1300 
1301  // fill cumulative mva distribution
1302 
1303 
1304 }
1305 
1306 ////////////////////////////////////////////////////////////////////////////////
1307 /// fill various monitoring histograms from information of the individual classifiers that
1308 /// have been boosted.
1309 /// of course.... this depends very much on the individual classifiers, and so far, only for
1310 /// Decision Trees, this monitoring is actually implemented
1311 
1313 {
1314  Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
1315 
1316  if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kDT) {
1317  TMVA::MethodDT* currentDT=dynamic_cast<TMVA::MethodDT*>(GetCurrentMethod(methodIndex));
1318  if (currentDT){
1319  if (stage == Types::kBoostProcBegin){
1320  results->Store(new TH1I("NodesBeforePruning","nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesBeforePruning");
1321  results->Store(new TH1I("NodesAfterPruning","nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesAfterPruning");
1322  }
1323 
1324  if (stage == Types::kBeforeTraining){
1325  }
1326  else if (stage == Types::kBeforeBoosting){
1327  results->GetHist("NodesBeforePruning")->SetBinContent(methodIndex+1,currentDT->GetNNodesBeforePruning());
1328  results->GetHist("NodesAfterPruning")->SetBinContent(methodIndex+1,currentDT->GetNNodes());
1329  }
1330  else if (stage == Types::kAfterBoosting){
1331 
1332  }
1333  else if (stage != Types::kBoostProcEnd){
1334  Log() << kINFO << "<Train> average number of nodes before/after pruning : "
1335  << results->GetHist("NodesBeforePruning")->GetMean() << " / "
1336  << results->GetHist("NodesAfterPruning")->GetMean()
1337  << Endl;
1338  }
1339  }
1340 
1341  }else if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kFisher) {
1342  if (stage == Types::kAfterBoosting){
1344  }
1345  }else{
1346  if (methodIndex < 3){
1347  Log() << kDEBUG << "No detailed boost monitoring for "
1348  << GetCurrentMethod(methodIndex)->GetMethodName()
1349  << " yet available " << Endl;
1350  }
1351  }
1352 
1353  //boosting plots universal for all classifiers 'typically for debug purposes only as they are not general enough'
1354 
1355  if (stage == Types::kBeforeBoosting){
1356  // if you want to display the weighted events for 2D case at each boost step:
1357  if (fDetailedMonitoring){
1358  // the following code is useful only for 2D examples - mainly illustration for debug/educational purposes:
1359  if (DataInfo().GetNVariables() == 2) {
1360  results->Store(new TH2F(Form("EventDistSig_%d",methodIndex),Form("EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1361  results->GetHist(Form("EventDistSig_%d",methodIndex))->SetMarkerColor(4);
1362  results->Store(new TH2F(Form("EventDistBkg_%d",methodIndex),Form("EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1363  results->GetHist(Form("EventDistBkg_%d",methodIndex))->SetMarkerColor(2);
1364 
1365  Data()->SetCurrentType(Types::kTraining);
1366  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1367  const Event* ev = GetEvent(ievt);
1368  Float_t w = ev->GetWeight();
1369  Float_t v0= ev->GetValue(0);
1370  Float_t v1= ev->GetValue(1);
1371  // if (ievt<3) std::cout<<ievt<<" var0="<<v0<<" var1="<<v1<<" weight="<<w<<std::endl;
1372  TH2* h;
1373  if (DataInfo().IsSignal(ev)) h=results->GetHist2D(Form("EventDistSig_%d",methodIndex));
1374  else h=results->GetHist2D(Form("EventDistBkg_%d",methodIndex));
1375  if (h) h->Fill(v0,v1,w);
1376  }
1377  }
1378  }
1379  }
1380 
1381  return;
1382 }
1383 
1384 
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:371
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:145
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3242
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
static long int sum(long int i)
Definition: Factory.cxx:2173
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
float xmin
Definition: THbookFile.cxx:93
Random number generator class based on M.
Definition: TRandom3.h:27
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
Definition: MethodBoost.h:191
THist< 1, int, THistStatContent > TH1I
Definition: THist.hxx:287
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
Definition: TRandom.cxx:435
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Singleton class for Global types used by TMVA.
Definition: Types.h:73
long long Long64_t
Definition: RtypesCore.h:69
auto * m
Definition: textangle.C:8
void SingleTrain()
initialization
Stat_t GetSum() const
Definition: TArrayD.h:46
TString GetMethodName(Types::EMVA method) const
Definition: Types.cxx:136
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
Double_t Log(Double_t x)
Definition: TMath.h:648
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
float Float_t
Definition: RtypesCore.h:53
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:70
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:285
Config & gConfig()
TH1 * h
Definition: legend2.C:5
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4761
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
EAnalysisType
Definition: Types.h:125
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Definition: MethodBoost.cxx:94
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
Definition: MethodBase.h:354
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:6890
Basic string class.
Definition: TString.h:125
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:556
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1099
int Int_t
Definition: RtypesCore.h:41
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
bool Bool_t
Definition: RtypesCore.h:59
const Ranking * CreateRanking()
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8406
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:367
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:847
static void InhibitOutput()
Definition: MsgLogger.cxx:74
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
void AddEvent(Double_t val, Double_t weight, Int_t type)
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
Virtual base class for combining several TMVA method.
virtual ~MethodBoost(void)
destructor
TString fWeightFileDir
Definition: Config.h:107
UInt_t GetClass() const
Definition: Event.h:81
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Definition: TAttMarker.h:38
Int_t GetNNodes()
Definition: MethodDT.h:97
IONames & GetIONames()
Definition: Config.h:85
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:394
Class that contains all the data information.
Definition: DataSetInfo.h:60
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
virtual void Delete(Option_t *option="")
Delete this object.
Definition: TObject.cxx:169
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
RooCmdArg Timer(Bool_t flag=kTRUE)
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:733
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
SVector< double, 2 > v
Definition: Dict.h:5
class TMVA::Config::VariablePlotting fVariablePlotting
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Implementation of the GiniIndex as separation criterion.
Definition: GiniIndex.h:63
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8475
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
void ScaleBoostWeight(Double_t s) const
Definition: Event.h:107
TAxis * GetYaxis()
Definition: TH1.h:306
float xmax
Definition: THbookFile.cxx:93
Tools & gTools()
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:599
REAL epsilon
Definition: triangle.c:617
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
void CreateMVAHistorgrams()
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition: TMath.cxx:452
const Bool_t kFALSE
Definition: RtypesCore.h:88
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
Class for categorizing the phase space.
int Ssiz_t
Definition: RtypesCore.h:63
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Double_t Exp(Double_t x)
Definition: TMath.h:621
#define ClassImp(name)
Definition: Rtypes.h:359
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:34
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:130
void SetBoostWeight(Double_t w) const
Definition: Event.h:106
The TH1 histogram class.
Definition: TH1.h:56
void GetHelpMessage() const
Get help message text.
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:96
void ComputeStat(const std::vector< TMVA::Event *> &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:214
virtual void TestClassification()
initialization
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
#define REGISTER_METHOD(CLASS)
for example
std::vector< IMethod * > fMethods
Abstract ClassifierFactory template that handles arbitrary types.
Double_t GetMVAProbAt(Double_t value)
TH2 * GetHist2D(const TString &alias) const
Definition: Results.cxx:139
DataSetManager * fDataSetManager
Definition: MethodBoost.h:193
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:497
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
Class that is the base-class for a vector of result.
Definition: Results.h:57
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
void SetWeightFileDir(TString fileDir)
set directory of weight file
Double_t GetOriginalWeight() const
Definition: Event.h:79
Bool_t fHistoricBoolOption
Definition: MethodBoost.h:195
void InitHistos()
initialisation routine
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:349
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
Definition: TDirectory.cxx:400
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:284
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:190
A TTree object has a header with a name and a title.
Definition: TTree.h:70
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:86
virtual Int_t GetNbinsX() const
Definition: TH1.h:282
static void EnableOutput()
Definition: MsgLogger.cxx:75
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:291
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
const Bool_t kTRUE
Definition: RtypesCore.h:87
Types::EMVA GetMethodType() const
Definition: MethodBase.h:322
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
virtual void TestClassification()
initialization
TAxis * GetXaxis()
Definition: TH1.h:305
Analysis of Boosted Decision Trees.
Definition: MethodDT.h:49
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:353