149 #include <unordered_map> 153 using std::make_pair;
171 , fSigToBkgFraction(0)
176 , fBaggedGradBoost(
kFALSE)
180 , fMinNodeSizeS(
"5%")
183 , fMinLinCorrForFisher(.8)
184 , fUseExclusiveVars(0)
186 , fNodePurityLimit(0)
191 , fFValidationEvents(0)
193 , fRandomisedTrees(
kFALSE)
195 , fUsePoissonNvars(0)
196 , fUseNTrainEvents(0)
197 , fBaggedSampleFraction(0)
198 , fNoNegWeightsInTraining(
kFALSE)
199 , fInverseBoostNegWeights(
kFALSE)
200 , fPairNegWeightsGlobal(
kFALSE)
201 , fTrainWithNegWeights(
kFALSE)
211 , fSkipNormalization(
kFALSE)
226 , fSigToBkgFraction(0)
231 , fBaggedGradBoost(
kFALSE)
235 , fMinNodeSizeS(
"5%")
238 , fMinLinCorrForFisher(.8)
239 , fUseExclusiveVars(0)
241 , fNodePurityLimit(0)
246 , fFValidationEvents(0)
248 , fRandomisedTrees(
kFALSE)
250 , fUsePoissonNvars(0)
251 , fUseNTrainEvents(0)
252 , fBaggedSampleFraction(0)
253 , fNoNegWeightsInTraining(
kFALSE)
254 , fInverseBoostNegWeights(
kFALSE)
255 , fPairNegWeightsGlobal(
kFALSE)
256 , fTrainWithNegWeights(
kFALSE)
266 , fSkipNormalization(
kFALSE)
336 DeclareOptionRef(fNTrees,
"NTrees",
"Number of trees in the forest");
337 if (DoRegression()) {
338 DeclareOptionRef(fMaxDepth=50,
"MaxDepth",
"Max depth of the decision tree allowed");
340 DeclareOptionRef(fMaxDepth=3,
"MaxDepth",
"Max depth of the decision tree allowed");
343 TString tmp=
"5%";
if (DoRegression()) tmp=
"0.2%";
344 DeclareOptionRef(fMinNodeSizeS=tmp,
"MinNodeSize",
"Minimum percentage of training events required in a leaf node (default: Classification: 5%, Regression: 0.2%)");
346 DeclareOptionRef(fNCuts,
"nCuts",
"Number of grid points in variable range used in finding optimal cut in node splitting");
348 DeclareOptionRef(fBoostType,
"BoostType",
"Boosting type for the trees in the forest (note: AdaCost is still experimental)");
350 AddPreDefVal(
TString(
"AdaBoost"));
351 AddPreDefVal(
TString(
"RealAdaBoost"));
352 AddPreDefVal(
TString(
"AdaCost"));
353 AddPreDefVal(
TString(
"Bagging"));
355 AddPreDefVal(
TString(
"AdaBoostR2"));
357 if (DoRegression()) {
358 fBoostType =
"AdaBoostR2";
360 fBoostType =
"AdaBoost";
362 DeclareOptionRef(fAdaBoostR2Loss=
"Quadratic",
"AdaBoostR2Loss",
"Type of Loss function in AdaBoostR2");
363 AddPreDefVal(
TString(
"Linear"));
364 AddPreDefVal(
TString(
"Quadratic"));
365 AddPreDefVal(
TString(
"Exponential"));
367 DeclareOptionRef(fBaggedBoost=
kFALSE,
"UseBaggedBoost",
"Use only a random subsample of all events for growing the trees in each boost iteration.");
368 DeclareOptionRef(fShrinkage=1.0,
"Shrinkage",
"Learning rate for GradBoost algorithm");
369 DeclareOptionRef(fAdaBoostBeta=.5,
"AdaBoostBeta",
"Learning rate for AdaBoost algorithm");
370 DeclareOptionRef(fRandomisedTrees,
"UseRandomisedTrees",
"Determine at each node splitting the cut variable only as the best out of a random subset of variables (like in RandomForests)");
371 DeclareOptionRef(fUseNvars,
"UseNvars",
"Size of the subset of variables used with RandomisedTree option");
372 DeclareOptionRef(fUsePoissonNvars,
"UsePoissonNvars",
"Interpret \"UseNvars\" not as fixed number but as mean of a Poisson distribution in each split with RandomisedTree option");
373 DeclareOptionRef(fBaggedSampleFraction=.6,
"BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used (i.e. UseBaggedBoost, Bagging,)" );
375 DeclareOptionRef(fUseYesNoLeaf=
kTRUE,
"UseYesNoLeaf",
376 "Use Sig or Bkg categories, or the purity=S/(S+B) as classification of the leaf node -> Real-AdaBoost");
377 if (DoRegression()) {
381 DeclareOptionRef(fNegWeightTreatment=
"InverseBoostNegWeights",
"NegWeightTreatment",
"How to treat events with negative weights in the BDT training (particular the boosting) : IgnoreInTraining; Boost With inverse boostweight; Pair events with negative and positive weights in training sample and *annihilate* them (experimental!)");
382 AddPreDefVal(
TString(
"InverseBoostNegWeights"));
383 AddPreDefVal(
TString(
"IgnoreNegWeightsInTraining"));
384 AddPreDefVal(
TString(
"NoNegWeightsInTraining"));
385 AddPreDefVal(
TString(
"PairNegWeightsGlobal"));
390 DeclareOptionRef(fCss=1.,
"Css",
"AdaCost: cost of true signal selected signal");
391 DeclareOptionRef(fCts_sb=1.,
"Cts_sb",
"AdaCost: cost of true signal selected bkg");
392 DeclareOptionRef(fCtb_ss=1.,
"Ctb_ss",
"AdaCost: cost of true bkg selected signal");
393 DeclareOptionRef(fCbb=1.,
"Cbb",
"AdaCost: cost of true bkg selected bkg ");
395 DeclareOptionRef(fNodePurityLimit=0.5,
"NodePurityLimit",
"In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
398 DeclareOptionRef(fSepTypeS,
"SeparationType",
"Separation criterion for node splitting");
399 AddPreDefVal(
TString(
"CrossEntropy"));
400 AddPreDefVal(
TString(
"GiniIndex"));
401 AddPreDefVal(
TString(
"GiniIndexWithLaplace"));
402 AddPreDefVal(
TString(
"MisClassificationError"));
403 AddPreDefVal(
TString(
"SDivSqrtSPlusB"));
404 AddPreDefVal(
TString(
"RegressionVariance"));
405 if (DoRegression()) {
406 fSepTypeS =
"RegressionVariance";
408 fSepTypeS =
"GiniIndex";
411 DeclareOptionRef(fRegressionLossFunctionBDTGS =
"Huber",
"RegressionLossFunctionBDTG",
"Loss function for BDTG regression.");
412 AddPreDefVal(
TString(
"Huber"));
413 AddPreDefVal(
TString(
"AbsoluteDeviation"));
414 AddPreDefVal(
TString(
"LeastSquares"));
416 DeclareOptionRef(fHuberQuantile = 0.7,
"HuberQuantile",
"In the Huber loss function this is the quantile that separates the core from the tails in the residuals distribution.");
418 DeclareOptionRef(fDoBoostMonitor=
kFALSE,
"DoBoostMonitor",
"Create control plot with ROC integral vs tree number");
420 DeclareOptionRef(fUseFisherCuts=
kFALSE,
"UseFisherCuts",
"Use multivariate splits using the Fisher criterion");
421 DeclareOptionRef(fMinLinCorrForFisher=.8,
"MinLinCorrForFisher",
"The minimum linear correlation between two variables demanded for use in Fisher criterion in node splitting");
422 DeclareOptionRef(fUseExclusiveVars=
kFALSE,
"UseExclusiveVars",
"Variables already used in fisher criterion are not anymore analysed individually for node splitting");
425 DeclareOptionRef(fDoPreselection=
kFALSE,
"DoPreselection",
"and and apply automatic pre-selection for 100% efficient signal (bkg) cuts prior to training");
428 DeclareOptionRef(fSigToBkgFraction=1,
"SigToBkgFraction",
"Sig to Bkg ratio used in Training (similar to NodePurityLimit, which cannot be used in real adaboost");
430 DeclareOptionRef(fPruneMethodS,
"PruneMethod",
"Note: for BDTs use small trees (e.g.MaxDepth=3) and NoPruning: Pruning: Method used for pruning (removal) of statistically insignificant branches ");
431 AddPreDefVal(
TString(
"NoPruning"));
432 AddPreDefVal(
TString(
"ExpectedError"));
433 AddPreDefVal(
TString(
"CostComplexity"));
435 DeclareOptionRef(fPruneStrength,
"PruneStrength",
"Pruning strength");
437 DeclareOptionRef(fFValidationEvents=0.5,
"PruningValFraction",
"Fraction of events to use for optimizing automatic pruning.");
439 DeclareOptionRef(fSkipNormalization=
kFALSE,
"SkipNormalization",
"Skip normalization at initialization, to keep expectation value of BDT output according to the fraction of events");
442 DeclareOptionRef(fMinNodeEvents=0,
"nEventsMin",
"deprecated: Use MinNodeSize (in % of training events) instead");
444 DeclareOptionRef(fBaggedGradBoost=
kFALSE,
"UseBaggedGrad",
"deprecated: Use *UseBaggedBoost* instead: Use only a random subsample of all events for growing the trees in each iteration.");
445 DeclareOptionRef(fBaggedSampleFraction,
"GradBaggingFraction",
"deprecated: Use *BaggedSampleFraction* instead: Defines the fraction of events to be used in each iteration, e.g. when UseBaggedGrad=kTRUE. ");
446 DeclareOptionRef(fUseNTrainEvents,
"UseNTrainEvents",
"deprecated: Use *BaggedSampleFraction* instead: Number of randomly picked training events used in randomised (and bagged) trees");
447 DeclareOptionRef(fNNodesMax,
"NNodesMax",
"deprecated: Use MaxDepth instead to limit the tree size" );
459 DeclareOptionRef(fHistoricBool=
kTRUE,
"UseWeightedTrees",
460 "Use weighted trees or simple average in classification from the forest");
461 DeclareOptionRef(fHistoricBool=
kFALSE,
"PruneBeforeBoost",
"Flag to prune the tree before applying boosting algorithm");
462 DeclareOptionRef(fHistoricBool=
kFALSE,
"RenormByClass",
"Individually re-normalize each event class to the original size after boosting");
464 AddPreDefVal(
TString(
"NegWeightTreatment"),
TString(
"IgnoreNegWeights"));
475 else if (fSepTypeS ==
"giniindex") fSepType =
new GiniIndex();
477 else if (fSepTypeS ==
"crossentropy") fSepType =
new CrossEntropy();
478 else if (fSepTypeS ==
"sdivsqrtsplusb") fSepType =
new SdivSqrtSplusB();
479 else if (fSepTypeS ==
"regressionvariance") fSepType = NULL;
481 Log() << kINFO << GetOptions() <<
Endl;
482 Log() << kFATAL <<
"<ProcessOptions> unknown Separation Index option " << fSepTypeS <<
" called" <<
Endl;
485 if(!(fHuberQuantile >= 0.0 && fHuberQuantile <= 1.0)){
486 Log() << kINFO << GetOptions() <<
Endl;
487 Log() << kFATAL <<
"<ProcessOptions> Huber Quantile must be in range [0,1]. Value given, " << fHuberQuantile <<
", does not match this criteria" <<
Endl;
490 fRegressionLossFunctionBDTGS.ToLower();
491 if (fRegressionLossFunctionBDTGS ==
"huber") fRegressionLossFunctionBDTG =
new HuberLossFunctionBDT(fHuberQuantile);
495 Log() << kINFO << GetOptions() <<
Endl;
496 Log() << kFATAL <<
"<ProcessOptions> unknown Regression Loss Function BDT option " << fRegressionLossFunctionBDTGS <<
" called" <<
Endl;
499 fPruneMethodS.ToLower();
504 Log() << kINFO << GetOptions() <<
Endl;
505 Log() << kFATAL <<
"<ProcessOptions> unknown PruneMethod " << fPruneMethodS <<
" option called" <<
Endl;
511 <<
"Sorry automatic pruning strength determination is not implemented yet for ExpectedErrorPruning" <<
Endl;
515 if (fMinNodeEvents > 0){
516 fMinNodeSize =
Double_t(fMinNodeEvents*100.) / Data()->GetNTrainingEvents();
517 Log() << kWARNING <<
"You have explicitly set ** nEventsMin = " << fMinNodeEvents<<
" ** the min absolute number \n" 518 <<
"of events in a leaf node. This is DEPRECATED, please use the option \n" 519 <<
"*MinNodeSize* giving the relative number as percentage of training \n" 520 <<
"events instead. \n" 521 <<
"nEventsMin="<<fMinNodeEvents<<
"--> MinNodeSize="<<fMinNodeSize<<
"%" 523 Log() << kWARNING <<
"Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recommended \n" 524 <<
" *MinNodeSize* = " << fMinNodeSizeS <<
" option !!" <<
Endl ;
525 fMinNodeSizeS =
Form(
"%F3.2",fMinNodeSize);
528 SetMinNodeSize(fMinNodeSizeS);
532 fAdaBoostR2Loss.ToLower();
534 if (fBoostType==
"Grad") {
536 if (fNegWeightTreatment==
"InverseBoostNegWeights"){
537 Log() << kINFO <<
"the option *InverseBoostNegWeights* does not exist for BoostType=Grad --> change" <<
Endl;
538 Log() << kINFO <<
"to new default for GradBoost *Pray*" <<
Endl;
539 Log() << kDEBUG <<
"i.e. simply keep them as if which should work fine for Grad Boost" <<
Endl;
540 fNegWeightTreatment=
"Pray";
541 fNoNegWeightsInTraining=
kFALSE;
543 }
else if (fBoostType==
"RealAdaBoost"){
544 fBoostType =
"AdaBoost";
546 }
else if (fBoostType==
"AdaCost"){
550 if (fFValidationEvents < 0.0) fFValidationEvents = 0.0;
551 if (fAutomatic && fFValidationEvents > 0.5) {
552 Log() << kWARNING <<
"You have chosen to use more than half of your training sample " 553 <<
"to optimize the automatic pruning algorithm. This is probably wasteful " 554 <<
"and your overall results will be degraded. Are you sure you want this?" 559 if (this->Data()->HasNegativeEventWeights()){
560 Log() << kINFO <<
" You are using a Monte Carlo that has also negative weights. " 561 <<
"That should in principle be fine as long as on average you end up with " 562 <<
"something positive. For this you have to make sure that the minimal number " 563 <<
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize=" 564 << fMinNodeSizeS <<
" ("<< fMinNodeSize <<
"%)" 565 <<
", (or the deprecated equivalent nEventsMin) you can set this via the " 566 <<
"BDT option string when booking the " 567 <<
"classifier) is large enough to allow for reasonable averaging!!! " 568 <<
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining " 569 <<
"which ignores events with negative weight in the training. " <<
Endl 570 <<
Endl <<
"Note: You'll get a WARNING message during the training if that should ever happen" <<
Endl;
573 if (DoRegression()) {
574 if (fUseYesNoLeaf && !IsConstructedFromWeightFile()){
575 Log() << kWARNING <<
"Regression Trees do not work with fUseYesNoLeaf=TRUE --> I will set it to FALSE" <<
Endl;
579 if (fSepType != NULL){
580 Log() << kWARNING <<
"Regression Trees do not work with Separation type other than <RegressionVariance> --> I will use it instead" <<
Endl;
584 Log() << kWARNING <<
"Sorry, UseFisherCuts is not available for regression analysis, I will ignore it!" <<
Endl;
588 Log() << kWARNING <<
"Sorry, the option of nCuts<0 using a more elaborate node splitting algorithm " <<
Endl;
589 Log() << kWARNING <<
"is not implemented for regression analysis ! " <<
Endl;
590 Log() << kWARNING <<
"--> I switch do default nCuts = 20 and use standard node splitting"<<
Endl;
594 if (fRandomisedTrees){
595 Log() << kINFO <<
" Randomised trees use no pruning" <<
Endl;
600 if (fUseFisherCuts) {
601 Log() << kWARNING <<
"When using the option UseFisherCuts, the other option nCuts<0 (i.e. using" <<
Endl;
602 Log() <<
" a more elaborate node splitting algorithm) is not implemented. " <<
Endl;
609 Log() << kERROR <<
" Zero Decision Trees demanded... that does not work !! " 610 <<
" I set it to 1 .. just so that the program does not crash" 615 fNegWeightTreatment.ToLower();
616 if (fNegWeightTreatment ==
"ignorenegweightsintraining") fNoNegWeightsInTraining =
kTRUE;
617 else if (fNegWeightTreatment ==
"nonegweightsintraining") fNoNegWeightsInTraining =
kTRUE;
618 else if (fNegWeightTreatment ==
"inverseboostnegweights") fInverseBoostNegWeights =
kTRUE;
619 else if (fNegWeightTreatment ==
"pairnegweightsglobal") fPairNegWeightsGlobal =
kTRUE;
620 else if (fNegWeightTreatment ==
"pray")
Log() << kDEBUG <<
"Yes, good luck with praying " <<
Endl;
622 Log() << kINFO << GetOptions() <<
Endl;
623 Log() << kFATAL <<
"<ProcessOptions> unknown option for treating negative event weights during training " << fNegWeightTreatment <<
" requested" <<
Endl;
626 if (fNegWeightTreatment ==
"pairnegweightsglobal")
627 Log() << kWARNING <<
" you specified the option NegWeightTreatment=PairNegWeightsGlobal : This option is still considered EXPERIMENTAL !! " <<
Endl;
634 while (tmp < fNNodesMax){
638 Log() << kWARNING <<
"You have specified a deprecated option *NNodesMax="<<fNNodesMax
639 <<
"* \n this has been translated to MaxDepth="<<fMaxDepth<<
Endl;
643 if (fUseNTrainEvents>0){
644 fBaggedSampleFraction = (
Double_t) fUseNTrainEvents/Data()->GetNTrainingEvents();
645 Log() << kWARNING <<
"You have specified a deprecated option *UseNTrainEvents="<<fUseNTrainEvents
646 <<
"* \n this has been translated to BaggedSampleFraction="<<fBaggedSampleFraction<<
"(%)"<<
Endl;
649 if (fBoostType==
"Bagging") fBaggedBoost =
kTRUE;
650 if (fBaggedGradBoost){
651 fBaggedBoost =
kTRUE;
652 Log() << kWARNING <<
"You have specified a deprecated option *UseBaggedGrad* --> please use *UseBaggedBoost* instead" <<
Endl;
660 if (sizeInPercent > 0 && sizeInPercent < 50){
661 fMinNodeSize=sizeInPercent;
664 Log() << kFATAL <<
"you have demanded a minimal node size of " 665 << sizeInPercent <<
"% of the training events.. \n" 666 <<
" that somehow does not make sense "<<
Endl;
676 if (sizeInPercent.
IsFloat()) SetMinNodeSize(sizeInPercent.
Atof());
678 Log() << kFATAL <<
"I had problems reading the option MinNodeEvents, which " 679 <<
"after removing a possible % sign now reads " << sizeInPercent <<
Endl;
691 fBoostType =
"AdaBoost";
692 if(DataInfo().GetNClasses()!=0)
696 fBoostType =
"AdaBoostR2";
697 fAdaBoostR2Loss =
"Quadratic";
698 if(DataInfo().GetNClasses()!=0)
704 fPruneMethodS =
"NoPruning";
708 fFValidationEvents = 0.5;
709 fRandomisedTrees =
kFALSE;
712 fUsePoissonNvars =
kTRUE;
717 SetSignalReferenceCut( 0 );
730 for (
UInt_t i=0; i<fForest.size(); i++)
delete fForest[i];
733 fBoostWeights.clear();
734 if (fMonitorNtuple) { fMonitorNtuple->Delete(); fMonitorNtuple=NULL; }
735 fVariableImportance.clear();
737 fLossFunctionEventInfo.clear();
742 Log() << kDEBUG <<
" successfully(?) reset the method " <<
Endl;
754 for (
UInt_t i=0; i<fForest.size(); i++)
delete fForest[i];
762 if (!HasTrainingTree())
Log() << kFATAL <<
"<Init> Data().TrainingTree() is zero pointer" <<
Endl;
764 if (fEventSample.size() > 0) {
766 for (
UInt_t iev=0; iev<fEventSample.size(); iev++) fEventSample[iev]->SetBoostWeight(1.);
769 UInt_t nevents = Data()->GetNTrainingEvents();
771 std::vector<const TMVA::Event*> tmpEventSample;
772 for (
Long64_t ievt=0; ievt<nevents; ievt++) {
774 Event*
event =
new Event( *GetTrainingEvent(ievt) );
775 tmpEventSample.push_back(event);
778 if (!DoRegression()) DeterminePreselectionCuts(tmpEventSample);
779 else fDoPreselection =
kFALSE;
781 for (
UInt_t i=0; i<tmpEventSample.size(); i++)
delete tmpEventSample[i];
786 for (
Long64_t ievt=0; ievt<nevents; ievt++) {
789 Event*
event =
new Event( *GetTrainingEvent(ievt) );
790 if (fDoPreselection){
791 if (
TMath::Abs(ApplyPreselectionCuts(event)) > 0.05) {
797 if (event->GetWeight() < 0 && (IgnoreEventsWithNegWeightsInTraining() || fNoNegWeightsInTraining)){
798 if (firstNegWeight) {
799 Log() << kWARNING <<
" Note, you have events with negative event weight in the sample, but you've chosen to ignore them" <<
Endl;
803 }
else if (event->GetWeight()==0){
804 if (firstZeroWeight) {
806 Log() <<
"Events with weight == 0 are going to be simply ignored " <<
Endl;
810 if (event->GetWeight() < 0) {
811 fTrainWithNegWeights=
kTRUE;
814 if (fPairNegWeightsGlobal){
815 Log() << kWARNING <<
"Events with negative event weights are found and " 816 <<
" will be removed prior to the actual BDT training by global " 817 <<
" paring (and subsequent annihilation) with positiv weight events" 820 Log() << kWARNING <<
"Events with negative event weights are USED during " 821 <<
"the BDT training. This might cause problems with small node sizes " 822 <<
"or with the boosting. Please remove negative events from training " 823 <<
"using the option *IgnoreEventsWithNegWeightsInTraining* in case you " 824 <<
"observe problems with the boosting" 831 Double_t modulo = 1.0/(fFValidationEvents);
832 Int_t imodulo =
static_cast<Int_t>( fmod(modulo,1.0) > 0.5 ?
ceil(modulo) :
floor(modulo) );
833 if (ievt % imodulo == 0) fValidationSample.push_back( event );
834 else fEventSample.push_back( event );
837 fEventSample.push_back(event);
843 Log() << kINFO <<
"<InitEventSample> Internally I use " << fEventSample.size()
844 <<
" for Training and " << fValidationSample.size()
845 <<
" for Pruning Validation (" << ((
Float_t)fValidationSample.size())/((
Float_t)fEventSample.size()+fValidationSample.size())*100.0
846 <<
"% of training used for validation)" <<
Endl;
850 if (fPairNegWeightsGlobal) PreProcessNegativeEventWeights();
853 if (DoRegression()) {
855 }
else if (DoMulticlass()) {
857 }
else if (!fSkipNormalization) {
859 Log() << kDEBUG <<
"\t<InitEventSample> For classification trees, "<<
Endl;
860 Log() << kDEBUG <<
" \tthe effective number of backgrounds is scaled to match "<<
Endl;
861 Log() << kDEBUG <<
" \tthe signal. Otherwise the first boosting step would do 'just that'!"<<
Endl;
875 Double_t nevents = fEventSample.size();
877 Int_t sumSig=0, sumBkg=0;
878 for (
UInt_t ievt=0; ievt<fEventSample.size(); ievt++) {
879 if ((DataInfo().IsSignal(fEventSample[ievt])) ) {
880 sumSigW += fEventSample[ievt]->GetWeight();
883 sumBkgW += fEventSample[ievt]->GetWeight();
887 if (sumSigW && sumBkgW){
888 Double_t normSig = nevents/((1+fSigToBkgFraction)*sumSigW)*fSigToBkgFraction;
889 Double_t normBkg = nevents/((1+fSigToBkgFraction)*sumBkgW); ;
890 Log() << kDEBUG <<
"\tre-normalise events such that Sig and Bkg have respective sum of weights = " 891 << fSigToBkgFraction <<
Endl;
892 Log() << kDEBUG <<
" \tsig->sig*"<<normSig <<
"ev. bkg->bkg*"<<normBkg <<
"ev." <<
Endl;
893 Log() << kHEADER <<
"#events: (reweighted) sig: "<< sumSigW*normSig <<
" bkg: " << sumBkgW*normBkg <<
Endl;
894 Log() << kINFO <<
"#events: (unweighted) sig: "<< sumSig <<
" bkg: " << sumBkg <<
Endl;
895 for (
Long64_t ievt=0; ievt<nevents; ievt++) {
896 if ((DataInfo().IsSignal(fEventSample[ievt])) ) fEventSample[ievt]->SetBoostWeight(normSig);
897 else fEventSample[ievt]->SetBoostWeight(normBkg);
900 Log() << kINFO <<
"--> could not determine scaling factors as either there are " <<
Endl;
901 Log() << kINFO <<
" no signal events (sumSigW="<<sumSigW<<
") or no bkg ev. (sumBkgW="<<sumBkgW<<
")"<<
Endl;
906 fTrainSample = &fEventSample;
908 GetBaggedSubSample(fEventSample);
909 fTrainSample = &fSubSample;
935 std::vector<const Event*> negEvents;
936 for (
UInt_t iev = 0; iev < fEventSample.size(); iev++){
937 if (fEventSample[iev]->GetWeight() < 0) {
938 totalNegWeights += fEventSample[iev]->GetWeight();
939 negEvents.push_back(fEventSample[iev]);
941 totalPosWeights += fEventSample[iev]->GetWeight();
943 totalWeights += fEventSample[iev]->GetWeight();
945 if (totalNegWeights == 0 ) {
946 Log() << kINFO <<
"no negative event weights found .. no preprocessing necessary" <<
Endl;
949 Log() << kINFO <<
"found a total of " << totalNegWeights <<
" of negative event weights which I am going to try to pair with positive events to annihilate them" <<
Endl;
950 Log() << kINFO <<
"found a total of " << totalPosWeights <<
" of events with positive weights" <<
Endl;
951 Log() << kINFO <<
"--> total sum of weights = " << totalWeights <<
" = " << totalNegWeights+totalPosWeights <<
Endl;
958 for (
Int_t i=0; i<2; i++){
959 invCov = ((*cov)[i]);
961 std::cout <<
"<MethodBDT::PreProcessNeg...> matrix is almost singular with determinant=" 963 <<
" did you use the variables that are linear combinations or highly correlated?" 967 std::cout <<
"<MethodBDT::PreProcessNeg...> matrix is singular with determinant=" 969 <<
" did you use the variables that are linear combinations?" 978 Log() << kINFO <<
"Found a total of " << totalNegWeights <<
" in negative weights out of " << fEventSample.size() <<
" training events " <<
Endl;
979 Timer timer(negEvents.size(),
"Negative Event paired");
980 for (
UInt_t nev = 0; nev < negEvents.size(); nev++){
982 Double_t weight = negEvents[nev]->GetWeight();
983 UInt_t iClassID = negEvents[nev]->GetClass();
984 invCov = ((*cov)[iClassID]);
990 for (
UInt_t iev = 0; iev < fEventSample.size(); iev++){
991 if (iClassID==fEventSample[iev]->
GetClass() && fEventSample[iev]->GetWeight() > 0){
993 for (
UInt_t ivar=0; ivar < GetNvar(); ivar++){
994 for (
UInt_t jvar=0; jvar<GetNvar(); jvar++){
995 dist += (negEvents[nev]->GetValue(ivar)-fEventSample[iev]->GetValue(ivar))*
996 (*invCov)[ivar][jvar]*
997 (negEvents[nev]->GetValue(jvar)-fEventSample[iev]->GetValue(jvar));
1000 if (
dist < minDist) { iMin=iev; minDist=
dist;}
1006 Double_t newWeight = (negEvents[nev]->GetWeight() + fEventSample[iMin]->GetWeight());
1008 negEvents[nev]->SetBoostWeight( 0 );
1009 fEventSample[iMin]->SetBoostWeight( newWeight/fEventSample[iMin]->GetOriginalWeight() );
1011 negEvents[nev]->SetBoostWeight( newWeight/negEvents[nev]->GetOriginalWeight() );
1012 fEventSample[iMin]->SetBoostWeight( 0 );
1015 }
else Log() << kFATAL <<
"preprocessing didn't find event to pair with the negative weight ... probably a bug" <<
Endl;
1016 weight = negEvents[nev]->GetWeight();
1019 Log() << kINFO <<
"<Negative Event Pairing> took: " << timer.GetElapsedTime()
1023 totalNegWeights = 0;
1024 totalPosWeights = 0;
1031 std::vector<const Event*> newEventSample;
1033 for (
UInt_t iev = 0; iev < fEventSample.size(); iev++){
1034 if (fEventSample[iev]->GetWeight() < 0) {
1035 totalNegWeights += fEventSample[iev]->GetWeight();
1036 totalWeights += fEventSample[iev]->GetWeight();
1038 totalPosWeights += fEventSample[iev]->GetWeight();
1039 totalWeights += fEventSample[iev]->GetWeight();
1041 if (fEventSample[iev]->GetWeight() > 0) {
1042 newEventSample.push_back(
new Event(*fEventSample[iev]));
1043 if (fEventSample[iev]->
GetClass() == fSignalClass){
1044 sigWeight += fEventSample[iev]->GetWeight();
1047 bkgWeight += fEventSample[iev]->GetWeight();
1052 if (totalNegWeights < 0)
Log() << kFATAL <<
" compensation of negative event weights with positive ones did not work " << totalNegWeights <<
Endl;
1054 for (
UInt_t i=0; i<fEventSample.size(); i++)
delete fEventSample[i];
1055 fEventSample = newEventSample;
1057 Log() << kINFO <<
" after PreProcessing, the Event sample is left with " << fEventSample.size() <<
" events (unweighted), all with positive weights, adding up to " << totalWeights <<
Endl;
1058 Log() << kINFO <<
" nSig="<<nSig <<
" sigWeight="<<sigWeight <<
" nBkg="<<nBkg <<
" bkgWeight="<<bkgWeight <<
Endl;
1070 std::map<TString,TMVA::Interval*> tuneParameters;
1071 std::map<TString,Double_t> tunedParameters;
1080 tuneParameters.insert(std::pair<TString,Interval*>(
"NTrees",
new Interval(10,1000,5)));
1081 tuneParameters.insert(std::pair<TString,Interval*>(
"MaxDepth",
new Interval(2,4,3)));
1082 tuneParameters.insert(std::pair<TString,Interval*>(
"MinNodeSize",
new LogInterval(1,30,30)));
1087 if (fBoostType==
"AdaBoost"){
1088 tuneParameters.insert(std::pair<TString,Interval*>(
"AdaBoostBeta",
new Interval(.2,1.,5)));
1090 }
else if (fBoostType==
"Grad"){
1091 tuneParameters.insert(std::pair<TString,Interval*>(
"Shrinkage",
new Interval(0.05,0.50,5)));
1093 }
else if (fBoostType==
"Bagging" && fRandomisedTrees){
1096 tuneParameters.insert(std::pair<TString,Interval*>(
"UseNvars",
new Interval(min_var,max_var,4)));
1100 Log()<<kINFO <<
" the following BDT parameters will be tuned on the respective *grid*\n"<<
Endl;
1101 std::map<TString,TMVA::Interval*>::iterator it;
1102 for(it=tuneParameters.begin(); it!= tuneParameters.end(); it++){
1103 Log() << kWARNING << it->first <<
Endl;
1104 std::ostringstream oss;
1105 (it->second)->
Print(oss);
1111 tunedParameters=optimize.
optimize();
1113 return tunedParameters;
1122 std::map<TString,Double_t>::iterator it;
1123 for(it=tuneParameters.begin(); it!= tuneParameters.end(); it++){
1124 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
1125 if (it->first ==
"MaxDepth" ) SetMaxDepth ((
Int_t)it->second);
1126 else if (it->first ==
"MinNodeSize" ) SetMinNodeSize (it->second);
1127 else if (it->first ==
"NTrees" ) SetNTrees ((
Int_t)it->second);
1128 else if (it->first ==
"NodePurityLimit") SetNodePurityLimit (it->second);
1129 else if (it->first ==
"AdaBoostBeta" ) SetAdaBoostBeta (it->second);
1130 else if (it->first ==
"Shrinkage" ) SetShrinkage (it->second);
1131 else if (it->first ==
"UseNvars" ) SetUseNvars ((
Int_t)it->second);
1132 else if (it->first ==
"BaggedSampleFraction" ) SetBaggedSampleFraction (it->second);
1133 else Log() << kFATAL <<
" SetParameter for " << it->first <<
" not yet implemented " <<
Endl;
1150 Log() << kERROR <<
" Zero Decision Trees demanded... that does not work !! " 1151 <<
" I set it to 1 .. just so that the program does not crash" 1156 if (fInteractive && fInteractive->NotInitialized()){
1157 std::vector<TString> titles = {
"Boost weight",
"Error Fraction"};
1158 fInteractive->Init(titles);
1160 fIPyMaxIter = fNTrees;
1161 fExitFromTraining =
false;
1165 if (IsNormalised())
Log() << kFATAL <<
"\"Normalise\" option cannot be used with BDT; " 1166 <<
"please remove the option from the configuration string, or " 1167 <<
"use \"!Normalise\"" 1171 Log() << kINFO <<
"Regression Loss Function: "<< fRegressionLossFunctionBDTG->Name() <<
Endl;
1173 Log() << kINFO <<
"Training "<< fNTrees <<
" Decision Trees ... patience please" <<
Endl;
1175 Log() << kDEBUG <<
"Training with maximal depth = " <<fMaxDepth
1176 <<
", MinNodeEvents=" << fMinNodeEvents
1177 <<
", NTrees="<<fNTrees
1178 <<
", NodePurityLimit="<<fNodePurityLimit
1179 <<
", AdaBoostBeta="<<fAdaBoostBeta
1185 TString hname =
"AdaBooost weight distribution";
1191 if (DoRegression()) {
1195 hname=
"Boost event weights distribution";
1201 TH1* nodesBeforePruningVsTree =
new TH1I(
Form(
"%s_NodesBeforePruning",DataInfo().
GetName()),
"nodes before pruning",fNTrees,0,fNTrees);
1202 TH1* nodesAfterPruningVsTree =
new TH1I(
Form(
"%s_NodesAfterPruning",DataInfo().
GetName()),
"nodes after pruning",fNTrees,0,fNTrees);
1206 if(!DoMulticlass()){
1210 results->
Store(
h,
"BoostWeights");
1214 if (fDoBoostMonitor){
1215 TH2* boostMonitor =
new TH2F(
"BoostMonitor",
"ROC Integral Vs iTree",2,0,fNTrees,2,0,1.05);
1217 boostMonitor->
SetYTitle(
"ROC Integral");
1218 results->
Store(boostMonitor,
"BoostMonitor");
1220 boostMonitorGraph->
SetName(
"BoostMonitorGraph");
1221 boostMonitorGraph->
SetTitle(
"ROCIntegralVsNTrees");
1222 results->
Store(boostMonitorGraph,
"BoostMonitorGraph");
1226 h =
new TH1F(
"BoostWeightVsTree",
"Boost weights vs tree",fNTrees,0,fNTrees);
1229 results->
Store(
h,
"BoostWeightsVsTree");
1232 h =
new TH1F(
"ErrFractHist",
"error fraction vs tree number",fNTrees,0,fNTrees);
1235 results->
Store(
h,
"ErrorFrac");
1238 nodesBeforePruningVsTree->
SetXTitle(
"#tree");
1239 nodesBeforePruningVsTree->
SetYTitle(
"#tree nodes");
1240 results->
Store(nodesBeforePruningVsTree);
1243 nodesAfterPruningVsTree->
SetXTitle(
"#tree");
1244 nodesAfterPruningVsTree->
SetYTitle(
"#tree nodes");
1245 results->
Store(nodesAfterPruningVsTree);
1249 fMonitorNtuple=
new TTree(
"MonitorNtuple",
"BDT variables");
1250 fMonitorNtuple->Branch(
"iTree",&fITree,
"iTree/I");
1251 fMonitorNtuple->Branch(
"boostWeight",&fBoostWeight,
"boostWeight/D");
1252 fMonitorNtuple->Branch(
"errorFraction",&fErrorFraction,
"errorFraction/D");
1255 Int_t nNodesBeforePruningCount = 0;
1256 Int_t nNodesAfterPruningCount = 0;
1258 Int_t nNodesBeforePruning = 0;
1259 Int_t nNodesAfterPruning = 0;
1262 if(fBoostType==
"Grad"){
1263 InitGradBoost(fEventSample);
1269 while (itree < fNTrees && continueBoost){
1270 if (fExitFromTraining)
break;
1271 fIPyCurrentIter = itree;
1284 if (fBoostType!=
"Grad"){
1285 Log() << kFATAL <<
"Multiclass is currently only supported by gradient boost. " 1286 <<
"Please change boost option accordingly (GradBoost)." 1290 UInt_t nClasses = DataInfo().GetNClasses();
1291 for (
UInt_t i=0;i<nClasses;i++){
1295 fForest.push_back(
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), i,
1296 fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
1297 itree*nClasses+i, fNodePurityLimit, itree*nClasses+1));
1298 fForest.back()->SetNVars(GetNvar());
1299 if (fUseFisherCuts) {
1300 fForest.back()->SetUseFisherCuts();
1301 fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher);
1302 fForest.back()->SetUseExclusiveVars(fUseExclusiveVars);
1306 nNodesBeforePruning = fForest.back()->BuildTree(*fTrainSample);
1307 Double_t bw = this->Boost(*fTrainSample, fForest.back(),i);
1309 fBoostWeights.push_back(bw);
1311 fBoostWeights.push_back(0);
1312 Log() << kWARNING <<
"stopped boosting at itree="<<itree <<
Endl;
1319 fForest.push_back(
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), fSignalClass,
1320 fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
1321 itree, fNodePurityLimit, itree));
1322 fForest.back()->SetNVars(GetNvar());
1323 if (fUseFisherCuts) {
1324 fForest.back()->SetUseFisherCuts();
1325 fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher);
1326 fForest.back()->SetUseExclusiveVars(fUseExclusiveVars);
1329 nNodesBeforePruning = fForest.back()->BuildTree(*fTrainSample);
1331 if (fUseYesNoLeaf && !DoRegression() && fBoostType!=
"Grad") {
1332 nNodesBeforePruning = fForest.back()->CleanTree();
1335 nNodesBeforePruningCount += nNodesBeforePruning;
1336 nodesBeforePruningVsTree->
SetBinContent(itree+1,nNodesBeforePruning);
1338 fForest.back()->SetPruneMethod(fPruneMethod);
1339 fForest.back()->SetPruneStrength(fPruneStrength);
1341 std::vector<const Event*> * validationSample = NULL;
1342 if(fAutomatic) validationSample = &fValidationSample;
1344 Double_t bw = this->Boost(*fTrainSample, fForest.back());
1346 fBoostWeights.push_back(bw);
1348 fBoostWeights.push_back(0);
1349 Log() << kWARNING <<
"stopped boosting at itree="<<itree <<
Endl;
1360 if (fUseYesNoLeaf && !DoRegression() && fBoostType!=
"Grad"){
1361 fForest.back()->CleanTree();
1363 nNodesAfterPruning = fForest.back()->GetNNodes();
1364 nNodesAfterPruningCount += nNodesAfterPruning;
1365 nodesAfterPruningVsTree->
SetBinContent(itree+1,nNodesAfterPruning);
1368 fInteractive->AddPoint(itree, fBoostWeight, fErrorFraction);
1371 fMonitorNtuple->Fill();
1372 if (fDoBoostMonitor){
1373 if (! DoRegression() ){
1374 if ( itree==fNTrees-1 || (!(itree%500)) ||
1375 (!(itree%250) && itree <1000)||
1376 (!(itree%100) && itree < 500)||
1377 (!(itree%50) && itree < 250)||
1378 (!(itree%25) && itree < 150)||
1379 (!(itree%10) && itree < 50)||
1380 (!(itree%5) && itree < 20)
1381 ) BoostMonitor(itree);
1392 Log() << kDEBUG <<
"\t<Train> average number of nodes (w/o pruning) : " 1393 << nNodesBeforePruningCount/GetNTrees() <<
Endl;
1396 Log() << kDEBUG <<
"\t<Train> average number of nodes before/after pruning : " 1397 << nNodesBeforePruningCount/GetNTrees() <<
" / " 1398 << nNodesAfterPruningCount/GetNTrees()
1406 Log() << kDEBUG <<
"Now I delete the privat data sample"<<
Endl;
1407 for (
UInt_t i=0; i<fEventSample.size(); i++)
delete fEventSample[i];
1408 for (
UInt_t i=0; i<fValidationSample.size(); i++)
delete fValidationSample[i];
1409 fEventSample.clear();
1410 fValidationSample.clear();
1412 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
1423 for (
UInt_t itree=0; itree<nTrees; itree++) {
1428 return 2.0/(1.0+
exp(-2.0*
sum))-1;
1436 if (DoMulticlass()) {
1437 UInt_t nClasses = DataInfo().GetNClasses();
1438 std::vector<Double_t> expCache;
1439 if (cls == nClasses - 1) {
1440 expCache.resize(nClasses);
1442 for (
auto e : eventSample) {
1443 fResiduals[
e].at(cls) += fForest.back()->CheckEvent(
e,
kFALSE);
1444 if (cls == nClasses - 1) {
1445 auto &residualsThisEvent = fResiduals[
e];
1446 std::transform(residualsThisEvent.begin(),
1447 residualsThisEvent.begin() + nClasses,
1448 expCache.begin(), [](
Double_t d) {
return exp(d); });
1449 for (
UInt_t i = 0; i < nClasses; i++) {
1451 for (
UInt_t j = 0; j < nClasses; j++) {
1453 norm += expCache[j] / expCache[i];
1456 Double_t p_cls = 1.0 / (1.0 + norm);
1457 Double_t res = (
e->GetClass() == i) ? (1.0 - p_cls) : (-p_cls);
1463 for (
auto e : eventSample) {
1464 auto &residualAt0 = fResiduals[
e].at(0);
1465 residualAt0 += fForest.back()->CheckEvent(
e,
kFALSE);
1466 Double_t p_sig = 1.0 / (1.0 +
exp(-2.0 * residualAt0));
1467 Double_t res = (DataInfo().IsSignal(
e) ? 1 : 0) - p_sig;
1479 for (std::vector<const TMVA::Event*>::const_iterator
e=fEventSample.begin();
e!=fEventSample.end();
e++) {
1480 fLossFunctionEventInfo[*
e].predictedValue += fForest.back()->CheckEvent(*
e,
kFALSE);
1484 fRegressionLossFunctionBDTG->SetTargets(eventSample, fLossFunctionEventInfo);
1497 std::unordered_map<TMVA::DecisionTreeNode*, LeafInfo> leaves;
1498 for (
auto e : eventSample) {
1501 auto &
v = leaves[node];
1502 auto target =
e->GetTarget(cls);
1503 v.sumWeightTarget += target * weight;
1504 v.sum2 +=
fabs(target) * (1.0 -
fabs(target)) * weight;
1506 for (
auto &iLeave : leaves) {
1507 constexpr
auto minValue = 1
e-30;
1508 if (iLeave.second.sum2 < minValue) {
1509 iLeave.second.sum2 = minValue;
1511 const Double_t K = DataInfo().GetNClasses();
1512 iLeave.first->SetResponse(fShrinkage * (
K - 1) /
K * iLeave.second.sumWeightTarget / iLeave.second.sum2);
1517 DoMulticlass() ? UpdateTargets(fEventSample, cls) : UpdateTargets(fEventSample);
1528 std::map<TMVA::DecisionTreeNode*,vector< TMVA::LossFunctionEventInfo > > leaves;
1529 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
1531 (leaves[node]).push_back(fLossFunctionEventInfo[*
e]);
1536 for (std::map<
TMVA::DecisionTreeNode*,vector< TMVA::LossFunctionEventInfo > >::iterator iLeave=leaves.begin();
1537 iLeave!=leaves.end();++iLeave){
1538 Double_t fit = fRegressionLossFunctionBDTG->Fit(iLeave->second);
1539 (iLeave->first)->SetResponse(fShrinkage*fit);
1542 UpdateTargetsRegression(*fTrainSample);
1556 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
1560 fRegressionLossFunctionBDTG->Init(fLossFunctionEventInfo, fBoostWeights);
1561 UpdateTargetsRegression(*fTrainSample,
kTRUE);
1564 else if(DoMulticlass()){
1565 UInt_t nClasses = DataInfo().GetNClasses();
1566 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
1567 for (
UInt_t i=0;i<nClasses;i++){
1569 Double_t r = (*e)->GetClass()==i?(1-1.0/nClasses):(-1.0/nClasses);
1571 fResiduals[*
e].push_back(0);
1576 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
1577 Double_t r = (DataInfo().IsSignal(*
e)?1:0)-0.5;
1579 fResiduals[*
e].push_back(0);
1590 for (
UInt_t ievt=0; ievt<fValidationSample.size(); ievt++) {
1591 Bool_t isSignalType= (dt->
CheckEvent(fValidationSample[ievt]) > fNodePurityLimit ) ? 1 : 0;
1593 if (isSignalType == (DataInfo().IsSignal(fValidationSample[ievt])) ) {
1594 ncorrect += fValidationSample[ievt]->GetWeight();
1597 nfalse += fValidationSample[ievt]->GetWeight();
1601 return ncorrect / (ncorrect + nfalse);
1612 if (fBoostType==
"AdaBoost") returnVal = this->AdaBoost (eventSample, dt);
1613 else if (fBoostType==
"AdaCost") returnVal = this->AdaCost (eventSample, dt);
1614 else if (fBoostType==
"Bagging") returnVal = this->Bagging ( );
1615 else if (fBoostType==
"RegBoost") returnVal = this->RegBoost (eventSample, dt);
1616 else if (fBoostType==
"AdaBoostR2") returnVal = this->AdaBoostR2(eventSample, dt);
1617 else if (fBoostType==
"Grad"){
1619 returnVal = this->GradBoostRegression(eventSample, dt);
1620 else if(DoMulticlass())
1621 returnVal = this->GradBoost (eventSample, dt, cls);
1623 returnVal = this->GradBoost (eventSample, dt);
1626 Log() << kINFO << GetOptions() <<
Endl;
1627 Log() << kFATAL <<
"<Boost> unknown boost option " << fBoostType<<
" called" <<
Endl;
1631 GetBaggedSubSample(fEventSample);
1646 TH1F *tmpS =
new TH1F(
"tmpS",
"", 100 , -1., 1.00001 );
1647 TH1F *tmpB =
new TH1F(
"tmpB",
"", 100 , -1., 1.00001 );
1651 UInt_t signalClassNr = DataInfo().GetClassInfo(
"Signal")->GetNumber();
1661 UInt_t nevents = Data()->GetNTestEvents();
1662 for (
UInt_t iev=0; iev < nevents; iev++){
1663 const Event*
event = GetTestingEvent(iev);
1665 if (event->GetClass() == signalClassNr) {tmp=tmpS;}
1667 tmp->
Fill(PrivateGetMvaValue(event),event->GetWeight());
1671 std::vector<TH1F*> hS;
1672 std::vector<TH1F*> hB;
1673 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
1674 hS.push_back(
new TH1F(
Form(
"SigVar%dAtTree%d",ivar,iTree),
Form(
"SigVar%dAtTree%d",ivar,iTree),100,DataInfo().GetVariableInfo(ivar).GetMin(),DataInfo().GetVariableInfo(ivar).GetMax()));
1675 hB.push_back(
new TH1F(
Form(
"BkgVar%dAtTree%d",ivar,iTree),
Form(
"BkgVar%dAtTree%d",ivar,iTree),100,DataInfo().GetVariableInfo(ivar).GetMin(),DataInfo().GetVariableInfo(ivar).GetMax()));
1676 results->
Store(hS.back(),hS.back()->GetTitle());
1677 results->
Store(hB.back(),hB.back()->GetTitle());
1681 for (
UInt_t iev=0; iev < fEventSample.size(); iev++){
1682 if (fEventSample[iev]->GetBoostWeight() > max) max = 1.01*fEventSample[iev]->GetBoostWeight();
1684 TH1F *tmpBoostWeightsS =
new TH1F(
Form(
"BoostWeightsInTreeS%d",iTree),
Form(
"BoostWeightsInTreeS%d",iTree),100,0.,max);
1685 TH1F *tmpBoostWeightsB =
new TH1F(
Form(
"BoostWeightsInTreeB%d",iTree),
Form(
"BoostWeightsInTreeB%d",iTree),100,0.,max);
1686 results->
Store(tmpBoostWeightsS,tmpBoostWeightsS->
GetTitle());
1687 results->
Store(tmpBoostWeightsB,tmpBoostWeightsB->
GetTitle());
1689 TH1F *tmpBoostWeights;
1690 std::vector<TH1F*> *
h;
1692 for (
UInt_t iev=0; iev < fEventSample.size(); iev++){
1693 if (fEventSample[iev]->
GetClass() == signalClassNr) {
1694 tmpBoostWeights=tmpBoostWeightsS;
1697 tmpBoostWeights=tmpBoostWeightsB;
1700 tmpBoostWeights->
Fill(fEventSample[iev]->GetBoostWeight());
1701 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
1702 (*h)[ivar]->Fill(fEventSample[iev]->GetValue(ivar),fEventSample[iev]->GetWeight());
1738 Double_t err=0, sumGlobalw=0, sumGlobalwfalse=0, sumGlobalwfalse2=0;
1740 std::vector<Double_t> sumw(DataInfo().GetNClasses(),0);
1743 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
1746 UInt_t iclass=(*e)->GetClass();
1749 if ( DoRegression() ) {
1751 sumGlobalwfalse += w * tmpDev;
1752 sumGlobalwfalse2 += w * tmpDev*tmpDev;
1753 if (tmpDev > maxDev) maxDev = tmpDev;
1758 if (!(isSignalType == DataInfo().IsSignal(*
e))) {
1759 sumGlobalwfalse+= w;
1764 if (DataInfo().IsSignal(*
e)) trueType = 1;
1766 sumGlobalwfalse+= w*trueType*dtoutput;
1771 err = sumGlobalwfalse/sumGlobalw ;
1772 if ( DoRegression() ) {
1774 if (fAdaBoostR2Loss==
"linear"){
1775 err = sumGlobalwfalse/maxDev/sumGlobalw ;
1777 else if (fAdaBoostR2Loss==
"quadratic"){
1778 err = sumGlobalwfalse2/maxDev/maxDev/sumGlobalw ;
1780 else if (fAdaBoostR2Loss==
"exponential"){
1782 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
1785 err += w * (1 -
exp (-tmpDev/maxDev)) / sumGlobalw;
1790 Log() << kFATAL <<
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential " 1791 <<
" namely " << fAdaBoostR2Loss <<
"\n" 1792 <<
"and this is not implemented... a typo in the options ??" <<
Endl;
1796 Log() << kDEBUG <<
"BDT AdaBoos wrong/all: " << sumGlobalwfalse <<
"/" << sumGlobalw <<
Endl;
1800 std::vector<Double_t> newSumw(sumw.size(),0);
1803 if (err >= 0.5 && fUseYesNoLeaf) {
1807 Log() << kERROR <<
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot " 1808 <<
"boost such a thing... if after 1 step the error rate is == 0.5" 1810 <<
"please check why this happens, maybe too many events per node requested ?" 1814 Log() << kERROR <<
" The error rate in the BDT boosting is > 0.5. ("<< err
1815 <<
") That should not happen, please check your code (i.e... the BDT code), I " 1816 <<
" stop boosting here" <<
Endl;
1820 }
else if (err < 0) {
1821 Log() << kERROR <<
" The error rate in the BDT boosting is < 0. That can happen" 1822 <<
" due to improper treatment of negative weights in a Monte Carlo.. (if you have" 1823 <<
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)" 1824 <<
" for the time being I set it to its absolute value.. just to continue.." <<
Endl;
1828 boostWeight =
TMath::Log((1.-err)/err)*fAdaBoostBeta;
1830 boostWeight =
TMath::Log((1.+err)/(1-err))*fAdaBoostBeta;
1833 Log() << kDEBUG <<
"BDT AdaBoos wrong/all: " << sumGlobalwfalse <<
"/" << sumGlobalw <<
" 1-err/err="<<boostWeight<<
" log.."<<
TMath::Log(boostWeight)<<
Endl;
1838 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
1840 if (fUseYesNoLeaf||DoRegression()){
1841 if ((!( (dt->
CheckEvent(*
e,fUseYesNoLeaf) > fNodePurityLimit ) == DataInfo().IsSignal(*
e))) || DoRegression()) {
1845 if ( (*e)->GetWeight() > 0 ){
1846 (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
1848 if (DoRegression()) results->
GetHist(
"BoostWeights")->
Fill(boostfactor);
1850 if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. / boostfactor);
1851 else (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
1859 if (DataInfo().IsSignal(*
e)) trueType = 1;
1863 if ( (*e)->GetWeight() > 0 ){
1864 (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
1866 if (DoRegression()) results->
GetHist(
"BoostWeights")->
Fill(boostfactor);
1868 if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. / boostfactor);
1869 else (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
1872 newSumGlobalw+=(*e)->GetWeight();
1873 newSumw[(*e)->GetClass()] += (*e)->GetWeight();
1879 Log() << kDEBUG <<
"new Nsig="<<newSumw[0]*globalNormWeight <<
" new Nbkg="<<newSumw[1]*globalNormWeight <<
Endl;
1882 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
1886 if (DataInfo().IsSignal(*
e))(*e)->ScaleBoostWeight( globalNormWeight * fSigToBkgFraction );
1887 else (*e)->ScaleBoostWeight( globalNormWeight );
1890 if (!(DoRegression()))results->
GetHist(
"BoostWeights")->
Fill(boostWeight);
1894 fBoostWeight = boostWeight;
1895 fErrorFraction = err;
1921 Double_t err=0, sumGlobalWeights=0, sumGlobalCost=0;
1923 std::vector<Double_t> sumw(DataInfo().GetNClasses(),0);
1925 for (vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
1927 sumGlobalWeights += w;
1928 UInt_t iclass=(*e)->GetClass();
1932 if ( DoRegression() ) {
1933 Log() << kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
1938 Bool_t isTrueSignal = DataInfo().IsSignal(*
e);
1939 Bool_t isSelectedSignal = (dtoutput>0);
1940 if (isTrueSignal) trueType = 1;
1944 if (isTrueSignal && isSelectedSignal) cost=Css;
1945 else if (isTrueSignal && !isSelectedSignal) cost=Cts_sb;
1946 else if (!isTrueSignal && isSelectedSignal) cost=Ctb_ss;
1947 else if (!isTrueSignal && !isSelectedSignal) cost=Cbb;
1948 else Log() << kERROR <<
"something went wrong in AdaCost" <<
Endl;
1950 sumGlobalCost+= w*trueType*dtoutput*cost;
1955 if ( DoRegression() ) {
1956 Log() << kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
1961 sumGlobalCost /= sumGlobalWeights;
1966 vector<Double_t> newSumClassWeights(sumw.size(),0);
1968 Double_t boostWeight =
TMath::Log((1+sumGlobalCost)/(1-sumGlobalCost)) * fAdaBoostBeta;
1972 for (vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
1975 Bool_t isTrueSignal = DataInfo().IsSignal(*
e);
1976 Bool_t isSelectedSignal = (dtoutput>0);
1977 if (isTrueSignal) trueType = 1;
1981 if (isTrueSignal && isSelectedSignal) cost=Css;
1982 else if (isTrueSignal && !isSelectedSignal) cost=Cts_sb;
1983 else if (!isTrueSignal && isSelectedSignal) cost=Ctb_ss;
1984 else if (!isTrueSignal && !isSelectedSignal) cost=Cbb;
1985 else Log() << kERROR <<
"something went wrong in AdaCost" <<
Endl;
1988 if (DoRegression())
Log() << kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
1989 if ( (*e)->GetWeight() > 0 ){
1990 (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
1992 if (DoRegression())
Log() << kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
1994 if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. / boostfactor);
1997 newSumGlobalWeights+=(*e)->GetWeight();
1998 newSumClassWeights[(*e)->GetClass()] += (*e)->GetWeight();
2003 Double_t globalNormWeight=
Double_t(eventSample.size())/newSumGlobalWeights;
2004 Log() << kDEBUG <<
"new Nsig="<<newSumClassWeights[0]*globalNormWeight <<
" new Nbkg="<<newSumClassWeights[1]*globalNormWeight <<
Endl;
2007 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
2010 if (DataInfo().IsSignal(*
e))(*e)->ScaleBoostWeight( globalNormWeight * fSigToBkgFraction );
2011 else (*e)->ScaleBoostWeight( globalNormWeight );
2015 if (!(DoRegression()))results->
GetHist(
"BoostWeights")->
Fill(boostWeight);
2019 fBoostWeight = boostWeight;
2020 fErrorFraction = err;
2047 if (!fSubSample.empty()) fSubSample.clear();
2049 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
2050 n = trandom->
PoissonD(fBaggedSampleFraction);
2051 for (
Int_t i=0;i<
n;i++) fSubSample.push_back(*
e);
2085 if ( !DoRegression() )
Log() << kFATAL <<
"Somehow you chose a regression boost method for a classification job" <<
Endl;
2087 Double_t err=0, sumw=0, sumwfalse=0, sumwfalse2=0;
2089 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
2094 sumwfalse += w * tmpDev;
2095 sumwfalse2 += w * tmpDev*tmpDev;
2096 if (tmpDev > maxDev) maxDev = tmpDev;
2100 if (fAdaBoostR2Loss==
"linear"){
2101 err = sumwfalse/maxDev/sumw ;
2103 else if (fAdaBoostR2Loss==
"quadratic"){
2104 err = sumwfalse2/maxDev/maxDev/sumw ;
2106 else if (fAdaBoostR2Loss==
"exponential"){
2108 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
2111 err += w * (1 -
exp (-tmpDev/maxDev)) / sumw;
2116 Log() << kFATAL <<
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential " 2117 <<
" namely " << fAdaBoostR2Loss <<
"\n" 2118 <<
"and this is not implemented... a typo in the options ??" <<
Endl;
2126 Log() << kERROR <<
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot " 2127 <<
"boost such a thing... if after 1 step the error rate is == 0.5" 2129 <<
"please check why this happens, maybe too many events per node requested ?" 2133 Log() << kERROR <<
" The error rate in the BDT boosting is > 0.5. ("<< err
2134 <<
") That should not happen, but is possible for regression trees, and" 2135 <<
" should trigger a stop for the boosting. please check your code (i.e... the BDT code), I " 2136 <<
" stop boosting " <<
Endl;
2140 }
else if (err < 0) {
2141 Log() << kERROR <<
" The error rate in the BDT boosting is < 0. That can happen" 2142 <<
" due to improper treatment of negative weights in a Monte Carlo.. (if you have" 2143 <<
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)" 2144 <<
" for the time being I set it to its absolute value.. just to continue.." <<
Endl;
2148 Double_t boostWeight = err / (1.-err);
2153 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
2155 results->
GetHist(
"BoostWeights")->
Fill(boostfactor);
2157 if ( (*e)->GetWeight() > 0 ){
2158 Float_t newBoostWeight = (*e)->GetBoostWeight() * boostfactor;
2159 Float_t newWeight = (*e)->GetWeight() * (*e)->GetBoostWeight() * boostfactor;
2160 if (newWeight == 0) {
2161 Log() << kINFO <<
"Weight= " << (*e)->GetWeight() <<
Endl;
2162 Log() << kINFO <<
"BoostWeight= " << (*e)->GetBoostWeight() <<
Endl;
2163 Log() << kINFO <<
"boostweight="<<boostWeight <<
" err= " <<err <<
Endl;
2164 Log() << kINFO <<
"NewBoostWeight= " << newBoostWeight <<
Endl;
2165 Log() << kINFO <<
"boostfactor= " << boostfactor <<
Endl;
2166 Log() << kINFO <<
"maxDev = " << maxDev <<
Endl;
2168 Log() << kINFO <<
"target = " << (*e)->GetTarget(0) <<
Endl;
2171 (*e)->SetBoostWeight( newBoostWeight );
2174 (*e)->SetBoostWeight( (*e)->GetBoostWeight() / boostfactor);
2176 newSumw+=(*e)->GetWeight();
2180 Double_t normWeight = sumw / newSumw;
2181 for (std::vector<const TMVA::Event*>::const_iterator
e=eventSample.begin();
e!=eventSample.end();
e++) {
2184 (*e)->SetBoostWeight( (*e)->GetBoostWeight() * normWeight );
2191 fBoostWeight = boostWeight;
2192 fErrorFraction = err;
2204 if (fDoPreselection){
2205 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
2206 gTools().
AddAttr( wght,
Form(
"PreselectionLowBkgVar%d",ivar), fIsLowBkgCut[ivar]);
2207 gTools().
AddAttr( wght,
Form(
"PreselectionLowBkgVar%dValue",ivar), fLowBkgCut[ivar]);
2208 gTools().
AddAttr( wght,
Form(
"PreselectionLowSigVar%d",ivar), fIsLowSigCut[ivar]);
2209 gTools().
AddAttr( wght,
Form(
"PreselectionLowSigVar%dValue",ivar), fLowSigCut[ivar]);
2210 gTools().
AddAttr( wght,
Form(
"PreselectionHighBkgVar%d",ivar), fIsHighBkgCut[ivar]);
2211 gTools().
AddAttr( wght,
Form(
"PreselectionHighBkgVar%dValue",ivar),fHighBkgCut[ivar]);
2212 gTools().
AddAttr( wght,
Form(
"PreselectionHighSigVar%d",ivar), fIsHighSigCut[ivar]);
2213 gTools().
AddAttr( wght,
Form(
"PreselectionHighSigVar%dValue",ivar),fHighSigCut[ivar]);
2219 gTools().
AddAttr( wght,
"AnalysisType", fForest.back()->GetAnalysisType() );
2221 for (
UInt_t i=0; i< fForest.size(); i++) {
2222 void* trxml = fForest[i]->AddXMLTo(wght);
2233 for (i=0; i<fForest.size(); i++)
delete fForest[i];
2235 fBoostWeights.clear();
2242 if (
gTools().HasAttr( parent,
Form(
"PreselectionLowBkgVar%d",0))) {
2243 fIsLowBkgCut.resize(GetNvar());
2244 fLowBkgCut.resize(GetNvar());
2245 fIsLowSigCut.resize(GetNvar());
2246 fLowSigCut.resize(GetNvar());
2247 fIsHighBkgCut.resize(GetNvar());
2248 fHighBkgCut.resize(GetNvar());
2249 fIsHighSigCut.resize(GetNvar());
2250 fHighSigCut.resize(GetNvar());
2254 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
2256 fIsLowBkgCut[ivar]=tmpBool;
2258 fLowBkgCut[ivar]=tmpDouble;
2260 fIsLowSigCut[ivar]=tmpBool;
2262 fLowSigCut[ivar]=tmpDouble;
2264 fIsHighBkgCut[ivar]=tmpBool;
2266 fHighBkgCut[ivar]=tmpDouble;
2268 fIsHighSigCut[ivar]=tmpBool;
2270 fHighSigCut[ivar]=tmpDouble;
2277 if(
gTools().HasAttr(parent,
"TreeType")) {
2288 fForest.back()->SetTreeID(i++);
2290 fBoostWeights.push_back(boostWeight);
2302 Int_t analysisType(0);
2305 istr >>
dummy >> fNTrees;
2306 Log() << kINFO <<
"Read " << fNTrees <<
" Decision trees" <<
Endl;
2308 for (
UInt_t i=0;i<fForest.size();i++)
delete fForest[i];
2310 fBoostWeights.clear();
2313 for (
int i=0;i<fNTrees;i++) {
2314 istr >>
dummy >> iTree >>
dummy >> boostWeight;
2316 fForest.back()->Print( std::cout );
2317 Log() << kFATAL <<
"Error while reading weight file; mismatch iTree=" 2318 << iTree <<
" i=" << i
2319 <<
" dummy " <<
dummy 2320 <<
" boostweight " << boostWeight
2325 fForest.back()->SetTreeID(i);
2326 fForest.back()->
Read(istr, GetTrainingTMVAVersionCode());
2327 fBoostWeights.push_back(boostWeight);
2334 return this->GetMvaValue( err, errUpper, 0 );
2344 const Event* ev = GetEvent();
2345 if (fDoPreselection) {
2346 Double_t val = ApplyPreselectionCuts(ev);
2349 return PrivateGetMvaValue(ev, err, errUpper, useNTrees);
2361 NoErrorCalc(err, errUpper);
2365 UInt_t nTrees = fForest.size();
2367 if (useNTrees > 0 ) nTrees = useNTrees;
2369 if (fBoostType==
"Grad")
return GetGradBoostMVA(ev,nTrees);
2373 for (
UInt_t itree=0; itree<nTrees; itree++) {
2375 myMVA += fBoostWeights[itree] * fForest[itree]->CheckEvent(ev,fUseYesNoLeaf);
2376 norm += fBoostWeights[itree];
2388 if (fMulticlassReturnVal == NULL) fMulticlassReturnVal =
new std::vector<Float_t>();
2389 fMulticlassReturnVal->clear();
2391 UInt_t nClasses = DataInfo().GetNClasses();
2392 std::vector<Double_t> temp(nClasses);
2393 auto forestSize = fForest.size();
2397 for (
UInt_t itree = 0; itree < forestSize; ++itree) {
2398 temp[classOfTree] += fForest[itree]->CheckEvent(
e,
kFALSE);
2399 if (++classOfTree == nClasses) classOfTree = 0;
2404 std::transform(temp.begin(), temp.end(), temp.begin(), [](
Double_t d){
return exp(d);});
2406 for(
UInt_t iClass=0; iClass<nClasses; iClass++){
2408 for(
UInt_t j=0;j<nClasses;j++){
2410 norm += temp[j] / temp[iClass];
2412 (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
2415 return *fMulticlassReturnVal;
2424 if (fRegressionReturnVal == NULL) fRegressionReturnVal =
new std::vector<Float_t>();
2425 fRegressionReturnVal->clear();
2427 const Event * ev = GetEvent();
2432 if (fBoostType==
"AdaBoostR2") {
2443 vector< Double_t > response(fForest.size());
2444 vector< Double_t > weight(fForest.size());
2447 for (
UInt_t itree=0; itree<fForest.size(); itree++) {
2448 response[itree] = fForest[itree]->CheckEvent(ev,
kFALSE);
2449 weight[itree] = fBoostWeights[itree];
2450 totalSumOfWeights += fBoostWeights[itree];
2453 std::vector< std::vector<Double_t> > vtemp;
2454 vtemp.push_back( response );
2455 vtemp.push_back( weight );
2460 while (sumOfWeights <= totalSumOfWeights/2.) {
2461 sumOfWeights += vtemp[1][t];
2475 else if(fBoostType==
"Grad"){
2476 for (
UInt_t itree=0; itree<fForest.size(); itree++) {
2477 myMVA += fForest[itree]->CheckEvent(ev,
kFALSE);
2480 evT->
SetTarget(0, myMVA+fBoostWeights[0] );
2483 for (
UInt_t itree=0; itree<fForest.size(); itree++) {
2485 myMVA += fBoostWeights[itree] * fForest[itree]->CheckEvent(ev,
kFALSE);
2486 norm += fBoostWeights[itree];
2494 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
2495 fRegressionReturnVal->push_back( evT2->
GetTarget(0) );
2500 return *fRegressionReturnVal;
2509 Log() << kDEBUG <<
"\tWrite monitoring histograms to file: " << BaseDir()->GetPath() <<
Endl;
2513 fMonitorNtuple->
Write();
2524 fVariableImportance.resize(GetNvar());
2525 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
2526 fVariableImportance[ivar]=0;
2529 for (
UInt_t itree = 0; itree < GetNTrees(); itree++) {
2530 std::vector<Double_t> relativeImportance(fForest[itree]->GetVariableImportance());
2531 for (
UInt_t i=0; i< relativeImportance.size(); i++) {
2532 fVariableImportance[i] += fBoostWeights[itree] * relativeImportance[i];
2536 for (
UInt_t ivar=0; ivar< fVariableImportance.size(); ivar++){
2537 fVariableImportance[ivar] =
TMath::Sqrt(fVariableImportance[ivar]);
2538 sum += fVariableImportance[ivar];
2540 for (
UInt_t ivar=0; ivar< fVariableImportance.size(); ivar++) fVariableImportance[ivar] /=
sum;
2542 return fVariableImportance;
2552 std::vector<Double_t> relativeImportance = this->GetVariableImportance();
2553 if (ivar < (
UInt_t)relativeImportance.size())
return relativeImportance[ivar];
2554 else Log() << kFATAL <<
"<GetVariableImportance> ivar = " << ivar <<
" is out of range " <<
Endl;
2566 vector< Double_t> importance(this->GetVariableImportance());
2568 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
2570 fRanking->AddRank(
Rank( GetInputLabel(ivar), importance[ivar] ) );
2584 Log() <<
"Boosted Decision Trees are a collection of individual decision" <<
Endl;
2585 Log() <<
"trees which form a multivariate classifier by (weighted) majority " <<
Endl;
2586 Log() <<
"vote of the individual trees. Consecutive decision trees are " <<
Endl;
2587 Log() <<
"trained using the original training data set with re-weighted " <<
Endl;
2588 Log() <<
"events. By default, the AdaBoost method is employed, which gives " <<
Endl;
2589 Log() <<
"events that were misclassified in the previous tree a larger " <<
Endl;
2590 Log() <<
"weight in the training of the following tree." <<
Endl;
2592 Log() <<
"Decision trees are a sequence of binary splits of the data sample" <<
Endl;
2593 Log() <<
"using a single discriminant variable at a time. A test event " <<
Endl;
2594 Log() <<
"ending up after the sequence of left-right splits in a final " <<
Endl;
2595 Log() <<
"(\"leaf\") node is classified as either signal or background" <<
Endl;
2596 Log() <<
"depending on the majority type of training events in that node." <<
Endl;
2600 Log() <<
"By the nature of the binary splits performed on the individual" <<
Endl;
2601 Log() <<
"variables, decision trees do not deal well with linear correlations" <<
Endl;
2602 Log() <<
"between variables (they need to approximate the linear split in" <<
Endl;
2603 Log() <<
"the two dimensional space by a sequence of splits on the two " <<
Endl;
2604 Log() <<
"variables individually). Hence decorrelation could be useful " <<
Endl;
2605 Log() <<
"to optimise the BDT performance." <<
Endl;
2609 Log() <<
"The two most important parameters in the configuration are the " <<
Endl;
2610 Log() <<
"minimal number of events requested by a leaf node as percentage of the " <<
Endl;
2611 Log() <<
" number of training events (option \"MinNodeSize\" replacing the actual number " <<
Endl;
2612 Log() <<
" of events \"nEventsMin\" as given in earlier versions" <<
Endl;
2613 Log() <<
"If this number is too large, detailed features " <<
Endl;
2614 Log() <<
"in the parameter space are hard to be modelled. If it is too small, " <<
Endl;
2615 Log() <<
"the risk to overtrain rises and boosting seems to be less effective" <<
Endl;
2616 Log() <<
" typical values from our current experience for best performance " <<
Endl;
2617 Log() <<
" are between 0.5(%) and 10(%) " <<
Endl;
2619 Log() <<
"The default minimal number is currently set to " <<
Endl;
2620 Log() <<
" max(20, (N_training_events / N_variables^2 / 10)) " <<
Endl;
2621 Log() <<
"and can be changed by the user." <<
Endl;
2623 Log() <<
"The other crucial parameter, the pruning strength (\"PruneStrength\")," <<
Endl;
2624 Log() <<
"is also related to overtraining. It is a regularisation parameter " <<
Endl;
2625 Log() <<
"that is used when determining after the training which splits " <<
Endl;
2626 Log() <<
"are considered statistically insignificant and are removed. The" <<
Endl;
2627 Log() <<
"user is advised to carefully watch the BDT screen output for" <<
Endl;
2628 Log() <<
"the comparison between efficiencies obtained on the training and" <<
Endl;
2629 Log() <<
"the independent test sample. They should be equal within statistical" <<
Endl;
2630 Log() <<
"errors, in order to minimize statistical fluctuations in different samples." <<
Endl;
2642 fout <<
" std::vector<"<<nodeName<<
"*> fForest; // i.e. root nodes of decision trees" << std::endl;
2643 fout <<
" std::vector<double> fBoostWeights; // the weights applied in the individual boosts" << std::endl;
2644 fout <<
"};" << std::endl << std::endl;
2645 fout <<
"double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
2646 fout <<
"{" << std::endl;
2647 fout <<
" double myMVA = 0;" << std::endl;
2648 if (fDoPreselection){
2649 for (
UInt_t ivar = 0; ivar< fIsLowBkgCut.size(); ivar++){
2650 if (fIsLowBkgCut[ivar]){
2651 fout <<
" if (inputValues["<<ivar<<
"] < " << fLowBkgCut[ivar] <<
") return -1; // is background preselection cut" << std::endl;
2653 if (fIsLowSigCut[ivar]){
2654 fout <<
" if (inputValues["<<ivar<<
"] < "<< fLowSigCut[ivar] <<
") return 1; // is signal preselection cut" << std::endl;
2656 if (fIsHighBkgCut[ivar]){
2657 fout <<
" if (inputValues["<<ivar<<
"] > "<<fHighBkgCut[ivar] <<
") return -1; // is background preselection cut" << std::endl;
2659 if (fIsHighSigCut[ivar]){
2660 fout <<
" if (inputValues["<<ivar<<
"] > "<<fHighSigCut[ivar]<<
") return 1; // is signal preselection cut" << std::endl;
2665 if (fBoostType!=
"Grad"){
2666 fout <<
" double norm = 0;" << std::endl;
2668 fout <<
" for (unsigned int itree=0; itree<fForest.size(); itree++){" << std::endl;
2669 fout <<
" "<<nodeName<<
" *current = fForest[itree];" << std::endl;
2670 fout <<
" while (current->GetNodeType() == 0) { //intermediate node" << std::endl;
2671 fout <<
" if (current->GoesRight(inputValues)) current=("<<nodeName<<
"*)current->GetRight();" << std::endl;
2672 fout <<
" else current=("<<nodeName<<
"*)current->GetLeft();" << std::endl;
2673 fout <<
" }" << std::endl;
2674 if (fBoostType==
"Grad"){
2675 fout <<
" myMVA += current->GetResponse();" << std::endl;
2677 if (fUseYesNoLeaf) fout <<
" myMVA += fBoostWeights[itree] * current->GetNodeType();" << std::endl;
2678 else fout <<
" myMVA += fBoostWeights[itree] * current->GetPurity();" << std::endl;
2679 fout <<
" norm += fBoostWeights[itree];" << std::endl;
2681 fout <<
" }" << std::endl;
2682 if (fBoostType==
"Grad"){
2683 fout <<
" return 2.0/(1.0+exp(-2.0*myMVA))-1.0;" << std::endl;
2685 else fout <<
" return myMVA /= norm;" << std::endl;
2686 fout <<
"};" << std::endl << std::endl;
2687 fout <<
"void " << className <<
"::Initialize()" << std::endl;
2688 fout <<
"{" << std::endl;
2690 for (
UInt_t itree=0; itree<GetNTrees(); itree++) {
2691 fout <<
" // itree = " << itree << std::endl;
2692 fout <<
" fBoostWeights.push_back(" << fBoostWeights[itree] <<
");" << std::endl;
2693 fout <<
" fForest.push_back( " << std::endl;
2694 this->MakeClassInstantiateNode((
DecisionTreeNode*)fForest[itree]->GetRoot(), fout, className);
2695 fout <<
" );" << std::endl;
2697 fout <<
" return;" << std::endl;
2698 fout <<
"};" << std::endl;
2699 fout <<
" " << std::endl;
2700 fout <<
"// Clean up" << std::endl;
2701 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
2702 fout <<
"{" << std::endl;
2703 fout <<
" for (unsigned int itree=0; itree<fForest.size(); itree++) { " << std::endl;
2704 fout <<
" delete fForest[itree]; " << std::endl;
2705 fout <<
" }" << std::endl;
2706 fout <<
"}" << std::endl;
2718 fout <<
"#define NN new "<<nodeName << std::endl;
2720 fout <<
" " << std::endl;
2721 fout <<
"#ifndef "<<nodeName<<
"__def" << std::endl;
2722 fout <<
"#define "<<nodeName<<
"__def" << std::endl;
2723 fout <<
" " << std::endl;
2724 fout <<
"class "<<nodeName<<
" {" << std::endl;
2725 fout <<
" " << std::endl;
2726 fout <<
"public:" << std::endl;
2727 fout <<
" " << std::endl;
2728 fout <<
" // constructor of an essentially \"empty\" node floating in space" << std::endl;
2729 fout <<
" "<<nodeName<<
" ( "<<nodeName<<
"* left,"<<nodeName<<
"* right," << std::endl;
2730 if (fUseFisherCuts){
2731 fout <<
" int nFisherCoeff," << std::endl;
2732 for (
UInt_t i=0;i<GetNVariables()+1;i++){
2733 fout <<
" double fisherCoeff"<<i<<
"," << std::endl;
2736 fout <<
" int selector, double cutValue, bool cutType, " << std::endl;
2737 fout <<
" int nodeType, double purity, double response ) :" << std::endl;
2738 fout <<
" fLeft ( left )," << std::endl;
2739 fout <<
" fRight ( right )," << std::endl;
2740 if (fUseFisherCuts) fout <<
" fNFisherCoeff ( nFisherCoeff )," << std::endl;
2741 fout <<
" fSelector ( selector )," << std::endl;
2742 fout <<
" fCutValue ( cutValue )," << std::endl;
2743 fout <<
" fCutType ( cutType )," << std::endl;
2744 fout <<
" fNodeType ( nodeType )," << std::endl;
2745 fout <<
" fPurity ( purity )," << std::endl;
2746 fout <<
" fResponse ( response ){" << std::endl;
2747 if (fUseFisherCuts){
2748 for (
UInt_t i=0;i<GetNVariables()+1;i++){
2749 fout <<
" fFisherCoeff.push_back(fisherCoeff"<<i<<
");" << std::endl;
2752 fout <<
" }" << std::endl << std::endl;
2753 fout <<
" virtual ~"<<nodeName<<
"();" << std::endl << std::endl;
2754 fout <<
" // test event if it descends the tree at this node to the right" << std::endl;
2755 fout <<
" virtual bool GoesRight( const std::vector<double>& inputValues ) const;" << std::endl;
2756 fout <<
" "<<nodeName<<
"* GetRight( void ) {return fRight; };" << std::endl << std::endl;
2757 fout <<
" // test event if it descends the tree at this node to the left " << std::endl;
2758 fout <<
" virtual bool GoesLeft ( const std::vector<double>& inputValues ) const;" << std::endl;
2759 fout <<
" "<<nodeName<<
"* GetLeft( void ) { return fLeft; }; " << std::endl << std::endl;
2760 fout <<
" // return S/(S+B) (purity) at this node (from training)" << std::endl << std::endl;
2761 fout <<
" double GetPurity( void ) const { return fPurity; } " << std::endl;
2762 fout <<
" // return the node type" << std::endl;
2763 fout <<
" int GetNodeType( void ) const { return fNodeType; }" << std::endl;
2764 fout <<
" double GetResponse(void) const {return fResponse;}" << std::endl << std::endl;
2765 fout <<
"private:" << std::endl << std::endl;
2766 fout <<
" "<<nodeName<<
"* fLeft; // pointer to the left daughter node" << std::endl;
2767 fout <<
" "<<nodeName<<
"* fRight; // pointer to the right daughter node" << std::endl;
2768 if (fUseFisherCuts){
2769 fout <<
" int fNFisherCoeff; // =0 if this node doesn't use fisher, else =nvar+1 " << std::endl;
2770 fout <<
" std::vector<double> fFisherCoeff; // the fisher coeff (offset at the last element)" << std::endl;
2772 fout <<
" int fSelector; // index of variable used in node selection (decision tree) " << std::endl;
2773 fout <<
" double fCutValue; // cut value applied on this node to discriminate bkg against sig" << std::endl;
2774 fout <<
" bool fCutType; // true: if event variable > cutValue ==> signal , false otherwise" << std::endl;
2775 fout <<
" int fNodeType; // Type of node: -1 == Bkg-leaf, 1 == Signal-leaf, 0 = internal " << std::endl;
2776 fout <<
" double fPurity; // Purity of node from training"<< std::endl;
2777 fout <<
" double fResponse; // Regression response value of node" << std::endl;
2778 fout <<
"}; " << std::endl;
2779 fout <<
" " << std::endl;
2780 fout <<
"//_______________________________________________________________________" << std::endl;
2781 fout <<
" "<<nodeName<<
"::~"<<nodeName<<
"()" << std::endl;
2782 fout <<
"{" << std::endl;
2783 fout <<
" if (fLeft != NULL) delete fLeft;" << std::endl;
2784 fout <<
" if (fRight != NULL) delete fRight;" << std::endl;
2785 fout <<
"}; " << std::endl;
2786 fout <<
" " << std::endl;
2787 fout <<
"//_______________________________________________________________________" << std::endl;
2788 fout <<
"bool "<<nodeName<<
"::GoesRight( const std::vector<double>& inputValues ) const" << std::endl;
2789 fout <<
"{" << std::endl;
2790 fout <<
" // test event if it descends the tree at this node to the right" << std::endl;
2791 fout <<
" bool result;" << std::endl;
2792 if (fUseFisherCuts){
2793 fout <<
" if (fNFisherCoeff == 0){" << std::endl;
2794 fout <<
" result = (inputValues[fSelector] > fCutValue );" << std::endl;
2795 fout <<
" }else{" << std::endl;
2796 fout <<
" double fisher = fFisherCoeff.at(fFisherCoeff.size()-1);" << std::endl;
2797 fout <<
" for (unsigned int ivar=0; ivar<fFisherCoeff.size()-1; ivar++)" << std::endl;
2798 fout <<
" fisher += fFisherCoeff.at(ivar)*inputValues.at(ivar);" << std::endl;
2799 fout <<
" result = fisher > fCutValue;" << std::endl;
2800 fout <<
" }" << std::endl;
2802 fout <<
" result = (inputValues[fSelector] > fCutValue );" << std::endl;
2804 fout <<
" if (fCutType == true) return result; //the cuts are selecting Signal ;" << std::endl;
2805 fout <<
" else return !result;" << std::endl;
2806 fout <<
"}" << std::endl;
2807 fout <<
" " << std::endl;
2808 fout <<
"//_______________________________________________________________________" << std::endl;
2809 fout <<
"bool "<<nodeName<<
"::GoesLeft( const std::vector<double>& inputValues ) const" << std::endl;
2810 fout <<
"{" << std::endl;
2811 fout <<
" // test event if it descends the tree at this node to the left" << std::endl;
2812 fout <<
" if (!this->GoesRight(inputValues)) return true;" << std::endl;
2813 fout <<
" else return false;" << std::endl;
2814 fout <<
"}" << std::endl;
2815 fout <<
" " << std::endl;
2816 fout <<
"#endif" << std::endl;
2817 fout <<
" " << std::endl;
2826 Log() << kFATAL <<
"MakeClassInstantiateNode: started with undefined node" <<
Endl;
2829 fout <<
"NN("<<std::endl;
2830 if (
n->GetLeft() != NULL){
2831 this->MakeClassInstantiateNode( (
DecisionTreeNode*)
n->GetLeft() , fout, className);
2836 fout <<
", " <<std::endl;
2837 if (
n->GetRight() != NULL){
2838 this->MakeClassInstantiateNode( (
DecisionTreeNode*)
n->GetRight(), fout, className );
2843 fout <<
", " << std::endl
2844 << std::setprecision(6);
2845 if (fUseFisherCuts){
2846 fout <<
n->GetNFisherCoeff() <<
", ";
2847 for (
UInt_t i=0; i< GetNVariables()+1; i++) {
2848 if (
n->GetNFisherCoeff() == 0 ){
2851 fout <<
n->GetFisherCoeff(i) <<
", ";
2855 fout <<
n->GetSelector() <<
", " 2856 <<
n->GetCutValue() <<
", " 2857 <<
n->GetCutType() <<
", " 2858 <<
n->GetNodeType() <<
", " 2859 <<
n->GetPurity() <<
"," 2860 <<
n->GetResponse() <<
") ";
2871 Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
2873 std::vector<TMVA::BDTEventWrapper> bdtEventSample;
2875 fIsLowSigCut.assign(GetNvar(),
kFALSE);
2876 fIsLowBkgCut.assign(GetNvar(),
kFALSE);
2877 fIsHighSigCut.assign(GetNvar(),
kFALSE);
2878 fIsHighBkgCut.assign(GetNvar(),
kFALSE);
2880 fLowSigCut.assign(GetNvar(),0.);
2881 fLowBkgCut.assign(GetNvar(),0.);
2882 fHighSigCut.assign(GetNvar(),0.);
2883 fHighBkgCut.assign(GetNvar(),0.);
2888 for( std::vector<const TMVA::Event*>::const_iterator it = eventSample.begin(); it != eventSample.end(); ++it ) {
2889 if (DataInfo().IsSignal(*it)){
2890 nTotS += (*it)->GetWeight();
2894 nTotB += (*it)->GetWeight();
2900 for(
UInt_t ivar = 0; ivar < GetNvar(); ivar++ ) {
2902 std::sort( bdtEventSample.begin(),bdtEventSample.end() );
2904 Double_t bkgWeightCtr = 0.0, sigWeightCtr = 0.0;
2905 std::vector<TMVA::BDTEventWrapper>::iterator it = bdtEventSample.begin(), it_end = bdtEventSample.end();
2906 for( ; it != it_end; ++it ) {
2907 if (DataInfo().IsSignal(**it))
2908 sigWeightCtr += (**it)->GetWeight();
2910 bkgWeightCtr += (**it)->GetWeight();
2912 it->SetCumulativeWeight(
false,bkgWeightCtr);
2913 it->SetCumulativeWeight(
true,sigWeightCtr);
2918 Double_t dVal = (DataInfo().GetVariableInfo(ivar).GetMax() - DataInfo().GetVariableInfo(ivar).GetMin())/100. ;
2919 Double_t nSelS, nSelB, effS=0.05, effB=0.05, rejS=0.05, rejB=0.05;
2920 Double_t tmpEffS, tmpEffB, tmpRejS, tmpRejB;
2925 for(
UInt_t iev = 1; iev < bdtEventSample.size(); iev++) {
2928 nSelS = bdtEventSample[iev].GetCumulativeWeight(
true);
2929 nSelB = bdtEventSample[iev].GetCumulativeWeight(
false);
2931 tmpEffS=nSelS/nTotS;
2932 tmpEffB=nSelB/nTotB;
2935 if (nSelS==0 && tmpEffB>effB) {effB=tmpEffB; fLowBkgCut[ivar] = bdtEventSample[iev].GetVal() - dVal; fIsLowBkgCut[ivar]=
kTRUE;}
2936 else if (nSelB==0 && tmpEffS>effS) {effS=tmpEffS; fLowSigCut[ivar] = bdtEventSample[iev].GetVal() - dVal; fIsLowSigCut[ivar]=
kTRUE;}
2937 else if (nSelB==nTotB && tmpRejS>rejS) {rejS=tmpRejS; fHighSigCut[ivar] = bdtEventSample[iev].GetVal() + dVal; fIsHighSigCut[ivar]=
kTRUE;}
2938 else if (nSelS==nTotS && tmpRejB>rejB) {rejB=tmpRejB; fHighBkgCut[ivar] = bdtEventSample[iev].GetVal() + dVal; fIsHighBkgCut[ivar]=
kTRUE;}
2943 Log() << kDEBUG <<
" \tfound and suggest the following possible pre-selection cuts " <<
Endl;
2944 if (fDoPreselection)
Log() << kDEBUG <<
"\tthe training will be done after these cuts... and GetMVA value returns +1, (-1) for a signal (bkg) event that passes these cuts" <<
Endl;
2945 else Log() << kDEBUG <<
"\tas option DoPreselection was not used, these cuts however will not be performed, but the training will see the full sample"<<
Endl;
2946 for (
UInt_t ivar=0; ivar < GetNvar(); ivar++ ) {
2947 if (fIsLowBkgCut[ivar]){
2948 Log() << kDEBUG <<
" \tfound cut: Bkg if var " << ivar <<
" < " << fLowBkgCut[ivar] <<
Endl;
2950 if (fIsLowSigCut[ivar]){
2951 Log() << kDEBUG <<
" \tfound cut: Sig if var " << ivar <<
" < " << fLowSigCut[ivar] <<
Endl;
2953 if (fIsHighBkgCut[ivar]){
2954 Log() << kDEBUG <<
" \tfound cut: Bkg if var " << ivar <<
" > " << fHighBkgCut[ivar] <<
Endl;
2956 if (fIsHighSigCut[ivar]){
2957 Log() << kDEBUG <<
" \tfound cut: Sig if var " << ivar <<
" > " << fHighSigCut[ivar] <<
Endl;
2972 for (
UInt_t ivar=0; ivar < GetNvar(); ivar++ ) {
2973 if (fIsLowBkgCut[ivar]){
2974 if (ev->
GetValue(ivar) < fLowBkgCut[ivar]) result = -1;
2976 if (fIsLowSigCut[ivar]){
2977 if (ev->
GetValue(ivar) < fLowSigCut[ivar]) result = 1;
2979 if (fIsHighBkgCut[ivar]){
2980 if (ev->
GetValue(ivar) > fHighBkgCut[ivar]) result = -1;
2982 if (fIsHighSigCut[ivar]){
2983 if (ev->
GetValue(ivar) > fHighSigCut[ivar]) result = 1;
void Train(void)
BDT training.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
std::string GetName(const std::string &scope_name)
void PreProcessNegativeEventWeights()
O.k.
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
double dist(Rotation3D const &r1, Rotation3D const &r2)
void GetBaggedSubSample(std::vector< const TMVA::Event *> &)
Fills fEventSample with fBaggedSampleFraction*NEvents random training events.
static long int sum(long int i)
Random number generator class based on M.
THist< 1, int, THistStatContent > TH1I
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
MsgLogger & Endl(MsgLogger &ml)
Singleton class for Global types used by TMVA.
Double_t RegBoost(std::vector< const TMVA::Event *> &, DecisionTree *dt)
A special boosting only for Regression (not implemented).
void DeclareCompatibilityOptions()
Options that are used ONLY for the READER to ensure backward compatibility.
const Ranking * CreateRanking()
Compute ranking of input variables.
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
void BDT(TString dataset, const TString &fin="TMVA.root")
Absolute Deviation BDT Loss Function.
TString & ReplaceAll(const TString &s1, const TString &s2)
virtual void SetName(const char *name)
Set the name of the TNamed.
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
void DeclareOptions()
Define the options (their key words).
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
void DeterminePreselectionCuts(const std::vector< const TMVA::Event *> &eventSample)
Find useful preselection cuts that will be applied before and Decision Tree training.
void MakeClassInstantiateNode(DecisionTreeNode *n, std::ostream &fout, const TString &className) const
Recursively descends a tree and writes the node instance to the output stream.
Virtual base Class for all MVA method.
Double_t Bagging()
Call it boot-strapping, re-sampling or whatever you like, in the end it is nothing else but applying ...
1-D histogram with a float per channel (see TH1 documentation)}
Ranking for variables in method (implementation)
Short_t Min(Short_t a, Short_t b)
virtual void SetYTitle(const char *title)
virtual void SetTitle(const char *title="")
Set graph title.
Double_t AdaBoost(std::vector< const TMVA::Event *> &, DecisionTree *dt)
The AdaBoost implementation.
void ProcessOptions()
The option string is decoded, for available options see "DeclareOptions".
Int_t FloorNint(Double_t x)
void GetHelpMessage() const
Get help message text.
Double_t AdaCost(std::vector< const TMVA::Event *> &, DecisionTree *dt)
The AdaCost boosting algorithm takes a simple cost Matrix (currently fixed for all events...
void MakeClassSpecific(std::ostream &, const TString &) const
Make ROOT-independent C++ class for classifier response (classifier-specific implementation).
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Double_t GetGradBoostMVA(const TMVA::Event *e, UInt_t nTrees)
Returns MVA value: -1 for background, 1 for signal.
Implementation of the CrossEntropy as separation criterion.
Double_t GradBoostRegression(std::vector< const TMVA::Event *> &, DecisionTree *dt)
Implementation of M_TreeBoost using any loss function as described by Friedman 1999.
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the argument.
void MakeClassSpecificHeader(std::ostream &, const TString &) const
Specific class header.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
BDT can handle classification with multiple classes and regression with one regression-target.
void Reset(void)
Reset the method, as if it had just been instantiated (forget all training etc.). ...
TString & Append(const char *cs)
void SetMinNodeSize(Double_t sizeInPercent)
Double_t AdaBoostR2(std::vector< const TMVA::Event *> &, DecisionTree *dt)
Adaption of the AdaBoost to regression problems (see H.Drucker 1997).
virtual Int_t Read(const char *name)
Read contents of object with specified name from the current directory.
Class that contains all the data information.
Least Squares BDT Loss Function.
Implementation of the SdivSqrtSplusB as separation criterion.
PDF wrapper for histograms; uses user-defined spline interpolation.
const std::vector< Float_t > & GetMulticlassValues()
Get the multiclass MVA response for the BDT classifier.
Implementation of the MisClassificationError as separation criterion.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
const std::vector< Float_t > & GetRegressionValues()
Get the regression value generated by the BDTs.
void InitEventSample()
Initialize the event sample (i.e. reset the boost-weights... etc).
void WriteMonitoringHistosToFile(void) const
Here we could write some histograms created during the processing to the output file.
virtual void Delete(Option_t *option="")
Delete this object.
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
virtual Double_t Determinant() const
Float_t GetTarget(UInt_t itgt) const
std::string GetMethodName(TCppMethod_t)
Service class for 2-Dim histogram classes.
std::map< TString, Double_t > optimize()
TGraph * GetGraph(const TString &alias) const
void BoostMonitor(Int_t iTree)
Fills the ROCIntegral vs Itree from the testSample for the monitoring plots during the training ...
The TMVA::Interval Class.
virtual ~MethodBDT(void)
Destructor.
Implementation of the GiniIndex as separation criterion.
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Double_t PrivateGetMvaValue(const TMVA::Event *ev, Double_t *err=0, Double_t *errUpper=0, UInt_t useNTrees=0)
Return the MVA value (range [-1;1]) that classifies the event according to the majority vote from the...
Implementation of a Decision Tree.
Double_t GradBoost(std::vector< const TMVA::Event *> &, DecisionTree *dt, UInt_t cls=0)
Calculate the desired response value for each region.
char * Form(const char *fmt,...)
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
SeparationBase * fSepType
void Init(void)
Common initialisation with defaults for the BDT-Method.
void ReadWeightsFromXML(void *parent)
Reads the BDT from the xml file.
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in...
Double_t TestTreeQuality(DecisionTree *dt)
Test the tree quality.. in terms of Misclassification.
Implementation of the GiniIndex With Laplace correction as separation criterion.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
static void SetVarIndex(Int_t iVar)
void Print(std::ostream &os, const OptionType &opt)
void ReadWeightsFromStream(std::istream &istr)
Read the weights (BDT coefficients).
Double_t ApplyPreselectionCuts(const Event *ev)
Apply the preselection cuts before even bothering about any Decision Trees in the GetMVA ...
void UpdateTargets(std::vector< const TMVA::Event *> &, UInt_t cls=0)
Calculate residual for all events.
TH1 * GetHist(const TString &alias) const
void AddWeightsXMLTo(void *parent) const
Write weights to XML.
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
static RooMathCoreReg dummy
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Double_t Boost(std::vector< const TMVA::Event *> &, DecisionTree *dt, UInt_t cls=0)
Apply the boosting algorithm (the algorithm is selecte via the the "option" given in the constructor...
The TMVA::Interval Class.
LossFunctionBDT * fRegressionLossFunctionBDTG
TMatrixTSym< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant Notice that the LU decomposition is used instead of B...
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
Call the Optimizer with the set of parameters and ranges that are meant to be tuned.
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
virtual void SetXTitle(const char *title)
virtual void SetPoint(Int_t i, Double_t x, Double_t y)
Set x and y values for point number i.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Class that is the base-class for a vector of result.
Short_t Max(Short_t a, Short_t b)
A Graph is a graphics object made of two arrays X and Y with npoints each.
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
Double_t Atof() const
Return floating-point value contained in string.
void UpdateTargetsRegression(std::vector< const TMVA::Event *> &, Bool_t first=kFALSE)
Calculate current residuals for all events and update targets for next iteration. ...
A TTree object has a header with a name and a title.
void Store(TObject *obj, const char *alias=0)
static const Int_t fgDebugLevel
Double_t Sqrt(Double_t x)
virtual void Set(Int_t n)
Set number of points in the graph Existing coordinates are preserved New coordinates above fNpoints a...
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Timing information for training and evaluation of MVA methods.
Analysis of Boosted Decision Trees.
Int_t CeilNint(Double_t x)
void InitGradBoost(std::vector< const TMVA::Event *> &)
Initialize targets for first tree.
virtual const char * GetTitle() const
Returns title of object.
MethodBDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
The standard constructor for the "boosted decision trees".