81 , fGDPathStep ( 0.01 )
82 , fGDNPathSteps ( 1000 )
107 if (fNTCoeff) {
delete fNTCoeff; fNTCoeff = 0; }
108 if (fNTLinCoeff) {
delete fNTLinCoeff;fNTLinCoeff = 0; }
117 if (fRuleFit==0)
return;
118 if (fRuleFit->GetMethodRuleFit()==0) {
119 Log() << kFATAL <<
"RuleFitParams::Init() - MethodRuleFit ptr is null" <<
Endl;
121 UInt_t neve = fRuleFit->GetTrainingEvents().size();
123 fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
124 fNRules = fRuleEnsemble->GetNRules();
125 fNLinear = fRuleEnsemble->GetNLinear();
134 fPerfIdx2 =
static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
139 ofs = neve - fPerfIdx2 - 1;
149 fPathIdx2 =
static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
158 for (
UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
159 fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
163 for (
UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
164 fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
167 Log() << kVERBOSE <<
"Path constr. - event index range = [ " << fPathIdx1 <<
", " << fPathIdx2 <<
" ]" 168 <<
", effective N(events) = " << fNEveEffPath <<
Endl;
169 Log() << kVERBOSE <<
"Error estim. - event index range = [ " << fPerfIdx1 <<
", " << fPerfIdx2 <<
" ]" 170 <<
", effective N(events) = " << fNEveEffPerf <<
Endl;
172 if (fRuleEnsemble->DoRules())
173 Log() << kDEBUG <<
"Number of rules in ensemble: " << fNRules <<
Endl;
175 Log() << kDEBUG <<
"Rules are disabled " <<
Endl;
177 if (fRuleEnsemble->DoLinear())
178 Log() << kDEBUG <<
"Number of linear terms: " << fNLinear <<
Endl;
180 Log() << kDEBUG <<
"Linear terms are disabled " <<
Endl;
188 fGDNtuple=
new TTree(
"MonitorNtuple_RuleFitParams",
"RuleFit path search");
189 fGDNtuple->Branch(
"risk", &fNTRisk,
"risk/D");
190 fGDNtuple->Branch(
"error", &fNTErrorRate,
"error/D");
191 fGDNtuple->Branch(
"nuval", &fNTNuval,
"nuval/D");
192 fGDNtuple->Branch(
"coefrad", &fNTCoefRad,
"coefrad/D");
193 fGDNtuple->Branch(
"offset", &fNTOffset,
"offset/D");
195 fNTCoeff = (fNRules >0 ?
new Double_t[fNRules] : 0);
196 fNTLinCoeff = (fNLinear>0 ?
new Double_t[fNLinear] : 0);
198 for (
UInt_t i=0; i<fNRules; i++) {
199 fGDNtuple->Branch(
Form(
"a%d",i+1),&fNTCoeff[i],
Form(
"a%d/D",i+1));
201 for (
UInt_t i=0; i<fNLinear; i++) {
202 fGDNtuple->Branch(
Form(
"b%d",i+1),&fNTLinCoeff[i],
Form(
"b%d/D",i+1));
210 std::vector<Double_t> &avsel,
211 std::vector<Double_t> &avrul )
213 UInt_t neve = ind2-ind1+1;
215 Log() << kFATAL <<
"<EvaluateAverage> - no events selected for path search -> BUG!" <<
Endl;
221 if (fNLinear>0) avsel.resize(fNLinear,0);
222 if (fNRules>0) avrul.resize(fNRules,0);
223 const std::vector<UInt_t> *eventRuleMap=0;
229 if (fRuleEnsemble->IsRuleMapOK()) {
230 for (
UInt_t i=ind1; i<ind2+1; i++) {
231 ew = fRuleFit->GetTrainingEventWeight(i);
233 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
234 avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
238 if (fRuleEnsemble->DoRules()) {
239 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
240 nrules = (*eventRuleMap).size();
243 avrul[(*eventRuleMap)[
r]] += ew;
248 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
249 for (
UInt_t i=ind1; i<ind2+1; i++) {
250 ew = fRuleFit->GetTrainingEventWeight(i);
253 fRuleEnsemble->EvalLinEvent(*((*events)[i]));
254 fRuleEnsemble->EvalEvent(*((*events)[i]));
256 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
257 avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
261 avrul[
r] += ew*fRuleEnsemble->GetEventRuleVal(
r);
266 for (
UInt_t sel=0; sel<fNLinear; sel++ ) {
267 avsel[sel] = avsel[sel] / sumew;
271 avrul[
r] = avrul[
r] / sumew;
282 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e)?1:-1) -
h;
284 return diff*diff*
e.GetWeight();
294 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) -
h;
296 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
305 Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
307 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) -
h;
309 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
317 UInt_t neve = ind2-ind1+1;
319 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
324 for (
UInt_t i=ind1; i<ind2+1; i++) {
337 UInt_t neve = ind2-ind1+1;
339 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
344 for (
UInt_t i=ind1; i<ind2+1; i++) {
359 Log() << kWARNING <<
"<Penalty> Using unverified code! Check!" <<
Endl;
361 const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
362 for (
UInt_t i=0; i<fNRules; i++) {
363 rval +=
TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
365 for (
UInt_t i=0; i<fNLinear; i++) {
392 fGDTauVec.resize( fGDNTau );
394 fGDTauVec[0] = fGDTau;
398 Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
399 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
400 fGDTauVec[itau] =
static_cast<Double_t>(itau)*dtau + fGDTauMin;
401 if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
409 fGradVecLinTst.clear();
414 fGDCoefLinTst.clear();
418 fGDCoefTst.resize(fGDNTau);
419 fGradVec.resize(fNRules,0);
420 fGradVecTst.resize(fGDNTau);
421 for (
UInt_t i=0; i<fGDNTau; i++) {
422 fGradVecTst[i].resize(fNRules,0);
423 fGDCoefTst[i].resize(fNRules,0);
428 fGDCoefLinTst.resize(fGDNTau);
429 fGradVecLin.resize(fNLinear,0);
430 fGradVecLinTst.resize(fGDNTau);
431 for (
UInt_t i=0; i<fGDNTau; i++) {
432 fGradVecLinTst[i].resize(fNLinear,0);
433 fGDCoefLinTst[i].resize(fNLinear,0);
438 fGDErrTst.resize(fGDNTau,0);
439 fGDErrTstOK.resize(fGDNTau,
kTRUE);
440 fGDOfsTst.resize(fGDNTau,0);
441 fGDNTauTstOK = fGDNTau;
452 if (fGDNTau<2)
return 0;
453 if (fGDTauScan==0)
return 0;
455 if (fGDOfsTst.size()<1)
456 Log() << kFATAL <<
"BUG! FindGDTau() has been called BEFORE InitGD()." <<
Endl;
458 Log() << kINFO <<
"Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." <<
Endl;
461 UInt_t nscan = fGDTauScan;
478 Timer timer( nscan,
"RuleFit" );
481 MakeTstGradientVector();
483 UpdateTstCoefficients();
487 if ( (ip==0) || ((ip+1)%netst==0) ) {
489 itauMin = RiskPerfTst();
490 Log() << kVERBOSE <<
Form(
"%4d",ip+1) <<
". tau = " <<
Form(
"%4.4f",fGDTauVec[itauMin])
491 <<
" => error rate = " << fGDErrTst[itauMin] <<
Endl;
494 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
496 if (
Log().GetMinType()>kVERBOSE)
504 Log() << kERROR <<
"<FindGDTau> number of scanned loops is zero! Should NOT see this message." <<
Endl;
506 fGDTau = fGDTauVec[itauMin];
507 fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
508 fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
509 fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
510 Log() << kINFO <<
"Best path found with tau = " <<
Form(
"%4.4f",fGDTau)
541 Log() << kINFO <<
"GD path scan - the scan stops when the max num. of steps is reached or a min is found" 543 Log() << kVERBOSE <<
"Number of events used per path step = " << fPathIdx2-fPathIdx1+1 <<
Endl;
544 Log() << kVERBOSE <<
"Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 <<
Endl;
547 const Bool_t isVerbose = (
Log().GetMinType()<=kVERBOSE);
548 const Bool_t isDebug = (
Log().GetMinType()<=kDEBUG);
554 EvaluateAveragePath();
555 EvaluateAveragePerf();
558 Log() << kVERBOSE <<
"Creating GD path" <<
Endl;
559 Log() << kVERBOSE <<
" N(steps) = " << fGDNPathSteps <<
Endl;
560 Log() << kVERBOSE <<
" step = " << fGDPathStep <<
Endl;
561 Log() << kVERBOSE <<
" N(tau) = " << fGDNTau <<
Endl;
562 Log() << kVERBOSE <<
" N(tau steps) = " << fGDTauScan <<
Endl;
563 Log() << kVERBOSE <<
" tau range = [ " << fGDTauVec[0] <<
" , " << fGDTauVec[fGDNTau-1] <<
" ]" <<
Endl;
566 if (isDebug) InitNtuple();
578 std::vector<Double_t> coefsMin;
579 std::vector<Double_t> lincoefsMin;
594 std::vector<Double_t> valx;
595 std::vector<Double_t> valy;
596 std::vector<Double_t> valxy;
606 int imod = fGDNPathSteps/100;
607 if (imod<100) imod = std::min(100,fGDNPathSteps);
608 if (imod>100) imod=100;
611 fAverageTruth = -CalcAverageTruth();
612 offsetMin = fAverageTruth;
613 fRuleEnsemble->SetOffset(offsetMin);
614 fRuleEnsemble->ClearCoefficients(0);
615 fRuleEnsemble->ClearLinCoefficients(0);
616 for (
UInt_t i=0; i<fGDOfsTst.size(); i++) {
617 fGDOfsTst[i] = offsetMin;
619 Log() << kVERBOSE <<
"Obtained initial offset = " << offsetMin <<
Endl;
622 Int_t nprescan = FindGDTau();
637 Int_t stopCondition=0;
639 Log() << kINFO <<
"Fitting model..." <<
Endl;
641 Timer timer( fGDNPathSteps,
"RuleFit" );
645 if (isVerbose) t0 = clock();
646 MakeGradientVector();
648 tgradvec =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
649 stgradvec += tgradvec;
653 if (isVerbose) t0 = clock();
654 UpdateCoefficients();
656 tupgrade =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
657 stupgrade += tupgrade;
661 docheck = ((iloop==0) ||((iloop+1)%imod==0));
667 fNTNuval =
Double_t(iloop)*fGDPathStep;
672 if (isDebug) FillCoefficients();
673 fNTCoefRad = fRuleEnsemble->CoefficientRadius();
677 fNTRisk = RiskPath();
678 trisk =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
685 if (fNTRisk>=
rprev) {
688 Log() <<
"Risk(i+1)>=Risk(i) in path" <<
Endl;
689 riskFlat=(nbadrisk>3);
691 Log() << kWARNING <<
"Chaotic behaviour of risk evolution" <<
Endl;
692 Log() <<
"--- STOPPING MINIMISATION ---" <<
Endl;
693 Log() <<
"This may be OK if minimum is already found" <<
Endl;
703 if (isVerbose) t0 = clock();
713 fNTErrorRate = errroc;
716 tperf =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
723 if (fNTErrorRate<=errmin) {
724 errmin = fNTErrorRate;
727 fRuleEnsemble->GetCoefficients(coefsMin);
728 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
729 offsetMin = fRuleEnsemble->GetOffset();
731 if ( fNTErrorRate > fGDErrScale*errmin) found =
kTRUE;
735 if (valx.size()==npreg) {
736 valx.erase(valx.begin());
737 valy.erase(valy.begin());
738 valxy.erase(valxy.begin());
740 valx.push_back(fNTNuval);
741 valy.push_back(fNTErrorRate);
742 valxy.push_back(fNTErrorRate*fNTNuval);
747 if (isDebug) fGDNtuple->Fill();
749 Log() << kVERBOSE <<
"ParamsIRE : " 751 <<
Form(
"%8d",iloop+1) <<
" " 752 <<
Form(
"%4.4f",fNTRisk) <<
" " 753 <<
Form(
"%4.4f",riskPerf) <<
" " 754 <<
Form(
"%4.4f",fNTRisk+riskPerf) <<
" " 770 Bool_t endOfLoop = (iloop==fGDNPathSteps);
771 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
775 else if (endOfLoop) {
779 Log() << kWARNING <<
"BUG TRAP: should not be here - still, this bug is harmless;)" <<
Endl;
780 errmin = fNTErrorRate;
783 fRuleEnsemble->GetCoefficients(coefsMin);
784 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
785 offsetMin = fRuleEnsemble->GetOffset();
793 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
794 Log() << kINFO <<
"Found minimum at step " << indMin+1 <<
" with error = " << errmin <<
Endl;
795 Log() << kINFO <<
"Reason for ending loop: ";
796 switch (stopCondition) {
798 Log() << kINFO <<
"clear minima found";
801 Log() << kINFO <<
"chaotic behaviour of risk";
804 Log() << kINFO <<
"end of loop reached";
807 Log() << kINFO <<
"unknown!";
811 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
815 Log() << kWARNING <<
"Reached minimum early in the search" <<
Endl;
816 Log() <<
"Check results and maybe decrease GDStep size" <<
Endl;
826 Log() << kINFO <<
"The error rate was still decreasing at the end of the path" <<
Endl;
827 Log() << kINFO <<
"Increase number of steps (GDNSteps)." <<
Endl;
833 fRuleEnsemble->SetCoefficients( coefsMin );
834 fRuleEnsemble->SetLinCoefficients( lincoefsMin );
835 fRuleEnsemble->SetOffset( offsetMin );
838 Log() << kFATAL <<
"BUG TRAP: minimum not found in MakeGDPath()" <<
Endl;
845 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
846 Log() << kVERBOSE <<
"Timing per loop (ms):" <<
Endl;
847 Log() << kVERBOSE <<
" gradvec = " << 1000*stgradvec/iloop <<
Endl;
848 Log() << kVERBOSE <<
" upgrade = " << 1000*stupgrade/iloop <<
Endl;
849 Log() << kVERBOSE <<
" risk = " << 1000*strisk/iloop <<
Endl;
850 Log() << kVERBOSE <<
" perf = " << 1000*stperf/iloop <<
Endl;
851 Log() << kVERBOSE <<
" loop = " << 1000*stloop/iloop <<
Endl;
854 Log() << kVERBOSE <<
" GDPtr = " << 1000*
gGDPtr/iloop <<
Endl;
863 if (isDebug) fGDNtuple->
Write();
871 fNTOffset = fRuleEnsemble->GetOffset();
873 for (
UInt_t i=0; i<fNRules; i++) {
874 fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
876 for (
UInt_t i=0; i<fNLinear; i++) {
877 fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
888 Log() << kWARNING <<
"<CalcFStar> Using unverified code! Check!" <<
Endl;
889 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
891 Log() << kFATAL <<
"<CalcFStar> Invalid start/end indices!" <<
Endl;
895 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
898 std::vector<Double_t> fstarSorted;
901 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
902 const Event&
e = *(*events)[i];
903 fstarVal = fRuleEnsemble->FStar(
e);
904 fFstar.push_back(fstarVal);
905 fstarSorted.push_back(fstarVal);
909 std::sort( fstarSorted.begin(), fstarSorted.end() );
912 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
915 fFstarMedian = fstarSorted[ind];
928 Log() << kWARNING <<
"<Optimism> Using unverified code! Check!" <<
Endl;
929 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
931 Log() << kFATAL <<
"<Optimism> Invalid start/end indices!" <<
Endl;
934 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
945 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
946 const Event&
e = *(*events)[i];
947 yhat = fRuleEnsemble->EvalEvent(i);
948 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e) ? 1.0:-1.0);
949 w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf;
952 sumyhaty += w*yhat*
y;
957 Double_t cov = sumyhaty - sumyhat*sumy;
969 Log() << kWARNING <<
"<ErrorRateReg> Using unverified code! Check!" <<
Endl;
970 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
972 Log() << kFATAL <<
"<ErrorRateReg> Invalid start/end indices!" <<
Endl;
974 if (fFstar.size()!=neve) {
975 Log() << kFATAL <<
"--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!" 976 <<
" Fstar.size() = " << fFstar.size() <<
" , N(events) = " << neve <<
Endl;
981 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
990 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
991 const Event&
e = *(*events)[i];
992 sF = fRuleEnsemble->EvalEvent(
e );
994 sumdf +=
TMath::Abs(fFstar[i-fPerfIdx1] - sF);
995 sumdfmed +=
TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
1000 return sumdf/sumdfmed;
1013 Log() << kWARNING <<
"<ErrorRateBin> Using unverified code! Check!" <<
Endl;
1014 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1016 Log() << kFATAL <<
"<ErrorRateBin> Invalid start/end indices!" <<
Endl;
1019 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1026 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1027 const Event&
e = *(*events)[i];
1028 sF = fRuleEnsemble->EvalEvent(
e );
1030 signF = (sF>0 ? +1:-1);
1032 signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e) ? +1:-1);
1046 std::vector<Double_t> & sFbkg )
1049 std::sort(sFsig.begin(), sFsig.end());
1050 std::sort(sFbkg.begin(), sFbkg.end());
1051 const Double_t minsig = sFsig.front();
1052 const Double_t minbkg = sFbkg.front();
1053 const Double_t maxsig = sFsig.back();
1054 const Double_t maxbkg = sFbkg.back();
1055 const Double_t minf = std::min(minsig,minbkg);
1056 const Double_t maxf = std::max(maxsig,maxbkg);
1059 const Int_t np = std::min((nsig+nbkg)/4,50);
1060 const Double_t df = (maxf-minf)/(np-1);
1065 std::vector<Double_t>::const_iterator indit;
1080 for (
Int_t i=0; i<np; i++) {
1082 indit = std::find_if(sFsig.begin(), sFsig.end(),
1083 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1084 nesig = sFsig.end()-indit;
1087 indit = std::find_if(sFbkg.begin(), sFbkg.end(),
1088 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1089 nrbkg = indit-sFbkg.begin();
1101 area += 0.5*(1+rejb)*effs;
1114 Log() << kWARNING <<
"<ErrorRateRoc> Should not be used in the current version! Check!" <<
Endl;
1115 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1117 Log() << kFATAL <<
"<ErrorRateRoc> Invalid start/end indices!" <<
Endl;
1120 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1124 std::vector<Double_t> sFsig;
1125 std::vector<Double_t> sFbkg;
1131 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1132 const Event&
e = *(*events)[i];
1133 sF = fRuleEnsemble->EvalEvent(i);
1134 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&
e)) {
1135 sFsig.push_back(sF);
1140 sFbkg.push_back(sF);
1145 fsigave = sumfsig/sFsig.size();
1146 fbkgave = sumfbkg/sFbkg.size();
1150 return ErrorRateRocRaw( sFsig, sFbkg );
1162 Log() << kWARNING <<
"<ErrorRateRocTst> Should not be used in the current version! Check!" <<
Endl;
1163 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1165 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1169 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1173 std::vector< std::vector<Double_t> > sFsig;
1174 std::vector< std::vector<Double_t> > sFbkg;
1176 sFsig.resize( fGDNTau );
1177 sFbkg.resize( fGDNTau );
1180 for (
UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1181 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1184 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1185 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1186 sFsig[itau].push_back(sF);
1189 sFbkg[itau].push_back(sF);
1195 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1196 err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1197 fGDErrTst[itau] = err;
1208 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1210 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1220 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1221 if (fGDErrTstOK[itau]) {
1223 fGDErrTst[itau] = RiskPerf(itau);
1224 sumx += fGDErrTst[itau];
1225 sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1226 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1227 if (fGDErrTst[itau]<minx) {
1228 minx=fGDErrTst[itau];
1238 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1239 if (fGDErrTstOK[itau]) {
1240 if (fGDErrTst[itau] > maxacc) {
1241 fGDErrTstOK[itau] =
kFALSE;
1250 Log() << kVERBOSE <<
"TAU: " 1266 UInt_t neve = fPathIdx1-fPathIdx2+1;
1268 Log() << kFATAL <<
"<MakeTstGradientVector> Invalid start/end indices!" <<
Endl;
1274 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1277 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1278 if (fGDErrTstOK[itau]) {
1279 for (
UInt_t ir=0; ir<fNRules; ir++) {
1280 fGradVecTst[itau][ir]=0;
1282 for (
UInt_t il=0; il<fNLinear; il++) {
1283 fGradVecLinTst[itau][il]=0;
1292 const std::vector<UInt_t> *eventRuleMap=0;
1298 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1299 const Event *
e = (*events)[i];
1301 if (fRuleEnsemble->DoRules()) {
1302 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1303 nrules = (*eventRuleMap).size();
1305 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1308 if (fGDErrTstOK[itau]) {
1309 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1313 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(
e)?1.0:-1.0);
1314 r = norm*(
y - sF) * fRuleFit->GetTrainingEventWeight(i);
1316 for (
UInt_t ir=0; ir<nrules; ir++) {
1317 rind = (*eventRuleMap)[ir];
1318 fGradVecTst[itau][rind] +=
r;
1321 for (
UInt_t il=0; il<fNLinear; il++) {
1322 fGradVecLinTst[itau][il] +=
r*fRuleEnsemble->EvalLinEventRaw( il,i,
kTRUE );
1338 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1339 if (fGDErrTstOK[itau]) {
1341 maxr = ( (fNRules>0 ?
1342 TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(),
AbsValue()))):0) );
1343 maxl = ( (fNLinear>0 ?
1344 TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(),
AbsValue()))):0) );
1347 Double_t maxv = (maxr>maxl ? maxr:maxl);
1348 cthresh = maxv * fGDTauVec[itau];
1358 for (
UInt_t i=0; i<fNRules; i++) {
1359 val = fGradVecTst[itau][i];
1362 fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1366 for (
UInt_t i=0; i<fNLinear; i++) {
1367 val = fGradVecLinTst[itau][i];
1369 fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1376 CalcTstAverageResponse();
1388 UInt_t neve = fPathIdx2-fPathIdx1+1;
1390 Log() << kFATAL <<
"<MakeGradientVector> Invalid start/end indices!" <<
Endl;
1394 const Double_t norm = 2.0/fNEveEffPath;
1396 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1399 for (
UInt_t ir=0; ir<fNRules; ir++) {
1402 for (
UInt_t il=0; il<fNLinear; il++) {
1410 const std::vector<UInt_t> *eventRuleMap=0;
1415 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1416 const Event *
e = (*events)[i];
1419 sF = fRuleEnsemble->EvalEvent( i );
1423 if (fRuleEnsemble->DoRules()) {
1424 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1425 nrules = (*eventRuleMap).size();
1427 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(
e)?1.0:-1.0);
1428 r = norm*(
y - sF) * fRuleFit->GetTrainingEventWeight(i);
1430 for (
UInt_t ir=0; ir<nrules; ir++) {
1431 rind = (*eventRuleMap)[ir];
1432 fGradVec[rind] +=
r;
1437 for (
UInt_t il=0; il<fNLinear; il++) {
1438 fGradVecLin[il] +=
r*fRuleEnsemble->EvalLinEventRaw( il, i,
kTRUE );
1450 Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1452 Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1453 TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(),
AbsValue()))):0) );
1455 Double_t maxv = (maxr>maxl ? maxr:maxl);
1463 useRThresh = cthresh;
1464 useLThresh = cthresh;
1472 for (
UInt_t i=0; i<fGradVec.size(); i++) {
1475 coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1476 fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1481 for (
UInt_t i=0; i<fGradVecLin.size(); i++) {
1482 lval = fGradVecLin[i];
1484 lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1485 fRuleEnsemble->SetLinCoefficient(i,lcoef);
1489 Double_t offset = CalcAverageResponse();
1490 fRuleEnsemble->SetOffset( offset );
1500 for (
UInt_t itau=0; itau<fGDNTau; itau++) {
1501 if (fGDErrTstOK[itau]) {
1502 fGDOfsTst[itau] = 0;
1504 fGDOfsTst[itau] -= fGDCoefLinTst[itau][
s] * fAverageSelectorPath[
s];
1507 fGDOfsTst[itau] -= fGDCoefTst[itau][
r] * fAverageRulePath[
r];
1523 ofs -= fRuleEnsemble->GetLinCoefficients(
s) * fAverageSelectorPath[
s];
1526 ofs -= fRuleEnsemble->GetRules(
r)->GetCoefficient() * fAverageRulePath[
r];
1536 if (fPathIdx2<=fPathIdx1) {
1537 Log() << kFATAL <<
"<CalcAverageTruth> Invalid start/end indices!" <<
Endl;
1543 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1544 for (
UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1545 Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1546 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1548 sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1550 Log() << kVERBOSE <<
"Effective number of signal / background = " << ensig <<
" / " << enbkg <<
Endl;
1552 return sum/fNEveEffPath;
1558 return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(
e) ? 1:-1);
1564 fLogger->SetMinType(t);
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
static long int sum(long int i)
MsgLogger & Endl(MsgLogger &ml)
void MakeGradientVector()
make gradient vector
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Short_t Min(Short_t a, Short_t b)
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
#define rprev(otri1, otri2)
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk assessment
Double_t CalcAverageTruth()
calculate the average truth
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
void SetMsgType(EMsgType t)
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment...
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t Optimism()
implementation of eq.
char * Form(const char *fmt,...)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
void InitNtuple()
initializes the ntuple
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
static constexpr double s
virtual ~RuleFitParams()
destructor
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
ostringstream derivative to redirect and format output
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Estimates the error rate with the current set of parameters.
RuleFitParams()
constructor
Short_t Max(Short_t a, Short_t b)
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() ! ...
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
void InitGD()
Initialize GD path search.
A TTree object has a header with a name and a title.
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Double_t Sqrt(Double_t x)
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
void MakeGDPath()
The following finds the gradient directed path in parameter space.
Timing information for training and evaluation of MVA methods.
Int_t Type(const Event *e) const
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...