Logo ROOT   6.12/06
Reference Guide
TDFInterface.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_TDF_TINTERFACE
12 #define ROOT_TDF_TINTERFACE
13 
14 #include "ROOT/TResultProxy.hxx"
15 #include "ROOT/TDataSource.hxx"
16 #include "ROOT/TDFNodes.hxx"
18 #include "ROOT/TDFHistoModels.hxx"
19 #include "ROOT/TDFUtils.hxx"
20 #include "RtypesCore.h" // for ULong64_t
21 #include "TChain.h"
22 #include "TH1.h" // For Histo actions
23 #include "TH2.h" // For Histo actions
24 #include "TH3.h" // For Histo actions
25 #include "TInterpreter.h"
26 #include "TProfile.h" // For Histo actions
27 #include "TProfile2D.h" // For Histo actions
28 #include "TRegexp.h"
29 #include "TROOT.h" // IsImplicitMTEnabled
30 #include "TTreeReader.h"
31 
32 #include <initializer_list>
33 #include <memory>
34 #include <string>
35 #include <sstream>
36 #include <typeinfo>
37 #include <type_traits> // is_same, enable_if
38 
39 namespace ROOT {
40 
41 namespace Experimental {
42 namespace TDF {
43 template <typename T>
44 class TInterface;
45 
46 } // end NS TDF
47 } // end NS Experimental
48 
49 namespace Internal {
50 namespace TDF {
51 using namespace ROOT::Experimental::TDF;
52 using namespace ROOT::Detail::TDF;
53 
54 // CACHE --------------
55 
56 template <typename T>
58 public:
59  using value_type = T; // A shortcut to the value_type of the vector
60  std::vector<value_type> fContent;
61  // This method returns a pointer since we treat these columns as if they come
62  // from a data source, i.e. we forward an entry point to valid memory rather
63  // than a value.
64  value_type *operator()(unsigned int /*slot*/, ULong64_t iEvent) { return &fContent[iEvent]; };
65 };
66 
67 // ENDCACHE------------
68 
69 /****** BuildAndBook overloads *******/
70 // BuildAndBook builds a TAction with the right operation and books it with the TLoopManager
71 
72 // Generic filling (covers Histo2D, Histo3D, Profile1D and Profile2D actions, with and without weights)
73 template <typename... BranchTypes, typename ActionType, typename ActionResultType, typename PrevNodeType>
74 TActionBase *BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &h,
75  const unsigned int nSlots, TLoopManager &loopManager, PrevNodeType &prevNode, ActionType *)
76 {
77  using Helper_t = FillTOHelper<ActionResultType>;
78  using Action_t = TAction<Helper_t, PrevNodeType, TTraits::TypeList<BranchTypes...>>;
79  auto action = std::make_shared<Action_t>(Helper_t(h, nSlots), bl, prevNode);
80  loopManager.Book(action);
81  return action.get();
82 }
83 
84 // Histo1D filling (must handle the special case of distinguishing FillTOHelper and FillHelper
85 template <typename... BranchTypes, typename PrevNodeType>
86 TActionBase *BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<::TH1D> &h, const unsigned int nSlots,
87  TLoopManager &loopManager, PrevNodeType &prevNode, ActionTypes::Histo1D *)
88 {
89  auto hasAxisLimits = HistoUtils<::TH1D>::HasAxisLimits(*h);
90 
91  TActionBase *actionBase;
92  if (hasAxisLimits) {
93  using Helper_t = FillTOHelper<::TH1D>;
94  using Action_t = TAction<Helper_t, PrevNodeType, TTraits::TypeList<BranchTypes...>>;
95  auto action = std::make_shared<Action_t>(Helper_t(h, nSlots), bl, prevNode);
96  loopManager.Book(action);
97  actionBase = action.get();
98  } else {
99  using Helper_t = FillHelper;
100  using Action_t = TAction<Helper_t, PrevNodeType, TTraits::TypeList<BranchTypes...>>;
101  auto action = std::make_shared<Action_t>(Helper_t(h, nSlots), bl, prevNode);
102  loopManager.Book(action);
103  actionBase = action.get();
104  }
105 
106  return actionBase;
107 }
108 
109 // Min action
110 template <typename BranchType, typename PrevNodeType, typename ActionResultType>
111 TActionBase *
112 BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &minV, const unsigned int nSlots,
113  TLoopManager &loopManager, PrevNodeType &prevNode, ActionTypes::Min *)
114 {
115  using Helper_t = MinHelper<ActionResultType>;
117  auto action = std::make_shared<Action_t>(Helper_t(minV, nSlots), bl, prevNode);
118  loopManager.Book(action);
119  return action.get();
120 }
121 
122 // Max action
123 template <typename BranchType, typename PrevNodeType, typename ActionResultType>
124 TActionBase *
125 BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &maxV, const unsigned int nSlots,
126  TLoopManager &loopManager, PrevNodeType &prevNode, ActionTypes::Max *)
127 {
128  using Helper_t = MaxHelper<ActionResultType>;
130  auto action = std::make_shared<Action_t>(Helper_t(maxV, nSlots), bl, prevNode);
131  loopManager.Book(action);
132  return action.get();
133 }
134 
135 // Sum action
136 template <typename BranchType, typename PrevNodeType, typename ActionResultType>
137 TActionBase *
138 BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<ActionResultType> &sumV, const unsigned int nSlots,
139  TLoopManager &loopManager, PrevNodeType &prevNode, ActionTypes::Sum *)
140 {
141  using Helper_t = SumHelper<ActionResultType>;
143  auto action = std::make_shared<Action_t>(Helper_t(sumV, nSlots), bl, prevNode);
144  loopManager.Book(action);
145  return action.get();
146 }
147 
148 // Mean action
149 template <typename BranchType, typename PrevNodeType>
150 TActionBase *BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr<double> &meanV, const unsigned int nSlots,
151  TLoopManager &loopManager, PrevNodeType &prevNode, ActionTypes::Mean *)
152 {
153  using Helper_t = MeanHelper;
155  auto action = std::make_shared<Action_t>(Helper_t(meanV, nSlots), bl, prevNode);
156  loopManager.Book(action);
157  return action.get();
158 }
159 /****** end BuildAndBook ******/
160 
161 std::vector<std::string> FindUsedColumnNames(std::string_view, TObjArray *, const std::vector<std::string> &);
162 
163 using TmpBranchBasePtr_t = std::shared_ptr<TCustomColumnBase>;
164 
165 Long_t JitTransformation(void *thisPtr, std::string_view methodName, std::string_view interfaceTypeName,
167  const std::map<std::string, std::string> &aliasMap, const ColumnNames_t &branches,
168  const std::vector<std::string> &customColumns,
169  const std::map<std::string, TmpBranchBasePtr_t> &tmpBookedBranches, TTree *tree,
170  std::string_view returnTypeName, TDataSource *ds);
171 
172 std::string JitBuildAndBook(const ColumnNames_t &bl, const std::string &prevNodeTypename, void *prevNode,
173  const std::type_info &art, const std::type_info &at, const void *r, TTree *tree,
174  const unsigned int nSlots, const std::map<std::string, TmpBranchBasePtr_t> &customColumns,
175  TDataSource *ds, const std::shared_ptr<TActionBase *> *const actionPtrPtr);
176 
177 // allocate a shared_ptr on the heap, return a reference to it. the user is responsible of deleting the shared_ptr*.
178 // this function is meant to only be used by TInterface's action methods, and should be deprecated as soon as we find
179 // a better way to make jitting work: the problem it solves is that we need to pass the same shared_ptr to the Helper
180 // object of each action and to the TResultProxy returned by the action. While the former is only instantiated when
181 // the event loop is about to start, the latter has to be returned to the user as soon as the action is booked.
182 // a heap allocated shared_ptr will stay alive long enough that at jitting time its address is still valid.
183 template <typename T>
184 std::shared_ptr<T> *MakeSharedOnHeap(const std::shared_ptr<T> &shPtr)
185 {
186  return new std::shared_ptr<T>(shPtr);
187 }
188 
189 bool AtLeastOneEmptyString(const std::vector<std::string_view> strings);
190 
191 /* The following functions upcast shared ptrs to TFilter, TCustomColumn, TRange to their parent class (***Base).
192  * Shared ptrs to TLoopManager are just copied, as well as shared ptrs to ***Base classes. */
193 std::shared_ptr<TFilterBase> UpcastNode(const std::shared_ptr<TFilterBase> ptr);
194 std::shared_ptr<TCustomColumnBase> UpcastNode(const std::shared_ptr<TCustomColumnBase> ptr);
195 std::shared_ptr<TRangeBase> UpcastNode(const std::shared_ptr<TRangeBase> ptr);
196 std::shared_ptr<TLoopManager> UpcastNode(const std::shared_ptr<TLoopManager> ptr);
197 
198 ColumnNames_t GetValidatedColumnNames(TLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns,
199  const ColumnNames_t &validCustomColumns, TDataSource *ds);
200 
201 std::vector<bool> FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedDSCols);
202 
203 /// Helper function to be used by `DefineDataSourceColumns`
204 template <typename T>
206 {
207  auto readers = ds.GetColumnReaders<T>(name);
208  auto getValue = [readers](unsigned int slot) { return *readers[slot]; };
210  lm.Book(std::make_shared<NewCol_t>(name, std::move(getValue), ColumnNames_t{}, &lm, /*isDSColumn=*/true));
212 }
213 
214 /// Take a list of data-source column names and define the ones that haven't been defined yet.
215 template <typename... ColumnTypes, int... S>
216 void DefineDataSourceColumns(const std::vector<std::string> &columns, TLoopManager &lm, StaticSeq<S...>,
217  TTraits::TypeList<ColumnTypes...>, TDataSource &ds)
218 {
219  const auto mustBeDefined = FindUndefinedDSColumns(columns, lm.GetCustomColumnNames());
220  if (std::none_of(mustBeDefined.begin(), mustBeDefined.end(), [](bool b) { return b; })) {
221  // no need to define any column
222  return;
223  } else {
224  // hack to expand a template parameter pack without c++17 fold expressions.
225  std::initializer_list<int> expander{
226  (mustBeDefined[S] ? DefineDSColumnHelper<ColumnTypes>(columns[S], lm, ds) : /*no-op*/ ((void)0), 0)...};
227  (void)expander; // avoid unused variable warnings
228  }
229 }
230 
231 /// Convenience function invoked by jitted code to build action nodes at runtime
232 template <typename ActionType, typename... BranchTypes, typename PrevNodeType, typename ActionResultType>
233 void CallBuildAndBook(PrevNodeType &prevNode, const ColumnNames_t &bl, const unsigned int nSlots,
234  const std::shared_ptr<ActionResultType> *rOnHeap,
235  const std::shared_ptr<TActionBase *> *actionPtrPtrOnHeap)
236 {
237  // if we are here it means we are jitting, if we are jitting the loop manager must be alive
238  auto &loopManager = *prevNode.GetImplPtr();
239  using ColTypes_t = TypeList<BranchTypes...>;
240  constexpr auto nColumns = ColTypes_t::list_size;
241  auto ds = loopManager.GetDataSource();
242  if (ds)
243  DefineDataSourceColumns(bl, loopManager, GenStaticSeq_t<nColumns>(), ColTypes_t(), *ds);
244  TActionBase *actionPtr =
245  BuildAndBook<BranchTypes...>(bl, *rOnHeap, nSlots, loopManager, prevNode, (ActionType *)nullptr);
246  **actionPtrPtrOnHeap = actionPtr;
247  delete rOnHeap;
248  delete actionPtrPtrOnHeap;
249 }
250 
251 /// The contained `type` alias is `double` if `T == TInferType`, `U` if `T == std::container<U>`, `T` otherwise.
252 template <typename T, bool Container = TTraits::IsContainer<T>::value>
254  using type = T;
255 };
256 
257 template <>
258 struct TMinReturnType<TDFDetail::TInferType, false> {
259  using type = double;
260 };
261 
262 template <typename T>
263 struct TMinReturnType<T, true> {
265 };
266 
267 } // namespace TDF
268 } // namespace Internal
269 
270 namespace Detail {
271 namespace TDF {
272 
273 /// The aliased type is `double` if `T == TInferType`, `U` if `T == container<U>`, `T` otherwise.
274 template <typename T>
276 
277 template <typename T>
279 
280 template <typename T>
282 
283 template <typename T, typename COLL = std::vector<T>>
285  // We cannot put in the output collection C arrays: the ownership is not defined.
286  // We therefore proceed to check if T is an TArrayBranch
287  // If yes, we check what type is the output collection and we rebuild it.
288  // E.g. if a vector<V> was the selected collection, where V is TArrayBranch<T>,
289  // the collection becomes vector<vector<T>>.
290  static constexpr auto isAB = TDFInternal::IsTArrayBranch_t<T>::value;
291  using RealT_t = typename TDFInternal::ValueType<T>::value_type;
292  using VTColl_t = std::vector<RealT_t>;
293 
294  using NewC0_t =
295  typename std::conditional<isAB && TDFInternal::IsVector_t<COLL>::value, std::vector<VTColl_t>, COLL>::type;
296  using NewC1_t =
297  typename std::conditional<isAB && TDFInternal::IsList_t<NewC0_t>::value, std::list<VTColl_t>, NewC0_t>::type;
298  using NewC2_t =
299  typename std::conditional<isAB && TDFInternal::IsDeque_t<NewC1_t>::value, std::deque<VTColl_t>, NewC1_t>::type;
301 };
302 template <typename T, typename C>
304 } // namespace TDF
305 } // namespace Detail
306 
307 namespace Experimental {
308 
309 // forward declarations
310 class TDataFrame;
311 
312 } // namespace Experimental
313 } // namespace ROOT
314 
315 namespace cling {
316 std::string printValue(ROOT::Experimental::TDataFrame *tdf); // For a nice printing at the prompt
317 }
318 
319 namespace ROOT {
320 namespace Experimental {
321 namespace TDF {
322 namespace TDFDetail = ROOT::Detail::TDF;
323 namespace TDFInternal = ROOT::Internal::TDF;
324 namespace TTraits = ROOT::TypeTraits;
325 
326 /**
327 * \class ROOT::Experimental::TDF::TInterface
328 * \ingroup dataframe
329 * \brief The public interface to the TDataFrame federation of classes
330 * \tparam T One of the "node" base types (e.g. TLoopManager, TFilterBase). The user never specifies this type manually.
331 */
332 template <typename Proxied>
333 class TInterface {
334  using ColumnNames_t = TDFDetail::ColumnNames_t;
339  friend std::string cling::printValue(::ROOT::Experimental::TDataFrame *tdf); // For a nice printing at the prompt
340  template <typename T>
341  friend class TInterface;
342 
343  const std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this TInterface.
344  const std::weak_ptr<TLoopManager> fImplWeakPtr; ///< Weak pointer to the TLoopManager at the root of the graph.
345  ColumnNames_t fValidCustomColumns; ///< Names of columns `Define`d for this branch of the functional graph.
346  /// Non-owning pointer to a data-source object. Null if no data-source. TLoopManager has ownership of the object.
347  TDataSource *const fDataSource = nullptr;
348 
349 public:
350  /// \cond HIDDEN_SYMBOLS
351  // Template conversion operator, meant to use to convert TInterfaces of certain node types to TInterfaces of base
352  // classes of those node types, e.g. TInterface<TFilter<F,P>> -> TInterface<TFilterBase>.
353  // It is used implicitly when a call to Filter or Define is jitted: the jitted call must convert the
354  // TInterface returned by the jitted transformations to a TInterface<***Base> before returning.
355  // Must be public because it is cling that uses it.
356  template <typename NewProxied>
357  operator TInterface<NewProxied>()
358  {
359  static_assert(std::is_base_of<NewProxied, Proxied>::value,
360  "TInterface<T> can only be converted to TInterface<BaseOfT>");
361  return TInterface<NewProxied>(fProxiedPtr, fImplWeakPtr, fValidCustomColumns, fDataSource);
362  }
364  /// \endcond
365 
366  ////////////////////////////////////////////////////////////////////////////
367  /// \brief Copy-assignment operator for TInterface.
368  TInterface &operator=(const TInterface &) = default;
369 
370  ////////////////////////////////////////////////////////////////////////////
371  /// \brief Copy-ctor for TInterface.
372  TInterface(const TInterface &) = default;
373 
374  ////////////////////////////////////////////////////////////////////////////
375  /// \brief Move-ctor for TInterface.
376  TInterface(TInterface &&) = default;
377 
378  ////////////////////////////////////////////////////////////////////////////
379  /// \brief Append a filter to the call graph.
380  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
381  /// signalling whether the event has passed the selection (true) or not (false).
382  /// \param[in] columns Names of the columns/branches in input to the filter function.
383  /// \param[in] name Optional name of this filter. See `Report`.
384  ///
385  /// Append a filter node at the point of the call graph corresponding to the
386  /// object this method is called on.
387  /// The callable `f` should not have side-effects (e.g. modification of an
388  /// external or static variable) to ensure correct results when implicit
389  /// multi-threading is active.
390  ///
391  /// TDataFrame only evaluates filters when necessary: if multiple filters
392  /// are chained one after another, they are executed in order and the first
393  /// one returning false causes the event to be discarded.
394  /// Even if multiple actions or transformations depend on the same filter,
395  /// it is executed once per entry. If its result is requested more than
396  /// once, the cached result is served.
397  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
399  {
400  TDFInternal::CheckFilter(f);
401  auto loopManager = GetDataFrameChecked();
402  using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
403  constexpr auto nColumns = ColTypes_t::list_size;
404  const auto validColumnNames =
405  TDFInternal::GetValidatedColumnNames(*loopManager, nColumns, columns, fValidCustomColumns, fDataSource);
406  if (fDataSource)
407  TDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, TDFInternal::GenStaticSeq_t<nColumns>(),
408  ColTypes_t(), *fDataSource);
409  using F_t = TDFDetail::TFilter<F, Proxied>;
410  auto FilterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, *fProxiedPtr, name);
411  loopManager->Book(FilterPtr);
412  return TInterface<F_t>(FilterPtr, fImplWeakPtr, fValidCustomColumns, fDataSource);
413  }
414 
415  ////////////////////////////////////////////////////////////////////////////
416  /// \brief Append a filter to the call graph.
417  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
418  /// signalling whether the event has passed the selection (true) or not (false).
419  /// \param[in] name Optional name of this filter. See `Report`.
420  ///
421  /// Refer to the first overload of this method for the full documentation.
422  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
424  {
425  // The sfinae is there in order to pick up the overloaded method which accepts two strings
426  // rather than this template method.
427  return Filter(f, {}, name);
428  }
429 
430  ////////////////////////////////////////////////////////////////////////////
431  /// \brief Append a filter to the call graph.
432  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
433  /// signalling whether the event has passed the selection (true) or not (false).
434  /// \param[in] columns Names of the columns/branches in input to the filter function.
435  ///
436  /// Refer to the first overload of this method for the full documentation.
437  template <typename F>
438  TInterface<TDFDetail::TFilter<F, Proxied>> Filter(F f, const std::initializer_list<std::string> &columns)
439  {
440  return Filter(f, ColumnNames_t{columns});
441  }
442 
443  ////////////////////////////////////////////////////////////////////////////
444  /// \brief Append a filter to the call graph.
445  /// \param[in] expression The filter expression in C++
446  /// \param[in] name Optional name of this filter. See `Report`.
447  ///
448  /// The expression is just-in-time compiled and used to filter entries. It must
449  /// be valid C++ syntax in which variable names are substituted with the names
450  /// of branches/columns.
451  ///
452  /// Refer to the first overload of this method for the full documentation.
454  {
455  auto retVal = CallJitTransformation("Filter", name, expression, "ROOT::Detail::TDF::TFilterBase");
456  return *(TInterface<TFilterBase> *)retVal;
457  }
458 
459  // clang-format off
460  ////////////////////////////////////////////////////////////////////////////
461  /// \brief Creates a custom column
462  /// \param[in] name The name of the custom column.
463  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
464  /// \param[in] columns Names of the columns/branches in input to the producer function.
465  ///
466  /// Create a custom column that will be visible from all subsequent nodes
467  /// of the functional chain. The `expression` is only evaluated for entries that pass
468  /// all the preceding filters.
469  /// A new variable is created called `name`, accessible as if it was contained
470  /// in the dataset from subsequent transformations/actions.
471  ///
472  /// Use cases include:
473  ///
474  /// * caching the results of complex calculations for easy and efficient multiple access
475  /// * extraction of quantities of interest from complex objects
476  /// * column aliasing, i.e. changing the name of a branch/column
477  ///
478  /// An exception is thrown if the name of the new column is already in use.
479  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
480  TInterface<Proxied> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
481  {
482  return DefineImpl<F, TDFDetail::TCCHelperTypes::TNothing>(name, std::move(expression), columns);
483  }
484  // clang-format on
485 
486  // clang-format off
487  ////////////////////////////////////////////////////////////////////////////
488  /// \brief Creates a custom column with a value dependent on the processing slot.
489  /// \param[in] name The name of the custom column.
490  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
491  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
492  ///
493  /// This alternative implementation of `Define` is meant as a helper in writing thread-safe custom columns.
494  /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
495  /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
496  /// representing a "slot number". TDataFrame guarantees that different threads will invoke the expression with
497  /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1.
498  ///
499  /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
500  /// ~~~{.cpp}
501  /// int function(unsigned int, double, double);
502  /// Define("x", function, {"tdfslot_", "column1", "column2"})
503  /// DefineSlot("x", function, {"column1", "column2"})
504  /// ~~~
505  ///
506  /// See Define for more information.
507  template <typename F>
509  {
510  return DefineImpl<F, TDFDetail::TCCHelperTypes::TSlot>(name, std::move(expression), columns);
511  }
512  // clang-format on
513 
514  // clang-format off
515  ////////////////////////////////////////////////////////////////////////////
516  /// \brief Creates a custom column with a value dependent on the processing slot and the current entry.
517  /// \param[in] name The name of the custom column.
518  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
519  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
520  ///
521  /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
522  /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
523  /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
524  /// integer representing a "slot number". TDataFrame guarantees that different threads will invoke the expression with
525  /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1. The second parameter
526  /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
527  ///
528  /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
529  /// ~~~{.cpp}
530  /// int function(unsigned int, ULong64_t, double, double);
531  /// Define("x", function, {"tdfslot_", "tdfentry_", "column1", "column2"})
532  /// DefineSlotEntry("x", function, {"column1", "column2"})
533  /// ~~~
534  ///
535  /// See Define for more information.
536  template <typename F>
538  {
539  return DefineImpl<F, TDFDetail::TCCHelperTypes::TSlotAndEntry>(name, std::move(expression), columns);
540  }
541  // clang-format on
542 
543  ////////////////////////////////////////////////////////////////////////////
544  /// \brief Creates a custom column
545  /// \param[in] name The name of the custom column.
546  /// \param[in] expression An expression in C++ which represents the temporary value
547  ///
548  /// The expression is just-in-time compiled and used to produce the column entries. The
549  /// It must be valid C++ syntax in which variable names are substituted with the names
550  /// of branches/columns.
551  ///
552  /// Refer to the first overload of this method for the full documentation.
553  TInterfaceJittedDefine Define(std::string_view name, std::string_view expression)
554  {
555  auto loopManager = GetDataFrameChecked();
556  // this check must be done before jitting lest we throw exceptions in jitted code
557  TDFInternal::CheckCustomColumn(name, loopManager->GetTree(), loopManager->GetCustomColumnNames(),
558  fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
559  auto retVal = CallJitTransformation("Define", name, expression, TInterfaceJittedDefine::GetNodeTypeName());
560  auto retInterface = reinterpret_cast<TInterfaceJittedDefine *>(retVal);
561  return *retInterface;
562  }
563 
564  ////////////////////////////////////////////////////////////////////////////
565  /// \brief Allow to refer to a column with a different name
566  /// \param[in] alias name of the column alias
567  /// \param[in] columnName of the column to be aliased
568  /// Aliasing an alias is supported.
570  {
571  // The symmetry with Define is clear. We want to:
572  // - Create globally the alias and return this very node, unchanged
573  // - Make aliases accessible based on chains and not globally
574  auto loopManager = GetDataFrameChecked();
575 
576  // Helper to find out if a name is a column
577  auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
578 
579  // If the alias name is a column name, there is a problem
580  TDFInternal::CheckCustomColumn(alias, loopManager->GetTree(), fValidCustomColumns, dsColumnNames);
581 
582  const auto validColumnName = TDFInternal::GetValidatedColumnNames(*loopManager, 1, {std::string(columnName)},
583  fValidCustomColumns, fDataSource)[0];
584 
585  loopManager->AddColumnAlias(std::string(alias), validColumnName);
586  TInterface<Proxied> newInterface(fProxiedPtr, fImplWeakPtr, fValidCustomColumns, fDataSource);
587  newInterface.fValidCustomColumns.emplace_back(alias);
588  return newInterface;
589  }
590 
591  ////////////////////////////////////////////////////////////////////////////
592  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
593  /// \tparam BranchTypes variadic list of branch/column types
594  /// \param[in] treename The name of the output TTree
595  /// \param[in] filename The name of the output TFile
596  /// \param[in] columnList The list of names of the columns/branches to be written
597  /// \param[in] options TSnapshotOptions struct with extra options to pass to TFile and TTree
598  ///
599  /// This function returns a `TDataFrame` built with the output tree as a source.
600  template <typename... BranchTypes>
602  Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
603  const TSnapshotOptions &options = TSnapshotOptions())
604  {
605  return SnapshotImpl<BranchTypes...>(treename, filename, columnList, options);
606  }
607 
608  ////////////////////////////////////////////////////////////////////////////
609  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
610  /// \param[in] treename The name of the output TTree
611  /// \param[in] filename The name of the output TFile
612  /// \param[in] columnList The list of names of the columns/branches to be written
613  /// \param[in] options TSnapshotOptions struct with extra options to pass to TFile and TTree
614  ///
615  /// This function returns a `TDataFrame` built with the output tree as a source.
616  /// The types of the columns are automatically inferred and do not need to be specified.
618  const ColumnNames_t &columnList,
619  const TSnapshotOptions &options = TSnapshotOptions())
620  {
621  // Early return: if the list of columns is empty, just return an empty TDF
622  // If we proceed, the jitted call will not compile!
623  if (columnList.empty()) {
624  auto nEntries = *this->Count();
625  TInterface<TLoopManager> emptyTDF(std::make_shared<TLoopManager>(nEntries));
626  return emptyTDF;
627  }
628  auto df = GetDataFrameChecked();
629  auto tree = df->GetTree();
630  std::stringstream snapCall;
631  auto upcastNode = TDFInternal::UpcastNode(fProxiedPtr);
632  TInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, fImplWeakPtr,
633  fValidCustomColumns, fDataSource);
634  // build a string equivalent to
635  // "(TInterface<nodetype*>*)(this)->Snapshot<Ts...>(treename,filename,*(ColumnNames_t*)(&columnList), options)"
636  snapCall << "reinterpret_cast<ROOT::Experimental::TDF::TInterface<" << upcastInterface.GetNodeTypeName() << ">*>("
637  << &upcastInterface << ")->Snapshot<";
638  bool first = true;
639  for (auto &b : columnList) {
640  if (!first)
641  snapCall << ", ";
642  snapCall << TDFInternal::ColumnName2ColumnTypeName(b, tree, df->GetBookedBranch(b), fDataSource);
643  first = false;
644  };
645  snapCall << ">(\"" << treename << "\", \"" << filename << "\", "
646  << "*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
647  << &columnList << "),"
648  << "*reinterpret_cast<ROOT::Experimental::TDF::TSnapshotOptions*>(" << &options << "));";
649  // jit snapCall, return result
650  TInterpreter::EErrorCode errorCode;
651  auto newTDFPtr = gInterpreter->Calc(snapCall.str().c_str(), &errorCode);
652  if (TInterpreter::EErrorCode::kNoError != errorCode) {
653  std::string msg = "Cannot jit Snapshot call. Interpreter error code is " + std::to_string(errorCode) + ".";
654  throw std::runtime_error(msg);
655  }
656  return *reinterpret_cast<TInterface<TLoopManager> *>(newTDFPtr);
657  }
658 
659  // clang-format off
660  ////////////////////////////////////////////////////////////////////////////
661  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
662  /// \param[in] treename The name of the output TTree
663  /// \param[in] filename The name of the output TFile
664  /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. See the documentation of TRegexp for more details. An empty string signals the selection of all columns.
665  /// \param[in] options TSnapshotOptions struct with extra options to pass to TFile and TTree
666  ///
667  /// This function returns a `TDataFrame` built with the output tree as a source.
668  /// The types of the columns are automatically inferred and do not need to be specified.
670  std::string_view columnNameRegexp = "",
671  const TSnapshotOptions &options = TSnapshotOptions())
672  {
673  auto selectedColumns = ConvertRegexToColumns(columnNameRegexp, "Snapshot");
674  return Snapshot(treename, filename, selectedColumns, options);
675  }
676  // clang-format on
677 
678  ////////////////////////////////////////////////////////////////////////////
679  /// \brief Save selected columns in memory
680  /// \param[in] columns to be cached in memory
681  ///
682  /// The content of the selected columns is saved in memory exploiting the functionality offered by
683  /// the Take action. No extra copy is carried out when serving cached data to the actions and
684  /// transformations requesting it.
685  template <typename... BranchTypes>
687  {
688  auto staticSeq = TDFInternal::GenStaticSeq_t<sizeof...(BranchTypes)>();
689  return CacheImpl<BranchTypes...>(columnList, staticSeq);
690  }
691 
692  ////////////////////////////////////////////////////////////////////////////
693  /// \brief Save selected columns in memory
694  /// \param[in] columns to be cached in memory
695  ///
696  /// The content of the selected columns is saved in memory exploiting the functionality offered by
697  /// the Take action. No extra copy is carried out when serving cached data to the actions and
698  /// transformations requesting it.
700  {
701  // Early return: if the list of columns is empty, just return an empty TDF
702  // If we proceed, the jitted call will not compile!
703  if (columnList.empty()) {
704  auto nEntries = *this->Count();
705  TInterface<TLoopManager> emptyTDF(std::make_shared<TLoopManager>(nEntries));
706  return emptyTDF;
707  }
708 
709  auto df = GetDataFrameChecked();
710  auto tree = df->GetTree();
711  std::stringstream snapCall;
712  auto upcastNode = TDFInternal::UpcastNode(fProxiedPtr);
713  TInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, fImplWeakPtr,
714  fValidCustomColumns, fDataSource);
715  // build a string equivalent to
716  // "(TInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
717  snapCall << "reinterpret_cast<ROOT::Experimental::TDF::TInterface<" << upcastInterface.GetNodeTypeName() << ">*>("
718  << &upcastInterface << ")->Cache<";
719  bool first = true;
720  for (auto &b : columnList) {
721  if (!first)
722  snapCall << ", ";
723  snapCall << TDFInternal::ColumnName2ColumnTypeName(b, tree, df->GetBookedBranch(b), fDataSource);
724  first = false;
725  };
726  snapCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
727  << &columnList << "));";
728  // jit snapCall, return result
729  TInterpreter::EErrorCode errorCode;
730  auto newTDFPtr = gInterpreter->Calc(snapCall.str().c_str(), &errorCode);
731  if (TInterpreter::EErrorCode::kNoError != errorCode) {
732  std::string msg = "Cannot jit Cache call. Interpreter error code is " + std::to_string(errorCode) + ".";
733  throw std::runtime_error(msg);
734  }
735  return *reinterpret_cast<TInterface<TLoopManager> *>(newTDFPtr);
736  }
737 
738  ////////////////////////////////////////////////////////////////////////////
739  /// \brief Save selected columns in memory
740  /// \param[in] a regular expression to select the columns
741  ///
742  /// The existing columns are matched against the regeular expression. If the string provided
743  /// is empty, all columns are selected.
745  {
746  auto selectedColumns = ConvertRegexToColumns(columnNameRegexp, "Cache");
747  return Cache(selectedColumns);
748  }
749 
750  ////////////////////////////////////////////////////////////////////////////
751  /// \brief Creates a node that filters entries based on range
752  /// \param[in] start How many entries to discard before resuming processing.
753  /// \param[in] stop Total number of entries that will be processed before stopping. 0 means "never stop".
754  /// \param[in] stride Process one entry every `stride` entries. Must be strictly greater than 0.
755  ///
756  /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
757  TInterface<TDFDetail::TRange<Proxied>> Range(unsigned int start, unsigned int stop, unsigned int stride = 1)
758  {
759  // check invariants
760  if (stride == 0 || (stop != 0 && stop < start))
761  throw std::runtime_error("Range: stride must be strictly greater than 0 and stop must be greater than start.");
763  throw std::runtime_error("Range was called with ImplicitMT enabled. Multi-thread ranges are not supported.");
764 
765  auto df = GetDataFrameChecked();
767  auto RangePtr = std::make_shared<Range_t>(start, stop, stride, *fProxiedPtr);
768  df->Book(RangePtr);
769  TInterface<TDFDetail::TRange<Proxied>> tdf_r(RangePtr, fImplWeakPtr, fValidCustomColumns, fDataSource);
770  return tdf_r;
771  }
772 
773  ////////////////////////////////////////////////////////////////////////////
774  /// \brief Creates a node that filters entries based on range
775  /// \param[in] stop Total number of entries that will be processed before stopping. 0 means "never stop".
776  ///
777  /// See the other Range overload for a detailed description.
778  TInterface<TDFDetail::TRange<Proxied>> Range(unsigned int stop) { return Range(0, stop, 1); }
779 
780  ////////////////////////////////////////////////////////////////////////////
781  /// \brief Execute a user-defined function on each entry (*instant action*)
782  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined
783  /// calculations.
784  /// \param[in] columns Names of the columns/branches in input to the user function.
785  ///
786  /// The callable `f` is invoked once per entry. This is an *instant action*:
787  /// upon invocation, an event loop as well as execution of all scheduled actions
788  /// is triggered.
789  /// Users are responsible for the thread-safety of this callable when executing
790  /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
791  template <typename F>
792  void Foreach(F f, const ColumnNames_t &columns = {})
793  {
795  using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
796  ForeachSlot(TDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
797  }
798 
799  ////////////////////////////////////////////////////////////////////////////
800  /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*)
801  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined
802  /// calculations.
803  /// \param[in] columns Names of the columns/branches in input to the user function.
804  ///
805  /// Same as `Foreach`, but the user-defined function takes an extra
806  /// `unsigned int` as its first parameter, the *processing slot index*.
807  /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
808  /// for each thread of execution.
809  /// This is meant as a helper in writing thread-safe `Foreach`
810  /// actions when using `TDataFrame` after `ROOT::EnableImplicitMT()`.
811  /// The user-defined processing callable is able to follow different
812  /// *streams of processing* indexed by the first parameter.
813  /// `ForeachSlot` works just as well with single-thread execution: in that
814  /// case `slot` will always be `0`.
815  template <typename F>
816  void ForeachSlot(F f, const ColumnNames_t &columns = {})
817  {
818  auto loopManager = GetDataFrameChecked();
820  constexpr auto nColumns = ColTypes_t::list_size;
821  const auto validColumnNames =
822  TDFInternal::GetValidatedColumnNames(*loopManager, nColumns, columns, fValidCustomColumns, fDataSource);
823  if (fDataSource)
824  TDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, TDFInternal::GenStaticSeq_t<nColumns>(),
825  ColTypes_t(), *fDataSource);
826  using Helper_t = TDFInternal::ForeachSlotHelper<F>;
828  loopManager->Book(std::make_shared<Action_t>(Helper_t(std::move(f)), validColumnNames, *fProxiedPtr));
829  loopManager->Run();
830  }
831 
832  ////////////////////////////////////////////////////////////////////////////
833  /// \brief Execute a user-defined reduce operation on the values of a column.
834  /// \tparam F The type of the reduce callable. Automatically deduced.
835  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
836  /// \param[in] f A callable with signature `T(T,T)`
837  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
838  ///
839  /// A reduction takes two values of a column and merges them into one (e.g.
840  /// by summing them, taking the maximum, etc). This action performs the
841  /// specified reduction operation on all processed column values, returning
842  /// a single value of the same type. The callable f must satisfy the general
843  /// requirements of a *processing function* besides having signature `T(T,T)`
844  /// where `T` is the type of column columnName.
845  ///
846  /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
847  /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
848  /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
849  /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
850  /// overload.
851  ///
852  /// This action is *lazy*: upon invocation of this method the calculation is
853  /// booked but not executed. See TResultProxy documentation.
854  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
856  {
857  static_assert(
858  std::is_default_constructible<T>::value,
859  "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
860  return Reduce(std::move(f), columnName, T());
861  }
862 
863  ////////////////////////////////////////////////////////////////////////////
864  /// \brief Execute a user-defined reduce operation on the values of a column.
865  /// \tparam F The type of the reduce callable. Automatically deduced.
866  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
867  /// \param[in] f A callable with signature `T(T,T)`
868  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
869  /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
870  ///
871  /// See the description of the first Reduce overload for more information.
872  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
873  TResultProxy<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
874  {
875  using arg_types = typename TTraits::CallableTraits<F>::arg_types;
876  TDFInternal::CheckReduce(f, arg_types());
877  auto loopManager = GetDataFrameChecked();
878  const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
879  constexpr auto nColumns = arg_types::list_size;
880  const auto validColumnNames =
881  TDFInternal::GetValidatedColumnNames(*loopManager, 1, columns, fValidCustomColumns, fDataSource);
882  if (fDataSource)
883  TDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, TDFInternal::GenStaticSeq_t<nColumns>(),
884  arg_types(), *fDataSource);
885  auto redObjPtr = std::make_shared<T>(redIdentity);
886  using Helper_t = TDFInternal::ReduceHelper<F, T>;
887  using Action_t = typename TDFInternal::TAction<Helper_t, Proxied>;
888  auto action = std::make_shared<Action_t>(Helper_t(std::move(f), redObjPtr, fProxiedPtr->GetNSlots()),
889  validColumnNames, *fProxiedPtr);
890  loopManager->Book(action);
891  return MakeResultProxy(redObjPtr, loopManager, action.get());
892  }
893 
894  ////////////////////////////////////////////////////////////////////////////
895  /// \brief Return the number of entries processed (*lazy action*)
896  ///
897  /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
898  /// This action is *lazy*: upon invocation of this method the calculation is
899  /// booked but not executed. See TResultProxy documentation.
901  {
902  auto df = GetDataFrameChecked();
903  const auto nSlots = fProxiedPtr->GetNSlots();
904  auto cSPtr = std::make_shared<ULong64_t>(0);
905  using Helper_t = TDFInternal::CountHelper;
907  auto action = std::make_shared<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), *fProxiedPtr);
908  df->Book(action);
909  return MakeResultProxy(cSPtr, df, action.get());
910  }
911 
912  ////////////////////////////////////////////////////////////////////////////
913  /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default)
914  /// \tparam T The type of the column.
915  /// \tparam COLL The type of collection used to store the values.
916  /// \param[in] column The name of the column to collect the values of.
917  ///
918  /// If the type of the column is `TArrayBranch<T>`, i.e. in the ROOT dataset this is
919  /// a C-style array, the type stored in the return container is a `std::vector<T>` to
920  /// guarantee the lifetime of the data involved.
921  /// This action is *lazy*: upon invocation of this method the calculation is
922  /// booked but not executed. See TResultProxy documentation.
923  template <typename T, typename COLL = std::vector<T>>
925  {
926  auto loopManager = GetDataFrameChecked();
927  const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
928  const auto validColumnNames =
929  TDFInternal::GetValidatedColumnNames(*loopManager, 1, columns, fValidCustomColumns, fDataSource);
930  if (fDataSource)
931  TDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, TDFInternal::GenStaticSeq_t<1>(),
932  TTraits::TypeList<T>(), *fDataSource);
933 
934  using RealT_t = typename TDFDetail::TTakeRealTypes<T, COLL>::RealT_t;
935  using RealColl_t = typename TDFDetail::TTakeRealTypes<T, COLL>::RealColl_t;
936 
937  using Helper_t = TDFInternal::TakeHelper<RealT_t, T, RealColl_t>;
939  auto valuesPtr = std::make_shared<RealColl_t>();
940  const auto nSlots = fProxiedPtr->GetNSlots();
941  auto action = std::make_shared<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, *fProxiedPtr);
942  loopManager->Book(action);
943  return MakeResultProxy(valuesPtr, loopManager, action.get());
944  }
945 
946  ////////////////////////////////////////////////////////////////////////////
947  /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
948  /// \tparam V The type of the column used to fill the histogram.
949  /// \param[in] model The returned histogram will be constructed using this as a model.
950  /// \param[in] vName The name of the column that will fill the histogram.
951  ///
952  /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
953  /// is filled with each one of the elements of the container. In case multiple columns of container type
954  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
955  /// possibly different lengths between events).
956  /// This action is *lazy*: upon invocation of this method the calculation is
957  /// booked but not executed. See TResultProxy documentation.
958  /// The user gives up ownership of the model histogram.
959  template <typename V = TDFDetail::TInferType>
960  TResultProxy<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
961  {
962  const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
963  std::shared_ptr<::TH1D> h(nullptr);
964  {
965  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
966  if (model.fBinXEdges.empty()) {
967  h = std::make_shared<::TH1D>(model.fName, model.fTitle, model.fNbinsX, model.fXLow, model.fXUp);
968  } else {
969  h = std::make_shared<::TH1D>(model.fName, model.fTitle, model.fNbinsX, model.fBinXEdges.data());
970  }
971  }
972 
973  if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
974  TDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
975  return CreateAction<TDFInternal::ActionTypes::Histo1D, V>(userColumns, h);
976  }
977 
978  template <typename V = TDFDetail::TInferType>
980  {
981  return Histo1D<V>({"", "", 128u, 0., 0.}, vName);
982  }
983 
984  ////////////////////////////////////////////////////////////////////////////
985  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
986  /// \tparam V The type of the column used to fill the histogram.
987  /// \tparam W The type of the column used as weights.
988  /// \param[in] model The returned histogram will be constructed using this as a model.
989  /// \param[in] vName The name of the column that will fill the histogram.
990  /// \param[in] wName The name of the column that will provide the weights.
991  ///
992  /// See the description of the first Histo1D overload for more details.
993  template <typename V = TDFDetail::TInferType, typename W = TDFDetail::TInferType>
995  {
996  auto columnViews = {vName, wName};
997  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
998  ? ColumnNames_t()
999  : ColumnNames_t(columnViews.begin(), columnViews.end());
1000  std::shared_ptr<::TH1D> h(nullptr);
1001  {
1002  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1003  if (model.fBinXEdges.empty()) {
1004  h = std::make_shared<::TH1D>(model.fName, model.fTitle, model.fNbinsX, model.fXLow, model.fXUp);
1005  } else {
1006  h = std::make_shared<::TH1D>(model.fName, model.fTitle, model.fNbinsX, model.fBinXEdges.data());
1007  }
1008  }
1009  return CreateAction<TDFInternal::ActionTypes::Histo1D, V, W>(userColumns, h);
1010  }
1011 
1012  ////////////////////////////////////////////////////////////////////////////
1013  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1014  /// \tparam V The type of the column used to fill the histogram.
1015  /// \tparam W The type of the column used as weights.
1016  /// \param[in] vName The name of the column that will fill the histogram.
1017  /// \param[in] wName The name of the column that will provide the weights.
1018  ///
1019  /// This overload uses a default model histogram TH1D("", "", 128u, 0., 0.).
1020  /// See the description of the first Histo1D overload for more details.
1021  template <typename V = TDFDetail::TInferType, typename W = TDFDetail::TInferType>
1023  {
1024  return Histo1D<V, W>({"", "", 128u, 0., 0.}, vName, wName);
1025  }
1026 
1027  ////////////////////////////////////////////////////////////////////////////
1028  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1029  /// \tparam V The type of the column used to fill the histogram.
1030  /// \tparam W The type of the column used as weights.
1031  /// \param[in] model The returned histogram will be constructed using this as a model.
1032  ///
1033  /// This overload will use the first two default columns as column names.
1034  /// See the description of the first Histo1D overload for more details.
1035  template <typename V, typename W>
1036  TResultProxy<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1037  {
1038  return Histo1D<V, W>(model, "", "");
1039  }
1040 
1041  ////////////////////////////////////////////////////////////////////////////
1042  /// \brief Fill and return a two-dimensional histogram (*lazy action*)
1043  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1044  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1045  /// \param[in] model The returned histogram will be constructed using this as a model.
1046  /// \param[in] v1Name The name of the column that will fill the x axis.
1047  /// \param[in] v2Name The name of the column that will fill the y axis.
1048  ///
1049  /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1050  /// is filled with each one of the elements of the container. In case multiple columns of container type
1051  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1052  /// possibly different lengths between events).
1053  /// This action is *lazy*: upon invocation of this method the calculation is
1054  /// booked but not executed. See TResultProxy documentation.
1055  /// The user gives up ownership of the model histogram.
1056  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType>
1058  {
1059  std::shared_ptr<::TH2D> h(nullptr);
1060  {
1061  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1062  if (model.fBinXEdges.empty() && model.fBinYEdges.empty()) {
1063  h = std::make_shared<::TH2D>(model.fName, model.fTitle, model.fNbinsX, model.fXLow, model.fXUp,
1064  model.fNbinsY, model.fYLow, model.fYUp);
1065  } else if (!model.fBinXEdges.empty() && model.fBinYEdges.empty()) {
1066  h = std::make_shared<::TH2D>(model.fName, model.fTitle, model.fNbinsX, model.fBinXEdges.data(),
1067  model.fNbinsY, model.fYLow, model.fYUp);
1068  } else {
1069  h = std::make_shared<::TH2D>(model.fName, model.fTitle, model.fNbinsX, model.fBinXEdges.data(),
1070  model.fNbinsY, model.fBinYEdges.data());
1071  }
1072  }
1073  if (!TDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1074  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1075  }
1076  auto columnViews = {v1Name, v2Name};
1077  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1078  ? ColumnNames_t()
1079  : ColumnNames_t(columnViews.begin(), columnViews.end());
1080  return CreateAction<TDFInternal::ActionTypes::Histo2D, V1, V2>(userColumns, h);
1081  }
1082 
1083  ////////////////////////////////////////////////////////////////////////////
1084  /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*)
1085  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1086  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1087  /// \tparam W The type of the column used for the weights of the histogram.
1088  /// \param[in] model The returned histogram will be constructed using this as a model.
1089  /// \param[in] v1Name The name of the column that will fill the x axis.
1090  /// \param[in] v2Name The name of the column that will fill the y axis.
1091  /// \param[in] wName The name of the column that will provide the weights.
1092  ///
1093  /// This action is *lazy*: upon invocation of this method the calculation is
1094  /// booked but not executed. See TResultProxy documentation.
1095  /// The user gives up ownership of the model histogram.
1096  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
1097  typename W = TDFDetail::TInferType>
1100  {
1101  std::shared_ptr<::TH2D> h(nullptr);
1102  {
1103  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1104  if (model.fBinXEdges.empty() && model.fBinYEdges.empty()) {
1105  h = std::make_shared<::TH2D>(model.fName, model.fTitle, model.fNbinsX, model.fXLow, model.fXUp,
1106  model.fNbinsY, model.fYLow, model.fYUp);
1107  } else if (!model.fBinXEdges.empty() && model.fBinYEdges.empty()) {
1108  h = std::make_shared<::TH2D>(model.fName, model.fTitle, model.fNbinsX, model.fBinXEdges.data(),
1109  model.fNbinsY, model.fYLow, model.fYUp);
1110  } else {
1111  h = std::make_shared<::TH2D>(model.fName, model.fTitle, model.fNbinsX, model.fBinXEdges.data(),
1112  model.fNbinsY, model.fBinYEdges.data());
1113  }
1114  }
1115  if (!TDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1116  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1117  }
1118  auto columnViews = {v1Name, v2Name, wName};
1119  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1120  ? ColumnNames_t()
1121  : ColumnNames_t(columnViews.begin(), columnViews.end());
1122  return CreateAction<TDFInternal::ActionTypes::Histo2D, V1, V2, W>(userColumns, h);
1123  }
1124 
1125  template <typename V1, typename V2, typename W>
1127  {
1128  return Histo2D<V1, V2, W>(model, "", "", "");
1129  }
1130 
1131  ////////////////////////////////////////////////////////////////////////////
1132  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1133  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1134  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1135  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1136  /// \param[in] model The returned histogram will be constructed using this as a model.
1137  /// \param[in] v1Name The name of the column that will fill the x axis.
1138  /// \param[in] v2Name The name of the column that will fill the y axis.
1139  /// \param[in] v3Name The name of the column that will fill the z axis.
1140  ///
1141  /// This action is *lazy*: upon invocation of this method the calculation is
1142  /// booked but not executed. See TResultProxy documentation.
1143  /// The user gives up ownership of the model histogram.
1144  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
1145  typename V3 = TDFDetail::TInferType>
1147  std::string_view v3Name = "")
1148  {
1149  std::shared_ptr<::TH3D> h(nullptr);
1150  {
1151  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1152  if (model.fBinXEdges.empty() && model.fBinYEdges.empty() && model.fBinZEdges.empty()) {
1153  h =
1154  std::make_shared<::TH3D>(model.fName, model.fTitle, model.fNbinsX, model.fXLow, model.fXUp,
1155  model.fNbinsY, model.fYLow, model.fYUp, model.fNbinsZ, model.fZLow, model.fZUp);
1156  } else {
1157  h =
1158  std::make_shared<::TH3D>(model.fName, model.fTitle, model.fNbinsX, model.fBinXEdges.data(),
1159  model.fNbinsY, model.fBinYEdges.data(), model.fNbinsZ, model.fBinZEdges.data());
1160  }
1161  }
1162  if (!TDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1163  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1164  }
1165  auto columnViews = {v1Name, v2Name, v3Name};
1166  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1167  ? ColumnNames_t()
1168  : ColumnNames_t(columnViews.begin(), columnViews.end());
1169  return CreateAction<TDFInternal::ActionTypes::Histo3D, V1, V2, V3>(userColumns, h);
1170  }
1171 
1172  ////////////////////////////////////////////////////////////////////////////
1173  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1174  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1175  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1176  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1177  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1178  /// \param[in] model The returned histogram will be constructed using this as a model.
1179  /// \param[in] v1Name The name of the column that will fill the x axis.
1180  /// \param[in] v2Name The name of the column that will fill the y axis.
1181  /// \param[in] v3Name The name of the column that will fill the z axis.
1182  /// \param[in] wName The name of the column that will provide the weights.
1183  ///
1184  /// This action is *lazy*: upon invocation of this method the calculation is
1185  /// booked but not executed. See TResultProxy documentation.
1186  /// The user gives up ownership of the model histogram.
1187  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
1188  typename V3 = TDFDetail::TInferType, typename W = TDFDetail::TInferType>
1190  std::string_view v3Name, std::string_view wName)
1191  {
1192  std::shared_ptr<::TH3D> h(nullptr);
1193  {
1194  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1195  if (model.fBinXEdges.empty() && model.fBinYEdges.empty() && model.fBinZEdges.empty()) {
1196  h =
1197  std::make_shared<::TH3D>(model.fName, model.fTitle, model.fNbinsX, model.fXLow, model.fXUp,
1198  model.fNbinsY, model.fYLow, model.fYUp, model.fNbinsZ, model.fZLow, model.fZUp);
1199  } else {
1200  h =
1201  std::make_shared<::TH3D>(model.fName, model.fTitle, model.fNbinsX, model.fBinXEdges.data(),
1202  model.fNbinsY, model.fBinYEdges.data(), model.fNbinsZ, model.fBinZEdges.data());
1203  }
1204  }
1205  if (!TDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1206  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1207  }
1208  auto columnViews = {v1Name, v2Name, v3Name, wName};
1209  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1210  ? ColumnNames_t()
1211  : ColumnNames_t(columnViews.begin(), columnViews.end());
1212  return CreateAction<TDFInternal::ActionTypes::Histo3D, V1, V2, V3, W>(userColumns, h);
1213  }
1214 
1215  template <typename V1, typename V2, typename V3, typename W>
1217  {
1218  return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1219  }
1220 
1221  ////////////////////////////////////////////////////////////////////////////
1222  /// \brief Fill and return a one-dimensional profile (*lazy action*)
1223  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1224  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1225  /// \param[in] model The model to be considered to build the new return value.
1226  /// \param[in] v1Name The name of the column that will fill the x axis.
1227  /// \param[in] v2Name The name of the column that will fill the y axis.
1228  ///
1229  /// This action is *lazy*: upon invocation of this method the calculation is
1230  /// booked but not executed. See TResultProxy documentation.
1231  /// The user gives up ownership of the model profile object.
1232  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType>
1235  {
1236  std::shared_ptr<::TProfile> h(nullptr);
1237  {
1238  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1239  h = std::make_shared<::TProfile>(model.fName, model.fTitle, model.fNbinsX, model.fXLow, model.fXUp,
1240  model.fYLow, model.fYUp, model.fOption);
1241  }
1242 
1243  if (!TDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1244  throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1245  }
1246  auto columnViews = {v1Name, v2Name};
1247  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1248  ? ColumnNames_t()
1249  : ColumnNames_t(columnViews.begin(), columnViews.end());
1250  return CreateAction<TDFInternal::ActionTypes::Profile1D, V1, V2>(userColumns, h);
1251  }
1252 
1253  ////////////////////////////////////////////////////////////////////////////
1254  /// \brief Fill and return a one-dimensional profile (*lazy action*)
1255  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1256  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1257  /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1258  /// \param[in] model The model to be considered to build the new return value.
1259  /// \param[in] v1Name The name of the column that will fill the x axis.
1260  /// \param[in] v2Name The name of the column that will fill the y axis.
1261  /// \param[in] wName The name of the column that will provide the weights.
1262  ///
1263  /// This action is *lazy*: upon invocation of this method the calculation is
1264  /// booked but not executed. See TResultProxy documentation.
1265  /// The user gives up ownership of the model profile object.
1266  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
1267  typename W = TDFDetail::TInferType>
1270  {
1271  std::shared_ptr<::TProfile> h(nullptr);
1272  {
1273  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1274  h = std::make_shared<::TProfile>(model.fName, model.fTitle, model.fNbinsX, model.fXLow, model.fXUp,
1275  model.fYLow, model.fYUp, model.fOption);
1276  }
1277 
1278  if (!TDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1279  throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
1280  }
1281  auto columnViews = {v1Name, v2Name, wName};
1282  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1283  ? ColumnNames_t()
1284  : ColumnNames_t(columnViews.begin(), columnViews.end());
1285  return CreateAction<TDFInternal::ActionTypes::Profile1D, V1, V2, W>(userColumns, h);
1286  }
1287 
1288  template <typename V1, typename V2, typename W>
1290  {
1291  return Profile1D<V1, V2, W>(model, "", "", "");
1292  }
1293 
1294  ////////////////////////////////////////////////////////////////////////////
1295  /// \brief Fill and return a two-dimensional profile (*lazy action*)
1296  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1297  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1298  /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1299  /// \param[in] model The returned profile will be constructed using this as a model.
1300  /// \param[in] v1Name The name of the column that will fill the x axis.
1301  /// \param[in] v2Name The name of the column that will fill the y axis.
1302  /// \param[in] v3Name The name of the column that will fill the z axis.
1303  ///
1304  /// This action is *lazy*: upon invocation of this method the calculation is
1305  /// booked but not executed. See TResultProxy documentation.
1306  /// The user gives up ownership of the model profile.
1307  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
1308  typename V3 = TDFDetail::TInferType>
1310  std::string_view v2Name = "", std::string_view v3Name = "")
1311  {
1312  std::shared_ptr<::TProfile2D> h(nullptr);
1313  {
1314  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1315  h = std::make_shared<::TProfile2D>(model.fName, model.fTitle, model.fNbinsX, model.fXLow, model.fXUp,
1316  model.fNbinsY, model.fYLow, model.fYUp, model.fZLow, model.fZUp,
1317  model.fOption);
1318  }
1319 
1320  if (!TDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1321  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1322  }
1323  auto columnViews = {v1Name, v2Name, v3Name};
1324  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1325  ? ColumnNames_t()
1326  : ColumnNames_t(columnViews.begin(), columnViews.end());
1327  return CreateAction<TDFInternal::ActionTypes::Profile2D, V1, V2, V3>(userColumns, h);
1328  }
1329 
1330  ////////////////////////////////////////////////////////////////////////////
1331  /// \brief Fill and return a two-dimensional profile (*lazy action*)
1332  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1333  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1334  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1335  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1336  /// \param[in] model The returned histogram will be constructed using this as a model.
1337  /// \param[in] v1Name The name of the column that will fill the x axis.
1338  /// \param[in] v2Name The name of the column that will fill the y axis.
1339  /// \param[in] v3Name The name of the column that will fill the z axis.
1340  /// \param[in] wName The name of the column that will provide the weights.
1341  ///
1342  /// This action is *lazy*: upon invocation of this method the calculation is
1343  /// booked but not executed. See TResultProxy documentation.
1344  /// The user gives up ownership of the model profile.
1345  template <typename V1 = TDFDetail::TInferType, typename V2 = TDFDetail::TInferType,
1346  typename V3 = TDFDetail::TInferType, typename W = TDFDetail::TInferType>
1348  std::string_view v3Name, std::string_view wName)
1349  {
1350  std::shared_ptr<::TProfile2D> h(nullptr);
1351  {
1352  ROOT::Internal::TDF::TIgnoreErrorLevelRAII iel(kError);
1353  h = std::make_shared<::TProfile2D>(model.fName, model.fTitle, model.fNbinsX, model.fXLow, model.fXUp,
1354  model.fNbinsY, model.fYLow, model.fYUp, model.fZLow, model.fZUp,
1355  model.fOption);
1356  }
1357 
1358  if (!TDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1359  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1360  }
1361  auto columnViews = {v1Name, v2Name, v3Name, wName};
1362  const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews)
1363  ? ColumnNames_t()
1364  : ColumnNames_t(columnViews.begin(), columnViews.end());
1365  return CreateAction<TDFInternal::ActionTypes::Profile2D, V1, V2, V3, W>(userColumns, h);
1366  }
1367 
1368  template <typename V1, typename V2, typename V3, typename W>
1370  {
1371  return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
1372  }
1373 
1374  ////////////////////////////////////////////////////////////////////////////
1375  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1376  ///
1377  /// T must be a type that provides a copy- or move-constructor and a `T::Fill` method that takes as many arguments
1378  /// as the column names pass as columnList. The arguments of `T::Fill` must have type equal to the one of the
1379  /// specified columns (these types are passed as template parameters to this method).
1380  /// \tparam FirstColumn The first type of the column the values of which are used to fill the object.
1381  /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
1382  /// \tparam T The type of the object to fill. Automatically deduced.
1383  /// \param[in] model The model to be considered to build the new return value.
1384  /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1385  ///
1386  /// The user gives up ownership of the model object.
1387  /// The list of column names to be used for filling must always be specified.
1388  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
1389  /// See TResultProxy documentation.
1390  template <typename FirstColumn, typename... OtherColumns, typename T> // need FirstColumn to disambiguate overloads
1391  TResultProxy<T> Fill(T &&model, const ColumnNames_t &columnList)
1392  {
1393  auto h = std::make_shared<T>(std::move(model));
1394  if (!TDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1395  throw std::runtime_error("The absence of axes limits is not supported yet.");
1396  }
1397  return CreateAction<TDFInternal::ActionTypes::Fill, FirstColumn, OtherColumns...>(columnList, h);
1398  }
1399 
1400  ////////////////////////////////////////////////////////////////////////////
1401  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1402  ///
1403  /// This overload infers the types of the columns specified in columnList at runtime and just-in-time compiles the
1404  /// method with these types. See previous overload for more information.
1405  /// \tparam T The type of the object to fill. Automatically deduced.
1406  /// \param[in] model The model to be considered to build the new return value.
1407  /// \param[in] columnList The name of the columns read to fill the object.
1408  ///
1409  /// This overload of `Fill` infers the type of the specified columns at runtime and just-in-time compiles the
1410  /// previous overload. Check the previous overload for more details on `Fill`.
1411  template <typename T>
1413  {
1414  auto h = std::make_shared<T>(std::move(model));
1415  if (!TDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1416  throw std::runtime_error("The absence of axes limits is not supported yet.");
1417  }
1418  return CreateAction<TDFInternal::ActionTypes::Fill, TDFDetail::TInferType>(bl, h, bl.size());
1419  }
1420 
1421  ////////////////////////////////////////////////////////////////////////////
1422  /// \brief Return the minimum of processed column values (*lazy action*)
1423  /// \tparam T The type of the branch/column.
1424  /// \param[in] columnName The name of the branch/column to be treated.
1425  ///
1426  /// If T is not specified, TDataFrame will infer it from the data and just-in-time compile the correct
1427  /// template specialization of this method.
1428  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1429  ///
1430  /// This action is *lazy*: upon invocation of this method the calculation is
1431  /// booked but not executed. See TResultProxy documentation.
1432  template <typename T = TDFDetail::TInferType>
1434  {
1435  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1436  using RetType_t = TDFDetail::MinReturnType_t<T>;
1437  auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
1438  return CreateAction<TDFInternal::ActionTypes::Min, T>(userColumns, minV);
1439  }
1440 
1441  ////////////////////////////////////////////////////////////////////////////
1442  /// \brief Return the maximum of processed column values (*lazy action*)
1443  /// \tparam T The type of the branch/column.
1444  /// \param[in] columnName The name of the branch/column to be treated.
1445  ///
1446  /// If T is not specified, TDataFrame will infer it from the data and just-in-time compile the correct
1447  /// template specialization of this method.
1448  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1449  ///
1450  /// This action is *lazy*: upon invocation of this method the calculation is
1451  /// booked but not executed. See TResultProxy documentation.
1452  template <typename T = TDFDetail::TInferType>
1454  {
1455  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1456  using RetType_t = TDFDetail::MaxReturnType_t<T>;
1457  auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
1458  return CreateAction<TDFInternal::ActionTypes::Max, T>(userColumns, maxV);
1459  }
1460 
1461  ////////////////////////////////////////////////////////////////////////////
1462  /// \brief Return the mean of processed column values (*lazy action*)
1463  /// \tparam T The type of the branch/column.
1464  /// \param[in] columnName The name of the branch/column to be treated.
1465  ///
1466  /// If T is not specified, TDataFrame will infer it from the data and just-in-time compile the correct
1467  /// template specialization of this method.
1468  ///
1469  /// This action is *lazy*: upon invocation of this method the calculation is
1470  /// booked but not executed. See TResultProxy documentation.
1471  template <typename T = TDFDetail::TInferType>
1473  {
1474  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1475  auto meanV = std::make_shared<double>(0);
1476  return CreateAction<TDFInternal::ActionTypes::Mean, T>(userColumns, meanV);
1477  }
1478 
1479  // clang-format off
1480  ////////////////////////////////////////////////////////////////////////////
1481  /// \brief Return the sum of processed column values (*lazy action*)
1482  /// \tparam T The type of the branch/column.
1483  /// \param[in] columnName The name of the branch/column.
1484  /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
1485  ///
1486  /// If T is not specified, TDataFrame will infer it from the data and just-in-time compile the correct
1487  /// template specialization of this method.
1488  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1489  ///
1490  /// This action is *lazy*: upon invocation of this method the calculation is
1491  /// booked but not executed. See TResultProxy documentation.
1492  template <typename T = TDFDetail::TInferType>
1494  Sum(std::string_view columnName = "",
1496  {
1497  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1498  auto sumV = std::make_shared<TDFDetail::SumReturnType_t<T>>(initValue);
1499  return CreateAction<TDFInternal::ActionTypes::Sum, T>(userColumns, sumV);
1500  }
1501  // clang-format on
1502 
1503  ////////////////////////////////////////////////////////////////////////////
1504  /// \brief Print filtering statistics on screen
1505  ///
1506  /// Calling `Report` on the main `TDataFrame` object prints stats for
1507  /// all named filters in the call graph. Calling this method on a
1508  /// stored chain state (i.e. a graph node different from the first) prints
1509  /// the stats for all named filters in the chain section between the original
1510  /// `TDataFrame` and that node (included). Stats are printed in the same
1511  /// order as the named filters have been added to the graph.
1512  void Report()
1513  {
1514  // if this is a TInterface<TLoopManager> on which `Define` has been called, users
1515  // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
1516  // certainly does not contain named filters.
1517  // The number 2 takes into account the implicit columns for entry and slot number
1518  if (std::is_same<Proxied, TLoopManager>::value && fValidCustomColumns.size() > 2)
1519  return;
1520 
1521  auto df = GetDataFrameChecked();
1522  // TODO: we could do better and check if the Report was asked for Filters that have already run
1523  // and in that case skip the event-loop
1524  if (df->MustRunNamedFilters())
1525  df->Run();
1526  fProxiedPtr->Report();
1527  }
1528 
1529  /////////////////////////////////////////////////////////////////////////////
1530  /// \brief Returns the names of the available columns
1531  ///
1532  /// This is not an action nor a transformation, just a simple utility to
1533  /// get columns names out of the TDataFrame nodes.
1535  {
1536  ColumnNames_t allColumns;
1537 
1538  auto addIfNotInternal = [&allColumns](std::string_view colName) {
1539  if (!TDFInternal::IsInternalColumn(colName))
1540  allColumns.emplace_back(colName);
1541  };
1542 
1543  std::for_each(fValidCustomColumns.begin(), fValidCustomColumns.end(), addIfNotInternal);
1544 
1545  auto df = GetDataFrameChecked();
1546  auto tree = df->GetTree();
1547  if (tree) {
1548  auto branchNames = TDFInternal::GetBranchNames(*tree);
1549  allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end());
1550  }
1551 
1552  if (fDataSource) {
1553  auto &dsColNames = fDataSource->GetColumnNames();
1554  allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end());
1555  }
1556 
1557  return allColumns;
1558  }
1559 
1560 private:
1562  {
1563  auto lm = GetDataFrameChecked();
1564  ColumnNames_t validColNames = {};
1565 
1566  // Entry number column
1567  auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
1568  const auto entryColName = "tdfentry_";
1570  lm->Book(std::make_shared<EntryCol_t>(entryColName, std::move(entryColGen), validColNames, lm.get()));
1571  fValidCustomColumns.emplace_back(entryColName);
1572 
1573  // Slot number column
1574  auto slotColGen = [](unsigned int slot) { return slot; };
1575  const auto slotColName = "tdfslot_";
1577  lm->Book(std::make_shared<SlotCol_t>(slotColName, std::move(slotColGen), validColNames, lm.get()));
1578  fValidCustomColumns.emplace_back(slotColName);
1579  }
1580 
1582  {
1583  const auto theRegexSize = columnNameRegexp.size();
1584  std::string theRegex(columnNameRegexp);
1585 
1586  const auto isEmptyRegex = 0 == theRegexSize;
1587  // This is to avoid cases where branches called b1, b2, b3 are all matched by expression "b"
1588  if (theRegexSize > 0 && theRegex[0] != '^')
1589  theRegex = "^" + theRegex;
1590  if (theRegexSize > 0 && theRegex[theRegexSize - 1] != '$')
1591  theRegex = theRegex + "$";
1592 
1593  ColumnNames_t selectedColumns;
1594  selectedColumns.reserve(32);
1595 
1596  // Since we support gcc48 and it does not provide in its stl std::regex,
1597  // we need to use TRegexp
1598  TRegexp regexp(theRegex);
1599  int dummy;
1600  for (auto &&branchName : fValidCustomColumns) {
1601  if ((isEmptyRegex || -1 != regexp.Index(branchName.c_str(), &dummy)) &&
1603  selectedColumns.emplace_back(branchName);
1604  }
1605  }
1606 
1607  auto df = GetDataFrameChecked();
1608  auto tree = df->GetTree();
1609  if (tree) {
1610  auto branchNames = TDFInternal::GetBranchNames(*tree);
1611  for (auto &branchName : branchNames) {
1612  if (isEmptyRegex || -1 != regexp.Index(branchName, &dummy)) {
1613  selectedColumns.emplace_back(branchName);
1614  }
1615  }
1616  }
1617 
1618  if (fDataSource) {
1619  auto &dsColNames = fDataSource->GetColumnNames();
1620  for (auto &dsColName : dsColNames) {
1621  if (isEmptyRegex || -1 != regexp.Index(dsColName, &dummy)) {
1622  selectedColumns.emplace_back(dsColName);
1623  }
1624  }
1625  }
1626 
1627  if (selectedColumns.empty()) {
1628  std::string text(callerName);
1629  if (columnNameRegexp.empty()) {
1630  text = ": there is no column available to match.";
1631  } else {
1632  text = ": regex \"" + columnNameRegexp + "\" did not match any column.";
1633  }
1634  throw std::runtime_error(text);
1635  }
1636  return selectedColumns;
1637  }
1638 
1640  std::string_view returnTypeName)
1641  {
1642  auto df = GetDataFrameChecked();
1643  auto &aliasMap = df->GetAliasMap();
1644  auto tree = df->GetTree();
1646  const auto &customColumns = df->GetCustomColumnNames();
1647  auto tmpBookedBranches = df->GetBookedColumns();
1648  auto upcastNode = TDFInternal::UpcastNode(fProxiedPtr);
1650  upcastNode, fImplWeakPtr, fValidCustomColumns, fDataSource);
1651  const auto thisTypeName = "ROOT::Experimental::TDF::TInterface<" + upcastInterface.GetNodeTypeName() + ">";
1652  return TDFInternal::JitTransformation(&upcastInterface, transformation, thisTypeName, nodeName, expression,
1653  aliasMap, branches, customColumns, tmpBookedBranches, tree, returnTypeName,
1654  fDataSource);
1655  }
1656 
1657  /// Return string containing fully qualified type name of the node pointed by fProxied.
1658  /// The method is only defined for TInterface<{TFilterBase,TCustomColumnBase,TRangeBase,TLoopManager}> as it should
1659  /// only be called on "upcast" TInterfaces.
1660  inline static std::string GetNodeTypeName();
1661 
1662  // Type was specified by the user, no need to infer it
1663  template <typename ActionType, typename... BranchTypes, typename ActionResultType,
1664  typename std::enable_if<!TDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
1665  TResultProxy<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r)
1666  {
1667  auto lm = GetDataFrameChecked();
1668  constexpr auto nColumns = sizeof...(BranchTypes);
1669  const auto selectedCols =
1670  TDFInternal::GetValidatedColumnNames(*lm, nColumns, columns, fValidCustomColumns, fDataSource);
1671  if (fDataSource)
1672  TDFInternal::DefineDataSourceColumns(selectedCols, *lm, TDFInternal::GenStaticSeq_t<nColumns>(),
1673  TDFInternal::TypeList<BranchTypes...>(), *fDataSource);
1674  const auto nSlots = fProxiedPtr->GetNSlots();
1675  auto actionPtr =
1676  TDFInternal::BuildAndBook<BranchTypes...>(selectedCols, r, nSlots, *lm, *fProxiedPtr, (ActionType *)nullptr);
1677  return MakeResultProxy(r, lm, actionPtr);
1678  }
1679 
1680  // User did not specify type, do type inference
1681  // This version of CreateAction has a `nColumns` optional argument. If present, the number of required columns for
1682  // this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(BranchTypes)
1683  template <typename ActionType, typename... BranchTypes, typename ActionResultType,
1684  typename std::enable_if<TDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
1686  CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r, const int nColumns = -1)
1687  {
1688  auto lm = GetDataFrameChecked();
1689  auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(BranchTypes));
1690  const auto validColumnNames =
1691  TDFInternal::GetValidatedColumnNames(*lm, realNColumns, columns, fValidCustomColumns, fDataSource);
1692  const unsigned int nSlots = fProxiedPtr->GetNSlots();
1693  const auto &customColumns = lm->GetBookedColumns();
1694  auto tree = lm->GetTree();
1695  auto rOnHeap = TDFInternal::MakeSharedOnHeap(r);
1696  auto upcastNode = TDFInternal::UpcastNode(fProxiedPtr);
1698  upcastNode, fImplWeakPtr, fValidCustomColumns, fDataSource);
1699  auto resultProxyAndActionPtrPtr = MakeResultProxy(r, lm);
1700  auto &resultProxy = resultProxyAndActionPtrPtr.first;
1701  auto actionPtrPtrOnHeap = TDFInternal::MakeSharedOnHeap(resultProxyAndActionPtrPtr.second);
1702  auto toJit = TDFInternal::JitBuildAndBook(validColumnNames, upcastInterface.GetNodeTypeName(), upcastNode.get(),
1703  typeid(std::shared_ptr<ActionResultType>), typeid(ActionType), rOnHeap,
1704  tree, nSlots, customColumns, fDataSource, actionPtrPtrOnHeap);
1705  lm->Jit(toJit);
1706  return resultProxy;
1707  }
1708 
1709  template <typename F, typename ExtraArgs>
1710  typename std::enable_if<std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
1711  TInterface<Proxied>>::type
1712  DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
1713  {
1714  auto loopManager = GetDataFrameChecked();
1715  TDFInternal::CheckCustomColumn(name, loopManager->GetTree(), loopManager->GetCustomColumnNames(),
1716  fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
1717 
1718  using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
1719  using ColTypesTmp_t =
1720  typename TDFInternal::RemoveFirstParameterIf<std::is_same<ExtraArgs, TDFDetail::TCCHelperTypes::TSlot>::value,
1721  ArgTypes_t>::type;
1722  using ColTypes_t = typename TDFInternal::RemoveFirstTwoParametersIf<
1723  std::is_same<ExtraArgs, TDFDetail::TCCHelperTypes::TSlotAndEntry>::value, ColTypesTmp_t>::type;
1724 
1725  constexpr auto nColumns = ColTypes_t::list_size;
1726  const auto validColumnNames =
1727  TDFInternal::GetValidatedColumnNames(*loopManager, nColumns, columns, fValidCustomColumns, fDataSource);
1728  if (fDataSource)
1729  TDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, TDFInternal::GenStaticSeq_t<nColumns>(),
1730  ColTypes_t(), *fDataSource);
1731  using NewCol_t = TDFDetail::TCustomColumn<F, ExtraArgs>;
1732 
1733  // Here we check if the return type is a pointer. In this case, we assume it points to valid memory
1734  // and we treat the column as if it came from a data source, i.e. it points to valid memory.
1735  loopManager->Book(std::make_shared<NewCol_t>(name, std::move(expression), validColumnNames, loopManager.get()));
1736  TInterface<Proxied> newInterface(fProxiedPtr, fImplWeakPtr, fValidCustomColumns, fDataSource);
1737  newInterface.fValidCustomColumns.emplace_back(name);
1738  return newInterface;
1739  }
1740 
1741  // This overload is chosen when the callable passed to Define or DefineSlot returns void.
1742  // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
1743  // this way compilation of `Define` has no way to continue after throwing the error.
1744  template <
1745  typename F, typename ExtraArgs,
1746  typename std::enable_if<!std::is_convertible<F, std::string>::value &&
1747  !std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
1748  int>::type = 0>
1750  {
1751  static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
1752  "Error in `Define`: type returned by expression is not default-constructible");
1753  return *this; // never reached
1754  }
1755 
1756  ////////////////////////////////////////////////////////////////////////////
1757  /// \brief Implementation of snapshot
1758  /// \param[in] treename The name of the TTree
1759  /// \param[in] filename The name of the TFile
1760  /// \param[in] columnList The list of names of the branches to be written
1761  /// The implementation exploits Foreach. The association of the addresses to
1762  /// the branches takes place at the first event. This is possible because
1763  /// since there are no copies, the address of the value passed by reference
1764  /// is the address pointing to the storage of the read/created object in/by
1765  /// the TTreeReaderValue/TemporaryBranch
1766  template <typename... BranchTypes>
1768  const ColumnNames_t &columnList, const TSnapshotOptions &options)
1769  {
1770  TDFInternal::CheckSnapshot(sizeof...(BranchTypes), columnList.size());
1771 
1772  auto df = GetDataFrameChecked();
1773  auto validCols =
1774  TDFInternal::GetValidatedColumnNames(*df, columnList.size(), columnList, fValidCustomColumns, fDataSource);
1775 
1776  if (fDataSource)
1777  TDFInternal::DefineDataSourceColumns(validCols, *df, TDFInternal::GenStaticSeq_t<sizeof...(BranchTypes)>(),
1778  TTraits::TypeList<BranchTypes...>(), *fDataSource);
1779 
1780  const std::string fullTreename(treename);
1781  // split name into directory and treename if needed
1782  const auto lastSlash = treename.rfind('/');
1783  std::string_view dirname = "";
1784  if (std::string_view::npos != lastSlash) {
1785  dirname = treename.substr(0, lastSlash);
1786  treename = treename.substr(lastSlash + 1, treename.size());
1787  }
1788 
1789  // add action node to functional graph and run event loop
1790  std::shared_ptr<TDFInternal::TActionBase> actionPtr;
1791  if (!ROOT::IsImplicitMTEnabled()) {
1792  // single-thread snapshot
1793  using Helper_t = TDFInternal::SnapshotHelper<BranchTypes...>;
1794  using Action_t = TDFInternal::TAction<Helper_t, Proxied, TTraits::TypeList<BranchTypes...>>;
1795  actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, validCols, columnList, options), validCols,
1796  *fProxiedPtr));
1797  } else {
1798  // multi-thread snapshot
1799  using Helper_t = TDFInternal::SnapshotHelperMT<BranchTypes...>;
1800  using Action_t = TDFInternal::TAction<Helper_t, Proxied>;
1801  actionPtr.reset(new Action_t(
1802  Helper_t(fProxiedPtr->GetNSlots(), filename, dirname, treename, validCols, columnList, options), validCols,
1803  *fProxiedPtr));
1804  }
1805  df->Book(std::move(actionPtr));
1806  df->Run();
1807 
1808  // create new TDF
1810  // Now we mimic a constructor for the TDataFrame. We cannot invoke it here
1811  // since this would introduce a cyclic headers dependency.
1812  TInterface<TLoopManager> snapshotTDF(std::make_shared<TLoopManager>(nullptr, validCols));
1813  auto chain = std::make_shared<TChain>(fullTreename.c_str());
1814  chain->Add(std::string(filename).c_str());
1815  snapshotTDF.fProxiedPtr->SetTree(chain);
1816 
1817  return snapshotTDF;
1818  }
1819 
1820  ////////////////////////////////////////////////////////////////////////////
1821  /// \brief Implementation of cache
1822  template <typename... BranchTypes, int... S>
1823  TInterface<TLoopManager> CacheImpl(const ColumnNames_t &columnList, TDFInternal::StaticSeq<S...> s)
1824  {
1825 
1826  // Check at compile time that the columns types are copy constructible
1827  constexpr bool areCopyConstructible =
1828  TDFInternal::TEvalAnd<std::is_copy_constructible<BranchTypes>::value...>::value;
1829  static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
1830 
1831  // We share bits and pieces with snapshot. De facto this is a snapshot
1832  // in memory!
1833  TDFInternal::CheckSnapshot(sizeof...(BranchTypes), columnList.size());
1834  if (fDataSource) {
1835  auto lm = GetDataFrameChecked();
1836  TDFInternal::DefineDataSourceColumns(columnList, *lm, s, TTraits::TypeList<BranchTypes...>(), *fDataSource);
1837  }
1838  std::tuple<
1840  colHolders;
1841 
1842  // TODO: really fix the type of the Take....
1843  std::initializer_list<int> expander0{(
1844  // This gets expanded
1845  std::get<S>(colHolders).fContent = std::move(
1846  Take<typename std::decay<decltype(std::get<S>(colHolders))>::type::value_type>(columnList[S]).GetValue()),
1847  0)...};
1848  (void)expander0;
1849 
1850  auto nEntries = std::get<0>(colHolders).fContent.size();
1851 
1852  TInterface<TLoopManager> cachedTDF(std::make_shared<TLoopManager>(nEntries));
1853  const ColumnNames_t noCols = {};
1854 
1855  // Now we define the data columns. We add the name of the valid custom columns by hand later.
1856  auto lm = cachedTDF.GetDataFrameChecked();
1857  std::initializer_list<int> expander1{(
1858  // This gets expanded
1859  lm->Book(
1860  std::make_shared<TDFDetail::TCustomColumn<typename std::decay<decltype(std::get<S>(colHolders))>::type,
1861  TDFDetail::TCCHelperTypes::TSlotAndEntry>>(
1862  columnList[S], std::move(std::get<S>(colHolders)), noCols, lm.get(), true)),
1863  0)...};
1864  (void)expander1;
1865 
1866  // Add the defined columns
1867  auto &vc = cachedTDF.fValidCustomColumns;
1868  vc.insert(vc.end(), columnList.begin(), columnList.end());
1869  return cachedTDF;
1870  }
1871 
1872 protected:
1873  /// Get the TLoopManager if reachable. If not, throw.
1874  std::shared_ptr<TLoopManager> GetDataFrameChecked()
1875  {
1876  auto df = fImplWeakPtr.lock();
1877  if (!df) {
1878  throw std::runtime_error("The main TDataFrame is not reachable: did it go out of scope?");
1879  }
1880  return df;
1881  }
1882 
1883  TInterface(const std::shared_ptr<Proxied> &proxied, const std::weak_ptr<TLoopManager> &impl,
1884  const ColumnNames_t &validColumns, TDataSource *ds)
1885  : fProxiedPtr(proxied), fImplWeakPtr(impl), fValidCustomColumns(validColumns), fDataSource(ds)
1886  {
1887  }
1888 
1889  /// Only enabled when building a TInterface<TLoopManager>
1890  template <typename T = Proxied, typename std::enable_if<std::is_same<T, TLoopManager>::value, int>::type = 0>
1891  TInterface(const std::shared_ptr<Proxied> &proxied)
1892  : fProxiedPtr(proxied), fImplWeakPtr(proxied->GetSharedPtr()), fValidCustomColumns(),
1893  fDataSource(proxied->GetDataSource())
1894  {
1895  AddDefaultColumns();
1896  }
1897 
1898  const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
1899 };
1900 
1901 template <>
1903 {
1904  return "ROOT::Detail::TDF::TFilterBase";
1905 }
1906 
1907 template <>
1909 {
1910  return "ROOT::Detail::TDF::TLoopManager";
1911 }
1912 
1913 template <>
1915 {
1916  return "ROOT::Detail::TDF::TRangeBase";
1917 }
1918 
1919 } // end NS TDF
1920 } // end NS Experimental
1921 } // end NS ROOT
1922 
1923 #endif // ROOT_TDF_INTERFACE
ColumnNames_t ConvertRegexToColumns(std::string_view columnNameRegexp, std::string_view callerName)
TResultProxy< TDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const TDFDetail::SumReturnType_t< T > &initValue=TDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action)
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
Definition: TypeTraits.hxx:185
TInterface(const std::shared_ptr< Proxied > &proxied, const std::weak_ptr< TLoopManager > &impl, const ColumnNames_t &validColumns, TDataSource *ds)
TTraits::TakeFirstParameter_t< T > type
std::vector< bool > FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedDSCols)
Return a bitset each element of which indicates whether the corresponding element in selectedColumns ...
std::shared_ptr< TFilterBase > UpcastNode(const std::shared_ptr< TFilterBase > ptr)
The contained type alias is double if T == TInferType, U if T == std::container<U>, T otherwise.
TResultProxy< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
A collection of options to steer the creation of the dataset on file.
Definition: TDFUtils.hxx:38
An array of TObjects.
Definition: TObjArray.h:37
A struct which stores the parameters of a TH2D.
std::enable_if< std::is_default_constructible< typename TTraits::CallableTraits< F >::ret_type >::value, TInterface< Proxied > >::type DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
std::vector< value_type > fContent
TInterface< TLoopManager > CacheImpl(const ColumnNames_t &columnList, TDFInternal::StaticSeq< S... > s)
Implementation of cache.
basic_string_view< char > string_view
Definition: RStringView.h:35
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
TInterface< TLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
typename TDFInternal::ValueType< T >::value_type RealT_t
const std::shared_ptr< Proxied > & GetProxiedPtr() const
TResultProxy< ULong64_t > Count()
Return the number of entries processed (lazy action)
typename TakeFirstParameter< T >::type TakeFirstParameter_t
Definition: TypeTraits.hxx:171
TResultProxy< T > MakeResultProxy(const std::shared_ptr< T > &r, const std::shared_ptr< TLoopManager > &df, TDFInternal::TActionBase *actionPtr)
Create a TResultProxy and set its pointer to the corresponding TAction This overload is invoked by no...
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1193
double T(double x)
Definition: ChebyshevPol.h:34
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
TInterface< TLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
TH1 * h
Definition: legend2.C:5
TResultProxy< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
void CheckCustomColumn(std::string_view definedCol, TTree *treePtr, const ColumnNames_t &customCols, const ColumnNames_t &dataSourceColumns)
Definition: TDFUtils.cxx:274
TResultProxy< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r)
TInterface< TLoopManager > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const TSnapshotOptions &options=TSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Regular expression class.
Definition: TRegexp.h:31
std::shared_ptr< TLoopManager > UpcastNode(const std::shared_ptr< TLoopManager > ptr)
Long_t CallJitTransformation(std::string_view transformation, std::string_view nodeName, std::string_view expression, std::string_view returnTypeName)
TActionBase * BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr< double > &meanV, const unsigned int nSlots, TLoopManager &loopManager, PrevNodeType &prevNode, ActionTypes::Mean *)
TResultProxy< T > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action) ...
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
#define gInterpreter
Definition: TInterpreter.h:526
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action) ...
TInterface< Proxied > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot and the current entry...
A struct which stores the parameters of a TProfile2D.
void DefineDSColumnHelper(std::string_view name, TLoopManager &lm, TDataSource &ds)
Helper function to be used by DefineDataSourceColumns
TResultProxy<::TProfile > Profile1D(const TProfile1DModel &model)
A struct which stores the parameters of a TH3D.
void Book(const ActionBasePtr_t &actionPtr)
Definition: TDFNodes.cxx:426
TInterface< Proxied > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
std::vector< std::string > FindUsedColumnNames(std::string_view, TObjArray *, const std::vector< std::string > &)
void AddDataSourceColumn(std::string_view name)
Definition: TDFNodes.hxx:198
MinReturnType_t< T > MaxReturnType_t
TInterface< TLoopManager > SnapshotImpl(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const TSnapshotOptions &options)
Implementation of snapshot.
Double_t GetXmin() const
Definition: TAxis.h:133
typename TDFInternal::TMinReturnType< T >::type MinReturnType_t
The aliased type is double if T == TInferType, U if T == container<U>, T otherwise.
TInterface< TLoopManager > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const TSnapshotOptions &options=TSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
void DefineDataSourceColumns(const std::vector< std::string > &columns, TLoopManager &lm, StaticSeq< S... >, TTraits::TypeList< ColumnTypes... >, TDataSource &ds)
Take a list of data-source column names and define the ones that haven&#39;t been defined yet...
static std::string GetNodeTypeName()
Return string containing fully qualified type name of the node pointed by fProxied.
unsigned int GetNSlots() const
Definition: TDFNodes.hxx:368
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
std::string JitBuildAndBook(const ColumnNames_t &bl, const std::string &prevNodeTypename, void *prevNode, const std::type_info &art, const std::type_info &at, const void *r, TTree *tree, const unsigned int nSlots, const std::map< std::string, TmpBranchBasePtr_t > &customColumns, TDataSource *ds, const std::shared_ptr< TActionBase *> *const actionPtrPtr)
TResultProxy<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action)
TResultProxy<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action) ...
const ColumnNames_t & GetCustomColumnNames() const
Definition: TDFNodes.hxx:175
std::vector< RealT_t > VTColl_t
typename std::conditional< isAB &&TDFInternal::IsVector_t< COLL >::value, std::vector< VTColl_t >, COLL >::type NewC0_t
bool IsInternalColumn(std::string_view colName)
Definition: TDFUtils.cxx:359
ColumnNames_t GetValidatedColumnNames(TLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const ColumnNames_t &validCustomColumns, TDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
TInterface< TDFDetail::TFilter< F, Proxied > > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
TResultProxy< TDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action)
typename std::conditional< isAB &&TDFInternal::IsList_t< NewC0_t >::value, std::list< VTColl_t >, NewC0_t >::type NewC1_t
TResultProxy< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action)
RooArgSet S(const RooAbsArg &v1)
Smart pointer for the return type of actions.
#define F(x, y, z)
std::shared_ptr< T > * MakeSharedOnHeap(const std::shared_ptr< T > &shPtr)
std::string printValue(const TDatime *val)
Print a TDatime at the prompt.
Definition: TDatime.cxx:514
TResultProxy< TDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action)
TResultProxy<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action) ...
TInterface< TLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
ROOT::R::TRInterface & r
Definition: Object.C:4
TDataSource defines an API that TDataFrame can use to read arbitrary data formats.
Definition: TDataSource.hxx:51
TInterface< TDFDetail::TRange< Proxied > > Range(unsigned int start, unsigned int stop, unsigned int stride=1)
Creates a node that filters entries based on range.
TResultProxy<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action) ...
TResultProxy<::TH3D > Histo3D(const TH3DModel &model)
TInterface< TDFDetail::TFilter< F, Proxied > > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
TResultProxy<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action) ...
Double_t Mean(Long64_t n, const T *a, const Double_t *w=0)
Definition: TMath.h:972
TResultProxy<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action)
TInterface< Proxied > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column.
A struct which stores the parameters of a TProfile.
std::shared_ptr< TLoopManager > GetDataFrameChecked()
Get the TLoopManager if reachable. If not, throw.
TInterface< TDFDetail::TFilter< F, Proxied > > Filter(F f, std::string_view name)
Append a filter to the call graph.
TActionBase * BuildAndBook(const ColumnNames_t &bl, const std::shared_ptr< ActionResultType > &h, const unsigned int nSlots, TLoopManager &loopManager, PrevNodeType &prevNode, ActionType *)
TResultProxy<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action)
TResultProxy<::TProfile2D > Profile2D(const TProfile2DModel &model)
const std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this TInterface.
TResultProxy<::TH1D > Histo1D(std::string_view vName)
TResultProxy<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action)
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:27
TResultProxy< T > Fill(T &&model, const ColumnNames_t &bl)
Return an object of type T on which T::Fill will be called once per event (lazy action) ...
long Long_t
Definition: RtypesCore.h:50
TResultProxy<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action)
TInterface< TFilterBase > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Ssiz_t Index(const TString &str, Ssiz_t *len, Ssiz_t start=0) const
Find the first occurrence of the regexp in string and return the position, or -1 if there is no match...
Definition: TRegexp.cxx:209
TText * text
unsigned long long ULong64_t
Definition: RtypesCore.h:70
Print a TSeq at the prompt:
Definition: TDatime.h:115
void Run(unsigned int slot, Long64_t entry) final
Definition: TDFNodes.hxx:401
typename TTakeRealTypes< T, C >::RealColl_t ColType_t
void Report()
Print filtering statistics on screen.
const std::weak_ptr< TLoopManager > fImplWeakPtr
Weak pointer to the TLoopManager at the root of the graph.
static RooMathCoreReg dummy
ROOT type_traits extensions.
Definition: TypeTraits.hxx:23
static constexpr double s
Extract types from the signature of a callable object. See CallableTraits.
Definition: TypeTraits.hxx:38
TResultProxy<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action)
Binding & operator=(OUT(*fun)(void))
TResultProxy< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const int nColumns=-1)
TResultProxy<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action)
typedef void((*Func_t)())
void CheckSnapshot(unsigned int nTemplateParams, unsigned int nColumnNames)
Definition: TDFUtils.cxx:300
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, TCustomColumnBase *tmpBranch, TDataSource *ds)
Return a string containing the type of the given branch.
Definition: TDFUtils.cxx:123
std::vector< T ** > GetColumnReaders(std::string_view columnName)
Called at most once per column by TDF.
Definition: TDataSource.hxx:78
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:584
ROOT&#39;s TDataFrame offers a high level interface for analyses of data stored in TTrees.
Definition: TDataFrame.hxx:39
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
A struct which stores the parameters of a TH1D.
const Int_t kError
Definition: TError.h:39
ColumnNames_t fValidCustomColumns
Names of columns Defined for this branch of the functional graph.
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
TInterfaceJittedDefine Define(std::string_view name, std::string_view expression)
Creates a custom column.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action)
Definition: tree.py:1
typename std::conditional< isAB &&TDFInternal::IsDeque_t< NewC1_t >::value, std::deque< VTColl_t >, NewC1_t >::type NewC2_t
std::shared_ptr< TCustomColumnBase > TmpBranchBasePtr_t
Definition: first.py:1
TInterface< Proxied > DefineImpl(std::string_view, F, const ColumnNames_t &={})
void CallBuildAndBook(PrevNodeType &prevNode, const ColumnNames_t &bl, const unsigned int nSlots, const std::shared_ptr< ActionResultType > *rOnHeap, const std::shared_ptr< TActionBase *> *actionPtrPtrOnHeap)
Convenience function invoked by jitted code to build action nodes at runtime.
Long_t JitTransformation(void *thisPtr, std::string_view methodName, std::string_view interfaceTypeName, std::string_view name, std::string_view expression, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &branches, const std::vector< std::string > &customColumns, const std::map< std::string, TmpBranchBasePtr_t > &tmpBookedBranches, TTree *tree, std::string_view returnTypeName, TDataSource *ds)
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset&#39;s column names.
value_type * operator()(unsigned int, ULong64_t iEvent)
TInterface< TDFDetail::TRange< Proxied > > Range(unsigned int stop)
Creates a node that filters entries based on range.
Double_t GetXmax() const
Definition: TAxis.h:134
MinReturnType_t< T > SumReturnType_t
ColumnNames_t GetBranchNames(TTree &t)
Get all the branches names, including the ones of the friend trees.
Definition: TDFUtils.cxx:265
char name[80]
Definition: TGX11.cxx:109
The public interface to the TDataFrame federation of classes.
TInterface< TLoopManager > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const TSnapshotOptions &options=TSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
TInterface< Proxied > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot.
TAxis * GetXaxis()
Definition: TH1.h:305
TResultProxy<::TH2D > Histo2D(const TH2DModel &model)
TResultProxy< typename TDFDetail::ColType_t< T, COLL > > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default) ...
TResultProxy<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action)
TInterface(const std::shared_ptr< Proxied > &proxied)
Only enabled when building a TInterface<TLoopManager>