Logo ROOT   6.12/06
Reference Guide
TTreeProcessorMT.hxx
Go to the documentation of this file.
1 // @(#)root/thread:$Id$
2 // Author: Enric Tejedor, CERN 12/09/2016
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef ROOT_TTreeProcessorMT
13 #define ROOT_TTreeProcessorMT
14 
15 #include "TKey.h"
16 #include "TTree.h"
17 #include "TFile.h"
18 #include "TChain.h"
19 #include "TTreeReader.h"
20 #include "TError.h"
21 #include "TEntryList.h"
22 #include "TFriendElement.h"
23 #include "ROOT/TThreadedObject.hxx"
24 
25 #include <string.h>
26 #include <functional>
27 #include <vector>
28 
29 
30 /** \class TTreeView
31  \brief A helper class that encapsulates a file and a tree.
32 
33 A helper class that encapsulates a TFile and a TTree, along with their names.
34 It is used together with TTProcessor and ROOT::TThreadedObject, so that
35 in the TTProcessor::Process method each thread can work on its own
36 <TFile,TTree> pair.
37 
38 This class can also be used with a collection of file names or a TChain, in case
39 the tree is stored in more than one file. A view will always contain only the
40 current (active) tree and file objects.
41 
42 A copy constructor is defined for TTreeView to work with ROOT::TThreadedObject.
43 The latter makes a copy of a model object every time a new thread accesses
44 the threaded object.
45 */
46 
47 namespace ROOT {
48  namespace Internal {
49 
50  /// A cluster of entries as seen by TTreeView
51  struct TreeViewCluster {
54  };
55 
56  class TTreeView {
57  private:
58  typedef std::pair<std::string, std::string> NameAlias;
59 
60  std::unique_ptr<TChain> fChain; ///< Chain on which to operate
61  std::vector<std::string> fFileNames; ///< Names of the files
62  std::string fTreeName; ///< Name of the tree
63  TEntryList fEntryList; ///< Entry numbers to be processed
64  std::vector<Long64_t> fLoadedEntries; ///<! Per-task loaded entries (for task interleaving)
65  std::vector<NameAlias> fFriendNames; ///< <name,alias> pairs of the friends of the tree/chain
66  std::vector<std::vector<std::string>> fFriendFileNames; ///< Names of the files where friends are stored
67  std::vector<std::unique_ptr<TChain>> fFriends; ///< Friends of the tree/chain
68 
69  ////////////////////////////////////////////////////////////////////////////////
70  /// Initialize TTreeView.
71  void Init()
72  {
73  // If the tree name is empty, look for a tree in the file
74  if (fTreeName.empty()) {
76  std::unique_ptr<TFile> f(TFile::Open(fFileNames[0].c_str()));
77  TIter next(f->GetListOfKeys());
78  while (TKey *key = (TKey*)next()) {
79  const char *className = key->GetClassName();
80  if (strcmp(className, "TTree") == 0) {
81  fTreeName = key->GetName();
82  break;
83  }
84  }
85  if (fTreeName.empty()) {
86  auto msg = "Cannot find any tree in file " + fFileNames[0];
87  throw std::runtime_error(msg);
88  }
89  }
90 
91  fChain.reset(new TChain(fTreeName.c_str()));
92  for (auto &fn : fFileNames) {
93  fChain->Add(fn.c_str());
94  }
95  fChain->ResetBit(TObject::kMustCleanup);
96 
97  auto friendNum = 0u;
98  for (auto &na : fFriendNames) {
99  auto &name = na.first;
100  auto &alias = na.second;
101 
102  // Build a friend chain
103  TChain *frChain = new TChain(name.c_str());
104  auto &fileNames = fFriendFileNames[friendNum];
105  for (auto &fn : fileNames)
106  frChain->Add(fn.c_str());
107 
108  // Make it friends with the main chain
109  fFriends.emplace_back(frChain);
110  fChain->AddFriend(frChain, alias.c_str());
111 
112  ++friendNum;
113  }
114  }
115 
116  ////////////////////////////////////////////////////////////////////////////////
117  /// Get and store the names, aliases and file names of the friends of the tree.
118  void StoreFriends(const TTree &tree, bool isTree)
119  {
120  auto friends = tree.GetListOfFriends();
121  if (!friends)
122  return;
123 
124  for (auto fr : *friends) {
125  auto frTree = static_cast<TFriendElement *>(fr)->GetTree();
126 
127  // Check if friend tree has an alias
128  auto realName = frTree->GetName();
129  auto alias = tree.GetFriendAlias(frTree);
130  if (alias) {
131  fFriendNames.emplace_back(std::make_pair(realName, std::string(alias)));
132  } else {
133  fFriendNames.emplace_back(std::make_pair(realName, ""));
134  }
135 
136  // Store the file names of the friend tree
137  fFriendFileNames.emplace_back();
138  auto &fileNames = fFriendFileNames.back();
139  if (isTree) {
140  auto f = frTree->GetCurrentFile();
141  fileNames.emplace_back(f->GetName());
142  } else {
143  auto frChain = static_cast<TChain *>(frTree);
144  for (auto f : *(frChain->GetListOfFiles())) {
145  fileNames.emplace_back(f->GetTitle());
146  }
147  }
148  }
149  }
150 
151  public:
152  //////////////////////////////////////////////////////////////////////////
153  /// Constructor based on a file name.
154  /// \param[in] fn Name of the file containing the tree to process.
155  /// \param[in] tn Name of the tree to process. If not provided,
156  /// the implementation will automatically search for a
157  /// tree in the file.
159  {
160  fFileNames.emplace_back(fn);
161  Init();
162  }
163 
164  //////////////////////////////////////////////////////////////////////////
165  /// Constructor based on a collection of file names.
166  /// \param[in] fns Collection of file names containing the tree to process.
167  /// \param[in] tn Name of the tree to process. If not provided,
168  /// the implementation will automatically search for a
169  /// tree in the collection of files.
170  TTreeView(const std::vector<std::string_view>& fns, std::string_view tn) : fTreeName(tn)
171  {
172  if (fns.size() > 0) {
173  for (auto& fn : fns)
174  fFileNames.emplace_back(fn);
175  Init();
176  }
177  else {
178  auto msg = "The provided list of file names is empty, cannot process tree " + fTreeName;
179  throw std::runtime_error(msg);
180  }
181  }
182 
183  //////////////////////////////////////////////////////////////////////////
184  /// Constructor based on a TTree.
185  /// \param[in] tree Tree or chain of files containing the tree to process.
187  {
188  static const TClassRef clRefTChain("TChain");
189  if (clRefTChain == tree.IsA()) {
190  TObjArray* filelist = dynamic_cast<TChain&>(tree).GetListOfFiles();
191  if (filelist->GetEntries() > 0) {
192  for (auto f : *filelist)
193  fFileNames.emplace_back(f->GetTitle());
194  StoreFriends(tree, false);
195  Init();
196  }
197  else {
198  auto msg = "The provided chain of files is empty, cannot process tree " + fTreeName;
199  throw std::runtime_error(msg);
200  }
201  }
202  else {
203  TFile *f = tree.GetCurrentFile();
204  if (f) {
205  fFileNames.emplace_back(f->GetName());
206  StoreFriends(tree, true);
207  Init();
208  }
209  else {
210  auto msg = "The specified TTree is not linked to any file, in-memory-only trees are not supported. Cannot process tree " + fTreeName;
211  throw std::runtime_error(msg);
212  }
213  }
214  }
215 
216  //////////////////////////////////////////////////////////////////////////
217  /// Constructor based on a TTree and a TEntryList.
218  /// \param[in] tree Tree or chain of files containing the tree to process.
219  /// \param[in] entries List of entry numbers to process.
221  {
222  Long64_t numEntries = entries.GetN();
223  for (Long64_t i = 0; i < numEntries; ++i) {
224  fEntryList.Enter(entries.GetEntry(i));
225  }
226  }
227 
228  //////////////////////////////////////////////////////////////////////////
229  /// Copy constructor.
230  /// \param[in] view Object to copy.
232  {
233  for (auto& fn : view.fFileNames)
234  fFileNames.emplace_back(fn);
235 
236  for (auto &fn : view.fFriendNames)
237  fFriendNames.emplace_back(fn);
238 
239  for (auto &ffn : view.fFriendFileNames) {
240  fFriendFileNames.emplace_back();
241  auto &fileNames = fFriendFileNames.back();
242  for (auto &name : ffn) {
243  fileNames.emplace_back(name);
244  }
245  }
246 
247  Init();
248  }
249 
250  //////////////////////////////////////////////////////////////////////////
251  /// Get a TTreeReader for the current tree of this view.
252  using TreeReaderEntryListPair = std::pair<std::unique_ptr<TTreeReader>, std::unique_ptr<TEntryList>>;
254  {
255  std::unique_ptr<TTreeReader> reader;
256  std::unique_ptr<TEntryList> elist;
257  if (fEntryList.GetN() > 0) {
258  // TEntryList and SetEntriesRange do not work together (the former has precedence).
259  // We need to construct a TEntryList that contains only those entry numbers
260  // in our desired range.
261  elist.reset(new TEntryList);
262  Long64_t entry = fEntryList.GetEntry(0);
263  do {
264  if (entry >= start && entry < end) // TODO can quit this loop early when entry >= end
265  elist->Enter(entry);
266  } while ((entry = fEntryList.Next()) >= 0);
267 
268  reader.reset(new TTreeReader(fChain.get(), elist.get()));
269  } else {
270  // If no TEntryList is involved we can safely set the range in the reader
271  reader.reset(new TTreeReader(fChain.get()));
272  fChain->LoadTree(start - 1);
273  reader->SetEntriesRange(start, end);
274  }
275 
276  return std::make_pair(std::move(reader), std::move(elist));
277  }
278 
279  //////////////////////////////////////////////////////////////////////////
280  /// Get the filenames for this view.
281  const std::vector<std::string> &GetFileNames() const
282  {
283  return fFileNames;
284  }
285 
286  //////////////////////////////////////////////////////////////////////////
287  /// Get the name of the tree of this view.
288  std::string GetTreeName() const
289  {
290  return fTreeName;
291  }
292 
293  //////////////////////////////////////////////////////////////////////////
294  /// Push a new loaded entry to the stack.
295  void PushLoadedEntry(Long64_t entry) { fLoadedEntries.push_back(entry); }
296 
297  //////////////////////////////////////////////////////////////////////////
298  /// Restore the tree of the previous loaded entry, if any.
300  {
301  fLoadedEntries.pop_back();
302  if (fLoadedEntries.size() > 0) {
303  fChain->LoadTree(fLoadedEntries.back());
304  }
305  }
306  };
307  } // End of namespace Internal
308 
309 
311  private:
313 
314  std::vector<ROOT::Internal::TreeViewCluster> MakeClusters();
315  public:
316  TTreeProcessorMT(std::string_view filename, std::string_view treename = "");
317  TTreeProcessorMT(const std::vector<std::string_view>& filenames, std::string_view treename = "");
320 
321  void Process(std::function<void(TTreeReader&)> func);
322 
323  };
324 
325 } // End of namespace ROOT
326 
327 #endif // defined TTreeProcessorMT
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:145
const std::vector< std::string > & GetFileNames() const
Get the filenames for this view.
An array of TObjects.
Definition: TObjArray.h:37
virtual Long64_t Next()
Return the next non-zero entry index (next after fLastIndexQueried) this function is faster than GetE...
Definition: TEntryList.cxx:890
TTreeView(const std::vector< std::string_view > &fns, std::string_view tn)
Constructor based on a collection of file names.
long long Long64_t
Definition: RtypesCore.h:69
virtual Long64_t GetN() const
Definition: TEntryList.h:75
TTreeReader is a simple, robust and fast interface to read values from a TTree, TChain or TNtuple...
Definition: TTreeReader.h:42
basic_string_view< char > string_view
Definition: RStringView.h:35
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
std::pair< std::unique_ptr< TTreeReader >, std::unique_ptr< TEntryList > > TreeReaderEntryListPair
Get a TTreeReader for the current tree of this view.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
A cluster of entries as seen by TTreeView.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3950
std::pair< std::string, std::string > NameAlias
std::string GetTreeName() const
Get the name of the tree of this view.
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:146
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:24
std::string fTreeName
Name of the tree.
void Init()
Initialize TTreeView.
std::vector< std::string > fFileNames
Names of the files.
TTreeView(const TTreeView &view)
Copy constructor.
void StoreFriends(const TTree &tree, bool isTree)
Get and store the names, aliases and file names of the friends of the tree.
TTreeView(TTree &tree, TEntryList &entries)
Constructor based on a TTree and a TEntryList.
void RestoreLoadedEntry()
Restore the tree of the previous loaded entry, if any.
TreeReaderEntryListPair GetTreeReader(Long64_t start, Long64_t end)
virtual Long64_t GetEntry(Int_t index)
Return the number of the entry #index of this TEntryList in the TTree or TChain See also Next()...
Definition: TEntryList.cxx:657
TTreeView(TTree &tree)
Constructor based on a TTree.
if object destructor must call RecursiveRemove()
Definition: TObject.h:60
std::unique_ptr< TChain > fChain
Chain on which to operate.
void Process(std::function< void(TTreeReader &)> func)
Process the entries of a TTree in parallel.
std::vector< ROOT::Internal::TreeViewCluster > MakeClusters()
Divide input data in clusters, i.e. the workloads to distribute to tasks.
ROOT::TThreadedObject< ROOT::Internal::TTreeView > treeView
! Thread-local TreeViews
virtual Bool_t Enter(Long64_t entry, TTree *tree=0)
Add entry #entry to the list.
Definition: TEntryList.cxx:562
std::vector< Long64_t > fLoadedEntries
! Per-task loaded entries (for task interleaving)
void PushLoadedEntry(Long64_t entry)
Push a new loaded entry to the stack.
TClassRef is used to implement a permanent reference to a TClass object.
Definition: TClassRef.h:29
virtual TList * GetListOfKeys() const
A TFriendElement TF describes a TTree object TF in a file.
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
Int_t GetEntries() const
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:522
Definition: tree.py:1
A TTree object has a header with a name and a title.
Definition: TTree.h:70
#define gDirectory
Definition: TDirectory.h:213
TTreeProcessorMT(std::string_view filename, std::string_view treename="")
Constructor based on a file name.
std::vector< NameAlias > fFriendNames
<name,alias> pairs of the friends of the tree/chain
A List of entry numbers in a TTree or TChain.
Definition: TEntryList.h:25
A class to process the entries of a TTree in parallel.
TEntryList fEntryList
Entry numbers to be processed.
std::vector< std::vector< std::string > > fFriendFileNames
Names of the files where friends are stored.
char name[80]
Definition: TGX11.cxx:109
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition: TChain.cxx:221
TTreeView(std::string_view fn, std::string_view tn)
Constructor based on a file name.
std::vector< std::unique_ptr< TChain > > fFriends
Friends of the tree/chain.