MADNESS  version 0.9
lbdeux.h
Go to the documentation of this file.
1 /*
2  This file is part of MADNESS.
3 
4  Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20  For more information please contact:
21 
22  Robert J. Harrison
23  Oak Ridge National Laboratory
24  One Bethel Valley Road
25  P.O. Box 2008, MS-6367
26 
27  email: harrisonrj@ornl.gov
28  tel: 865-241-3937
29  fax: 865-572-0680
30 
31  $Id$
32 */
33 #ifndef MADNESS_MRA_IBDEUX_H__INCLUDED
34 #define MADNESS_MRA_IBDEUX_H__INCLUDED
35 
36 #include <madness/madness_config.h>
37 #include <map>
38 #include <queue>
40 #include <madness/world/worlddc.h>
41 
42 #include <madness/mra/key.h>
44 
48 
49 namespace madness {
50 
51  template<typename T, std::size_t NDIM>
52  class FunctionNode;
53 
54  template<typename T, std::size_t NDIM>
55  class Function;
56 
57  template <std::size_t NDIM>
58  class LBDeuxPmap : public WorldDCPmapInterface< Key<NDIM> > {
59  typedef Key<NDIM> keyT;
60  typedef std::pair<keyT,ProcessID> pairT;
61  typedef std::map<keyT,ProcessID> mapT;
62  mapT map;
63  typedef typename mapT::const_iterator iteratorT;
64 
65  public:
66  LBDeuxPmap(const std::vector<pairT>& v) {
67  for (unsigned int i=0; i<v.size(); ++i) {
68  map.insert(v[i]);
69  }
70  }
71 
72  // If level 0 is not entered as a node this will
73  // be an infinite loop.
74  ProcessID owner(const keyT& key) const {
75  while (key.level() >= 0) {
76  iteratorT it = map.find(key);
77  if (it == map.end()) {
78  return owner(key.parent());
79  }
80  else {
81  return it->second;
82  }
83  }
84  madness::print("Mon Dieux!", key);
85  throw "LBDeuxPmap: lookup failed";
86  }
87 
88  void print() const {
89  madness::print("LBDeuxPmap");
90  }
91  };
92 
93 
94 
95  template <std::size_t NDIM>
96  class LBNodeDeux {
97  static const int nchild = (1<<NDIM);
98  typedef Key<NDIM> keyT;
99  typedef LBNodeDeux<NDIM> nodeT;
101  volatile double child_cost[nchild];
102  volatile double my_cost;
103  volatile double total_cost;
104  volatile bool gotkids;
105  AtomicInt nsummed;
106 
108  int index(const keyT& key) {
109  int ind = 0;
110  for (std::size_t d=0; d<NDIM; ++d) ind += ((key.translation()[d])&0x1) << d;
111  return ind;
112  }
113 
114  public:
116  : my_cost(0.0), total_cost(0.0), gotkids(false) {
117  nsummed = 0;
118  for (int i=0; i<nchild; ++i)
119  child_cost[i] = 0.0;
120  }
121 
122  LBNodeDeux(const LBNodeDeux<NDIM>& other) :
123  my_cost(other.my_cost), total_cost(other.total_cost), gotkids(other.gotkids)
124  {
125  nsummed = other.nsummed;
126  for (int i=0; i<nchild; ++i)
127  child_cost[i] = other.child_cost[i];
128  }
129 
131  for (int i=0; i<nchild; ++i)
132  child_cost[i] = other.child_cost[i];
133  my_cost = other.my_cost;
134  total_cost = other.total_cost;
135  gotkids = other.gotkids;
136  nsummed = other.nsummed;
137 
138  return *this;
139  }
140 
141  bool has_children() const {
142  return gotkids;
143  }
144 
145  double get_total_cost() const {
146  return total_cost;
147  }
148 
150  Void add(double cost, bool got_kids) {
151  total_cost = (my_cost += cost);
152  gotkids = gotkids || got_kids;
153  return None;
154  }
155 
157  Void sum(const treeT& tree, const keyT& child, double value) {
158  child_cost[index(child)] = value;
159  ++nsummed;
160  if (nsummed == nchild) {
161  for (int i=0; i<nchild; ++i) total_cost += child_cost[i];
162  if (child.level() > 1) {
163  keyT key = child.parent();
164  keyT parent = key.parent();
165  const_cast<treeT&>(tree).task(parent, &nodeT::sum, tree, key, double(total_cost));
166  }
167  }
168  return None;
169  }
170 
171 
173 
176  Void deleter(const treeT& tree, const keyT& key) {
177  total_cost = my_cost = -1.0;
178  if (has_children()) {
179  for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
180  const keyT child = kit.key();
181  const_cast<treeT&>(tree).task(child, &nodeT::deleter, tree, child);
182  }
183  }
184  return None;
185  }
186 
188  Void partition(const treeT& tree, const keyT& key, double avg) {
189  if (has_children()) {
190  // Sort children in descending cost order
191  keyT keys[nchild];
192  double vals[nchild];
193  for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
194  const keyT child = kit.key();
195  int ind = index(child);
196  keys[ind] = child;
197  vals[ind] = child_cost[ind];
198  }
199  for (int i=0; i<nchild; ++i) {
200  for (int j=i+1; j<nchild; ++j) {
201  if (vals[i] < vals[j]) {
202  std::swap(vals[i],vals[j]);
203  std::swap(keys[i],keys[j]);
204  }
205  }
206  }
207 
208  // Split off subtrees in decreasing cost order
209  for (int i=0; i<nchild; ++i) {
210  if (total_cost <= avg) {
211  const_cast<treeT&>(tree).task(keys[i], &nodeT::deleter, tree, keys[i]);
212  }
213  else {
214  total_cost -= vals[i];
215  const_cast<treeT&>(tree).task(keys[i], &nodeT::partition, tree, keys[i], avg);
216  }
217  }
218  }
219  return None;
220  }
221 
222  template <typename Archive>
223  void serialize(Archive& ar) {
224  ar & archive::wrap_opaque(this,1);
225  }
226  };
227 
228 
229  template <std::size_t NDIM>
231  typedef Key<NDIM> keyT;
232  typedef LBNodeDeux<NDIM> nodeT;
234  typedef typename treeT::iterator iteratorT;
235  typedef typename treeT::const_iterator const_iteratorT;
236  World& world;
237  treeT tree;
238 
239 
240  template <typename T, typename costT>
241  struct add_op {
242  LoadBalanceDeux* lb;
243  const costT& costfn;
244  add_op(LoadBalanceDeux* lb, const costT& costfn) : lb(lb), costfn(costfn) {}
245  void operator()(const keyT& key, const FunctionNode<T,NDIM>& node) const {
246  lb->tree.send(key, &nodeT::add, costfn(key,node), node.has_children());
247  }
248  };
249 
251  double sum() {
252  world.gop.fence();
253  const_iteratorT end = tree.end();
254  for (const_iteratorT it=tree.begin(); it!=end; ++it) {
255  const keyT& key = it->first;
256  const nodeT& node = it->second;
257  if (!node.has_children() && key.level() > 0) {
258  tree.task(key.parent(), &nodeT::sum, tree, key, node.get_total_cost());
259  }
260  }
261  world.gop.fence();
262  double total;
263  keyT key0(0);
264  if (world.rank() == tree.owner(key0)) {
265  total = tree.find(key0).get()->second.get_total_cost();
266  }
267  world.gop.broadcast(total, tree.owner(key0));
268  world.gop.fence();
269 
270  return total;
271  }
272 
274  static bool compare(const std::pair<keyT,double>& a, const std::pair<keyT,double>& b) {
275  return a.second < b.second;
276  }
277 
278 
279  public:
281  : world(world)
282  , tree(world, FunctionDefaults<NDIM>::get_pmap()) {
283  world.gop.fence();
284  };
285 
287  template <typename T, typename costT>
288  void add_tree(const Function<T,NDIM>& f, const costT& costfn, bool fence=false) {
289  const_cast<Function<T,NDIM>&>(f).unaryop_node(add_op<T,costT>(this,costfn), fence);
290  }
291 
293  void print_tree(const keyT& key = keyT(0)) {
294  Future<iteratorT> futit = tree.find(key);
295  iteratorT it = futit.get();
296  if (it != tree.end()) {
297  for (int i=0; i<key.level(); ++i) std::cout << " ";
298  print(key, it->second.get_total_cost());
299 
300  if (it->second.has_children()) {
301  for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
302  print_tree(kit.key());
303  }
304  }
305  }
306  }
307 
308  struct CostPerProc {
309  double cost;
310  int proc;
311  CostPerProc() : cost(0.0), proc(0) {}
312  CostPerProc(double cost, int proc) : cost(cost), proc(proc) {}
313  bool operator<(const CostPerProc& other) const {
314  return cost > other.cost; // Want ascending order
315  }
316  };
317 
319  std::shared_ptr< WorldDCPmapInterface<keyT> > load_balance(double fac = 1.0, bool printstuff=false) {
320  world.gop.fence();
321  // Compute full tree of costs
322  double avg = sum()/(world.size()*fac);
323  //if (world.rank() == 0) print_tree();
324  world.gop.fence();
325 
326  // Create partitioning
327  keyT key0(0);
328  if (world.rank() == tree.owner(key0)) {
329  tree.send(key0, &nodeT::partition, tree, key0, avg*1.1);
330  }
331  world.gop.fence();
332 
333  // Collect entire vector onto node0
334  std::vector< std::pair<keyT,double> > results;
335  const_iteratorT end = tree.end();
336  for (const_iteratorT it=tree.begin(); it!=end; ++it) {
337  if (it->second.get_total_cost() >= 0) {
338  results.push_back(std::make_pair(it->first,it->second.get_total_cost()));
339  }
340  }
341  results = world.gop.concat0(results, 128*1024*1024);
342  world.gop.fence();
343 
344  std::vector< std::pair<keyT,ProcessID> > map;
345 
346  if (world.rank() == 0) {
347 
348  std::sort(results.begin(), results.end(), compare);
349  if (printstuff) {
350  print("THESE ARE THE INITIAL SUBTREES");
351  for (unsigned int i=0; i<results.size(); ++i) print(i,results[i]);
352  }
353 
354  // Now use bin packing to cram the results together
355  map.reserve(results.size());
356 
357  // Shove the first nproc entries directly into the queue
358  unsigned int nproc = world.size();
359  std::priority_queue<CostPerProc> costs;
360  for (unsigned int p=0; p<nproc && !results.empty(); ++p) {
361  const std::pair<keyT,double>& f = results.back();
362  costs.push(CostPerProc(f.second,p));
363  map.push_back(std::make_pair(f.first,p));
364  results.pop_back();
365  }
366 
367  // Process the remainder using the sorting maintained by the priority queue
368  while (!results.empty()) {
369  const std::pair<keyT,double>& f = results.back();
370  CostPerProc top = costs.top();
371  costs.pop();
372  top.cost += f.second;
373  costs.push(top);
374  map.push_back(std::make_pair(f.first,top.proc));
375  results.pop_back();
376  }
377  if (printstuff) {
378  print("THIS IS THE MAP");
379  print(map);
380  print("THESE ARE THE COSTS PER PROCESSOR");
381  while (!costs.empty()) {
382  print(costs.top().proc,costs.top().cost);
383  costs.pop();
384  }
385  }
386  }
387 
388  world.gop.fence();
389  world.gop.broadcast_serializable(map, 0);
390  world.gop.fence();
391 
392  // Return the Procmap
393 
395  }
396  };
397 }
398 
399 
400 #endif // MADNESS_MRA_IBDEUX_H__INCLUDED
401 
WorldGopInterface & gop
Global operations.
Definition: worldfwd.h:462
Definition: shared_ptr_bits.h:38
FunctionDefaults holds default paramaters as static class members.
Definition: funcdefaults.h:175
bool find(accessor &acc, const keyT &key)
Write access to LOCAL value by key. Returns true if found, false otherwise (always false for remote)...
Definition: worlddc.h:740
void print_tree(const keyT &key=keyT(0))
Printing for the curious.
Definition: lbdeux.h:293
bool has_children() const
Returns true if this node has children.
Definition: funcimpl.h:196
const Vector< Translation, NDIM > & translation() const
Definition: key.h:225
LBNodeDeux< NDIM > & operator=(const LBNodeDeux< NDIM > &other)
Definition: lbdeux.h:130
const int NDIM
Definition: tdse1.cc:44
std::shared_ptr< WorldDCPmapInterface< keyT > > load_balance(double fac=1.0, bool printstuff=false)
Actually does the partitioning of the tree.
Definition: lbdeux.h:319
Definition: lbdeux.h:230
Iterator for distributed container wraps the local iterator.
Definition: worlddc.h:159
double get_total_cost() const
Definition: lbdeux.h:145
Void partition(const treeT &tree, const keyT &key, double avg)
Descends tree deleting all except internal nodes and sub-tree parents.
Definition: lbdeux.h:188
ProcessID owner(const keyT &key) const
Maps key to processor.
Definition: lbdeux.h:74
LoadBalanceDeux(World &world)
Definition: lbdeux.h:280
NDIM & f
Definition: mra.h:2179
iterator end()
Returns an iterator past the end of the local data (no communication)
Definition: worlddc.h:835
ProcessID size() const
Returns the number of processes in this world (same as MPI_Comm_size())
Definition: worldfwd.h:533
Provides FunctionDefaults and utilities for coordinate transformation.
iterator begin()
Returns an iterator to the beginning of the local data (no communication)
Definition: worlddc.h:822
LBDeuxPmap(const std::vector< pairT > &v)
Definition: lbdeux.h:66
Void add(double cost, bool got_kids)
Accumulates cost into this node.
Definition: lbdeux.h:150
bool operator<(const CostPerProc &other) const
Definition: lbdeux.h:313
Void deleter(const treeT &tree, const keyT &key)
Logically deletes this node by setting cost to -1.
Definition: lbdeux.h:176
Definition: lbdeux.h:96
CostPerProc(double cost, int proc)
Definition: lbdeux.h:312
CostPerProc()
Definition: lbdeux.h:311
void broadcast_serializable(objT &obj, ProcessID root)
Broadcast a serializable object.
Definition: worldgop.h:707
T & get()
Gets the value, waiting if necessary (error if not a local future)
Definition: worldfut.h:513
Key parent(int generation=1) const
Returns the key of the parent.
Definition: key.h:248
bool has_children() const
Definition: lbdeux.h:141
FLOAT a(int j, FLOAT z)
Definition: y1.cc:86
Level level() const
Definition: key.h:220
LBNodeDeux()
Definition: lbdeux.h:115
Definition: lbdeux.h:58
Void sum(const treeT &tree, const keyT &child, double value)
Accumulates cost up the tree from children.
Definition: lbdeux.h:157
An integer with atomic set, get, read+inc, read+dec, dec+test operations.
Definition: atomicint.h:73
A parallel world with full functionality wrapping an MPI communicator.
Definition: worldfwd.h:416
void fence()
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
Definition: worldgop.cc:52
int proc
Definition: lbdeux.h:310
A multiresolution adaptive numerical function.
Definition: derivative.h:61
int ProcessID
Used to clearly identify process number/rank.
Definition: worldtypes.h:37
Future< MEMFUN_RETURNT(memfunT) > send(const keyT &key, memfunT memfun)
Sends message "resultT memfun()" to item (non-blocking comm if remote)
Definition: worlddc.h:919
FunctionNode holds the coefficients, etc., at each node of the 2^NDIM-tree.
Definition: derivative.h:58
ProcessID rank() const
Returns the process rank in this world (same as MPI_Comm_rank()))
Definition: worldfwd.h:526
archive_array< unsigned char > wrap_opaque(const T *, unsigned int)
Factory function to wrap pointer to contiguous data as opaque (uchar) archive_array.
Definition: archive.h:827
ProcessID owner(const keyT &key) const
Returns processor that logically owns key (no communication)
Definition: worlddc.h:786
std::vector< T > concat0(const std::vector< T > &v, size_t bufsz=1024 *1024)
Concatenate an STL vector of serializable stuff onto node 0.
Definition: worldgop.h:846
void serialize(Archive &ar)
Definition: lbdeux.h:223
double cost
Definition: lbdeux.h:309
LBNodeDeux(const LBNodeDeux< NDIM > &other)
Definition: lbdeux.h:122
void broadcast(void *buf, size_t nbyte, ProcessID root, bool dowork=true)
Broadcasts bytes from process root while still processing AM & tasks.
Definition: worldgop.cc:145
Implements WorldContainer.
void print() const
Definition: lbdeux.h:88
A future is a possibly yet unevaluated value.
Definition: ref.h:210
A type you can return when you want to return void ... use "return None".
Definition: typestuff.h:154
Interface to be provided by any process map.
Definition: worlddc.h:64
Future< REMFUTURE(MEMFUN_RETURNT(memfunT)) > task(const keyT &key, memfunT memfun, const TaskAttributes &attr=TaskAttributes())
Adds task "resultT memfun()" in process owning item (non-blocking comm if remote) ...
Definition: worlddc.h:1148
void print(const A &a)
Print a single item to std::cout terminating with new line.
Definition: print.h:122
Multidimension Key for MRA tree and associated iterators.
Holds machinery to set up Functions/FuncImpls using various Factories and Interfaces.
Definition: chem/atomutil.cc:45
FLOAT b(int j, FLOAT z)
Definition: y1.cc:79
void add_tree(const Function< T, NDIM > &f, const costT &costfn, bool fence=false)
Accumulates cost from a function.
Definition: lbdeux.h:288
void swap(mpfr::mpreal &x, mpfr::mpreal &y)
Definition: mpreal.h:3069
Key is the index for a node of the 2^NDIM-tree.
Definition: key.h:69
Iterates in lexical order thru all children of a key.
Definition: key.h:61