MADNESS  version 0.9
distributed_matrix.h
Go to the documentation of this file.
1 #ifndef MADNESS_DISTRIBUTED_MATRIX_H
2 #define MADNESS_DISTRIBUTED_MATRIX_H
3 
4 /*
5  This file is part of MADNESS.
6 
7  Copyright (C) 2007,2010 Oak Ridge National Laboratory
8 
9  This program is free software; you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation; either version 2 of the License, or
12  (at your option) any later version.
13 
14  This program is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  along with this program; if not, write to the Free Software
21  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 
23  For more information please contact:
24 
25  Robert J. Harrison
26  Oak Ridge National Laboratory
27  One Bethel Valley Road
28  P.O. Box 2008, MS-6367
29 
30  email: harrisonrj@ornl.gov
31  tel: 865-241-3937
32  fax: 865-572-0680
33 
34  $Id$
35 */
36 
37 // THE STUFF IN THIS FILE IS IN TRANSITION! THE API AND
38 // IMPLEMENTATION WILL BOTH SHIFT RAPIDLY AS WE TRANSITION FROM
39 // REPLICATED TO DISTRIBUTED MATRIX ALGORITHMS, AND SUBSEQUENTLY
40 // REFINE THE DESIGN AND INTERFACE TO 3RD PARTY PACKAGES.
41 
42 #include <madness/world/world.h>
43 #include <utility>
44 #include <madness/tensor/tensor.h>
45 
46 namespace madness {
47 
48  // If in a fit of misplaced enthusiasm you desire to change
49  // int64_t to either long or std::size_t you should be aware that
50  // some uses below may contain quantities greater than may be
51  // represented in a 32-bit integer and may also be negative.
52  // I.e., a simple global replace will fail, though the existing
53  // test suite may not detect that. Also, large skinny matrices
54  // could easily need more than 32 bit integers to address.
55 
56 
57  // Forward declarations for friends
58  class DistributedMatrixDistribution;
59  template <typename T> class DistributedMatrix;
60 
61  static inline DistributedMatrixDistribution column_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t coltile=0);
62  static inline DistributedMatrixDistribution row_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t rowtile=0);
63 
64  template <typename T>
65  DistributedMatrix<T> concatenate_rows(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b);
66 
67  template <typename T>
68  DistributedMatrix<T> interleave_rows(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b);
69 
71  friend DistributedMatrixDistribution column_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t coltile);
72  friend DistributedMatrixDistribution row_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t rowtile);
73  template <typename T> friend class DistributedMatrix;
74 
75  protected:
77  int64_t P; //< No. of processors
78  ProcessID rank; //< My processor rank
79  int64_t n; //< Column dimension of A(n,m)
80  int64_t m; //< Row dimension of A(n,m)
81  int64_t tilen; //< Tile size for column
82  int64_t tilem; //< Tile size for row
83  int64_t Pcoldim; //< Column dimension of processor grid
84  int64_t Prowdim; //< Row dimension of processor grid
85  int64_t Pcol; //< Column of processor grid for this processor
86  int64_t Prow; //< Row of processor grid for this processor
87  int64_t ilo,ihi; //< Range of column indices on this processor
88  int64_t jlo,jhi; //< Range of row indices on this processor
89  int64_t idim,jdim; //< Dimension of data on this processor
90 
91 
93 
105  DistributedMatrixDistribution(World& world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
106  : pworld(&world)
107  , P(world.size())
108  , rank(world.rank())
109  , n(n)
110  , m(m)
111  , tilen(coltile)
112  , tilem(rowtile)
113  , Pcoldim((n-1)/tilen+1)
114  , Prowdim((m-1)/tilem+1)
115  , Pcol(rank/Prowdim)
116  , Prow(rank - Pcol*Prowdim)
117  , ilo(Pcol*tilen)
118  , ihi(std::min(ilo+tilen-1,n-1))
119  , jlo(Prow*tilem)
120  , jhi(std::min(jlo+tilem-1,m-1))
121  , idim(std::max(ihi-ilo+1,int64_t(0)))
122  , jdim(std::max(jhi-jlo+1,int64_t(0)))
123  {
124  if (ilo > ihi || jlo > jhi) {
125  ilo = jlo = 0;
126  ihi = jhi = -1;
127  }
128  }
129 
130 
131  public:
132 
135  : pworld(0)
136  , P(0)
137  , rank(0)
138  , n(0)
139  , m(0)
140  , tilen(0)
141  , tilem(0)
142  , Pcoldim(0)
143  , Prowdim(0)
144  , Pcol(0)
145  , Prow(0)
146  , ilo(0)
147  , ihi(-1)
148  , jlo(0)
149  , jhi(-1)
150  , idim(0)
151  , jdim(0)
152  {}
153 
154 
156  void clear() {
157  pworld = (World*)(0);
158  P = rank = n = m = tilen = tilem = Pcoldim = Prowdim = Pcol = Prow = ilo = ihi = jlo = jhi = idim = jdim = 0;
159  }
160 
162  return
163  pworld == d.pworld &&
164  P == d.P &&
165  rank == d.rank &&
166  n == d.n &&
167  m == d.m &&
168  tilen == d.tilen &&
169  tilem == d.tilem &&
170  Pcoldim == d.Pcoldim &&
171  Prowdim == d.Prowdim &&
172  Pcol == d.Pcol &&
173  Prow == d.Prow &&
174  ilo == d.ilo &&
175  ihi == d.ihi &&
176  jlo == d.jlo &&
177  jhi == d.jhi &&
178  idim == d.idim &&
179  jdim == d.jdim;
180  }
181 
182 
184 
186  int64_t coldim() const {
187  return n;
188  }
189 
191 
192 
194  int64_t rowdim() const {
195  return m;
196  }
197 
198 
200 
202  int64_t coltile() const {
203  return tilen;
204  }
205 
206 
208 
210  int64_t rowtile() const {
211  return tilem;
212  }
213 
214 
216 
218  int64_t process_coldim() const {return Pcoldim;}
219 
220 
222 
224  int64_t process_rowdim() const {return Prowdim;}
225 
226 
228 
230  int64_t local_size() const {return idim*jdim;}
231 
232 
234 
236  int64_t local_coldim() const {return idim;}
237 
238 
240 
242  int64_t local_rowdim() const {return jdim;}
243 
245 
249  void local_colrange(int64_t& ilow, int64_t& ihigh) const {
250  ilow = ilo;
251  ihigh = ihi;
252  }
253 
254 
256 
260  void local_rowrange(int64_t& jlow, int64_t& jhigh) const {
261  jlow = jlo;
262  jhigh = jhi;
263  }
264 
265 
267  int64_t local_ilow() const {
268  return ilo;
269  }
270 
272  int64_t local_ihigh() const {
273  return ihi;
274  }
275 
276 
278  int64_t local_jlow() const {
279  return jlo;
280  }
281 
282 
284  int64_t local_jhigh() const {
285  return jhi;
286  }
287 
288 
290 
297  void get_range(int p, int64_t& ilow, int64_t& ihigh, int64_t& jlow, int64_t& jhigh) const {
298  int pi = p/Prowdim;
299  int pj = p - pi*Prowdim;
300  if (pi >= process_coldim() || pj >= process_rowdim()) {
301  ilow = jlow = 0;
302  ihigh = jhigh = -1;
303  }
304  else {
305  ilow = pi*tilen;
306  jlow = pj*tilem;
307  ihigh= std::min(ilow+tilen-1,n-1);
308  jhigh= std::min(jlow+tilem-1,m-1);
309  }
310 
311  return;
312  }
313 
314 
316 
321  void get_colrange(int p, int64_t& ilow, int64_t& ihigh) const {
322  int64_t jlow, jhigh;
323  get_range(p, ilow, ihigh, jlow, jhigh);
324 
325  return;
326  }
327 
328 
330 
335  void get_rowrange(int p, int64_t& jlow, int64_t& jhigh) const {
336  int64_t ilow, ihigh;
337  get_range(p, ilow, ihigh, jlow, jhigh);
338 
339  return;
340  }
341 
342 
344 
346  World& get_world() const {return *pworld;}
347 
348 
350 
352  bool is_column_distributed() const {return process_rowdim()==1;}
353 
354 
356 
358  bool is_row_distributed() const {return process_coldim()==1;}
359 
360 
362  const DistributedMatrixDistribution& distribution() const {return *this;}
363 
364 
366  ProcessID owner(int64_t i, int64_t j) const {
367  int pcol = i/coltile();
368  int prow = j/rowtile();
369 
370  return pcol*process_rowdim() + prow;
371  }
372 
374  };
375 
376 
378 
389  template <typename T>
390  class DistributedMatrix : public DistributedMatrixDistribution {
391  friend DistributedMatrix<T> interleave_rows<T>(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b);
392  friend DistributedMatrix<T> concatenate_rows<T>(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b);
393 
394  Tensor<T> t; //< The data
395 
396  static T idij(const int64_t i, const int64_t j) {return (i==j) ? T(1) : T(0);}
397 
398  protected:
399 
401 
410  DistributedMatrix(World& world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
411  : DistributedMatrixDistribution(world, n, m, coltile, rowtile)
412  {
413  if (idim>0 && jdim>0) t = Tensor<T>(idim,jdim);
414  }
415 
416  public:
417 
421  , t()
422  {}
423 
424 
428  {
429  if (idim>0 && jdim>0) t = Tensor<T>(idim,jdim);
430  }
431 
432 
434  DistributedMatrix(const DistributedMatrix<T>& A, bool deepcopy=false)
436  , t(deepcopy ? copy(A.t) : A.t)
437  {}
438 
439 
442  if (this != &A) {
443  DistributedMatrixDistribution::operator=(A);
444  t = A.t;
445  }
446  return *this;
447  }
448 
449  virtual ~DistributedMatrix() {}
450 
451 
453  void clear() {
455  t.clear();
456  }
457 
458 
460 
462  template <typename funcT>
463  void fill(const funcT& f) {
464  for (int64_t i=ilo; i<=ihi; i++) {
465  for (int64_t j=jlo; j<=jhi; j++) {
466  t(i-ilo,j-jlo) = f(i,j);
467  }
468  }
469  }
470 
471 
473 
475  void fill(T value) {
476  t.fill(value);
477  }
478 
479 
480  void fill_identity() {
482  }
483 
484 
486 
498  Tensor<T>& data() {return t;}
499 
501 
513  const Tensor<T>& data() const {return t;}
514 
515 
517 
519  void copy_from_replicated(const Tensor<T>& s) {
520  if (local_size() > 0) t(___) = s(Slice(ilo,ihi),Slice(jlo,jhi));
521  }
522 
524 
530  void copy_to_replicated(Tensor<T>& s) const {
531  MADNESS_ASSERT(s.iscontiguous());
532  s = 0.0;
533  if (local_size() > 0) s(Slice(ilo,ihi),Slice(jlo,jhi)) = t(___);
534  get_world().gop.sum(s.ptr(), s.size());
535  }
536 
538 
543  void copy_from_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, const Tensor<T>& s) {
544  int64_t i0 = std::max(ilo,ilow);
545  int64_t j0 = std::max(jlo,jlow);
546  int64_t i1 = std::min(ihi,ihigh);
547  int64_t j1 = std::min(jhi,jhigh);
548  if (i0<=i1 && j0<=j1) {
549  t(Slice(i0-ilo,i1-ilo),Slice(j0-jlo,j1-jlo)) = s(Slice(i0-ilow,i1-ilow),Slice(j0-jlow,j1-jlow));
550  }
551  }
552 
554 
562  void copy_to_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, Tensor<T>& s) const {
563  MADNESS_ASSERT(s.iscontiguous());
564  s = 0;
565  int64_t i0 = std::max(ilo,ilow);
566  int64_t j0 = std::max(jlo,jlow);
567  int64_t i1 = std::min(ihi,ihigh);
568  int64_t j1 = std::min(jhi,jhigh);
569  if (i0<=i1 && j0<=j1) {
570  t(Slice(i0-ilo,i1-ilo),Slice(j0-jlo,j1-jlo)) = s(Slice(i0-ilow,i1-ilow),Slice(j0-jlow,j1-jlow));
571  }
572  get_world().gop.sum(s.ptr(), s.size());
573  }
574 
575  void extract_columns(int64_t jlow, int64_t jhigh, DistributedMatrix<T>& U) const {
576  int newrowdim = jhigh - jlow + 1;
577  MADNESS_ASSERT(jlow >= 0);
578  MADNESS_ASSERT(jhigh < rowdim());
579  MADNESS_ASSERT(newrowdim == U.rowdim());
580  MADNESS_ASSERT(coldim() == U.coldim());
581  MADNESS_ASSERT(is_column_distributed());
582  MADNESS_ASSERT(U.is_column_distributed());
583  MADNESS_ASSERT(coltile() == U.coltile());
584 
585  int64_t i0 = ilo;
586  int64_t j0 = std::max(jlo,jlow);
587  int64_t i1 = ihi;
588  int64_t j1 = std::min(jhi,jhigh);
589  if (i0<=i1 && j0<=j1) {
590  U.data()(___) = t(Slice(i0-ilo,i1-ilo),Slice(j0-jlo,j1-jlo));
591  }
592  }
593 
594  template <typename R>
597  }
598 
600 
604  MADNESS_ASSERT(has_same_dimension_and_distribution(A));
605  t += A.t;
606  return *this;
607  }
608 
609 
611 
615  MADNESS_ASSERT(has_same_dimension_and_distribution(A));
616  return copy(*this)+=A;
617  }
618 
619 
621 
625  t.scale(s);
626  return *this;
627  }
628 
630  void set(int64_t i, int64_t j, const T x) {
631  MADNESS_ASSERT(i>=ilo && i<=ihi && j>=jlo && j<=jhi);
632  t(i-ilo,j-jlo) = x;
633  }
634 
636  T get(int64_t i, int64_t j) const {
637  MADNESS_ASSERT(i>=ilo && i<=ihi && j>=jlo && j<=jhi);
638  return t(i-ilo,j-jlo);
639  }
640  };
641 
642 
644 
647  template <typename T>
649  return DistributedMatrix<T>(A,true);
650  }
651 
652 
654 
661  static inline DistributedMatrixDistribution
662  column_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t coltile) { // default coltile=0 above
663  if (world.size()*coltile < n) coltile = (n-1)/world.size() + 1;
664  coltile = std::min(coltile,n);
665  if ((coltile&0x1)) ++coltile; // ??? Was before the previous statement
666 
667  return DistributedMatrixDistribution(world, n, m, coltile, m);
668  }
669 
670 
672 
679  template <typename T>
680  DistributedMatrix<T> column_distributed_matrix(World& world, int64_t n, int64_t m, int64_t coltile=0) {
681  return DistributedMatrix<T>(column_distributed_matrix_distribution(world, n, m, coltile));
682  }
683 
684 
686 
692  static inline DistributedMatrixDistribution
693  row_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t rowtile) { // default rowtile=0 above
694  if (world.size()*rowtile < m) rowtile = (m-1)/world.size() + 1;
695  rowtile = std::min(rowtile,m);
696 
697  return DistributedMatrixDistribution(world, n, m, n, rowtile);
698  }
699 
701 
707  template <typename T>
708  DistributedMatrix<T> row_distributed_matrix(World& world, int64_t n, int64_t m, int64_t rowtile=0) {
709  return DistributedMatrix<T>(row_distributed_matrix_distribution(world, n, m, rowtile));
710  }
711 
712 
714 
724  template <typename T>
725  DistributedMatrix<T> interleave_rows(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b) {
726  MADNESS_ASSERT(a.rowdim()==b.rowdim() && a.coldim()==b.coldim() && a.coltile()==b.coltile() && a.rowtile()==b.rowtile());
727 
728  DistributedMatrix<T> c(a.get_world(), a.coldim()*2, a.rowdim(), a.coltile()*2, a.rowtile());
729  c.data()(Slice(0,-1,2),_) = a.data()(___);
730  c.data()(Slice(1,-1,2),_) = b.data()(___);
731  }
732 
733 
735 
745  template <typename T>
746  DistributedMatrix<T> concatenate_rows(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b) {
747  MADNESS_ASSERT(a.coldim()==b.coldim() && a.coltile()==b.coltile() && a.is_column_distributed() && b.is_column_distributed());
748 
749  int64_t ma = a.rowdim();
750  int64_t mb = b.rowdim();
751 
752  DistributedMatrix<T> c(a.get_world(), a.coldim(), ma+mb, a.coltile(), ma+mb);
753 
754  int64_t ilow, ihigh;
755  a.local_colrange(ilow, ihigh);
756  if (ilow <= ihigh) {
757  c.data()(_,Slice(0,ma-1)) = a.data()(___);
758  c.data()(_,Slice(ma,-1)) = b.data()(___);
759  }
760 
761  return c;
762  }
763 
765 
776  template <typename T>
777  DistributedMatrix<T> concatenate_rows( const DistributedMatrix<T>& a, const DistributedMatrix<T>& b, const DistributedMatrix<T>& c, const DistributedMatrix<T>& d) {
778  MADNESS_ASSERT(a.coldim()==b.coldim() && b.coldim()==c.coldim() && c.coldim()==d.coldim());
779  MADNESS_ASSERT(a.coltile()==b.coltile() && b.coltile()==c.coltile() && c.coltile()==d.coltile());
780  MADNESS_ASSERT(a.is_column_distributed() && b.is_column_distributed() && c.is_column_distributed() && d.is_column_distributed());
781 
782  int64_t ma = a.rowdim();
783  int64_t mb = b.rowdim();
784  int64_t mc = c.rowdim();
785  int64_t md = d.rowdim();
786 
787  DistributedMatrix<T> result(a.get_world(), a.coldim(), ma+mb+mc+md, a.coltile(), ma+mb+mc+md);
788 
789  if(a.local_size() > 0) result.data()( _ , Slice(0,ma-1) ) = a.data()(___);
790  if(b.local_size() > 0) result.data()( _ , Slice(ma, ma+mb-1) ) = b.data()(___);
791  if(c.local_size() > 0) result.data()( _ , Slice(ma+mb, ma+mb+mc-1) ) = c.data()(___);
792  if(d.local_size() > 0) result.data()( _ , Slice(ma+mb+mc, -1) ) = d.data()(___);
793 
794  return result;
795  }
796 
797 
799 
809  template <typename T>
810  DistributedMatrix<T> concatenate_columns(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b) {
811  MADNESS_ASSERT(a.rowdim()==b.rowdim() && a.rowtile()==b.rowtile() && a.is_row_distributed() && b.is_row_distributed());
812 
813  int64_t ma = a.coldim();
814  int64_t mt = ma + b.coldim();
815 
816  DistributedMatrix<T> c(a.get_world(), mt, a.rowdim(), b.rowtile(), mt);
817 
818  if(a.local_size() > 0) c.data()( Slice(0,ma-1), _ ) = a.data()(___);
819  if(a.local_size() > 0) c.data()( Slice(ma,-1), _ ) = b.data()(___);
820 
821  return c;
822  }
823 }
824 
825 #endif
int64_t local_rowdim() const
Returns the no. of row elements stored on this processor.
Definition: distributed_matrix.h:242
WorldGopInterface & gop
Global operations.
Definition: worldfwd.h:462
int64_t local_size() const
Returns the total no. of elements stored on this processor.
Definition: apps/ii/systolic.h:98
void copy_from_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, const Tensor< T > &s)
Copy from replicated patch (inclusive index range) into the distributed matrix.
Definition: distributed_matrix.h:543
friend DistributedMatrixDistribution column_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t coltile)
Generates distribution for an (n,m) matrix distributed by columns (row dimension is not distributed) ...
Definition: distributed_matrix.h:662
bool is_column_distributed() const
Returns true if the matrix is column distributed (i.e., row dimension not distributed) ...
Definition: distributed_matrix.h:352
virtual ~DistributedMatrix()
Definition: distributed_matrix.h:449
World & get_world() const
Returns the associated world.
Definition: distributed_matrix.h:346
DistributedMatrix(World &world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
Constructs a distributed matrix dimension (n,m) with specified tile sizes and initialized to zero...
Definition: distributed_matrix.h:410
int64_t rowdim() const
Returns the row dimension of the matrix ... i.e., m for A(n,m)
Definition: apps/ii/systolic.h:83
int64_t rowtile() const
Returns the row tile size.
Definition: distributed_matrix.h:210
void copy_to_replicated(Tensor< T > &s) const
Copy from the distributed (m,n) matrix into the replicated matrix (collective call) ...
Definition: distributed_matrix.h:530
World & get_world() const
Returns associated world.
Definition: apps/ii/systolic.h:152
const Tensor< T > & data() const
Returns const reference to data.
Definition: distributed_matrix.h:513
int64_t Pcoldim
Definition: distributed_matrix.h:83
Definition: mpreal.h:3066
DistributedMatrix(const DistributedMatrix< T > &A, bool deepcopy=false)
Copy constructor copies dimensions, distribution, and shallow copy of content (unless deepcopy=true) ...
Definition: distributed_matrix.h:434
void fill(const funcT &f)
Fills the matrix with the provided function of the indices.
Definition: distributed_matrix.h:463
int64_t jdim
Definition: distributed_matrix.h:89
int64_t Prow
Definition: distributed_matrix.h:86
DistributedMatrix< T > column_distributed_matrix(World &world, int64_t n, int64_t m, int64_t coltile=0)
Generates an (n,m) matrix distributed by columns (row dimension is not distributed) ...
Definition: apps/ii/systolic.h:201
void local_rowrange(int64_t &jlow, int64_t &jhigh) const
Returns the inclusive range of row indices on this processor.
Definition: distributed_matrix.h:260
NDIM & f
Definition: mra.h:2179
DistributedMatrixDistribution(World &world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
Constructs distribution and size info for a matrix (for use by factory functions only) ...
Definition: distributed_matrix.h:105
bool operator==(const DistributedMatrixDistribution &d) const
Definition: distributed_matrix.h:161
Tensor< T > & data()
Returns reference to the local data.
Definition: distributed_matrix.h:498
ProcessID size() const
Returns the number of processes in this world (same as MPI_Comm_size())
Definition: worldfwd.h:533
int64_t process_rowdim() const
Returns the no. of processors in the row dimension.
Definition: distributed_matrix.h:224
void get_colrange(int p, int64_t &ilow, int64_t &ihigh) const
Returns the inclusive range of column indices on processor p.
Definition: distributed_matrix.h:321
int64_t coldim() const
Returns the column dimension of the matrix ... i.e., n for A(n,m)
Definition: apps/ii/systolic.h:80
int64_t n
Definition: distributed_matrix.h:79
World * pworld
Definition: distributed_matrix.h:76
This header should include pretty much everything needed for the parallel runtime.
void get_range(int p, int64_t &ilow, int64_t &ihigh, int64_t &jlow, int64_t &jhigh) const
Returns the inclusive ranges of column and row indicies on processor p.
Definition: distributed_matrix.h:297
DistributedMatrix< T > & operator=(const DistributedMatrix< T > &A)
Assigment copies dimensions, distribution, and shallow copy of content.
Definition: distributed_matrix.h:441
virtual ~DistributedMatrixDistribution()
Definition: distributed_matrix.h:373
int64_t tilen
Definition: distributed_matrix.h:81
Defines and implements most of Tensor.
int64_t local_jhigh() const
Returns the last row index on this processor (0 if no data present)
Definition: distributed_matrix.h:284
int64_t coltile() const
Returns the column tile size.
Definition: distributed_matrix.h:202
int64_t P
Definition: distributed_matrix.h:77
int64_t local_size() const
Returns the total no. of elements stored on this processor.
Definition: distributed_matrix.h:230
const T1 &f1 return GTEST_2_TUPLE_() T(f0, f1)
#define max(a, b)
Definition: lda.h:53
Definition: distributed_matrix.h:70
friend DistributedMatrixDistribution row_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t rowtile)
Generates an (n,m) matrix distribution distributed by rows (column dimension is not distributed) ...
Definition: distributed_matrix.h:693
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence. ...
Definition: mra.h:1835
const mpreal min(const mpreal &x, const mpreal &y)
Definition: mpreal.h:2675
FLOAT a(int j, FLOAT z)
Definition: y1.cc:86
void copy_to_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, Tensor< T > &s) const
Copy from distributed matrix into replicated patch (inclusive index range; collective call) ...
Definition: distributed_matrix.h:562
int64_t ihi
Definition: distributed_matrix.h:87
DistributedMatrix< T > operator+(const DistributedMatrix< T > &A) const
Out of place addition — dimensions and distribution must be identical.
Definition: distributed_matrix.h:614
int64_t local_ilow() const
Returns the first column index on this processor (0 if no data present)
Definition: distributed_matrix.h:267
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition: worldgop.h:767
void set(int64_t i, int64_t j, const T x)
Sets element (i,j) to v if (i,j) is local, otherwise throws MadnessException.
Definition: distributed_matrix.h:630
A parallel world with full functionality wrapping an MPI communicator.
Definition: worldfwd.h:416
int64_t coldim() const
Returns the column dimension of the matrix ... i.e., n for A(n,m)
Definition: distributed_matrix.h:186
void fill(T value)
Fills the matrix with a scalar.
Definition: distributed_matrix.h:475
bool is_row_distributed() const
Returns true if the matrix is row distributed (i.e., column dimension not distributed) ...
Definition: distributed_matrix.h:358
int ProcessID
Used to clearly identify process number/rank.
Definition: worldtypes.h:37
void copy_from_replicated(const Tensor< T > &s)
Copy from the replicated (m,n) matrix into the distributed matrix.
Definition: distributed_matrix.h:519
int64_t idim
Definition: distributed_matrix.h:89
int64_t local_coldim() const
Returns the no. of column elements stored on this processor.
Definition: distributed_matrix.h:236
void local_colrange(int64_t &ilow, int64_t &ihigh) const
Returns the inclusive range of column indices on this processor.
Definition: distributed_matrix.h:249
const double m
Definition: gfit.cc:199
void extract_columns(int64_t jlow, int64_t jhigh, DistributedMatrix< T > &U) const
Definition: distributed_matrix.h:575
const DistributedMatrixDistribution & distribution() const
Returns the distribution (aka *this)
Definition: distributed_matrix.h:362
ProcessID rank
Definition: distributed_matrix.h:78
DistributedMatrix< T > concatenate_rows(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a distributed matrix with rows of a and b concatenated.
Definition: apps/ii/systolic.h:251
int64_t local_jlow() const
Returns the first row index on this processor (0 if no data present)
Definition: distributed_matrix.h:278
int64_t local_ihigh() const
Returns the last column index on this processor (-1 if no data present)
Definition: distributed_matrix.h:272
void clear()
Frees memory and resets state to same as default constructor.
Definition: distributed_matrix.h:453
DistributedMatrix< T > & operator*=(const T s)
Inplace scale by a constant.
Definition: distributed_matrix.h:624
bool has_same_dimension_and_distribution(const DistributedMatrix< R > &A)
Definition: distributed_matrix.h:595
int64_t rowdim() const
Returns the row dimension of the matrix ... i.e., m for A(n,m)
Definition: distributed_matrix.h:194
int64_t jhi
Definition: distributed_matrix.h:88
DistributedMatrix< T > concatenate_columns(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a row-distributed matrix with rows of a and b contatenated.
Definition: apps/ii/systolic.h:288
DistributedMatrix< T > row_distributed_matrix(World &world, int64_t n, int64_t m, int64_t rowtile=0)
Generates an (n,m) matrix distributed by rows (column dimension is not distributed) ...
Definition: apps/ii/systolic.h:216
int64_t ilo
Definition: distributed_matrix.h:87
void get_rowrange(int p, int64_t &jlow, int64_t &jhigh) const
Returns the inclusive range of row indices on processor p.
Definition: distributed_matrix.h:335
DistributedMatrix< T > & operator+=(const DistributedMatrix< T > &A)
Inplace addition — dimensions and distribution must be identical.
Definition: distributed_matrix.h:603
bool is_column_distributed() const
Returns true if the matrix is column distributed (i.e., row dimension not distributed) ...
Definition: apps/ii/systolic.h:161
int64_t coltile() const
Returns the column tile size.
Definition: apps/ii/systolic.h:86
A slice defines a sub-range or patch of a dimension.
Definition: slice.h:103
int64_t tilem
Definition: distributed_matrix.h:82
DistributedMatrix(const DistributedMatrixDistribution &d)
Constructs a distributed matrix with given distribution info.
Definition: distributed_matrix.h:426
DistributedMatrixDistribution()
Default constructor makes an invalid distribution.
Definition: distributed_matrix.h:134
int64_t Prowdim
Definition: distributed_matrix.h:84
int64_t m
Definition: distributed_matrix.h:80
Holds machinery to set up Functions/FuncImpls using various Factories and Interfaces.
Definition: chem/atomutil.cc:45
ProcessID owner(int64_t i, int64_t j) const
Returns the number of the process that owns element (i,j)
Definition: distributed_matrix.h:366
void clear()
Resets state to same as default constructor.
Definition: distributed_matrix.h:156
const double c
Definition: gfit.cc:200
FLOAT b(int j, FLOAT z)
Definition: y1.cc:79
int64_t jlo
Definition: distributed_matrix.h:88
int64_t process_coldim() const
Returns the no. of processors in the column dimension.
Definition: distributed_matrix.h:218
DistributedMatrix< T > interleave_rows(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a distributed matrix with rows of a and b interleaved.
Definition: apps/ii/systolic.h:233
void fill_identity()
Definition: distributed_matrix.h:480
DistributedMatrix()
Default constructor makes an empty matrix that cannot be used except as a target for assignemnt...
Definition: distributed_matrix.h:419
int64_t Pcol
Definition: distributed_matrix.h:85
int64_t rowtile() const
Returns the row tile size.
Definition: apps/ii/systolic.h:89
Manages data associated with a row/column/block distributed array.
Definition: apps/ii/systolic.h:51