MADNESS  version 0.9
elem.h
Go to the documentation of this file.
1 /*
2  This file is part of MADNESS.
3 
4  Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20  For more information please contact:
21 
22  Robert J. Harrison
23  Oak Ridge National Laboratory
24  One Bethel Valley Road
25  P.O. Box 2008, MS-6367
26 
27  email: harrisonrj@ornl.gov
28  tel: 865-241-3937
29  fax: 865-572-0680
30 
31 
32  $Id: eigen.h 2615 2011-10-23 13:24:06Z jeff.science@gmail.com $
33 */
34 
35 
36 #include <madness/madness_config.h>
37 
38 #ifdef MADNESS_HAS_ELEMENTAL
39 
40 #include <madness/world/world.h>
41 #include <madness/tensor/tensor.h>
44 #include <madness/world/print.h>
46 
47 #include <ctime>
48 #include <elemental.hpp>
49 
50 namespace madness {
51 
52  namespace detail {
53 
54  template <typename T>
55  struct Value {
56  int i;
57  int j;
58  T t;
59  Value(int i, int j, T t) : i(i), j(j), t(t) {}
60  Value(){} // required for use in STL container
61  template <typename Archive>
62  void serialize(const Archive& ar) {
63  ar & i & j & t;
64  }
65  };
66 
67  template <typename T>
68  class MadToElemDistCopy {
69  elem::DistMatrix<T>& d;
70  public:
71  MadToElemDistCopy(elem::DistMatrix<T>& d) : d(d) {}
72 
73  void operator()(const Value<T>& v) {
74  d.Set(v.i, v.j, v.t);
75  }
76  };
77 
78  template <typename T>
79  class ElemToMadDistCopy {
80  DistributedMatrix<T>& d;
81  public:
82  ElemToMadDistCopy(DistributedMatrix<T>& d) : d(d) {}
83 
84  void operator()(const Value<T>& v) {
85  d.set(v.i, v.j, v.t);
86  }
87  };
88 
89  }
90 
91 
93  template <typename T>
94  ProcessID Owner(const elem::DistMatrix<T>& d, int i, int j) {
95  int RowOwner = (i+d.ColAlignment()) % d.ColStride(); // is Col/Row Align in later versions ... no ment
96  int ColOwner = (j+d.RowAlignment()) % d.RowStride();
97  return RowOwner+ColOwner*d.ColStride();
98  }
99 
100 
102 
104  template <typename T>
105  void copy_to_elemental(const DistributedMatrix<T>& din, elem::DistMatrix<T>& dout) {
106  BinSorter< detail::Value<T> , detail::MadToElemDistCopy<T> > s(din.get_world(), detail::MadToElemDistCopy<T>(dout));
107 
108  int64_t ilo, ihi, jlo, jhi;
109  din.local_colrange(ilo,ihi);
110  din.local_rowrange(jlo,jhi);
111  const Tensor<T>& t = din.data();
112  for (int64_t i=ilo; i<=ihi; i++) {
113  for (int64_t j=jlo; j<=jhi; j++) {
114  ProcessID owner = Owner(dout, i, j);
115  s.insert(owner, detail::Value<T>(i,j,t(i-ilo, j-jlo)));
116  }
117  }
118 
119  s.finish();
120  }
121 
123 
125  template <typename T>
126  void copy_from_elemental(const elem::DistMatrix<T>& din, DistributedMatrix<T>& dout) {
127  BinSorter< detail::Value<T> , detail::ElemToMadDistCopy<T> > s(dout.get_world(), detail::ElemToMadDistCopy<T>(dout));
128 
129  const int64_t colShift = din.ColShift(); // first row we own
130  const int64_t rowShift = din.RowShift(); // first col we own
131  const int64_t colStride = din.ColStride();
132  const int64_t rowStride = din.RowStride();
133  const int64_t localHeight = din.LocalHeight();
134  const int64_t localWidth = din.LocalWidth();
135 
136  for( int64_t jLocal=0; jLocal<localWidth; ++jLocal ) {
137  for( int64_t iLocal=0; iLocal<localHeight; ++iLocal ) {
138  const int64_t i = colShift + iLocal*colStride;
139  const int64_t j = rowShift + jLocal*rowStride;
140 
141  const ProcessID owner = dout.owner(i,j);
142  s.insert(owner, detail::Value<T>(i,j,din.GetLocal(iLocal,jLocal)));
143  }
144  }
145 
146  s.finish();
147  }
148 
163  template <typename T>
164  void sygv(const DistributedMatrix<T>& A,
165  const DistributedMatrix<T>& B,
166  int itype,
167  DistributedMatrix<T>& X,
168  Tensor< typename Tensor<T>::scalar_type >& e)
169  {
170  const int64_t n = A.coldim();
171  const int64_t m = A.rowdim();
172  MADNESS_ASSERT(n==m);
173  MADNESS_ASSERT(n==B.coldim() && m==B.rowdim());
174 
175  const int blocksize = 128;
176  const elem::Grid GG(A.get_world().mpi.comm().Get_mpi_comm() );
177  elem::SetBlocksize(blocksize);
178 
179  elem::DistMatrix<T> EA(n,n,GG);
180  elem::DistMatrix<T> EB(n,n,GG);
181 
182  copy_to_elemental(A,EA);
183  copy_to_elemental(B,EB);
184 
185  elem::HermitianGenDefiniteEigType eigType = elem::AXBX;
186  elem::UpperOrLower uplo = elem::CharToUpperOrLower('U');
187  elem::DistMatrix<T> Xd(n,n,GG);
188  elem::DistMatrix<T,elem::VR,elem::STAR> wd( n, n, GG);
189 
190  // 0.83+ ???
191  // elem::HermitianGenDefiniteEig(eigType, uplo, EA, EB, wd, Xd, elem::SortType::ASCENDING);
192 
193  // 0.79-0.82 ?
194  elem::HermitianGenDefiniteEig(eigType, uplo, EA, EB, wd, Xd);
195  elem::hermitian_eig::Sort(wd, Xd);
196 
197  A.get_world().mpi.Barrier();
198 
199  X = DistributedMatrix<T>(A.distribution());
200  e = Tensor<typename Tensor<T>::scalar_type>(n);
201 
202  copy_from_elemental(Xd, X);
203 
204  const int localHeight1 = wd.LocalHeight();
205  const int colShift1 = wd.ColShift(); // first row we own
206  const int colStride1= wd.ColStride();
207  T * buffer = e.ptr();
208  for( int iLocal=0; iLocal<localHeight1; ++iLocal ) {
209  const int jLocal=0;
210  const int i = colShift1 + iLocal*colStride1;
211  //buffer[i]= wd.Get( iLocal, jLocal);
212  buffer[i]= wd.GetLocal( iLocal, jLocal);
213  }
214 
215  A.get_world().gop.sum(e.ptr(),n);
216  }
217 
232  template <typename T>
233  void sygvp(World& world,
234  const Tensor<T>& a, const Tensor<T>& B, int itype,
235  Tensor<T>& V, Tensor< typename Tensor<T>::scalar_type >& e) {
236  TENSOR_ASSERT(a.ndim() == 2, "sygv requires a matrix",a.ndim(),&a);
237  TENSOR_ASSERT(a.dim(0) == a.dim(1), "sygv requires square matrix",0,&a);
238  TENSOR_ASSERT(B.ndim() == 2, "sygv requires a matrix",B.ndim(),&a);
239  TENSOR_ASSERT(B.dim(0) == B.dim(1), "sygv requires square matrix",0,&a);
240  TENSOR_ASSERT(a.iscontiguous(),"sygvp requires a contiguous matrix (a)",0,&a);
241  TENSOR_ASSERT(B.iscontiguous(),"sygvp requires a contiguous matrix (B)",0,&B);
242 
243  world.gop.broadcast(a.ptr(), a.size(), 0);
244  world.gop.broadcast(B.ptr(), B.size(), 0);
245 
246  const int n = a.dim(1);
247 
248  e = Tensor<typename Tensor<T>::scalar_type>(n);
249  V = Tensor<T>(n,n);
250 
251  if (a.dim(0) <= 4) {
252  // Work around bug in elemental/pmrrr/mkl/something for n=2,3
253  sygv(a, B, itype, V, e);
254  return;
255  }
256 
257  world.gop.fence(); //<<<<<< Essential to quiesce MADNESS threads/comms
258 
259  const int blocksize = 128;
260  try {
261  const elem::Grid GG( world.mpi.comm().Get_mpi_comm() );
262  elem::SetBlocksize(blocksize);
263 
264  elem::DistMatrix<T> gd( n, n, GG );
265 
266  const int colShift = gd.ColShift(); // first row we own
267  const int rowShift = gd.RowShift(); // first col we own
268  const int colStride =gd.ColStride();
269  const int rowStride = gd.RowStride();
270  const int localHeight = gd.LocalHeight();
271  const int localWidth = gd.LocalWidth();
272  {
273  const T * buffer = a.ptr();
274  for( int jLocal=0; jLocal<localWidth; ++jLocal )
275  {
276  for( int iLocal=0; iLocal<localHeight; ++iLocal )
277  {
278  const int i = colShift + iLocal*colStride;
279  const int j = rowShift + jLocal*rowStride;
280  //gd.Set( iLocal, jLocal, buffer[i+j*n] );
281  gd.SetLocal( iLocal, jLocal, buffer[i+j*n] );
282  }
283  }
284  }
285  //gd.Print("gs");
286 
287  elem::DistMatrix<T> hd( n, n, GG );
288  {
289  const T * buffer = B.ptr();
290  for( int jLocal=0; jLocal<localWidth; ++jLocal )
291  {
292  for( int iLocal=0; iLocal<localHeight; ++iLocal )
293  {
294  const int i = colShift + iLocal*colStride;
295  const int j = rowShift + jLocal*rowStride;
296  //hd.Set( iLocal, jLocal, buffer[i+j*(n)] );
297  hd.SetLocal( iLocal, jLocal, buffer[i+j*(n)] );
298  }
299  }
300  }
301  // hd.Print("hd");
302 
303  world.mpi.Barrier();
304 
305  elem::HermitianGenDefiniteEigType eigType = elem::AXBX;
306  //const char* uu="U";
307  elem::UpperOrLower uplo = elem::CharToUpperOrLower('U');
308  elem::DistMatrix<T> Xd( n, n, GG );
309  elem::DistMatrix<T,elem::VR,elem::STAR> wd( n, n, GG);
310 
311 
312  // 0.83+ ???
313  // elem::HermitianGenDefiniteEig( eigType, uplo, gd, hd, wd, Xd,elem::SortType::ASCENDING);
314 
315  // 0.79-0.82 ?
316  elem::HermitianGenDefiniteEig( eigType, uplo, gd, hd, wd, Xd);
317  elem::hermitian_eig::Sort( wd, Xd );
318 
319  world.mpi.Barrier();
320  //Xd.Print("Xs");
321 
322  //retrive eigenvalues
323  {
324  const int localHeight1 = wd.LocalHeight();
325  const int colShift1 = wd.ColShift(); // first row we own
326  const int colStride1 =wd.ColStride();
327  T * buffer = e.ptr();
328  for( int iLocal=0; iLocal<localHeight1; ++iLocal )
329  {
330  const int jLocal=0;
331  const int i = colShift1 + iLocal*colStride1;
332  //buffer[i]= wd.Get( iLocal, jLocal);
333  buffer[i]= wd.GetLocal( iLocal, jLocal);
334  }
335  }
336  //world.gop.broadcast(e.ptr(),e.size(), 0);
337  world.gop.sum(e.ptr(),n);
338  //if(myrank ==0) cout<< e << endl;
339  //retrive eigenvectors
340  {
341  T * buffer = V.ptr();
342  for( int jLocal=0; jLocal<localWidth; ++jLocal )
343  {
344  for( int iLocal=0; iLocal<localHeight; ++iLocal )
345  {
346  const int i = colShift + iLocal*colStride;
347  const int j = rowShift + jLocal*rowStride;
348  //buffer[i+j*n]= Xd.Get( iLocal, jLocal);
349  buffer[i+j*n]= Xd.GetLocal( iLocal, jLocal);
350  }
351  }
352  }
353  world.gop.sum(V.ptr(), n*n);
354  V=madness::transpose(V);
355  //world.gop.broadcast(V.ptr(),V.size(), 0);
356  //if(myrank ==0)cout<< V << endl;
357  }
358  catch (TensorException S) {
359  std::cerr << S << std::endl;
360  }
361 
362  world.gop.fence(); //<<<<<< Essential to quiesce MADNESS threads/comms
363  }
364 
377  template <typename T>
378  void gesvp(World& world, const Tensor<T>& a, const Tensor<T>& b, Tensor<T>& x) {
379 
380  TENSOR_ASSERT(a.ndim() == 2, "gesv requires matrix",a.ndim(),&a);
381 
382  int n = a.dim(0), m = a.dim(1), nrhs = b.dim(1);
383 
384  TENSOR_ASSERT(m == n, "gesv requires square matrix",0,&a);
385  TENSOR_ASSERT(b.ndim() <= 2, "gesv require a vector or matrix for the RHS",b.ndim(),&b);
386  TENSOR_ASSERT(a.dim(0) == b.dim(0), "gesv matrix and RHS must conform",b.ndim(),&b);
387  TENSOR_ASSERT(a.iscontiguous(),"gesvp requires a contiguous matrix (a)",0,&a);
388  TENSOR_ASSERT(b.iscontiguous(),"gesvp requires a contiguous matrix (b)",0,&b);
389  world.gop.broadcast(a.ptr(), a.size(), 0);
390  world.gop.broadcast(b.ptr(), b.size(), 0);
391 
392  Tensor<T> AT = transpose(a);
393 
394  world.gop.fence(); //<<<<<< Essential to quiesce MADNESS threads/comms
395 
396  int blocksize = 128;
397  try {
398  const elem::Grid GG( world.mpi.comm().Get_mpi_comm() );
399  elem::SetBlocksize(blocksize);
400  elem::DistMatrix<T> gd( n, n, GG );
401 
402 
403  {
404  const int colShift = gd.ColShift(); // 1st row local
405  const int rowShift = gd.RowShift(); // 1st col local
406  const int colStride =gd.ColStride();
407  const int rowStride = gd.RowStride();
408  const int localHeight = gd.LocalHeight();
409  const int localWidth = gd.LocalWidth();
410  {
411  const T * buffer = AT.ptr();
412  for( int jLocal=0; jLocal<localWidth; ++jLocal )
413  {
414  for( int iLocal=0; iLocal<localHeight; ++iLocal )
415  {
416  const int i = colShift + iLocal*colStride;
417  const int j = rowShift + jLocal*rowStride;
418  //gd.Set( iLocal, jLocal, buffer[i+j*n] );
419  gd.SetLocal( iLocal, jLocal, buffer[i+j*n] );
420  }
421  }
422  }
423  }
424  Tensor<T> bT;
425  if (nrhs == 1) {
426  x = Tensor<T>(n);
427  bT = Tensor<T>(n);
428  bT = copy(b);
429  x = copy(b); //creating the correct size
430  }
431  else {
432  x = Tensor<T>(n,nrhs);
433  bT = transpose(b);
434  }
435 
436  for(int i=0; i< x.size(); ++i) {
437  T * buffer = x.ptr() ;
438  buffer[i] = 0.0;
439  }
440 
441  //cout << "Caught a tensor exception \n";
442  //cout << bT <<endl;
443  elem::DistMatrix<T> hd( n, nrhs, GG );
444  {
445  const int colShift = hd.ColShift(); // 1st row local
446  const int rowShift = hd.RowShift(); // 1st col local
447  const int colStride =hd.ColStride();
448  const int rowStride = hd.RowStride();
449  const int localHeight = hd.LocalHeight();
450  const int localWidth = hd.LocalWidth();
451  {
452  const T * buffer = bT.ptr();
453  for( int jLocal=0; jLocal<localWidth; ++jLocal )
454  {
455  for( int iLocal=0; iLocal<localHeight; ++iLocal )
456  {
457  const int i = colShift + iLocal*colStride;
458  const int j = rowShift + jLocal*rowStride;
459  //hd.Set( iLocal, jLocal, buffer[i+j*(n)]);
460  hd.SetLocal( iLocal, jLocal, buffer[i+j*(n)]);
461 
462  }
463  }
464  }
465  }
466  world.mpi.Barrier();
467 
468  elem::GaussianElimination(gd, hd);
469 
470  world.mpi.Barrier();
471  {
472  const int colShift = hd.ColShift(); // 1st row local
473  const int rowShift = hd.RowShift(); // 1st col local
474  const int colStride =hd.ColStride();
475  const int rowStride = hd.RowStride();
476  const int localHeight = hd.LocalHeight();
477  const int localWidth = hd.LocalWidth();
478 
479  T * buffer = x.ptr();
480  for( int jLocal=0; jLocal<localWidth; ++jLocal )
481  {
482  for( int iLocal=0; iLocal<localHeight; ++iLocal )
483  {
484  const int i = colShift + iLocal*colStride;
485  const int j = rowShift + jLocal*rowStride;
486  //buffer[j+i*nrhs]= hd.Get( iLocal, jLocal);
487  buffer[j+i*nrhs]= hd.GetLocal( iLocal, jLocal);
488  }
489  }
490  }
491  world.gop.sum(x.ptr(), n*nrhs);
492  //world.gop.broadcast(x.ptr(),x.size(), 0);
493  //if(myrank ==0) cout<< x << endl;
494 
495  }
496  catch (TensorException S) {
497  std::cerr << S << std::endl;
498  }
499  world.gop.fence(); //<<<<<< Essential to quiesce MADNESS threads/comms
500  }
501 }
502 
503 #else
504 
505 namespace madness {
506  // sequential fall back code
507  template <typename T>
508  void sygvp(World& world,
509  const Tensor<T>& a, const Tensor<T>& B, int itype,
510  Tensor<T>& V, Tensor< typename Tensor<T>::scalar_type >& e) {
511  sygv(a, B, itype, V, e);
512  }
513 
514  // sequential fall back code
515  template <typename T>
516  void gesvp(World& world, const Tensor<T>& a, const Tensor<T>& b, Tensor<T>& x) {
517  gesv(a, b, x);
518  }
519 }
520 
521 #endif //MADNESS_HAS_ELEMENTAL
Tensor< double > B
Definition: tdse1d.cc:167
void gesv(const Tensor< T > &a, const Tensor< T > &b, Tensor< T > &x)
Solve Ax = b for general A using the LAPACK *gesv routines.
Definition: lapack.cc:339
#define TENSOR_ASSERT(condition, msg, value, t)
Definition: tensorexcept.h:130
double V(const Vector< double, 3 > &r)
Definition: apps/ii/hatom_energy.cc:46
This header should include pretty much everything needed for the parallel runtime.
Defines and implements most of Tensor.
void sygvp(World &world, const Tensor< T > &a, const Tensor< T > &B, int itype, Tensor< T > &V, Tensor< typename Tensor< T >::scalar_type > &e)
Definition: elem.h:508
const T1 &f1 return GTEST_2_TUPLE_() T(f0, f1)
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence. ...
Definition: mra.h:1835
Prototypes for a partial interface from Tensor to LAPACK.
FLOAT a(int j, FLOAT z)
Definition: y1.cc:86
A parallel world with full functionality wrapping an MPI communicator.
Definition: worldfwd.h:416
enable_if_c< is_serializable< T >::value &&is_output_archive< Archive >::value >::type serialize(const Archive &ar, const T *t, unsigned int n)
Definition: archive.h:615
void gesvp(World &world, const Tensor< T > &a, const Tensor< T > &b, Tensor< T > &x)
Definition: elem.h:516
int ProcessID
Used to clearly identify process number/rank.
Definition: worldtypes.h:37
const double m
Definition: gfit.cc:199
void sygv(const Tensor< T > &A, const Tensor< T > &B, int itype, Tensor< T > &V, Tensor< typename Tensor< T >::scalar_type > &e)
Generalized real-symmetric or complex-Hermitian eigenproblem.
Definition: lapack.cc:519
Tensor< T > transpose(const Tensor< T > &t)
Returns a new deep copy of the transpose of the input tensor.
Definition: tensor.h:1945
Holds machinery to set up Functions/FuncImpls using various Factories and Interfaces.
Definition: chem/atomutil.cc:45
FLOAT b(int j, FLOAT z)
Definition: y1.cc:79