MADNESS  version 0.9
solvers.h
Go to the documentation of this file.
1 /*
2  This file is part of MADNESS.
3 
4  Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20  For more information please contact:
21 
22  Robert J. Harrison
23  Oak Ridge National Laboratory
24  One Bethel Valley Road
25  P.O. Box 2008, MS-6367
26 
27  email: harrisonrj@ornl.gov
28  tel: 865-241-3937
29  fax: 865-572-0680
30 
31  $Id$
32 */
33 #ifndef MADNESS_LINALG_SOLVERS_H__INCLUDED
34 #define MADNESS_LINALG_SOLVERS_H__INCLUDED
35 
36 #include <madness/tensor/tensor.h>
37 #include <madness/world/print.h>
38 #include <iostream>
40 
47 namespace madness {
48 
97  template <typename T>
98  Tensor<T> KAIN(const Tensor<T>& Q, double rcond=1e-12) {
99  const int nvec = Q.dim(0);
100  const int m = nvec-1;
101 
102  if (nvec == 1) {
103  Tensor<T> c(1);
104  c(0L) = 1.0;
105  return c;
106  }
107 
108  Tensor<T> A(m,m);
109  Tensor<T> b(m);
110  for (long i=0; i<m; ++i) {
111  b(i) = Q(m,m) - Q(i,m);
112  for (long j=0; j<m; ++j) {
113  A(i,j) = Q(i,j) - Q(m,j) - Q(i,m) + Q(m,m);
114  }
115  }
116 
117  // print("Q");
118  // print(Q);
119  // print("A");
120  // print(A);
121  // print("b");
122  // print(b);
123 
124  Tensor<T> x;
125  Tensor<double> s, sumsq;
126  long rank;
127  gelss(A, b, rcond, x, s, rank, sumsq);
128 // print("singular values", s);
129 // print("rank", rank);
130 // print("solution", x);
131 
132  Tensor<T> c(nvec);
133  T sumC = 0.0;
134  for (long i=0; i<m; ++i) sumC += x(i);
135  c(Slice(0,m-1)) = x;
136 // print("SUMC", nvec, m, sumC);
137  c(m) = 1.0 - sumC;
138 
139 // print("returned C", c);
140 
141  return c;
142  }
143 
144 
146 
150  virtual Tensor<double> residual(const Tensor<double>& x) = 0;
151 
153  virtual bool provides_jacobian() const {return false;}
154 
156 
158  virtual Tensor<double> jacobian(const Tensor<double>& x) {
159  throw "not implemented";
160  }
161 
163  virtual void residual_and_jacobian(const Tensor<double>& x,
164  Tensor<double>& residual, Tensor<double>& jacobian) {
165  residual = this->residual(x);
166  jacobian = this->jacobian(x);
167  }
168 
170  };
171 
172 
174 
178  virtual double value(const Tensor<double>& x) = 0;
179 
181  virtual bool provides_gradient() const {return false;}
182 
184  virtual Tensor<double> gradient(const Tensor<double>& x) {
185  throw "not implemented";
186  }
187 
189  virtual void value_and_gradient(const Tensor<double>& x,
190  double& value,
191  Tensor<double>& gradient) {
192  value = this->value(x);
193  gradient = this->gradient(x);
194  }
195 
197  double test_gradient(Tensor<double>& x, double value_precision, bool doprint=true);
198 
200  };
201 
202 
204 
207  virtual bool solve(Tensor<double>& x) = 0;
208  virtual bool converged() const = 0;
209  virtual double residual_norm() const = 0;
210  virtual ~SolverInterface() {}
211  };
212 
214 
217  virtual bool optimize(Tensor<double>& x) = 0;
218  virtual bool converged() const = 0;
219  virtual double value() const = 0;
220  virtual double gradient_norm() const = 0;
222  };
223 
224 
226 
230  const double tol;
231  const double value_precision; // Numerical precision of value
232  const double gradient_precision; // Numerical precision of each element of residual
233  double f;
234  double gnorm;
235 
236  public:
238  double tol = 1e-6,
239  double value_precision = 1e-12,
240  double gradient_precision = 1e-12);
241 
242  bool optimize(Tensor<double>& x);
243 
244  bool converged() const;
245 
246  double gradient_norm() const;
247 
248  double value() const;
249 
250  virtual ~SteepestDescent() { }
251  };
252 
253 
255 
259  private:
260  std::string update; // One of BFGS or SR1
262  const int maxiter;
263  const double tol;
264  const double value_precision; // Numerical precision of value
265  const double gradient_precision; // Numerical precision of each element of residual
266  double f;
267  double gnorm;
268  Tensor<double> h;
269  int n;
270  bool printtest;
271 
272  double line_search(double a1, double f0, double dxgrad, const Tensor<double>& x, const Tensor<double>& dx);
273 
274  void hessian_update_sr1(const Tensor<double>& s, const Tensor<double>& y);
275 
276  void hessian_update_bfgs(const Tensor<double>& dx,
277  const Tensor<double>& dg);
278 
279  Tensor<double> new_search_direction(const Tensor<double>& g);
280 
281  public:
283  int maxiter = 20,
284  double tol = 1e-6,
285  double value_precision = 1e-12,
286  double gradient_precision = 1e-12);
287 
289  void set_update(const std::string& method);
290 
292  void set_test(const bool& test_level);
293 
295 
297  bool optimize(Tensor<double>& x);
298 
300 
302  bool converged() const;
303 
305 
307  double value() const;
308 
310  void reset_hessian() {h = Tensor<double>();}
311 
313  void set_hessian(const Tensor<double>& matrix) {h = madness::copy(matrix);}
314 
316 
318  double gradient_norm() const;
319 
320  virtual ~QuasiNewton() {}
321  };
322 }
323 
324 #endif // MADNESS_LINALG_SOLVERS_H__INCLUDED
virtual Tensor< double > residual(const Tensor< double > &x)=0
Should return the resdiual (vector F(x))
NDIM const Function< R, NDIM > & g
Definition: mra.h:2179
virtual ~SteepestDescent()
Definition: solvers.h:250
void set_hessian(const Tensor< double > &matrix)
Sets Hessian to given matrix.
Definition: solvers.h:313
virtual bool converged() const =0
void gelss(const Tensor< T > &a, const Tensor< T > &b, double rcond, Tensor< T > &x, Tensor< typename Tensor< T >::scalar_type > &s, long &rank, Tensor< typename Tensor< T >::scalar_type > &sumsq)
Solve Ax = b for general A using the LAPACK *gelss routines.
Definition: lapack.cc:393
const double L
Definition: 3dharmonic.cc:123
::std::string string
Definition: gtest-port.h:872
virtual double value() const =0
virtual bool converged() const =0
double gradient_norm() const
Definition: solvers.cc:110
The interface to be provided by optimizers.
Definition: solvers.h:216
QuasiNewton(const std::shared_ptr< OptimizationTargetInterface > &tar, int maxiter=20, double tol=1e-6, double value_precision=1e-12, double gradient_precision=1e-12)
Definition: solvers.cc:247
virtual void residual_and_jacobian(const Tensor< double > &x, Tensor< double > &residual, Tensor< double > &jacobian)
Implement this if advantageous to compute residual and jacobian simultaneously.
Definition: solvers.h:163
double value() const
Definition: solvers.cc:112
Unconstrained minimization via steepest descent.
Definition: solvers.h:228
bool optimize(Tensor< double > &x)
Runs the optimizer.
Definition: solvers.cc:274
void reset_hessian()
Resets Hessian to default guess.
Definition: solvers.h:310
virtual ~SolverInterface()
Definition: solvers.h:210
Defines and implements most of Tensor.
virtual double residual_norm() const =0
virtual ~OptimizerInterface()
Definition: solvers.h:221
Optimization via quasi-Newton (BFGS or SR1 update)
Definition: solvers.h:258
const T1 &f1 return GTEST_2_TUPLE_() T(f0, f1)
The interface to be provided by functions to be optimized.
Definition: solvers.h:176
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence. ...
Definition: mra.h:1835
Prototypes for a partial interface from Tensor to LAPACK.
double gradient_norm() const
Value of gradient norm.
Definition: solvers.cc:329
virtual ~SolverTargetInterface()
Definition: solvers.h:169
virtual ~QuasiNewton()
Definition: solvers.h:320
virtual bool provides_gradient() const
Override this to return true if the derivative is implemented.
Definition: solvers.h:181
bool converged() const
Definition: solvers.cc:108
virtual Tensor< double > gradient(const Tensor< double > &x)
Should return the derivative of the function.
Definition: solvers.h:184
void set_test(const bool &test_level)
Choose update method (currently only "BFGS" or "SR1")
Definition: solvers.cc:269
bool optimize(Tensor< double > &x)
Definition: solvers.cc:80
The interface to be provided by targets for non-linear equation solver.
Definition: solvers.h:148
virtual Tensor< double > jacobian(const Tensor< double > &x)
Some solvers require the jacobian or are faster if an analytic form is available. ...
Definition: solvers.h:158
void set_update(const std::string &method)
Choose update method (currently only "BFGS" or "SR1")
Definition: solvers.cc:265
const double m
Definition: gfit.cc:199
virtual bool provides_jacobian() const
Override this to return true if the Jacobian is implemented.
Definition: solvers.h:153
virtual void value_and_gradient(const Tensor< double > &x, double &value, Tensor< double > &gradient)
Reimplement if more efficient to evaluate both value and gradient in one call.
Definition: solvers.h:189
const double a1
Definition: vnucso.cc:90
double test_gradient(Tensor< double > &x, double value_precision, bool doprint=true)
Numerical test of the derivative ... optionally prints to stdout, returns max abs error...
Definition: solvers.cc:38
virtual ~OptimizationTargetInterface()
Definition: solvers.h:199
virtual bool solve(Tensor< double > &x)=0
virtual double gradient_norm() const =0
double value() const
Value of objective function.
Definition: solvers.cc:327
A slice defines a sub-range or patch of a dimension.
Definition: slice.h:103
SteepestDescent(const std::shared_ptr< OptimizationTargetInterface > &tar, double tol=1e-6, double value_precision=1e-12, double gradient_precision=1e-12)
Definition: solvers.cc:67
Holds machinery to set up Functions/FuncImpls using various Factories and Interfaces.
Definition: chem/atomutil.cc:45
const double c
Definition: gfit.cc:200
FLOAT b(int j, FLOAT z)
Definition: y1.cc:79
The interface to be provided by solvers ... NOT USED ANYWHERE?
Definition: solvers.h:206
virtual double value(const Tensor< double > &x)=0
Should return the value of the objective function.
Tensor< T > KAIN(const Tensor< T > &Q, double rcond=1e-12)
Solves non-linear equation using KAIN (returns coefficients to compute next vector) ...
Definition: solvers.h:98
virtual bool optimize(Tensor< double > &x)=0
bool converged() const
After running the optimizer returns true if converged.
Definition: solvers.cc:325