MADNESS  version 0.9
mtxmq.h
Go to the documentation of this file.
1 /*
2  This file is part of MADNESS.
3 
4  Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20  For more information please contact:
21 
22  Robert J. Harrison
23  Oak Ridge National Laboratory
24  One Bethel Valley Road
25  P.O. Box 2008, MS-6367
26 
27  email: harrisonrj@ornl.gov
28  tel: 865-241-3937
29  fax: 865-572-0680
30 
31  $Id$
32 */
33 #ifndef MADNESS_TENSOR_MTXMQ_H__INCLUDED
34 #define MADNESS_TENSOR_MTXMQ_H__INCLUDED
35 
36 #include <madness/madness_config.h>
37 
38 typedef std::complex<double> double_complex;
39 
40 namespace madness {
49  template <typename aT, typename bT, typename cT>
50  void mTxmq(long dimi, long dimj, long dimk,
51  cT* restrict c, const aT* a, const bT* b) {
52 
53  //std::cout << "IN GENERIC mTxmq " << tensor_type_names[TensorTypeData<aT>::id] << " " << tensor_type_names[TensorTypeData<bT>::id] << " " << tensor_type_names[TensorTypeData<cT>::id] << "\n";
54 
55  for (long i=0; i<dimi; ++i,c+=dimj,++a) {
56  for (long j=0; j<dimj; ++j) c[j] = 0.0;
57  const aT *aik_ptr = a;
58  for (long k=0; k<dimk; ++k,aik_ptr+=dimi) {
59  aT aki = *aik_ptr;
60  for (long j=0; j<dimj; ++j) {
61  c[j] += aki*b[k*dimj+j];
62  }
63  }
64  }
65 
66  }
67 
68  /*
69  * mtxm, but with padded buffers.
70  *
71  * ext_b is the extent of the b array, so shrink() isn't needed.
72  */
73  template <typename aT, typename bT, typename cT>
74  void mTxmq_padding(long dimi, long dimj, long dimk, long ext_b,
75  cT* c, const aT* a, const bT* b) {
76  const int alignment = 4;
77  bool free_b = false;
78  long effj = dimj;
79 
80  /* Setup a buffer for c if needed */
81  cT* c_buf = c;
82  if (dimj%alignment) {
83  effj = (dimj | 3) + 1;
84  c_buf = (cT*)malloc(sizeof(cT)*dimi*effj);
85  }
86 
87  /* Copy b into a buffer if needed */
88  if (ext_b%alignment) {
89  free_b = true;
90  bT* b_buf = (bT*)malloc(sizeof(bT)*dimk*effj);
91 
92  bT* bp = b_buf;
93  for (long k=0; k<dimk; k++, bp += effj, b += ext_b)
94  memcpy(bp, b, sizeof(bT)*dimj);
95 
96  b = b_buf;
97  ext_b = effj;
98  }
99 
100  cT* c_work = c_buf;
101  /* mTxm */
102  for (long i=0; i<dimi; ++i,c_work+=effj,++a) {
103  for (long j=0; j<dimj; ++j) c_work[j] = 0.0;
104  const aT *aik_ptr = a;
105  for (long k=0; k<dimk; ++k,aik_ptr+=dimi) {
106  aT aki = *aik_ptr;
107  for (long j=0; j<dimj; ++j) {
108  c_work[j] += aki*b[k*ext_b+j];
109  }
110  }
111  }
112 
113  /* Copy c out if needed */
114  if (dimj%alignment) {
115  cT* ct = c_buf;
116  for (long i=0; i<dimi; i++, ct += effj, c += dimj)
117  memcpy(c, ct, sizeof(cT)*dimj);
118 
119  free(c_buf);
120  }
121 
122  /* Free the buffer for b */
123  if (free_b) free((bT*)b);
124  }
125 #ifdef HAVE_IBMBGQ
126  extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,
127  double* c, const double* a, const double* b);
128  extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,
129  __complex__ double* c, const __complex__ double* a, const __complex__ double* b);
130  extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,
131  __complex__ double* c, const double* a, const __complex__ double* b);
132  extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,
133  __complex__ double* c, const __complex__ double* a, const double* b);
134 
135  template <>
136  inline void mTxmq_padding(long ni, long nj, long nk, long ej,
137  double* c, const double* a, const double* b) {
138  bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);
139  }
140 
141  template <>
142  inline void mTxmq_padding(long ni, long nj, long nk, long ej,
143  __complex__ double* c, const __complex__ double* a, const __complex__ double* b) {
144  bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);
145  }
146 
147  template <>
148  inline void mTxmq_padding(long ni, long nj, long nk, long ej,
149  __complex__ double* c, const double* a, const __complex__ double* b) {
150  bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);
151  }
152 
153  template <>
154  inline void mTxmq_padding(long ni, long nj, long nk, long ej,
155  __complex__ double* c, const __complex__ double* a, const double* b) {
156  bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);
157  }
158 #elif defined(HAVE_IBMBGP)
159  extern void bgpmTxmq(long ni, long nj, long nk, double* restrict c,
160  const double* a, const double* b);
161  extern void bgpmTxmq(long ni, long nj, long nk, double_complex* restrict c,
162  const double_complex* a, const double_complex* b);
163 
164  template <>
165  inline void mTxmq(long ni, long nj, long nk, double* restrict c, const double* a, const double* b) {
166  bgpmTxmq(ni, nj, nk, c, a, b);
167  }
168 
169  template <>
170  inline void mTxmq(long ni, long nj, long nk, double_complex* restrict c, const double_complex* a, const double_complex* b) {
171  bgpmTxmq(ni, nj, nk, c, a, b);
172  }
173 
174 #elif defined(X86_64) && !defined(DISABLE_SSE3)
175  template <>
176  void mTxmq(long dimi, long dimj, long dimk,
177  double* restrict c, const double* a, const double* b);
178 
179  template <>
180  void mTxmq(long dimi, long dimj, long dimk,
182 
183 #ifndef __INTEL_COMPILER
184  template <>
185  void mTxmq(long dimi, long dimj, long dimk,
186  double_complex* restrict c, const double_complex* a, const double* b);
187 #endif
188 
189 #elif defined(X86_32)
190  template <>
191  void mTxmq(long dimi, long dimj, long dimk,
192  double* restrict c, const double* a, const double* b);
193 #endif
194 
195 }
196 
197 #endif // MADNESS_TENSOR_MTXMQ_H__INCLUDED
std::complex< double > double_complex
Definition: lineplot.cc:16
void bgq_mtxmq_padded(long dimi, long dimj, long dimk, long extb, __complex__ double *c_x, const __complex__ double *a_x, const __complex__ double *b_x)
Definition: bgq_mtxm.cc:10
void mTxmq_padding(long dimi, long dimj, long dimk, long ext_b, cT *c, const aT *a, const bT *b)
Definition: mtxmq.h:74
std::complex< double > double_complex
Definition: mtxmq.h:38
const int k
Definition: dielectric.cc:184
FLOAT a(int j, FLOAT z)
Definition: y1.cc:86
void mTxmq(long dimi, long dimj, long dimk, cT *restrict c, const aT *a, const bT *b)
Definition: mtxmq.h:50
#define restrict
Definition: config.h:403
Holds machinery to set up Functions/FuncImpls using various Factories and Interfaces.
Definition: chem/atomutil.cc:45
const double c
Definition: gfit.cc:200
FLOAT b(int j, FLOAT z)
Definition: y1.cc:79
char * malloc()