madness/mtxmq_8h_source.html

 /*

   This file is part of MADNESS.


   Copyright (C) 2007,2010 Oak Ridge National Laboratory


   This program is free software; you can redistribute it and/or modify

   it under the terms of the GNU General Public License as published by

   the Free Software Foundation; either version 2 of the License, or

   (at your option) any later version.


   This program is distributed in the hope that it will be useful,

   but WITHOUT ANY WARRANTY; without even the implied warranty of

   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

   GNU General Public License for more details.


   You should have received a copy of the GNU General Public License

   along with this program; if not, write to the Free Software

   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA


   For more information please contact:


   Robert J. Harrison

   Oak Ridge National Laboratory

   One Bethel Valley Road

   P.O. Box 2008, MS-6367


   email: harrisonrj@ornl.gov

   tel:   865-241-3937

   fax:   865-572-0680


   $Id$

 */

 #ifndef MADNESS_TENSOR_MTXMQ_H__INCLUDED

 #define MADNESS_TENSOR_MTXMQ_H__INCLUDED


 #include <madness/madness_config.h>


 typedef std::complex<double> double_complex;


 namespace madness {

     template <typename aT, typename bT, typename cT>

     void mTxmq(long dimi, long dimj, long dimk,

                cT* restrict c, const aT* a, const bT* b) {


         //std::cout << "IN GENERIC mTxmq " << tensor_type_names[TensorTypeData<aT>::id] << " " << tensor_type_names[TensorTypeData<bT>::id] << " " << tensor_type_names[TensorTypeData<cT>::id] << "\n";


         for (long i=0; i<dimi; ++i,c+=dimj,++a) {

             for (long j=0; j<dimj; ++j) c[j] = 0.0;

             const aT *aik_ptr = a;

             for (long k=0; k<dimk; ++k,aik_ptr+=dimi) {

                 aT aki = *aik_ptr;

                 for (long j=0; j<dimj; ++j) {

                     c[j] += aki*b[k*dimj+j];

                 }

             }

         }


     }


     /*

      * mtxm, but with padded buffers.

      *

      * ext_b is the extent of the b array, so shrink() isn't needed.

      */

     template <typename aT, typename bT, typename cT>

     void mTxmq_padding(long dimi, long dimj, long dimk, long ext_b,

                cT* c, const aT* a, const bT* b) {

         const int alignment = 4;

         bool free_b = false;

         long effj = dimj;


         /* Setup a buffer for c if needed */

         cT* c_buf = c;

         if (dimj%alignment) {

             effj = (dimj | 3) + 1;

             c_buf = (cT*)malloc(sizeof(cT)*dimi*effj);

         }


         /* Copy b into a buffer if needed */

         if (ext_b%alignment) {

             free_b = true;

             bT* b_buf = (bT*)malloc(sizeof(bT)*dimk*effj);


             bT* bp = b_buf;

             for (long k=0; k<dimk; k++, bp += effj, b += ext_b)

                 memcpy(bp, b, sizeof(bT)*dimj);


             b = b_buf;

             ext_b = effj;

         }


         cT* c_work = c_buf;

         /* mTxm */

         for (long i=0; i<dimi; ++i,c_work+=effj,++a) {

             for (long j=0; j<dimj; ++j) c_work[j] = 0.0;

             const aT *aik_ptr = a;

             for (long k=0; k<dimk; ++k,aik_ptr+=dimi) {

                 aT aki = *aik_ptr;

                 for (long j=0; j<dimj; ++j) {

                     c_work[j] += aki*b[k*ext_b+j];

                 }

             }

         }


         /* Copy c out if needed */

         if (dimj%alignment) {

             cT* ct = c_buf;

             for (long i=0; i<dimi; i++, ct += effj, c += dimj)

                 memcpy(c, ct, sizeof(cT)*dimj);


             free(c_buf);

         }


         /* Free the buffer for b */

         if (free_b) free((bT*)b);

     }

 #ifdef HAVE_IBMBGQ

     extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,

             double* c, const double* a, const double* b);

     extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,

             __complex__ double* c, const __complex__ double* a, const __complex__ double* b);

     extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,

             __complex__ double* c, const double* a, const __complex__ double* b);

     extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,

             __complex__ double* c, const __complex__ double* a, const double* b);


     template <>

         inline void mTxmq_padding(long ni, long nj, long nk, long ej,

                 double* c, const double* a, const double* b) {

             bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);

         }


     template <>

         inline void mTxmq_padding(long ni, long nj, long nk, long ej,

                 __complex__ double* c, const __complex__ double* a, const __complex__ double* b) {

             bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);

         }


     template <>

         inline void mTxmq_padding(long ni, long nj, long nk, long ej,

                 __complex__ double* c, const double* a, const __complex__ double* b) {

             bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);

         }


     template <>

         inline void mTxmq_padding(long ni, long nj, long nk, long ej,

                 __complex__ double* c, const __complex__ double* a, const double* b) {

             bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);

         }

 #elif defined(HAVE_IBMBGP)

     extern void bgpmTxmq(long ni, long nj, long nk, double* restrict c,

                          const double* a, const double* b);

     extern void bgpmTxmq(long ni, long nj, long nk, double_complex* restrict c,

                          const double_complex* a, const double_complex* b);


     template <>

     inline void mTxmq(long ni, long nj, long nk, double* restrict c, const double* a, const double* b) {

         bgpmTxmq(ni, nj, nk, c, a, b);

     }


     template <>

     inline void mTxmq(long ni, long nj, long nk, double_complex* restrict c, const double_complex* a, const double_complex* b) {

         bgpmTxmq(ni, nj, nk, c, a, b);

     }


 #elif defined(X86_64) && !defined(DISABLE_SSE3)

     template <>

     void mTxmq(long dimi, long dimj, long dimk,

                double* restrict c, const double* a, const double* b);


     template <>

     void mTxmq(long dimi, long dimj, long dimk,

                double_complex* restrict c, const double_complex* a, const double_complex* b);


 #ifndef __INTEL_COMPILER

     template <>

     void mTxmq(long dimi, long dimj, long dimk,

                double_complex* restrict c, const double_complex* a, const double* b);

 #endif


 #elif defined(X86_32)

     template <>

     void mTxmq(long dimi, long dimj, long dimk,

                double* restrict c, const double* a, const double* b);

 #endif


 }


 #endif // MADNESS_TENSOR_MTXMQ_H__INCLUDED

double_complex
std::complex< double > double_complex
Definition: lineplot.cc:16

madness::bgq_mtxmq_padded
void bgq_mtxmq_padded(long dimi, long dimj, long dimk, long extb, __complex__ double *c_x, const __complex__ double *a_x, const __complex__ double *b_x)
Definition: bgq_mtxm.cc:10

madness::mTxmq_padding
void mTxmq_padding(long dimi, long dimj, long dimk, long ext_b, cT *c, const aT *a, const bT *b)
Definition: mtxmq.h:74

double_complex
std::complex< double > double_complex
Definition: mtxmq.h:38

k
const int k
Definition: dielectric.cc:184

a
FLOAT a(int j, FLOAT z)
Definition: y1.cc:86

madness::mTxmq
void mTxmq(long dimi, long dimj, long dimk, cT *restrict c, const aT *a, const bT *b)
Definition: mtxmq.h:50

madness_config.h

restrict
#define restrict
Definition: config.h:403

madness
Holds machinery to set up Functions/FuncImpls using various Factories and Interfaces.
Definition: chem/atomutil.cc:45

c
const double c
Definition: gfit.cc:200

b
FLOAT b(int j, FLOAT z)
Definition: y1.cc:79

malloc
char * malloc()