MADNESS  version 0.9
tensor_macros.h
Go to the documentation of this file.
1 /*
2  This file is part of MADNESS.
3 
4  Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20  For more information please contact:
21 
22  Robert J. Harrison
23  Oak Ridge National Laboratory
24  One Bethel Valley Road
25  P.O. Box 2008, MS-6367
26 
27  email: harrisonrj@ornl.gov
28  tel: 865-241-3937
29  fax: 865-572-0680
30 
31 
32  $Id$
33 */
34 
35 
36 #ifndef MADNESS_TENSOR_TENSOR_MACROS_H__INCLUDED
37 #define MADNESS_TENSOR_TENSOR_MACROS_H__INCLUDED
38 
193 // don't change this without changing the iterator macros
194 #define TENSOR_MAXDIM 6
195 
197 
198 #define IND1 _i
199 #define IND2 _i,_j
200 #define IND3 _i,_j,_k
201 #define IND4 _i,_j,_k,_l
202 #define IND5 _i,_j,_k,_l,_m
203 #define IND6 _i,_j,_k,_l,_m,_n
204 #define IND IND6
205 
206 
207 #define ITERATOR1(t,exp) do { \
208  long __xd0=t.dim(0),_index=0; \
209  for (long _i=0; _i<__xd0; ++_i) {exp;_index++;} } while (0)
210 
211 #define ITERATOR2(t,exp) do { \
212  long __xd0=t.dim(0), __xd1=t.dim(1), _index=0; \
213 for (long _i=0; _i<__xd0; ++_i) { \
214  for (long _j=0; _j<__xd1; ++_j) {exp;_index++;} } } while (0)
215 
216 #define ITERATOR3(t,exp) do { \
217  long __xd0=t.dim(0), __xd1=t.dim(1), __xd2=t.dim(2), _index=0; \
218 for (long _i=0; _i<__xd0; ++_i) { \
219  for (long _j=0; _j<__xd1; ++_j) { \
220  for (long _k=0; _k<__xd2; ++_k) {exp;_index++;} } } } while (0)
221 
222 #define ITERATOR4(t,exp) do { \
223  long __xd0=t.dim(0), __xd1=t.dim(1), __xd2=t.dim(2), \
224  __xd3=t.dim(3), _index=0; \
225 for (long _i=0; _i<__xd0; ++_i) { \
226  for (long _j=0; _j<__xd1; ++_j) { \
227  for (long _k=0; _k<__xd2; ++_k) { \
228  for (long _l=0; _l<__xd3; ++_l) {exp;_index++;} } } } } while (0)
229 
230 #define ITERATOR5(t,exp) do { \
231  long __xd0=t.dim(0), __xd1=t.dim(1), __xd2=t.dim(2), \
232  __xd3=t.dim(3), __xd4=t.dim(4), _index=0; \
233 for (long _i=0; _i<__xd0; ++_i) { \
234  for (long _j=0; _j<__xd1; ++_j) { \
235  for (long _k=0; _k<__xd2; ++_k) { \
236  for (long _l=0; _l<__xd3; ++_l) { \
237  for (long _m=0; _m<__xd4; ++_m) {exp;_index++;} } } } } } while (0)
238 
239 #define ITERATOR6(t,exp) do { \
240  long __xd0=t.dim(0), __xd1=t.dim(1), __xd2=t.dim(2), \
241  __xd3=t.dim(3), __xd4=t.dim(4), __xd5=t.dim(5), _index=0;; \
242 for (long _i=0; _i<__xd0; ++_i) { \
243  for (long _j=0; _j<__xd1; ++_j) { \
244  for (long _k=0; _k<__xd2; ++_k) { \
245  for (long _l=0; _l<__xd3; ++_l) { \
246  for (long _m=0; _m<__xd4; ++_m) { \
247  for (long _n=0; _n<__xd5; ++_n) {exp;_index++;} } } } } } } while(0)
248 
249 #define ITERATOR(t,exp) do { \
250  long _j=0, _k=0, _l=0, _m=0, _n=0; \
251  if (t.ndim() == 1) {ITERATOR1(t,exp);} \
252  else if (t.ndim() == 2) {ITERATOR2(t,exp);} \
253  else if (t.ndim() == 3) {ITERATOR3(t,exp);} \
254  else if (t.ndim() == 4) {ITERATOR4(t,exp);} \
255  else if (t.ndim() == 5) {ITERATOR5(t,exp);} \
256  else if (t.ndim() == 6) {ITERATOR6(t,exp);} \
257  else {TENSOR_ASSERT(t.ndim() <= 6,"ndim confused?",t.ndim(),&t);} \
258  } while(0)
259 
260 // Inside iterator access pointer to current element as _p0 (pointer to
261 // argument number 0). _i, _j, _k, ..., also defined
262 #define UNARYITERATOR1(X,x,exp) do { \
263  long __xd0=x.dim(0); \
264  long __xs0=x.stride(0); \
265 X* restrict _p0=x.ptr(); \
266 for (long _i=0; _i<__xd0; ++_i,_p0+=__xs0) { \
267  exp; \
268 } } while(0)
269 
270 #define UNARYITERATOR2(X,x,exp) do { \
271  long __xd0=x.dim(0), __xd1=x.dim(1); \
272  long __xs0=x.stride(0), __xs1=x.stride(1); \
273 X* restrict __xp0=x.ptr(); \
274 for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
275  X* restrict _p0=__xp0; \
276  for (long _j=0; _j<__xd1; ++_j, _p0+=__xs1) { \
277  exp; \
278  } } } while(0)
279 
280 #define UNARYITERATOR3(X,x,exp) do { \
281  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2); \
282  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2); \
283 X* restrict __xp0=x.ptr(); \
284 for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
285  X* restrict __xp1=__xp0; \
286  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1) { \
287  X* restrict _p0=__xp1; \
288  for (long _k=0; _k<__xd2; ++_k, _p0+=__xs2) { \
289  exp; \
290  } } } } while(0)
291 
292 #define UNARYITERATOR4(X,x,exp) do { \
293  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
294  __xd3=x.dim(3); \
295  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
296  __xs3=x.stride(3); \
297 X* restrict __xp0=x.ptr(); \
298 for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
299  X* restrict __xp1=__xp0; \
300  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1) { \
301  X* restrict __xp2=__xp1; \
302  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2) { \
303  X* restrict _p0=__xp2; \
304  for (long _l=0; _l<__xd3; ++_l, _p0+=__xs3) { \
305  exp; \
306  } } } } } while(0)
307 
308 #define UNARYITERATOR5(X,x,exp) do { \
309  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
310  __xd3=x.dim(3), __xd4=x.dim(4); \
311  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
312  __xs3=x.stride(3), __xs4=x.stride(4); \
313 X* restrict __xp0=x.ptr(); \
314 for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
315  X* restrict __xp1=__xp0; \
316  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1) { \
317  X* restrict __xp2=__xp1; \
318  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2) { \
319  X* restrict __xp3=__xp2; \
320  for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3) { \
321  X* restrict _p0 =__xp3; \
322  for (long _m=0; _m<__xd4; ++_m, _p0+=__xs4) { \
323  exp; \
324  } } } } } } while(0)
325 
326 #define UNARYITERATOR6(X,x,exp) do { \
327  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
328  __xd3=x.dim(3), __xd4=x.dim(4), __xd5=x.dim(5); \
329  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
330  __xs3=x.stride(3), __xs4=x.stride(4), __xs5=x.stride(5); \
331 X* restrict __xp0=x.ptr(); \
332 for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
333  X* restrict __xp1=__xp0; \
334  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1) { \
335  X* restrict __xp2=__xp1; \
336  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2) { \
337  X* restrict __xp3=__xp2; \
338  for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3) { \
339  X* restrict __xp4=__xp3; \
340  for (long _m=0; _m<__xd4; ++_m, __xp4+=__xs4) { \
341  X* restrict _p0=__xp4; \
342  for (long _n=0; _n<__xd5; ++_n, _p0+=__xs5) { \
343  exp; \
344  } } } } } } } while(0)
345 
346 #define UNARYITERATOR(X,x,exp) do { \
347  long _j=0, _k=0, _l=0, _m=0, _n=0; \
348  if (x.ndim() == 1) UNARYITERATOR1(X,x,exp); \
349  else if (x.ndim() == 2) UNARYITERATOR2(X,x,exp); \
350  else if (x.ndim() == 3) UNARYITERATOR3(X,x,exp); \
351  else if (x.ndim() == 4) UNARYITERATOR4(X,x,exp); \
352  else if (x.ndim() == 5) UNARYITERATOR5(X,x,exp); \
353  else if (x.ndim() == 6) UNARYITERATOR6(X,x,exp); \
354  else {TENSOR_ASSERT(x.ndim() <= 6,"ndim confused?",x.ndim(),&x);} } while(0)
355 
356 // Inside iterator access pointers to current elements as _p0 & _p1
357 // _i, _j, _k, ... also defined
358 #define BINARYITERATOR1(X,x,Y,y,exp) do { \
359 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
360  long __xd0=x.dim(0); \
361  long __xs0=x.stride(0); \
362  long __ys0=y.stride(0); \
363 X* restrict _p0=x.ptr(); \
364 Y* restrict _p1=y.ptr(); \
365 for (long _i=0; _i<__xd0; ++_i, _p0+=__xs0, _p1+=__ys0) { \
366  exp; \
367 } } while(0)
368 
369 #define BINARYITERATOR2(X,x,Y,y,exp) do { \
370 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
371  long __xd0=x.dim(0), __xd1=x.dim(1); \
372  long __xs0=x.stride(0), __xs1=x.stride(1); \
373  long __ys0=y.stride(0), __ys1=y.stride(1); \
374 X* restrict __xp0=x.ptr(); \
375 Y* restrict __yp0=y.ptr(); \
376 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
377  X* restrict _p0=__xp0; \
378  Y* restrict _p1=__yp0; \
379  for (long _j=0; _j<__xd1; ++_j, _p0+=__xs1, _p1+=__ys1) { \
380  exp; \
381  } } } while(0)
382 
383 #define BINARYITERATOR3(X,x,Y,y,exp) do { \
384 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
385  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2); \
386  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2); \
387  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2); \
388 X* restrict __xp0=x.ptr(); \
389 Y* restrict __yp0=y.ptr(); \
390 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
391  X* restrict __xp1=__xp0; \
392  Y* restrict __yp1=__yp0; \
393  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1) { \
394  X* restrict _p0=__xp1; \
395  Y* restrict _p1=__yp1; \
396  for (long _k=0; _k<__xd2; ++_k, _p0+=__xs2, _p1+=__ys2) { \
397  exp; \
398  } } } } while(0)
399 
400 #define BINARYITERATOR4(X,x,Y,y,exp) do { \
401 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
402  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
403  __xd3=x.dim(3); \
404  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
405  __xs3=x.stride(3); \
406  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
407  __ys3=y.stride(3); \
408 X* restrict __xp0=x.ptr(); \
409 Y* restrict __yp0=y.ptr(); \
410 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
411  X* restrict __xp1=__xp0; \
412  Y* restrict __yp1=__yp0; \
413  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1) { \
414  X* restrict __xp2=__xp1; \
415  Y* restrict __yp2=__yp1; \
416  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2) { \
417  X* restrict _p0=__xp2; \
418  Y* restrict _p1=__yp2; \
419  for (long _l=0; _l<__xd3; ++_l, _p0+=__xs3, _p1+=__ys3) { \
420  exp; \
421  } } } } } while(0)
422 
423 #define BINARYITERATOR5(X,x,Y,y,exp) do { \
424 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
425  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
426  __xd3=x.dim(3), __xd4=x.dim(4); \
427  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
428  __xs3=x.stride(3), __xs4=x.stride(4); \
429  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
430  __ys3=y.stride(3), __ys4=y.stride(4); \
431 X* restrict __xp0=x.ptr(); \
432 Y* restrict __yp0=y.ptr(); \
433 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
434  X* restrict __xp1=__xp0; \
435  Y* restrict __yp1=__yp0; \
436  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1) { \
437  X* restrict __xp2=__xp1; \
438  Y* restrict __yp2=__yp1; \
439  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2) { \
440  X* restrict __xp3=__xp2; \
441  Y* restrict __yp3=__yp2; \
442  for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3, __yp3+=__ys3) { \
443  X* restrict _p0=__xp3; \
444  Y* restrict _p1=__yp3; \
445  for (long _m=0; _m<__xd4; ++_m, _p0+=__xs4, _p1+=__ys4) { \
446  exp; \
447  } } } } } } while(0)
448 
449 #define BINARYITERATOR6(X,x,Y,y,exp) do { \
450 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
451  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
452  __xd3=x.dim(3), __xd4=x.dim(4), __xd5=x.dim(5); \
453  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
454  __xs3=x.stride(3), __xs4=x.stride(4), __xs5=x.stride(5); \
455  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
456  __ys3=y.stride(3), __ys4=y.stride(4), __ys5=y.stride(5); \
457 X* restrict __xp0=x.ptr(); \
458 Y* restrict __yp0=y.ptr(); \
459 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
460  X* restrict __xp1=__xp0; \
461  Y* restrict __yp1=__yp0; \
462  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1) { \
463  X* restrict __xp2=__xp1; \
464  Y* restrict __yp2=__yp1; \
465  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2) { \
466  X* restrict __xp3=__xp2; \
467  Y* restrict __yp3=__yp2; \
468  for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3, __yp3+=__ys3) { \
469  X* restrict __xp4=__xp3; \
470  Y* restrict __yp4=__yp3; \
471  for (long _m=0; _m<__xd4; ++_m, __xp4+=__xs4, __yp4+=__ys4) { \
472  X* restrict _p0=__xp4; \
473  Y* restrict _p1=__yp4; \
474  for (long _n=0; _n<__xd5; ++_n, _p0+=__xs5, _p1+=__ys5) { \
475  exp; \
476  } } } } } } } while(0)
477 
478 #define BINARYITERATOR(X,x,Y,y,exp) do { \
479  long _j=0, _k=0, _l=0, _m=0, _n=0; \
480  if (x.ndim() == 1) BINARYITERATOR1(X,x,Y,y,exp); \
481  else if (x.ndim() == 2) BINARYITERATOR2(X,x,Y,y,exp); \
482  else if (x.ndim() == 3) BINARYITERATOR3(X,x,Y,y,exp); \
483  else if (x.ndim() == 4) BINARYITERATOR4(X,x,Y,y,exp); \
484  else if (x.ndim() == 5) BINARYITERATOR5(X,x,Y,y,exp); \
485  else if (x.ndim() == 6) BINARYITERATOR6(X,x,Y,y,exp); \
486  else {TENSOR_ASSERT(x.ndim() <= 6,"ndim confused?",x.ndim(),&x);} \
487 } while(0)
488 
489 // Inside iterator access pointers to current elements as _p0, _p1, _p2
490 // _i, _j, _k, ... also defined
491 #define TERNARYITERATOR1(X,x,Y,y,Z,z,exp) do { \
492 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
493 TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
494  long __xd0=x.dim(0); \
495  long __xs0=x.stride(0); \
496  long __ys0=y.stride(0); \
497  long __zs0=z.stride(0); \
498 X* restrict _p0=x.ptr(); \
499 Y* restrict _p1=y.ptr(); \
500 Z* restrict _p2=z.ptr(); \
501 for (long _i=0; _i<__xd0; ++_i, _p0+=__xs0, _p1+=__ys0, _p2+=__zs0) { \
502  exp; \
503 } } while(0)
504 
505 #define TERNARYITERATOR2(X,x,Y,y,Z,z,exp) do { \
506 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
507 TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
508  long __xd0=x.dim(0), __xd1=x.dim(1); \
509  long __xs0=x.stride(0), __xs1=x.stride(1); \
510  long __ys0=y.stride(0), __ys1=y.stride(1); \
511  long __zs0=z.stride(0), __zs1=z.stride(1); \
512 X* restrict __xp0=x.ptr(); \
513 Y* restrict __yp0=y.ptr(); \
514 Z* restrict __zp0=z.ptr(); \
515 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
516  X* restrict _p0=__xp0; \
517  Y* restrict _p1=__yp0; \
518  Z* restrict _p2=__zp0; \
519  for (long _j=0; _j<__xd1; ++_j, _p0+=__xs1, _p1+=__ys1, _p2+=__zs1) { \
520  exp; \
521  } } } while(0)
522 
523 #define TERNARYITERATOR3(X,x,Y,y,Z,z,exp) do { \
524 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
525 TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
526  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2); \
527  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2); \
528  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2); \
529  long __zs0=z.stride(0), __zs1=z.stride(1), __zs2=z.stride(2); \
530 X* restrict __xp0=x.ptr(); \
531 Y* restrict __yp0=y.ptr(); \
532 Z* restrict __zp0=z.ptr(); \
533 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
534  X* restrict __xp1=__xp0; \
535  Y* restrict __yp1=__yp0; \
536  Z* restrict __zp1=__zp0; \
537  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1, __zp1+=__zs1) { \
538  X* restrict _p0=__xp1; \
539  Y* restrict _p1=__yp1; \
540  Z* restrict _p2=__zp1; \
541  for (long _k=0; _k<__xd2; ++_k, _p0+=__xs2, _p1+=__ys2, _p2+=__zs2) { \
542  exp; \
543  } } } } while(0)
544 
545 #define TERNARYITERATOR4(X,x,Y,y,Z,z,exp) do { \
546 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
547 TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
548  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
549  __xd3=x.dim(3); \
550  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
551  __xs3=x.stride(3); \
552  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
553  __ys3=y.stride(3); \
554  long __zs0=z.stride(0), __zs1=z.stride(1), __zs2=z.stride(2), \
555  __zs3=z.stride(3); \
556 X* restrict __xp0=x.ptr(); \
557 Y* restrict __yp0=y.ptr(); \
558 Z* restrict __zp0=z.ptr(); \
559 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
560  X* restrict __xp1=__xp0; \
561  Y* restrict __yp1=__yp0; \
562  Z* restrict __zp1=__zp0; \
563  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1, __zp1+=__zs1) { \
564  X* restrict __xp2=__xp1; \
565  Y* restrict __yp2=__yp1; \
566  Z* restrict __zp2=__zp1; \
567  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2, __zp2+=__zs2) { \
568  X* restrict _p0=__xp2; \
569  Y* restrict _p1=__yp2; \
570  Z* restrict _p2=__zp2; \
571  for (long _l=0; _l<__xd3; ++_l, _p0+=__xs3, _p1+=__ys3, _p2+=__zs3) { \
572  exp; \
573  } } } } } while(0)
574 
575 #define TERNARYITERATOR5(X,x,Y,y,Z,z,exp) do { \
576 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
577 TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
578  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
579  __xd3=x.dim(3), __xd4=x.dim(4); \
580  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
581  __xs3=x.stride(3), __xs4=x.stride(4); \
582  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
583  __ys3=y.stride(3), __ys4=y.stride(4); \
584  long __zs0=z.stride(0), __zs1=z.stride(1), __zs2=z.stride(2), \
585  __zs3=z.stride(3), __zs4=z.stride(4); \
586 X* restrict __xp0=x.ptr(); \
587 Y* restrict __yp0=y.ptr(); \
588 Z* restrict __zp0=z.ptr(); \
589 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
590  X* restrict __xp1=__xp0; \
591  Y* restrict __yp1=__yp0; \
592  Z* restrict __zp1=__zp0; \
593  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1, __zp1+=__zs1) { \
594  X* restrict __xp2=__xp1; \
595  Y* restrict __yp2=__yp1; \
596  Z* restrict __zp2=__zp1; \
597  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2, __zp2+=__zs2) { \
598  X* restrict __xp3=__xp2; \
599  Y* restrict __yp3=__yp2; \
600  Z* restrict __zp3=__zp2; \
601  for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3, __yp3+=__ys3, __zp3+=__zs3) { \
602  X* restrict _p0=__xp3; \
603  Y* restrict _p1=__yp3; \
604  Z* restrict _p2=__zp3; \
605  for (long _m=0; _m<__xd4; ++_m, _p0+=__xs4, _p1+=__ys4, _p2+=__zs4) { \
606  exp; \
607  } } } } } } while(0)
608 
609 #define TERNARYITERATOR6(X,x,Y,y,Z,z,exp) do { \
610 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
611 TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
612  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
613  __xd3=x.dim(3), __xd4=x.dim(4), __xd5=x.dim(5); \
614  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
615  __xs3=x.stride(3), __xs4=x.stride(4), __xs5=x.stride(5); \
616  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
617  __ys3=y.stride(3), __ys4=y.stride(4), __ys5=y.stride(5); \
618  long __zs0=z.stride(0), __zs1=z.stride(1), __zs2=z.stride(2), \
619  __zs3=z.stride(3), __zs4=z.stride(4), __zs5=z.stride(5); \
620 X* restrict __xp0=x.ptr(); \
621 Y* restrict __yp0=y.ptr(); \
622 Z* restrict __zp0=z.ptr(); \
623 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
624  X* restrict __xp1=__xp0; \
625  Y* restrict __yp1=__yp0; \
626  Z* restrict __zp1=__zp0; \
627  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1, __zp1+=__zs1) { \
628  X* restrict __xp2=__xp1; \
629  Y* restrict __yp2=__yp1; \
630  Z* restrict __zp2=__zp1; \
631  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2, __zp2+=__zs2) { \
632  X* restrict __xp3=__xp2; \
633  Y* restrict __yp3=__yp2; \
634  Z* restrict __zp3=__zp2; \
635  for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3, __yp3+=__ys3, __zp3+=__zs3) { \
636  X* restrict __xp4=__xp3; \
637  Y* restrict __yp4=__yp3; \
638  Z* restrict __zp4=__zp3; \
639  for (long _m=0; _m<__xd4; ++_m, __xp4+=__xs4, __yp4+=__ys4, __zp4+=__zs4) { \
640  X* restrict _p0=__xp4; \
641  Y* restrict _p1=__yp4; \
642  Z* restrict _p2=__zp4; \
643  for (long _n=0; _n<__xd5; ++_n, _p0+=__xs5, _p1+=__ys5, _p2+=__zs5) { \
644  exp; \
645  } } } } } } } while(0)
646 
647 #define TERNARYITERATOR(X,x,Y,y,Z,z,exp) do { \
648  long _j=0, _k=0, _l=0, _m=0, _n=0; \
649  if (x.ndim() == 1) TERNARYITERATOR1(X,x,Y,y,Z,z,exp); \
650  else if (x.ndim() == 2) TERNARYITERATOR2(X,x,Y,y,Z,z,exp); \
651  else if (x.ndim() == 3) TERNARYITERATOR3(X,x,Y,y,Z,z,exp); \
652  else if (x.ndim() == 4) TERNARYITERATOR4(X,x,Y,y,Z,z,exp); \
653  else if (x.ndim() == 5) TERNARYITERATOR5(X,x,Y,y,Z,z,exp); \
654  else if (x.ndim() == 6) TERNARYITERATOR6(X,x,Y,y,Z,z,exp); \
655  else {TENSOR_ASSERT(x.ndim() <= 6,"ndim confused?",x.ndim(),&x);} \
656 } while(0)
657 
658 #define UNARY_OPTIMIZED_ITERATOR(X,x,exp) do { \
659  if (x.iscontiguous()) { \
660  X* restrict _p0 = x.ptr(); \
661  for (long _j=0; _j<x.size(); ++_j,++_p0) {exp;} \
662  } \
663  else { \
664  for (TensorIterator<REMCONST(X)> iter=x.unary_iterator(1); iter._p0; ++iter) { \
665  long _dimj = iter.dimj; \
666  X* restrict _p0 = iter._p0; \
667  long _s0 = iter._s0; \
668  for (long _j=0; _j<_dimj; ++_j, _p0+=_s0) { \
669  exp; \
670  } \
671  } \
672  } \
673 } while(0)
674 
675 
676 // Can optimize these by moving definition of stride out of loop.
677 
678 #define UNARY_UNOPTIMIZED_ITERATOR(X,x,exp) do { \
679  for (TensorIterator<REMCONST(X)> iter=x.unary_iterator(1,false,false); iter._p0; ++iter) { \
680  long _dimj = iter.dimj; \
681  X* restrict _p0 = iter._p0; \
682  long _s0 = iter._s0; \
683  for (long _j=0; _j<_dimj; ++_j, _p0+=_s0) { \
684  exp; \
685  } \
686  } } while(0)
687 
688 // Use this inside another iterator macro ... pointers are _q* instead of _p*
689 // Iterator is iter2.
690 
691 #define UNARY_UNOPTIMIZED_ITERATOR_NESTED(X,x,exp) do { \
692  for (TensorIterator<REMCONST(X)> iter2=x.unary_iterator(1,false,false); iter2._p0; ++iter2) { \
693  long _dimj2 = iter2.dimj; \
694  X* restrict _q0 = iter2._p0; \
695  long _s20 = iter2._s0; \
696  for (long _j2=0; _j2<_dimj2; ++_j2, _q0+=_s20) { \
697  exp; \
698  } \
699  } } while(0)
700 
701 #define BINARY_OPTIMIZED_ITERATOR(X,x,Y,y,exp) do { \
702  if (x.iscontiguous() && y.iscontiguous() && x.size()==y.size()) { \
703  X* restrict _p0 = x.ptr(); \
704  Y* restrict _p1 = y.ptr(); \
705  for (long _j=0; _j<x.size(); ++_j,++_p0,++_p1) {exp;} \
706  } \
707  else { \
708  for (TensorIterator<REMCONST(X),REMCONST(Y)> iter=x.binary_iterator(y,1); iter._p0; ++iter) { \
709  long _dimj = iter.dimj; \
710  X* restrict _p0 = iter._p0; \
711  Y* restrict _p1 = iter._p1; \
712  long _s0 = iter._s0; \
713  long _s1 = iter._s1; \
714  for (long _j=0; _j<_dimj; ++_j, _p0+=_s0, _p1+=_s1) { \
715  exp; \
716  } \
717  } } } while(0)
718 
719 #define TERNARY_OPTIMIZED_ITERATOR(X,x,Y,y,Z,z,exp) do { \
720  if (x.iscontiguous() && y.iscontiguous() && z.iscontiguous() && x.size()==y.size() && x.size()==z.size()) { \
721  X* restrict _p0 = x.ptr(); \
722  Y* restrict _p1 = y.ptr(); \
723  Z* restrict _p2 = z.ptr(); \
724  for (long _j=0; _j<x.size(); ++_j,++_p0,++_p1,++_p2) {exp;} \
725  } \
726  else { \
727  for (TensorIterator<REMCONST(X),REMCONST(Y),REMCONST(Z)> iter=x.ternary_iterator(y,z,1); iter._p0; ++iter) { \
728  long _dimj = iter.dimj; \
729  X* restrict _p0 = iter._p0; \
730  Y* restrict _p1 = iter._p1; \
731  Z* restrict _p2 = iter._p2; \
732  long _s0 = iter._s0; \
733  long _s1 = iter._s1; \
734  long _s2 = iter._s2; \
735  for (long _j=0; _j<_dimj; ++_j, _p0+=_s0, _p1+=_s1, _p2+=_s2) { \
736  exp; \
737  } \
738  } } } while(0)
739 
740 #endif // MADNESS_TENSOR_TENSOR_MACROS_H__INCLUDED