PSCF v1.4.0
cuda/VecOp.h
1#ifndef PSCF_CUDA_VEC_OP_H
2#define PSCF_CUDA_VEC_OP_H
3
4/*
5* PSCF - Polymer Self-Consistent Field
6*
7* Copyright 2015 - 2025, The Regents of the University of Minnesota
8* Distributed under the terms of the GNU General Public License.
9*/
10
11#include <pscf/cuda/cudaTypes.h>
12#include <pscf/cuda/DeviceArray.h>
13
14namespace Pscf {
15
75
81 namespace VecOp {
82
83 // Assignment operations:
84
96 void eqV(DeviceArray<cudaReal>& a,
97 DeviceArray<cudaReal> const & b,
98 const int beginIdA, const int beginIdB, const int n);
99
108 inline
110 DeviceArray<cudaReal> const & b)
111 { eqV(a, b, 0, 0, a.capacity()); }
112
124 void eqV(Array<cudaReal>& a,
125 DeviceArray<cudaReal> const & b,
126 const int beginIdA, const int beginIdB, const int n);
127
136 inline
138 DeviceArray<cudaReal> const & b)
139 { eqV(a, b, 0, 0, a.capacity()); }
140
153 Array<cudaReal> const & b,
154 const int beginIdA, const int beginIdB, const int n);
155
164 inline
166 Array<cudaReal> const & b)
167 { eqV(a, b, 0, 0, a.capacity()); }
168
181 DeviceArray<cudaComplex> const & b,
182 const int beginIdA, const int beginIdB, const int n);
183
192 inline
194 DeviceArray<cudaComplex> const & b)
195 { eqV(a, b, 0, 0, a.capacity()); }
196
208 const cudaReal b,
209 const int beginIdA, const int n);
210
219 inline
221 { eqS(a, b, 0, a.capacity()); }
222
234 const cudaComplex b,
235 const int beginIdA, const int n);
236
245 inline
247 { eqS(a, b, 0, a.capacity()); }
248
249 // Addition operations
250
265 DeviceArray<cudaReal> const & b,
266 DeviceArray<cudaReal> const & c,
267 const int beginIdA, const int beginIdB, const int beginIdC,
268 const int n);
269
279 inline
281 DeviceArray<cudaReal> const & b,
282 DeviceArray<cudaReal> const & c)
283 { addVV(a, b, c, 0, 0, 0, a.capacity()); }
284
299 DeviceArray<cudaComplex> const & b,
300 DeviceArray<cudaComplex> const & c,
301 const int beginIdA, const int beginIdB, const int beginIdC,
302 const int n);
303
313 inline
315 DeviceArray<cudaComplex> const & b,
316 DeviceArray<cudaComplex> const & c)
317 { addVV(a, b, c, 0, 0, 0, a.capacity()); }
318
333 DeviceArray<cudaReal> const & b,
334 DeviceArray<cudaComplex> const & c,
335 const int beginIdA, const int beginIdB, const int beginIdC,
336 const int n);
337
347 inline
349 DeviceArray<cudaReal> const & b,
350 DeviceArray<cudaComplex> const & c)
351 { addVV(a, b, c, 0, 0, 0, a.capacity()); }
352
367 DeviceArray<cudaComplex> const & b,
368 DeviceArray<cudaReal> const & c,
369 const int beginIdA, const int beginIdB, const int beginIdC,
370 const int n);
371
381 inline
383 DeviceArray<cudaComplex> const & b,
384 DeviceArray<cudaReal> const & c)
385 { addVV(a, b, c, 0, 0, 0, a.capacity()); }
386
400 DeviceArray<cudaReal> const & b,
401 const cudaReal c,
402 const int beginIdA, const int beginIdB,
403 const int n);
404
414 inline
416 DeviceArray<cudaReal> const & b,
417 const cudaReal c)
418 { addVS(a, b, c, 0, 0, a.capacity()); }
419
433 DeviceArray<cudaComplex> const & b,
434 const cudaComplex c,
435 const int beginIdA, const int beginIdB,
436 const int n);
437
447 inline
449 DeviceArray<cudaComplex> const & b,
450 const cudaComplex c)
451 { addVS(a, b, c, 0, 0, a.capacity()); }
452
466 DeviceArray<cudaReal> const & b,
467 const cudaComplex c,
468 const int beginIdA, const int beginIdB, const int n);
469
479 inline
481 DeviceArray<cudaReal> const & b,
482 const cudaComplex c)
483 { addVS(a, b, c, 0, 0, a.capacity()); }
484
498 DeviceArray<cudaComplex> const & b,
499 const cudaReal c,
500 const int beginIdA, const int beginIdB, const int n);
501
511 inline
513 DeviceArray<cudaComplex> const & b,
514 const cudaReal c)
515 { addVS(a, b, c, 0, 0, a.capacity()); }
516
517
518 // Subtraction operations
519
534 DeviceArray<cudaReal> const & b,
535 DeviceArray<cudaReal> const & c,
536 const int beginIdA, const int beginIdB, const int beginIdC,
537 const int n);
538
548 inline
550 DeviceArray<cudaReal> const & c)
551 { subVV(a, b, c, 0, 0, 0, a.capacity()); }
552
567 DeviceArray<cudaComplex> const & b,
568 DeviceArray<cudaComplex> const & c, const int beginIdA,
569 const int beginIdB, const int beginIdC, const int n);
570
580 inline
582 DeviceArray<cudaComplex> const & b,
583 DeviceArray<cudaComplex> const & c)
584 { subVV(a, b, c, 0, 0, 0, a.capacity()); }
585
600 DeviceArray<cudaReal> const & b,
601 DeviceArray<cudaComplex> const & c,
602 const int beginIdA,
603 const int beginIdB, const int beginIdC,
604 const int n);
605
615 inline
617 DeviceArray<cudaReal> const & b,
618 DeviceArray<cudaComplex> const & c)
619 { subVV(a, b, c, 0, 0, 0, a.capacity()); }
620
635 DeviceArray<cudaComplex> const & b,
636 DeviceArray<cudaReal> const & c,
637 const int beginIdA,
638 const int beginIdB, const int beginIdC, const int n);
639
649 inline
651 DeviceArray<cudaComplex> const & b,
652 DeviceArray<cudaReal> const & c)
653 { subVV(a, b, c, 0, 0, 0, a.capacity()); }
654
668 DeviceArray<cudaReal> const & b,
669 const cudaReal c,
670 const int beginIdA, const int beginIdB,
671 const int n);
672
682 inline
684 DeviceArray<cudaReal> const & b,
685 const cudaReal c)
686 { subVS(a, b, c, 0, 0, a.capacity()); }
687
701 DeviceArray<cudaComplex> const & b,
702 const cudaComplex c,
703 const int beginIdA, const int beginIdB, const int n);
704
714 inline
716 DeviceArray<cudaComplex> const & b,
717 const cudaComplex c)
718 { subVS(a, b, c, 0, 0, a.capacity()); }
719
733 DeviceArray<cudaReal> const & b,
734 const cudaComplex c,
735 const int beginIdA, const int beginIdB, const int n);
736
746 inline
748 DeviceArray<cudaReal> const & b,
749 const cudaComplex c)
750 { subVS(a, b, c, 0, 0, a.capacity()); }
751
765 DeviceArray<cudaComplex> const & b,
766 const cudaReal c,
767 const int beginIdA, const int beginIdB, const int n);
768
778 inline
780 DeviceArray<cudaComplex> const & b,
781 const cudaReal c)
782 { subVS(a, b, c, 0, 0, a.capacity()); }
783
784
785 // Multiplication operations
786
801 DeviceArray<cudaReal> const & b,
802 DeviceArray<cudaReal> const & c,
803 const int beginIdA, const int beginIdB, const int beginIdC,
804 const int n);
805
815 inline
817 DeviceArray<cudaReal> const & b,
818 DeviceArray<cudaReal> const & c)
819 { mulVV(a, b, c, 0, 0, 0, a.capacity()); }
820
835 DeviceArray<cudaComplex> const & b,
836 DeviceArray<cudaComplex> const & c,
837 const int beginIdA, const int beginIdB, const int beginIdC,
838 const int n);
839
849 inline
851 DeviceArray<cudaComplex> const & b,
852 DeviceArray<cudaComplex> const & c)
853 { mulVV(a, b, c, 0, 0, 0, a.capacity()); }
854
869 DeviceArray<cudaReal> const & b,
870 DeviceArray<cudaComplex> const & c, const int beginIdA,
871 const int beginIdB, const int beginIdC, const int n);
872
882 inline
884 DeviceArray<cudaReal> const & b,
885 DeviceArray<cudaComplex> const & c)
886 { mulVV(a, b, c, 0, 0, 0, a.capacity()); }
887
902 DeviceArray<cudaComplex> const & b,
903 DeviceArray<cudaReal> const & c,
904 const int beginIdA, const int beginIdB, const int beginIdC,
905 const int n);
906
916 inline
918 DeviceArray<cudaComplex> const & b,
919 DeviceArray<cudaReal> const & c)
920 { mulVV(a, b, c, 0, 0, 0, a.capacity()); }
921
935 DeviceArray<cudaReal> const & b,
936 const cudaReal c,
937 const int beginIdA, const int beginIdB,
938 const int n);
939
949 inline
951 DeviceArray<cudaReal> const & b,
952 const cudaReal c)
953 { mulVS(a, b, c, 0, 0, a.capacity()); }
954
968 DeviceArray<cudaComplex> const & b,
969 const cudaComplex c,
970 const int beginIdA, const int beginIdB, const int n);
971
981 inline
983 DeviceArray<cudaComplex> const & b,
984 const cudaComplex c)
985 { mulVS(a, b, c, 0, 0, a.capacity()); }
986
1000 DeviceArray<cudaReal> const & b,
1001 const cudaComplex c,
1002 const int beginIdA, const int beginIdB, const int n);
1003
1013 inline
1015 DeviceArray<cudaReal> const & b,
1016 const cudaComplex c)
1017 { mulVS(a, b, c, 0, 0, a.capacity()); }
1018
1032 DeviceArray<cudaComplex> const & b,
1033 const cudaReal c,
1034 const int beginIdA, const int beginIdB, const int n);
1035
1045 inline
1047 DeviceArray<cudaComplex> const & b,
1048 const cudaReal c)
1049 { mulVS(a, b, c, 0, 0, a.capacity()); }
1050
1051
1052 // Division operations
1053
1068 DeviceArray<cudaReal> const & b,
1069 DeviceArray<cudaReal> const & c,
1070 const int beginIdA, const int beginIdB, const int beginIdC,
1071 const int n);
1072
1082 inline
1084 DeviceArray<cudaReal> const & b,
1085 DeviceArray<cudaReal> const & c)
1086 { divVV(a, b, c, 0, 0, 0, a.capacity()); }
1087
1102 DeviceArray<cudaComplex> const & b,
1103 DeviceArray<cudaReal> const & c,
1104 const int beginIdA, const int beginIdB, const int beginIdC,
1105 const int n);
1106
1116 inline
1118 DeviceArray<cudaComplex> const & b,
1119 DeviceArray<cudaReal> const & c)
1120 { divVV(a, b, c, 0, 0, 0, a.capacity()); }
1121
1135 DeviceArray<cudaReal> const & b,
1136 const cudaReal c, const int beginIdA,
1137 const int beginIdB, const int n);
1138
1148 inline
1150 DeviceArray<cudaReal> const & b,
1151 const cudaReal c)
1152 { divVS(a, b, c, 0, 0, a.capacity()); }
1153
1167 DeviceArray<cudaComplex> const & b,
1168 const cudaReal c,
1169 const int beginIdA, const int beginIdB, const int n);
1170
1180 inline
1182 DeviceArray<cudaComplex> const & b,
1183 const cudaReal c)
1184 { divVS(a, b, c, 0, 0, a.capacity()); }
1185
1199 const cudaReal b,
1200 DeviceArray<cudaReal> const & c,
1201 const int beginIdA, const int beginIdC, const int n);
1202
1212 inline
1214 const cudaReal b,
1215 DeviceArray<cudaReal> const & c)
1216 { divSV(a, b, c, 0, 0, a.capacity()); }
1217
1218 // In-place addition
1219
1232 DeviceArray<cudaReal> const & b,
1233 const int beginIdA, const int beginIdB, const int n);
1234
1243 inline
1245 DeviceArray<cudaReal> const & b)
1246 { addEqV(a, b, 0, 0, a.capacity()); }
1247
1260 DeviceArray<cudaComplex> const & b,
1261 const int beginIdA, const int beginIdB, const int n);
1262
1271 inline
1273 DeviceArray<cudaComplex> const & b)
1274 { addEqV(a, b, 0, 0, a.capacity()); }
1275
1288 DeviceArray<cudaReal> const & b,
1289 const int beginIdA, const int beginIdB, const int n);
1290
1299 inline
1301 DeviceArray<cudaReal> const & b)
1302 { addEqV(a, b, 0, 0, a.capacity()); }
1303
1315 const cudaReal b,
1316 const int beginIdA, const int n);
1317
1326 inline
1328 { addEqS(a, b, 0, a.capacity()); }
1329
1341 const cudaComplex b,
1342 const int beginIdA, const int n);
1343
1352 inline
1354 { addEqS(a, b, 0, a.capacity()); }
1355
1367 const cudaReal b,
1368 const int beginIdA, const int n);
1369
1378 inline
1380 { addEqS(a, b, 0, a.capacity()); }
1381
1382
1383 // In-place subtraction
1384
1397 DeviceArray<cudaReal> const & b,
1398 const int beginIdA, const int beginIdB, const int n);
1399
1408 inline
1410 DeviceArray<cudaReal> const & b)
1411 { subEqV(a, b, 0, 0, a.capacity()); }
1412
1425 DeviceArray<cudaComplex> const & b,
1426 const int beginIdA, const int beginIdB, const int n);
1427
1436 inline
1438 DeviceArray<cudaComplex> const & b)
1439 { subEqV(a, b, 0, 0, a.capacity()); }
1440
1453 DeviceArray<cudaReal> const & b,
1454 const int beginIdA, const int beginIdB, const int n);
1455
1464 inline
1466 DeviceArray<cudaReal> const & b)
1467 { subEqV(a, b, 0, 0, a.capacity()); }
1468
1479 void subEqS(DeviceArray<cudaReal>& a, const cudaReal b,
1480 const int beginIdA, const int n);
1481
1490 inline
1492 { subEqS(a, b, 0, a.capacity()); }
1493
1505 const int beginIdA, const int n);
1506
1515 inline
1517 { subEqS(a, b, 0, a.capacity()); }
1518
1530 const cudaReal b,
1531 const int beginIdA, const int n);
1532
1541 inline
1543 { subEqS(a, b, 0, a.capacity()); }
1544
1545
1546 // In-place multiplication
1547
1560 DeviceArray<cudaReal> const & b,
1561 const int beginIdA, const int beginIdB, const int n);
1562
1571 inline
1573 DeviceArray<cudaReal> const & b)
1574 { mulEqV(a, b, 0, 0, a.capacity()); }
1575
1588 DeviceArray<cudaComplex> const & b,
1589 const int beginIdA, const int beginIdB, const int n);
1590
1599 inline
1601 DeviceArray<cudaComplex> const & b)
1602 { mulEqV(a, b, 0, 0, a.capacity()); }
1603
1616 DeviceArray<cudaReal> const & b,
1617 const int beginIdA, const int beginIdB, const int n);
1618
1627 inline
1629 DeviceArray<cudaReal> const & b)
1630 { mulEqV(a, b, 0, 0, a.capacity()); }
1631
1642 void mulEqS(DeviceArray<cudaReal>& a, const cudaReal b,
1643 const int beginIdA, const int n);
1644
1653 inline
1655 { mulEqS(a, b, 0, a.capacity()); }
1656
1668 const int beginIdA, const int n);
1669
1678 inline
1680 { mulEqS(a, b, 0, a.capacity()); }
1681
1693 const cudaReal b,
1694 const int beginIdA, const int n);
1695
1704 inline
1706 { mulEqS(a, b, 0, a.capacity()); }
1707
1708
1709 // In-place division
1710
1723 DeviceArray<cudaReal> const & b,
1724 const int beginIdA, const int beginIdB, const int n);
1725
1734 inline
1736 DeviceArray<cudaReal> const & b)
1737 { divEqV(a, b, 0, 0, a.capacity()); }
1738
1751 DeviceArray<cudaReal> const & b,
1752 const int beginIdA, const int beginIdB, const int n);
1753
1762 inline
1764 DeviceArray<cudaReal> const & b)
1765 { divEqV(a, b, 0, 0, a.capacity()); }
1766
1778 const cudaReal b,
1779 const int beginIdA, const int n);
1780
1789 inline
1791 { divEqS(a, b, 0, a.capacity()); }
1792
1804 const cudaReal b,
1805 const int beginIdA, const int n);
1806
1815 inline
1817 { divEqS(a, b, 0, a.capacity()); }
1818
1819 // Exponentiation operations
1820
1833 DeviceArray<cudaReal> const & b,
1834 const int beginIdA, const int beginIdB,
1835 const int n);
1836
1845 inline
1847 DeviceArray<cudaReal> const & b)
1848 { expV(a, b, 0, 0, a.capacity()); }
1849
1862 DeviceArray<cudaComplex> const & b,
1863 const int beginIdA, const int beginIdB,
1864 const int n);
1865
1874 inline
1876 DeviceArray<cudaComplex> const & b)
1877 { expV(a, b, 0, 0, a.capacity()); }
1878
1879 // Vector (element-wise) square
1880
1892 void sqV(DeviceArray<cudaReal>& a,
1893 DeviceArray<cudaReal> const & b,
1894 const int beginIdA, const int beginIdB,
1895 const int n);
1896
1905 inline
1907 DeviceArray<cudaReal> const & b)
1908 { sqV(a, b, 0, 0, a.capacity()); }
1909
1922 DeviceArray<cudaComplex> const & b,
1923 const int beginIdA, const int beginIdB,
1924 const int n);
1925
1934 inline
1936 DeviceArray<cudaComplex> const & b)
1937 { sqV(a, b, 0, 0, a.capacity()); }
1938
1939 // Absolute magnitude
1940
1953 DeviceArray<cudaReal> const & b,
1954 const int beginIdA, const int beginIdB,
1955 const int n);
1956
1965 inline
1967 DeviceArray<cudaReal> const & b)
1968 { absV(a, b, 0, 0, a.capacity()); }
1969
1982 DeviceArray<cudaComplex> const & b,
1983 const int beginIdA, const int beginIdB,
1984 const int n);
1985
1994 inline
1996 DeviceArray<cudaComplex> const & b)
1997 { sqAbsV(a, b, 0, 0, a.capacity()); }
1998
1999 } // namespace VecOp
2000} // namespace Pscf
2001
2002// Ensure that if VecOp.h is included, so is VecOpMisc.h
2003#include "VecOpMisc.h"
2004#endif
Dynamic array on the GPU device with aligned data.
Definition DeviceArray.h:96
int capacity() const
Return array capacity.
Array container class template.
Definition Array.h:40
int capacity() const
Return allocated size.
Definition Array.h:144
void divEqS(Array< double > &a, double b)
Vector-scalar in-place division, a[i] /= b.
Definition VecOp.cpp:292
void addEqV(Array< double > &a, Array< double > const &b)
Vector-vector in-place addition, a[i] += b[i] (real).
Definition VecOp.cpp:198
void divEqV(Array< double > &a, Array< double > const &b)
Vector-vector in-place division, a[i] /= b[i].
Definition VecOp.cpp:279
void addEqS(Array< double > &a, double b)
Vector-scalar in-place addition, a[i] += b (real).
Definition VecOp.cpp:211
void sqV(Array< double > &a, Array< double > const &b)
Vector element-wise square, a[i] = b[i]*b[i] (real).
Definition VecOp.cpp:334
void mulEqV(Array< double > &a, Array< double > const &b)
Vector-vector in-place multiplication, a[i] *= b[i] (real).
Definition VecOp.cpp:252
void eqV(Array< double > &a, Array< double > const &b, const int beginIdA, const int beginIdB, const int n)
Vector assignment, a[i] = b[i] (real, slice).
Definition VecOp.cpp:21
void sqAbsV(Array< double > &a, Array< fftw_complex > const &b)
Square of absolute magnitude, a[i] = |b[i]|^2 (complex).
Definition VecOpCx.cpp:698
void mulEqS(Array< double > &a, double b)
Vector-scalar in-place multiplication, a[i] *= b (real).
Definition VecOp.cpp:265
void subVV(Array< double > &a, Array< double > const &b, Array< double > const &c)
Vector-vector subtraction, a[i] = b[i] - c[i] (real)
Definition VecOp.cpp:95
void absV(Array< double > &a, Array< double > const &b)
Element-wise absolute magnitude, a[i] = abs(b[i]) (real).
Definition VecOp.cpp:349
void expV(Array< double > &a, Array< double > const &b)
Vector exponentiation, a[i] = exp(b[i]) (real).
Definition VecOp.cpp:306
void divVS(Array< double > &a, Array< double > const &b, double c)
Vector-scalar division, a[i] = b[i] / c (real).
Definition VecOp.cpp:170
void eqS(Array< double > &a, double b)
Vector assignment, a[i] = b (real).
Definition VecOp.cpp:50
void mulVV(Array< double > &a, Array< double > const &b, Array< double > const &c)
Vector-vector multiplication, a[i] = b[i] * c[i] (real).
Definition VecOp.cpp:125
void subVS(Array< double > &a, Array< double > const &b, double c)
Vector-scalar subtraction, a[i] = b[i] - c (real).
Definition VecOp.cpp:110
void subEqV(Array< double > &a, Array< double > const &b)
Vector-vector in-place subtraction, a[i] -= b[i] (real).
Definition VecOp.cpp:225
void addVV(Array< double > &a, Array< double > const &b, Array< double > const &c)
Vector-vector addition, a[i] = b[i] + c[i] (real)
Definition VecOp.cpp:64
void divSV(Array< double > &a, double b, Array< double > const &c)
Vector division, a[i] = b / c[i].
Definition VecOp.cpp:183
void addVS(Array< double > &a, Array< double > const &b, double c)
Vector-scalar addition, a[i] = b[i] + c (real).
Definition VecOp.cpp:79
void divVV(Array< double > &a, Array< double > const &b, Array< double > const &c)
Vector-vector division, a[i] = b[i] / c[i] (real).
Definition VecOp.cpp:155
void subEqS(Array< double > &a, double b)
Vector-scalar subtraction in-place, a[i] -= b (real).
Definition VecOp.cpp:238
void mulVS(Array< double > &a, Array< double > const &b, double c)
Vector-scalar multiplication, a[i] = b[i] * c (real).
Definition VecOp.cpp:140
Vector operations on GPU or CPU.
Definition VecOp.cpp:14
PSCF package top-level namespace.
cufftDoubleComplex cudaComplex
Complex number type used in CPU code that uses FFTW.
Definition cudaTypes.h:22
cufftDoubleReal cudaReal
Real number type used in CPU code that uses FFTW.
Definition cudaTypes.h:35