PSCF v1.3
VecOp.h
1#ifndef PRDC_VEC_OP_H
2#define PRDC_VEC_OP_H
3
4/*
5* PSCF - Polymer Self-Consistent Field
6*
7* Copyright 2015 - 2025, The Regents of the University of Minnesota
8* Distributed under the terms of the GNU General Public License.
9*/
10
11#include "types.h"
12#include <pscf/cuda/DeviceArray.h>
13
14namespace Pscf {
15namespace Prdc {
16namespace Cuda {
17
75namespace VecOp {
76
77// Assignment operations:
78// ~~~~~~~~~~~~~~~~~~~~~~
79
89void eqV(DeviceArray<cudaReal>& a, DeviceArray<cudaReal> const & b,
90 const int beginIdA, const int beginIdB, const int n);
91
99{ eqV(a, b, 0, 0, a.capacity()); }
100
111 const int beginIdA, const int beginIdB, const int n);
112
120 DeviceArray<cudaComplex> const & b)
121{ eqV(a, b, 0, 0, a.capacity()); }
122
131void eqS(DeviceArray<cudaReal>& a, const cudaReal b,
132 const int beginIdA, const int n);
133
140inline void eqS(DeviceArray<cudaReal>& a, const cudaReal b)
141{ eqS(a, b, 0, a.capacity()); }
142
151void eqS(DeviceArray<cudaComplex>& a, const cudaComplex b,
152 const int beginIdA, const int n);
153
160inline void eqS(DeviceArray<cudaComplex>& a, const cudaComplex b)
161{ eqS(a, b, 0, a.capacity()); }
162
163
164// Addition operations
165// ~~~~~~~~~~~~~~~~~~~
166
179 DeviceArray<cudaReal> const & c, const int beginIdA,
180 const int beginIdB, const int beginIdC, const int n);
181
190 DeviceArray<cudaReal> const & c)
191{ addVV(a, b, c, 0, 0, 0, a.capacity()); }
192
205 DeviceArray<cudaComplex> const & c, const int beginIdA,
206 const int beginIdB, const int beginIdC, const int n);
207
216 DeviceArray<cudaComplex> const & b,
217 DeviceArray<cudaComplex> const & c)
218{ addVV(a, b, c, 0, 0, 0, a.capacity()); }
219
232 DeviceArray<cudaComplex> const & c, const int beginIdA,
233 const int beginIdB, const int beginIdC, const int n);
234
243 DeviceArray<cudaReal> const & b,
244 DeviceArray<cudaComplex> const & c)
245{ addVV(a, b, c, 0, 0, 0, a.capacity()); }
246
259 DeviceArray<cudaReal> const & c, const int beginIdA,
260 const int beginIdB, const int beginIdC, const int n);
261
270 DeviceArray<cudaComplex> const & b,
271 DeviceArray<cudaReal> const & c)
272{ addVV(a, b, c, 0, 0, 0, a.capacity()); }
273
285 const cudaReal c, const int beginIdA,
286 const int beginIdB, const int n);
287
296 const cudaReal c)
297{ addVS(a, b, c, 0, 0, a.capacity()); }
298
310 const cudaComplex c, const int beginIdA,
311 const int beginIdB, const int n);
312
321 DeviceArray<cudaComplex> const & b, const cudaComplex c)
322{ addVS(a, b, c, 0, 0, a.capacity()); }
323
335 const cudaComplex c, const int beginIdA,
336 const int beginIdB, const int n);
337
346 const cudaComplex c)
347{ addVS(a, b, c, 0, 0, a.capacity()); }
348
360 const cudaReal c, const int beginIdA,
361 const int beginIdB, const int n);
362
371 DeviceArray<cudaComplex> const & b, const cudaReal c)
372{ addVS(a, b, c, 0, 0, a.capacity()); }
373
374
375// Subtraction operations
376// ~~~~~~~~~~~~~~~~~~~~~~
377
390 DeviceArray<cudaReal> const & c, const int beginIdA,
391 const int beginIdB, const int beginIdC, const int n);
392
401 DeviceArray<cudaReal> const & c)
402{ subVV(a, b, c, 0, 0, 0, a.capacity()); }
403
416 DeviceArray<cudaComplex> const & c, const int beginIdA,
417 const int beginIdB, const int beginIdC, const int n);
418
427 DeviceArray<cudaComplex> const & b,
428 DeviceArray<cudaComplex> const & c)
429{ subVV(a, b, c, 0, 0, 0, a.capacity()); }
430
443 DeviceArray<cudaComplex> const & c, const int beginIdA,
444 const int beginIdB, const int beginIdC, const int n);
445
454 DeviceArray<cudaComplex> const & c)
455{ subVV(a, b, c, 0, 0, 0, a.capacity()); }
456
469 DeviceArray<cudaReal> const & c, const int beginIdA,
470 const int beginIdB, const int beginIdC, const int n);
471
480 DeviceArray<cudaComplex> const & b,
481 DeviceArray<cudaReal> const & c)
482{ subVV(a, b, c, 0, 0, 0, a.capacity()); }
483
495 const cudaReal c, const int beginIdA,
496 const int beginIdB, const int n);
497
506 const cudaReal c)
507{ subVS(a, b, c, 0, 0, a.capacity()); }
508
520 const cudaComplex c, const int beginIdA,
521 const int beginIdB, const int n);
522
531 DeviceArray<cudaComplex> const & b, const cudaComplex c)
532{ subVS(a, b, c, 0, 0, a.capacity()); }
533
545 const cudaComplex c, const int beginIdA,
546 const int beginIdB, const int n);
547
556 DeviceArray<cudaReal> const & b,
557 const cudaComplex c)
558{ subVS(a, b, c, 0, 0, a.capacity()); }
559
571 const cudaReal c, const int beginIdA,
572 const int beginIdB, const int n);
573
582 DeviceArray<cudaComplex> const & b, const cudaReal c)
583{ subVS(a, b, c, 0, 0, a.capacity()); }
584
585
586// Multiplication operations
587// ~~~~~~~~~~~~~~~~~~~~~~~~~
588
601 DeviceArray<cudaReal> const & c, const int beginIdA,
602 const int beginIdB, const int beginIdC, const int n);
603
612 DeviceArray<cudaReal> const & c)
613{ mulVV(a, b, c, 0, 0, 0, a.capacity()); }
614
627 DeviceArray<cudaComplex> const & c, const int beginIdA,
628 const int beginIdB, const int beginIdC, const int n);
629
638 DeviceArray<cudaComplex> const & b,
639 DeviceArray<cudaComplex> const & c)
640{ mulVV(a, b, c, 0, 0, 0, a.capacity()); }
641
654 DeviceArray<cudaComplex> const & c, const int beginIdA,
655 const int beginIdB, const int beginIdC, const int n);
656
665 DeviceArray<cudaReal> const & b,
666 DeviceArray<cudaComplex> const & c)
667{ mulVV(a, b, c, 0, 0, 0, a.capacity()); }
668
681 DeviceArray<cudaReal> const & c, const int beginIdA,
682 const int beginIdB, const int beginIdC, const int n);
683
692 DeviceArray<cudaComplex> const & b,
693 DeviceArray<cudaReal> const & c)
694{ mulVV(a, b, c, 0, 0, 0, a.capacity()); }
695
707 const cudaReal c, const int beginIdA,
708 const int beginIdB, const int n);
709
718 const cudaReal c)
719{ mulVS(a, b, c, 0, 0, a.capacity()); }
720
732 const cudaComplex c, const int beginIdA,
733 const int beginIdB, const int n);
734
743 DeviceArray<cudaComplex> const & b, const cudaComplex c)
744{ mulVS(a, b, c, 0, 0, a.capacity()); }
745
757 DeviceArray<cudaReal> const & b,
758 const cudaComplex c,
759 const int beginIdA, const int beginIdB, const int n);
760
769 DeviceArray<cudaReal> const & b, const cudaComplex c)
770{ mulVS(a, b, c, 0, 0, a.capacity()); }
771
783 DeviceArray<cudaComplex> const & b,
784 const cudaReal c,
785 const int beginIdA, const int beginIdB, const int n);
786
795 DeviceArray<cudaComplex> const & b,
796 const cudaReal c)
797{ mulVS(a, b, c, 0, 0, a.capacity()); }
798
799
800// Division operations
801// ~~~~~~~~~~~~~~~~~~~
802
815 DeviceArray<cudaReal> const & c, const int beginIdA,
816 const int beginIdB, const int beginIdC, const int n);
817
826 DeviceArray<cudaReal> const & c)
827{ divVV(a, b, c, 0, 0, 0, a.capacity()); }
828
841 DeviceArray<cudaReal> const & c, const int beginIdA,
842 const int beginIdB, const int beginIdC, const int n);
843
852 DeviceArray<cudaComplex> const & b,
853 DeviceArray<cudaReal> const & c)
854{ divVV(a, b, c, 0, 0, 0, a.capacity()); }
855
867 const cudaReal c, const int beginIdA,
868 const int beginIdB, const int n);
869
878 const cudaReal c)
879{ divVS(a, b, c, 0, 0, a.capacity()); }
880
892 const cudaReal c, const int beginIdA,
893 const int beginIdB, const int n);
894
903 DeviceArray<cudaComplex> const & b, const cudaReal c)
904{ divVS(a, b, c, 0, 0, a.capacity()); }
905
916void divSV(DeviceArray<cudaReal>& a, const cudaReal b,
917 DeviceArray<cudaReal> const & c,
918 const int beginIdA, const int beginIdC, const int n);
919
927inline void divSV(DeviceArray<cudaReal>& a, const cudaReal b,
928 DeviceArray<cudaReal> const & c)
929{ divSV(a, b, c, 0, 0, a.capacity()); }
930
931// Exponentiation operations:
932// ~~~~~~~~~~~~~~~~~~~~~~
933
944 const int beginIdA, const int beginIdB, const int n);
945
953{ expV(a, b, 0, 0, a.capacity()); }
954
965 const int beginIdA, const int beginIdB, const int n);
966
974 DeviceArray<cudaComplex> const & b)
975{ expV(a, b, 0, 0, a.capacity()); }
976
977
978// Compound operations: addition
979// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
980
991 const int beginIdA, const int beginIdB, const int n);
992
1000{ addEqV(a, b, 0, 0, a.capacity()); }
1001
1012 const int beginIdA, const int beginIdB, const int n);
1013
1021 DeviceArray<cudaComplex> const & b)
1022{ addEqV(a, b, 0, 0, a.capacity()); }
1023
1034 const int beginIdA, const int beginIdB, const int n);
1035
1043 DeviceArray<cudaReal> const & b)
1044{ addEqV(a, b, 0, 0, a.capacity()); }
1045
1054void addEqS(DeviceArray<cudaReal>& a, const cudaReal b,
1055 const int beginIdA, const int n);
1056
1063inline void addEqS(DeviceArray<cudaReal>& a, const cudaReal b)
1064{ addEqS(a, b, 0, a.capacity()); }
1065
1074void addEqS(DeviceArray<cudaComplex>& a, const cudaComplex b,
1075 const int beginIdA, const int n);
1076
1083inline void addEqS(DeviceArray<cudaComplex>& a, const cudaComplex b)
1084{ addEqS(a, b, 0, a.capacity()); }
1085
1094void addEqS(DeviceArray<cudaComplex>& a, const cudaReal b,
1095 const int beginIdA, const int n);
1096
1103inline void addEqS(DeviceArray<cudaComplex>& a, const cudaReal b)
1104{ addEqS(a, b, 0, a.capacity()); }
1105
1106
1107// Compound operations: subtraction
1108// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1109
1120 const int beginIdA, const int beginIdB, const int n);
1121
1129{ subEqV(a, b, 0, 0, a.capacity()); }
1130
1141 const int beginIdA, const int beginIdB, const int n);
1142
1150 DeviceArray<cudaComplex> const & b)
1151{ subEqV(a, b, 0, 0, a.capacity()); }
1152
1163 const int beginIdA, const int beginIdB, const int n);
1164
1172 DeviceArray<cudaReal> const & b)
1173{ subEqV(a, b, 0, 0, a.capacity()); }
1174
1183void subEqS(DeviceArray<cudaReal>& a, const cudaReal b,
1184 const int beginIdA, const int n);
1185
1192inline void subEqS(DeviceArray<cudaReal>& a, const cudaReal b)
1193{ subEqS(a, b, 0, a.capacity()); }
1194
1203void subEqS(DeviceArray<cudaComplex>& a, const cudaComplex b,
1204 const int beginIdA, const int n);
1205
1212inline void subEqS(DeviceArray<cudaComplex>& a, const cudaComplex b)
1213{ subEqS(a, b, 0, a.capacity()); }
1214
1223void subEqS(DeviceArray<cudaComplex>& a, const cudaReal b,
1224 const int beginIdA, const int n);
1225
1232inline void subEqS(DeviceArray<cudaComplex>& a, const cudaReal b)
1233{ subEqS(a, b, 0, a.capacity()); }
1234
1235
1236// Compound operations: multiplication
1237// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1238
1249 const int beginIdA, const int beginIdB, const int n);
1250
1258 DeviceArray<cudaReal> const & b)
1259{ mulEqV(a, b, 0, 0, a.capacity()); }
1260
1271 const int beginIdA, const int beginIdB, const int n);
1272
1280 DeviceArray<cudaComplex> const & b)
1281{ mulEqV(a, b, 0, 0, a.capacity()); }
1282
1293 const int beginIdA, const int beginIdB, const int n);
1294
1302 DeviceArray<cudaReal> const & b)
1303{ mulEqV(a, b, 0, 0, a.capacity()); }
1304
1313void mulEqS(DeviceArray<cudaReal>& a, const cudaReal b,
1314 const int beginIdA, const int n);
1315
1322inline void mulEqS(DeviceArray<cudaReal>& a, const cudaReal b)
1323{ mulEqS(a, b, 0, a.capacity()); }
1324
1333void mulEqS(DeviceArray<cudaComplex>& a, const cudaComplex b,
1334 const int beginIdA, const int n);
1335
1342inline void mulEqS(DeviceArray<cudaComplex>& a, const cudaComplex b)
1343{ mulEqS(a, b, 0, a.capacity()); }
1344
1353void mulEqS(DeviceArray<cudaComplex>& a, const cudaReal b,
1354 const int beginIdA, const int n);
1355
1362inline void mulEqS(DeviceArray<cudaComplex>& a, const cudaReal b)
1363{ mulEqS(a, b, 0, a.capacity()); }
1364
1365
1366// Compound operations: division
1367// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1368
1379 const int beginIdA, const int beginIdB, const int n);
1380
1388{ divEqV(a, b, 0, 0, a.capacity()); }
1389
1400 const int beginIdA, const int beginIdB, const int n);
1401
1409 DeviceArray<cudaReal> const & b)
1410{ divEqV(a, b, 0, 0, a.capacity()); }
1411
1420void divEqS(DeviceArray<cudaReal>& a, const cudaReal b,
1421 const int beginIdA, const int n);
1422
1429inline void divEqS(DeviceArray<cudaReal>& a, const cudaReal b)
1430{ divEqS(a, b, 0, a.capacity()); }
1431
1440void divEqS(DeviceArray<cudaComplex>& a, const cudaReal b,
1441 const int beginIdA, const int n);
1442
1449inline void divEqS(DeviceArray<cudaComplex>& a, const cudaReal b)
1450{ divEqS(a, b, 0, a.capacity()); }
1451
1453
1454} // namespace VecOp
1455} // namespace Cuda
1456} // namespace Prdc
1457} // namespace Pscf
1458
1459#include "VecOpMisc.h" // Ensure that if VecOp is included, so is VecOpMisc
1460
1461#endif
Dynamic array on the GPU device with aligned data.
Definition DeviceArray.h:43
int capacity() const
Return allocated capacity.
Functions that perform element-wise vector operations on the GPU.
Definition VecOp.cu:16
void addEqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector addition in-place, a[i] += b[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1672
void eqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector assignment, a[i] = b[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1039
void addVS(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const cudaReal c, const int beginIdA, const int beginIdB, int n)
Vector addition, a[i] = b[i] + c, kernel wrapper (cudaReal).
Definition VecOp.cu:1179
void eqS(DeviceArray< cudaReal > &a, const cudaReal b, const int beginIdA, const int n)
Vector assignment, a[i] = b, kernel wrapper (cudaReal).
Definition VecOp.cu:1073
void addEqS(DeviceArray< cudaReal > &a, const cudaReal b, const int beginIdA, const int n)
Vector addition in-place, a[i] += b, kernel wrapper (cudaReal).
Definition VecOp.cu:1724
void divVS(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const cudaReal c, const int beginIdA, const int beginIdB, const int n)
Vector division, a[i] = b[i] / c, kernel wrapper (cudaReal).
Definition VecOp.cu:1582
void divEqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector division in-place, a[i] /= b[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1964
void subEqS(DeviceArray< cudaReal > &a, const cudaReal b, const int beginIdA, const int n)
Vector subtraction in-place, a[i] -= b, kernel wrapper (cudaReal).
Definition VecOp.cu:1822
void divVV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c, const int beginIdA, const int beginIdB, const int beginIdC, const int n)
Vector division, a[i] = b[i] / c[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1544
void addVV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c, const int beginIdA, const int beginIdB, const int beginIdC, const int n)
Vector addition, a[i] = b[i] + c[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1103
void mulVV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c, const int beginIdA, const int beginIdB, const int beginIdC, const int n)
Vector multiplication, a[i] = b[i] * c[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1395
void subVV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c, const int beginIdA, const int beginIdB, const int beginIdC, const int n)
Vector subtraction, a[i] = b[i] - c[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1247
void mulVS(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const cudaReal c, const int beginIdA, const int beginIdB, const int n)
Vector multiplication, a[i] = b[i] * c, kernel wrapper (cudaReal).
Definition VecOp.cu:1471
void mulEqS(DeviceArray< cudaReal > &a, const cudaReal b, const int beginIdA, const int n)
Vector multiplication in-place, a[i] *= b, kernel wrapper (cudaReal).
Definition VecOp.cu:1918
void mulEqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector multiplication in-place, a[i] *= b[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1867
void subEqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector subtraction in-place, a[i] -= b[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1769
void divEqS(DeviceArray< cudaReal > &a, const cudaReal b, const int beginIdA, const int n)
Vector division in-place, a[i] /= b, kernel wrapper (cudaReal).
Definition VecOp.cu:1998
void subVS(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const cudaReal c, const int beginIdA, const int beginIdB, const int n)
Vector subtraction, a[i] = b[i] - c, kernel wrapper (cudaReal).
Definition VecOp.cu:1323
void expV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector exponentiation, a[i] = exp(b[i]), kernel wrapper (cudaReal).
Definition VecOp.cu:1638
void divSV(DeviceArray< cudaReal > &a, const cudaReal b, DeviceArray< cudaReal > const &c, const int beginIdA, const int beginIdC, const int n)
Vector division, a[i] = b / c[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1619
Fields, FFTs, and utilities for periodic boundary conditions (CUDA)
Definition Reduce.cpp:14
Periodic fields and crystallography.
Definition CField.cpp:11
PSCF package top-level namespace.
Definition param_pc.dox:1