PSCF v1.2
VecOp.h
1#ifndef PRDC_VEC_OP_H
2#define PRDC_VEC_OP_H
3
4/*
5* PSCF Package
6*
7* Copyright 2016 - 2022, The Regents of the University of Minnesota
8* Distributed under the terms of the GNU General Public License.
9*/
10
11#include "types.h"
12#include <pscf/cuda/DeviceArray.h>
13
14namespace Pscf {
15namespace Prdc {
16namespace Cuda {
17
75namespace VecOp {
76
77// Assignment operations:
78// ~~~~~~~~~~~~~~~~~~~~~~
79
89void eqV(DeviceArray<cudaReal>& a, DeviceArray<cudaReal> const & b,
90 const int beginIdA, const int beginIdB, const int n);
91
99{ eqV(a, b, 0, 0, a.capacity()); }
100
111 const int beginIdA, const int beginIdB, const int n);
112
120 DeviceArray<cudaComplex> const & b)
121{ eqV(a, b, 0, 0, a.capacity()); }
122
131void eqS(DeviceArray<cudaReal>& a, cudaReal const b,
132 const int beginIdA, const int n);
133
140inline void eqS(DeviceArray<cudaReal>& a, cudaReal const b)
141{ eqS(a, b, 0, a.capacity()); }
142
151void eqS(DeviceArray<cudaComplex>& a, cudaComplex const b,
152 const int beginIdA, const int n);
153
160inline void eqS(DeviceArray<cudaComplex>& a, cudaComplex const b)
161{ eqS(a, b, 0, a.capacity()); }
162
163
164// Addition operations
165// ~~~~~~~~~~~~~~~~~~~
166
179 DeviceArray<cudaReal> const & c, const int beginIdA,
180 const int beginIdB, const int beginIdC, const int n);
181
190 DeviceArray<cudaReal> const & c)
191{ addVV(a, b, c, 0, 0, 0, a.capacity()); }
192
205 DeviceArray<cudaComplex> const & c, const int beginIdA,
206 const int beginIdB, const int beginIdC, const int n);
207
216 DeviceArray<cudaComplex> const & b,
217 DeviceArray<cudaComplex> const & c)
218{ addVV(a, b, c, 0, 0, 0, a.capacity()); }
219
232 DeviceArray<cudaComplex> const & c, const int beginIdA,
233 const int beginIdB, const int beginIdC, const int n);
234
243 DeviceArray<cudaReal> const & b,
244 DeviceArray<cudaComplex> const & c)
245{ addVV(a, b, c, 0, 0, 0, a.capacity()); }
246
259 DeviceArray<cudaReal> const & c, const int beginIdA,
260 const int beginIdB, const int beginIdC, const int n);
261
270 DeviceArray<cudaComplex> const & b,
271 DeviceArray<cudaReal> const & c)
272{ addVV(a, b, c, 0, 0, 0, a.capacity()); }
273
285 cudaReal const c, const int beginIdA,
286 const int beginIdB, const int n);
287
296 cudaReal const c)
297{ addVS(a, b, c, 0, 0, a.capacity()); }
298
310 cudaComplex const c, const int beginIdA,
311 const int beginIdB, const int n);
312
321 DeviceArray<cudaComplex> const & b, cudaComplex const c)
322{ addVS(a, b, c, 0, 0, a.capacity()); }
323
335 cudaComplex const c, const int beginIdA,
336 const int beginIdB, const int n);
337
346 cudaComplex const c)
347{ addVS(a, b, c, 0, 0, a.capacity()); }
348
360 cudaReal const c, const int beginIdA,
361 const int beginIdB, const int n);
362
371 DeviceArray<cudaComplex> const & b, cudaReal const c)
372{ addVS(a, b, c, 0, 0, a.capacity()); }
373
374
375// Subtraction operations
376// ~~~~~~~~~~~~~~~~~~~~~~
377
390 DeviceArray<cudaReal> const & c, const int beginIdA,
391 const int beginIdB, const int beginIdC, const int n);
392
401 DeviceArray<cudaReal> const & c)
402{ subVV(a, b, c, 0, 0, 0, a.capacity()); }
403
416 DeviceArray<cudaComplex> const & c, const int beginIdA,
417 const int beginIdB, const int beginIdC, const int n);
418
427 DeviceArray<cudaComplex> const & b,
428 DeviceArray<cudaComplex> const & c)
429{ subVV(a, b, c, 0, 0, 0, a.capacity()); }
430
443 DeviceArray<cudaComplex> const & c, const int beginIdA,
444 const int beginIdB, const int beginIdC, const int n);
445
454 DeviceArray<cudaComplex> const & c)
455{ subVV(a, b, c, 0, 0, 0, a.capacity()); }
456
469 DeviceArray<cudaReal> const & c, const int beginIdA,
470 const int beginIdB, const int beginIdC, const int n);
471
480 DeviceArray<cudaComplex> const & b,
481 DeviceArray<cudaReal> const & c)
482{ subVV(a, b, c, 0, 0, 0, a.capacity()); }
483
495 cudaReal const c, const int beginIdA,
496 const int beginIdB, const int n);
497
506 cudaReal const c)
507{ subVS(a, b, c, 0, 0, a.capacity()); }
508
520 cudaComplex const c, const int beginIdA,
521 const int beginIdB, const int n);
522
531 DeviceArray<cudaComplex> const & b, cudaComplex const c)
532{ subVS(a, b, c, 0, 0, a.capacity()); }
533
545 cudaComplex const c, const int beginIdA,
546 const int beginIdB, const int n);
547
556 DeviceArray<cudaReal> const & b,
557 cudaComplex const c)
558{ subVS(a, b, c, 0, 0, a.capacity()); }
559
571 cudaReal const c, const int beginIdA,
572 const int beginIdB, const int n);
573
582 DeviceArray<cudaComplex> const & b, cudaReal const c)
583{ subVS(a, b, c, 0, 0, a.capacity()); }
584
585
586// Multiplication operations
587// ~~~~~~~~~~~~~~~~~~~~~~~~~
588
601 DeviceArray<cudaReal> const & c, const int beginIdA,
602 const int beginIdB, const int beginIdC, const int n);
603
612 DeviceArray<cudaReal> const & c)
613{ mulVV(a, b, c, 0, 0, 0, a.capacity()); }
614
627 DeviceArray<cudaComplex> const & c, const int beginIdA,
628 const int beginIdB, const int beginIdC, const int n);
629
638 DeviceArray<cudaComplex> const & b,
639 DeviceArray<cudaComplex> const & c)
640{ mulVV(a, b, c, 0, 0, 0, a.capacity()); }
641
654 DeviceArray<cudaComplex> const & c, const int beginIdA,
655 const int beginIdB, const int beginIdC, const int n);
656
665 DeviceArray<cudaReal> const & b,
666 DeviceArray<cudaComplex> const & c)
667{ mulVV(a, b, c, 0, 0, 0, a.capacity()); }
668
681 DeviceArray<cudaReal> const & c, const int beginIdA,
682 const int beginIdB, const int beginIdC, const int n);
683
692 DeviceArray<cudaComplex> const & b,
693 DeviceArray<cudaReal> const & c)
694{ mulVV(a, b, c, 0, 0, 0, a.capacity()); }
695
707 cudaReal const c, const int beginIdA,
708 const int beginIdB, const int n);
709
718 cudaReal const c)
719{ mulVS(a, b, c, 0, 0, a.capacity()); }
720
732 cudaComplex const c, const int beginIdA,
733 const int beginIdB, const int n);
734
743 DeviceArray<cudaComplex> const & b, cudaComplex const c)
744{ mulVS(a, b, c, 0, 0, a.capacity()); }
745
757 cudaComplex const c, const int beginIdA,
758 const int beginIdB, const int n);
759
768 DeviceArray<cudaReal> const & b, cudaComplex const c)
769{ mulVS(a, b, c, 0, 0, a.capacity()); }
770
782 cudaReal const c, const int beginIdA,
783 const int beginIdB, const int n);
784
793 DeviceArray<cudaComplex> const & b, cudaReal const c)
794{ mulVS(a, b, c, 0, 0, a.capacity()); }
795
796
797// Division operations
798// ~~~~~~~~~~~~~~~~~~~
799
812 DeviceArray<cudaReal> const & c, const int beginIdA,
813 const int beginIdB, const int beginIdC, const int n);
814
823 DeviceArray<cudaReal> const & c)
824{ divVV(a, b, c, 0, 0, 0, a.capacity()); }
825
838 DeviceArray<cudaReal> const & c, const int beginIdA,
839 const int beginIdB, const int beginIdC, const int n);
840
849 DeviceArray<cudaComplex> const & b,
850 DeviceArray<cudaReal> const & c)
851{ divVV(a, b, c, 0, 0, 0, a.capacity()); }
852
864 cudaReal const c, const int beginIdA,
865 const int beginIdB, const int n);
866
875 cudaReal const c)
876{ divVS(a, b, c, 0, 0, a.capacity()); }
877
889 cudaReal const c, const int beginIdA,
890 const int beginIdB, const int n);
891
900 DeviceArray<cudaComplex> const & b, cudaReal const c)
901{ divVS(a, b, c, 0, 0, a.capacity()); }
902
903
904// Exponentiation operations:
905// ~~~~~~~~~~~~~~~~~~~~~~
906
917 const int beginIdA, const int beginIdB, const int n);
918
926{ expV(a, b, 0, 0, a.capacity()); }
927
938 const int beginIdA, const int beginIdB, const int n);
939
947 DeviceArray<cudaComplex> const & b)
948{ expV(a, b, 0, 0, a.capacity()); }
949
950
951// Compound operations: addition
952// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
953
964 const int beginIdA, const int beginIdB, const int n);
965
973{ addEqV(a, b, 0, 0, a.capacity()); }
974
985 const int beginIdA, const int beginIdB, const int n);
986
994 DeviceArray<cudaComplex> const & b)
995{ addEqV(a, b, 0, 0, a.capacity()); }
996
1007 const int beginIdA, const int beginIdB, const int n);
1008
1016 DeviceArray<cudaReal> const & b)
1017{ addEqV(a, b, 0, 0, a.capacity()); }
1018
1027void addEqS(DeviceArray<cudaReal>& a, cudaReal const b,
1028 const int beginIdA, const int n);
1029
1036inline void addEqS(DeviceArray<cudaReal>& a, cudaReal const b)
1037{ addEqS(a, b, 0, a.capacity()); }
1038
1047void addEqS(DeviceArray<cudaComplex>& a, cudaComplex const b,
1048 const int beginIdA, const int n);
1049
1056inline void addEqS(DeviceArray<cudaComplex>& a, cudaComplex const b)
1057{ addEqS(a, b, 0, a.capacity()); }
1058
1067void addEqS(DeviceArray<cudaComplex>& a, cudaReal const b,
1068 const int beginIdA, const int n);
1069
1076inline void addEqS(DeviceArray<cudaComplex>& a, cudaReal const b)
1077{ addEqS(a, b, 0, a.capacity()); }
1078
1079
1080// Compound operations: subtraction
1081// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1082
1093 const int beginIdA, const int beginIdB, const int n);
1094
1102{ subEqV(a, b, 0, 0, a.capacity()); }
1103
1114 const int beginIdA, const int beginIdB, const int n);
1115
1123 DeviceArray<cudaComplex> const & b)
1124{ subEqV(a, b, 0, 0, a.capacity()); }
1125
1136 const int beginIdA, const int beginIdB, const int n);
1137
1145 DeviceArray<cudaReal> const & b)
1146{ subEqV(a, b, 0, 0, a.capacity()); }
1147
1156void subEqS(DeviceArray<cudaReal>& a, cudaReal const b,
1157 const int beginIdA, const int n);
1158
1165inline void subEqS(DeviceArray<cudaReal>& a, cudaReal const b)
1166{ subEqS(a, b, 0, a.capacity()); }
1167
1176void subEqS(DeviceArray<cudaComplex>& a, cudaComplex const b,
1177 const int beginIdA, const int n);
1178
1185inline void subEqS(DeviceArray<cudaComplex>& a, cudaComplex const b)
1186{ subEqS(a, b, 0, a.capacity()); }
1187
1196void subEqS(DeviceArray<cudaComplex>& a, cudaReal const b,
1197 const int beginIdA, const int n);
1198
1205inline void subEqS(DeviceArray<cudaComplex>& a, cudaReal const b)
1206{ subEqS(a, b, 0, a.capacity()); }
1207
1208
1209// Compound operations: multiplication
1210// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1211
1222 const int beginIdA, const int beginIdB, const int n);
1223
1231 DeviceArray<cudaReal> const & b)
1232{ mulEqV(a, b, 0, 0, a.capacity()); }
1233
1244 const int beginIdA, const int beginIdB, const int n);
1245
1253 DeviceArray<cudaComplex> const & b)
1254{ mulEqV(a, b, 0, 0, a.capacity()); }
1255
1266 const int beginIdA, const int beginIdB, const int n);
1267
1275 DeviceArray<cudaReal> const & b)
1276{ mulEqV(a, b, 0, 0, a.capacity()); }
1277
1286void mulEqS(DeviceArray<cudaReal>& a, cudaReal const b,
1287 const int beginIdA, const int n);
1288
1295inline void mulEqS(DeviceArray<cudaReal>& a, cudaReal const b)
1296{ mulEqS(a, b, 0, a.capacity()); }
1297
1306void mulEqS(DeviceArray<cudaComplex>& a, cudaComplex const b,
1307 const int beginIdA, const int n);
1308
1315inline void mulEqS(DeviceArray<cudaComplex>& a, cudaComplex const b)
1316{ mulEqS(a, b, 0, a.capacity()); }
1317
1326void mulEqS(DeviceArray<cudaComplex>& a, cudaReal const b,
1327 const int beginIdA, const int n);
1328
1335inline void mulEqS(DeviceArray<cudaComplex>& a, cudaReal const b)
1336{ mulEqS(a, b, 0, a.capacity()); }
1337
1338
1339// Compound operations: division
1340// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1341
1352 const int beginIdA, const int beginIdB, const int n);
1353
1361{ divEqV(a, b, 0, 0, a.capacity()); }
1362
1373 const int beginIdA, const int beginIdB, const int n);
1374
1382 DeviceArray<cudaReal> const & b)
1383{ divEqV(a, b, 0, 0, a.capacity()); }
1384
1393void divEqS(DeviceArray<cudaReal>& a, cudaReal const b,
1394 const int beginIdA, const int n);
1395
1402inline void divEqS(DeviceArray<cudaReal>& a, cudaReal const b)
1403{ divEqS(a, b, 0, a.capacity()); }
1404
1413void divEqS(DeviceArray<cudaComplex>& a, cudaReal const b,
1414 const int beginIdA, const int n);
1415
1422inline void divEqS(DeviceArray<cudaComplex>& a, cudaReal const b)
1423{ divEqS(a, b, 0, a.capacity()); }
1424
1427} // namespace VecOp
1428} // namespace Cuda
1429} // namespace Prdc
1430} // namespace Pscf
1431
1432#include "VecOpMisc.h" // Ensure that if VecOp is included, so is VecOpMisc
1433
1434#endif
Dynamic array on the GPU device with aligned data.
Definition rpg/System.h:32
int capacity() const
Return allocated capacity.
void addEqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector addition in-place, a[i] += b[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1632
void eqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector assignment, a[i] = b[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1020
void subEqS(DeviceArray< cudaReal > &a, cudaReal const b, const int beginIdA, const int n)
Vector subtraction in-place, a[i] -= b, kernel wrapper (cudaReal).
Definition VecOp.cu:1779
void addEqS(DeviceArray< cudaReal > &a, cudaReal const b, const int beginIdA, const int n)
Vector addition in-place, a[i] += b, kernel wrapper (cudaReal).
Definition VecOp.cu:1683
void subVS(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, cudaReal const c, const int beginIdA, const int beginIdB, const int n)
Vector subtraction, a[i] = b[i] - c, kernel wrapper (cudaReal).
Definition VecOp.cu:1304
void divEqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector division in-place, a[i] /= b[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1920
void divVV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c, const int beginIdA, const int beginIdB, const int beginIdC, const int n)
Vector division, a[i] = b[i] / c[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1524
void addVV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c, const int beginIdA, const int beginIdB, const int beginIdC, const int n)
Vector addition, a[i] = b[i] + c[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1084
void mulVV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c, const int beginIdA, const int beginIdB, const int beginIdC, const int n)
Vector multiplication, a[i] = b[i] * c[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1376
void subVV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c, const int beginIdA, const int beginIdB, const int beginIdC, const int n)
Vector subtraction, a[i] = b[i] - c[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1228
void addVS(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, cudaReal const c, const int beginIdA, const int beginIdB, int n)
Vector addition, a[i] = b[i] + c, kernel wrapper (cudaReal).
Definition VecOp.cu:1160
void mulEqS(DeviceArray< cudaReal > &a, cudaReal const b, const int beginIdA, const int n)
Vector multiplication in-place, a[i] *= b, kernel wrapper (cudaReal).
Definition VecOp.cu:1875
void mulEqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector multiplication in-place, a[i] *= b[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1824
void subEqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector subtraction in-place, a[i] -= b[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1728
void eqS(DeviceArray< cudaReal > &a, cudaReal const b, const int beginIdA, const int n)
Vector assignment, a[i] = b, kernel wrapper (cudaReal).
Definition VecOp.cu:1054
void mulVS(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, cudaReal const c, const int beginIdA, const int beginIdB, const int n)
Vector multiplication, a[i] = b[i] * c, kernel wrapper (cudaReal).
Definition VecOp.cu:1452
void divVS(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, cudaReal const c, const int beginIdA, const int beginIdB, const int n)
Vector division, a[i] = b[i] / c, kernel wrapper (cudaReal).
Definition VecOp.cu:1562
void expV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector exponentiation, a[i] = exp(b[i]), kernel wrapper (cudaReal).
Definition VecOp.cu:1598
void divEqS(DeviceArray< cudaReal > &a, cudaReal const b, const int beginIdA, const int n)
Vector division in-place, a[i] /= b, kernel wrapper (cudaReal).
Definition VecOp.cu:1954
PSCF package top-level namespace.
Definition param_pc.dox:1