PSCF v1.2
VecOpMisc.h
1#ifndef PRDC_VEC_OP_MISC_H
2#define PRDC_VEC_OP_MISC_H
3
4/*
5* PSCF Package
6*
7* Copyright 2016 - 2022, The Regents of the University of Minnesota
8* Distributed under the terms of the GNU General Public License.
9*/
10
11#include "types.h"
12#include <pscf/cuda/DeviceArray.h>
13#include <util/containers/DArray.h>
14
15namespace Pscf {
16namespace Prdc {
17namespace Cuda {
18namespace VecOp {
19
20/*
21* Miscellaneous element-wise vector operations performed on the GPU.
22*
23* Note: this file is included at the end of VecOp.h, so any file that
24* includes VecOp.h will also include this file.
25*
26* The functions defined in this file are wrappers for CUDA kernels that
27* perform the actual vector operations. The kernels themselves are only
28* intended to be called through their wrappers, so they are defined in
29* an anonymous namespace in VecOpMisc.cu.
30*
31* The functions defined in this file combine 2 or more element-wise
32* vector operations into a single kernel launch, which will perform the
33* operation faster than consecutively calling multiple of the functions
34* in VecOp.h. These functions are not intended to be comprehensive.
35* Rather, they are written and included as needed during the development
36* of other code.
37*
38* The names of these functions follow the same conventions as those in
39* VecOp, using add, sub, mul, div, exp, eq, and combinations thereof to
40* indicate the operation(s) being performed. V denotes a vector, S
41* denotes a scalar, and Vc denotes a vector that is multiplied by a
42* scalar coefficient and then used in another operation. For example,
43* addEqVc(a, b, c) performs a[i] += b[i] * c for all i.
44*
45* Another set of functions defined in this file contain the word Pair,
46* indicating that these functions perform the same operation for a pair
47* of output arrays. For example, eqVPair performs a1[i] = s[i] and
48* a2[i] = s[i] for all i. Performing these operations in pairs is
49* faster because the array s only needs to be loaded from global memory
50* once.
51*
52* A third set of functions defined in this file contain the word "Many",
53* indicating that an undefined number of vectors (>2) are involved in an
54* operation. For example, addVMany adds >2 vectors together by passing
55* an array of vectors, rather than a discrete set of vectors.
56*/
57
58// Functions that combine multiple VecOp operations
59// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
60
70void addVcVc(DeviceArray<cudaReal>& a,
71 DeviceArray<cudaReal> const & b, cudaReal const c,
72 DeviceArray<cudaReal> const & d, cudaReal const e);
73
85void addVcVcVc(DeviceArray<cudaReal>& a,
86 DeviceArray<cudaReal> const & b, cudaReal const c,
87 DeviceArray<cudaReal> const & d, cudaReal const e,
88 DeviceArray<cudaReal> const & f, cudaReal const g);
89
97void addEqVc(DeviceArray<cudaReal>& a, DeviceArray<cudaReal> const & b,
98 cudaReal const c);
99
108void subVVS(DeviceArray<cudaReal>& a, DeviceArray<cudaReal> const & b,
109 DeviceArray<cudaReal> const & c, cudaReal const d);
110
118void divEqVc(DeviceArray<cudaComplex>& a, DeviceArray<cudaReal> const & b,
119 cudaReal const c);
120
128void expVc(DeviceArray<cudaReal>& a, DeviceArray<cudaReal> const & b,
129 cudaReal const c);
130
131
132// Pair functions
133// ~~~~~~~~~~~~~~
134
142void eqVPair(DeviceArray<cudaReal>& a1, DeviceArray<cudaReal>& a2,
143 DeviceArray<cudaReal> const & s);
144
154void mulVVPair(DeviceArray<cudaReal>& a1, DeviceArray<cudaReal>& a2,
155 DeviceArray<cudaReal> const & b1,
156 DeviceArray<cudaReal> const & b2,
157 DeviceArray<cudaReal> const & s);
158
166void mulEqVPair(DeviceArray<cudaReal>& a1, DeviceArray<cudaReal>& a2,
167 DeviceArray<cudaReal> const & s);
168
169
170// Functions of "many" vectors
171// ~~~~~~~~~~~~~~~~~~~~~~~~~~~
172
183void addVMany(DeviceArray<cudaReal>& a,
184 DArray<DeviceArray<cudaReal> > const & vecs);
185
201void addVMany(DeviceArray<cudaReal>& a,
202 DArray<DeviceArray<cudaReal> const *> const & vecs);
203
214void mulVMany(DeviceArray<cudaReal>& a,
215 DArray<DeviceArray<cudaReal> > const & vecs);
216
232void mulVMany(DeviceArray<cudaReal>& a,
233 DArray<DeviceArray<cudaReal> const *> const & vecs);
234
235
236// Other useful functions
237// ~~~~~~~~~~~~~~~~~~~~~~
238
245void sqNormV(DeviceArray<cudaReal>& a, DeviceArray<cudaComplex> const & b);
246
253void sqSqNormV(DeviceArray<cudaReal>& a, DeviceArray<cudaComplex> const & b);
254
255} // namespace VecOp
256} // namespace Cuda
257} // namespace Prdc
258} // namespace Pscf
259#endif
void addVMany(DeviceArray< cudaReal > &a, DArray< DeviceArray< cudaReal > > const &vecs)
Add an undefined number of vectors pointwise, kernel wrapper.
Definition VecOpMisc.cu:465
void addVcVcVc(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, cudaReal const c, DeviceArray< cudaReal > const &d, cudaReal const e, DeviceArray< cudaReal > const &f, cudaReal const g)
3-vec addition w coeff, a[i] = (b[i]*c) + (d[i]*e) + (f[i]*g), kernel wrapper.
Definition VecOpMisc.cu:322
void expVc(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, cudaReal const c)
Vector exponentiation w/ coefficient, a[i] = exp(b[i]*c), kernel wrapper.
Definition VecOpMisc.cu:393
void sqSqNormV(DeviceArray< cudaReal > &a, DeviceArray< cudaComplex > const &b)
Norm of complex number to the 4th power, a[i] = norm(b[i])^4, kernel wrapper.
Definition VecOpMisc.cu:600
void mulVVPair(DeviceArray< cudaReal > &a1, DeviceArray< cudaReal > &a2, DeviceArray< cudaReal > const &b1, DeviceArray< cudaReal > const &b2, DeviceArray< cudaReal > const &s)
Vector multiplication in pairs, ax[i] = bx[i] * s[i], kernel wrapper.
Definition VecOpMisc.cu:426
void addVcVc(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, cudaReal const c, DeviceArray< cudaReal > const &d, cudaReal const e)
Vector addition w/ coefficient, a[i] = (b[i]*c) + (d[i]*e), kernel wrapper.
Definition VecOpMisc.cu:304
void mulVMany(DeviceArray< cudaReal > &a, DArray< DeviceArray< cudaReal > > const &vecs)
Multiply an undefined number of vectors pointwise, kernel wrapper.
Definition VecOpMisc.cu:525
void subVVS(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c, cudaReal const d)
Vector subtraction, a[i] = b[i] - c[i] - d, kernel wrapper.
Definition VecOpMisc.cu:359
void eqVPair(DeviceArray< cudaReal > &a1, DeviceArray< cudaReal > &a2, DeviceArray< cudaReal > const &s)
Vector assignment in pairs, ax[i] = b[i], kernel wrapper.
Definition VecOpMisc.cu:409
void sqNormV(DeviceArray< cudaReal > &a, DeviceArray< cudaComplex > const &b)
Squared norm of complex number, a[i] = norm(b[i])^2, kernel wrapper.
Definition VecOpMisc.cu:585
void divEqVc(DeviceArray< cudaComplex > &a, DeviceArray< cudaReal > const &b, cudaReal const c)
Vector division in-place w/ coeff., a[i] /= (b[i] * c), kernel wrapper.
Definition VecOpMisc.cu:377
void mulEqVPair(DeviceArray< cudaReal > &a1, DeviceArray< cudaReal > &a2, DeviceArray< cudaReal > const &s)
In-place vector multiplication in pairs, ax[i] *= s[i], kernel wrapper.
Definition VecOpMisc.cu:448
void addEqVc(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, cudaReal const c)
Vector addition in-place w/ coefficient, a[i] += b[i] * c, kernel wrapper.
Definition VecOpMisc.cu:343
PSCF package top-level namespace.
Definition param_pc.dox:1