PSCF v1.4.0
VecOpMisc.h
1#ifndef PSCF_CUDA_VEC_OP_MISC_H
2#define PSCF_CUDA_VEC_OP_MISC_H
3
4/*
5* PSCF - Polymer Self-Consistent Field
6*
7* Copyright 2015 - 2025, The Regents of the University of Minnesota
8* Distributed under the terms of the GNU General Public License.
9*/
10
11#include <pscf/cuda/cudaTypes.h>
12#include <pscf/cuda/DeviceArray.h>
13#include <util/containers/DArray.h>
14
15namespace Pscf {
16namespace VecOp {
17
18 /*
19 * Miscellaneous element-wise vector operations performed on the GPU.
20 *
21 * This collection of functions is not intended to be comprehensive.
22 * Rather, they are written and included as needed during the development
23 * of other code.
24 *
25 * Note: this file is included at the end of VecOp.h, so any file that
26 * includes VecOp.h will also include this file.
27 *
28 * Combined operations:
29 *
30 * The first set of functions defined in this file combine 2 or more
31 * element-wise vector operations. Several of these functions perform
32 * linear combinations of vectors multiplied (or scaled) by coefficients,
33 * thus combining vector-scalar multiplication with vector addition. On
34 * a GPU, these operations are performed by launching a single kernel,
35 * which will generally be faster than consecutively calling multiple
36 * simpler operations.
37 *
38 * The names of these functions follow conventions similar to those used
39 * in VecOp.h, using eq, add, sub, mul, div, exp and combinations
40 * thereof to indicate the operation(s) being performed. V denotes a
41 * a vector input, S denotes a scalar, and Vc denotes a vector that
42 * is multiplied by a scalar coefficient and then used in another
43 * operation. For example, addEqVc(a, b, c) performs a[i] += b[i] * c
44 * for all i.
45 *
46 * Pair operations:
47 *
48 * A second of functions defined in this file contain the word Pair,
49 * indicating that these functions perform the same operation for a
50 * pair of real output arrays, using a shared input array. For example,
51 * eqVPair performs a1[i] = s[i] and a2[i] = s[i] for all i. On a GPU,
52 * performing these operations in pairs is faster because the shared
53 * input array only needs to be loaded from global memory once.
54 *
55 * "Many" operations:
56 *
57 * A third set of functions defined in this file contain the word
58 * "Many", indicating that an undefined number of input vectors (>2)
59 * are involved in an operation. For example, addVMany adds >2 vectors
60 * together by passing an array of vectors, rather than a discrete set
61 * of vectors.
62 *
63 * The functions declared in this file are wrappers for CUDA kernels
64 * that perform the actual vector operations. The underlying kernels
65 * are only intended to be called through their wrappers, and so are
66 * defined in an anonymous namespace in the file VecOpMisc.cu that
67 * is inaccessible outside that file.
68 */
69
70 // Combined operations
71
83 void addVcVc(DeviceArray<cudaReal>& a,
84 DeviceArray<cudaReal> const & b1, cudaReal const c1,
85 DeviceArray<cudaReal> const & b2, cudaReal const c2);
86
97 void addVcS(DeviceArray<cudaReal>& a,
98 DeviceArray<cudaReal> const & b, cudaReal const c,
99 cudaReal const s);
100
110 void addEqVc(DeviceArray<cudaReal>& a,
111 DeviceArray<cudaReal> const & b,
112 cudaReal const c);
113
127 void addVcVcVc(DeviceArray<cudaReal>& a,
128 DeviceArray<cudaReal> const & b1, cudaReal const c1,
129 DeviceArray<cudaReal> const & b2, cudaReal const c2,
130 DeviceArray<cudaReal> const & b3, cudaReal const c3);
131
144 void addVcVcS(DeviceArray<cudaReal>& a,
145 DeviceArray<cudaReal> const & b1, cudaReal const c1,
146 DeviceArray<cudaReal> const & b2, cudaReal const c2,
147 cudaReal const s);
148
158 void divEqVc(DeviceArray<cudaComplex>& a,
159 DeviceArray<cudaReal> const & b,
160 cudaReal const c);
161
171 void expVc(DeviceArray<cudaReal>& a,
172 DeviceArray<cudaReal> const & b,
173 cudaReal const c);
174
175
176 // Pair operations (two output arrays and a shared input)
177
187 void eqVPair(DeviceArray<cudaReal>& a1,
188 DeviceArray<cudaReal>& a2,
189 DeviceArray<cudaReal> const & s);
190
202 void mulVVPair(DeviceArray<cudaReal>& a1, DeviceArray<cudaReal>& a2,
203 DeviceArray<cudaReal> const & b1,
204 DeviceArray<cudaReal> const & b2,
205 DeviceArray<cudaReal> const & s);
206
216 void mulEqVPair(DeviceArray<cudaReal>& a1,
217 DeviceArray<cudaReal>& a2,
218 DeviceArray<cudaReal> const & s);
219
220 // Functions of "many" vectors
221
234 void addVMany(DeviceArray<cudaReal>& a,
235 DArray<DeviceArray<cudaReal> > const & vecs);
236
254 void addVMany(DeviceArray<cudaReal>& a,
255 DArray<DeviceArray<cudaReal> const *> const & vecs);
256
269 void mulVMany(DeviceArray<cudaReal>& a,
270 DArray<DeviceArray<cudaReal> > const & vecs);
271
289 void mulVMany(DeviceArray<cudaReal>& a,
290 DArray<DeviceArray<cudaReal> const *> const & vecs);
291
292 // Other useful functions
293
302 void sqSqAbsV(DeviceArray<cudaReal>& a,
303 DeviceArray<cudaComplex> const & b);
304
305} // namespace VecOp
306} // namespace Pscf
307#endif
Dynamically allocatable contiguous array template.
Definition DArray.h:32
void sqSqAbsV(Array< double > &a, Array< fftw_complex > const &b)
Fourth power of absolute magnitude, a[i] = |b[i]|^4 (complex).
Definition VecOpCx.cpp:714
void expVc(Array< double > &a, Array< double > const &b, const double c)
Exponentiation a scaled vector, a[i] = exp(b[i]*c) (real).
Definition VecOp.cpp:319
void divEqVc(Array< fftw_complex > &a, Array< double > const &b, double c)
Vector division in-place w/ coeff., a[i] /= (b[i] * c).
Definition VecOpCx.cpp:730
void addEqVc(Array< double > &a, Array< double > const &b, const double c)
Add scaled vector in-place, a[i] += b[i]*c (real).
Definition VecOp.cpp:395
void mulVVPair(Array< double > &a1, Array< double > &a2, Array< double > const &b1, Array< double > const &b2, Array< double > const &c)
Vector multiplication in pairs, ax[i] = bx[i] * s[i], x=1,2.
Definition VecOp.cpp:463
void eqVPair(Array< double > &a1, Array< double > &a2, Array< double > const &b)
Vector assignment in pairs, ax[i] = b[i], x = 1, 2.
Definition VecOp.cpp:446
void addVMany(DeviceArray< cudaReal > &a, DArray< DeviceArray< cudaReal > > const &vecs)
Add an arbitrary number of vectors pointwise (real).
Definition VecOpMisc.cu:577
void mulVMany(DeviceArray< cudaReal > &a, DArray< DeviceArray< cudaReal > > const &vecs)
Multiply an undefined number of vectors pointwise (real).
Definition VecOpMisc.cu:643
void mulEqVPair(Array< double > &a1, Array< double > &a2, Array< double > const &b)
In-place vector multiplication in pairs, ax[i] *= b[i], x=1,2.
Definition VecOp.cpp:484
Vector operations on GPU or CPU.
Definition VecOp.cpp:14
void addVcVcS(Array< double > &a, Array< double > const &b1, const double c1, Array< double > const &b2, const double c2, const double s)
Add scaled vectors + scalar, a[i] = b1[i]*c1 + b2[2]*c2 + s (real).
Definition VecOp.cpp:409
void addVcS(Array< double > &a, Array< double > const &b, const double c, const double s)
Add a scaled vector and a scalar, a[i] = b[i]*c + s (real).
Definition VecOp.cpp:380
void addVcVc(Array< double > &a, Array< double > const &b1, const double c1, Array< double > const &b2, const double c2)
Add two scaled vectors, a[i] = b1[i]*c1 + b2[2]*c2 (real).
Definition VecOp.cpp:364
void addVcVcVc(Array< double > &a, Array< double > const &b1, const double c1, Array< double > const &b2, const double c2, Array< double > const &b3, const double c3)
Add scaled vectors, a[i] = b1[i]*c1 + b2[i]*c2 + b3[i]*c3 (real).
Definition VecOp.cpp:426
PSCF package top-level namespace.
cufftDoubleReal cudaReal
Real number type used in CPU code that uses FFTW.
Definition cudaTypes.h:35