PSCF v1.2
rpg/solvers/Block.tpp
1#ifndef RPG_BLOCK_TPP
2#define RPG_BLOCK_TPP
3
4/*
5* PSCF - Polymer Self-Consistent Field Theory
6*
7* Copyright 2016 - 2022, The Regents of the University of Minnesota
8* Distributed under the terms of the GNU General Public License.
9*/
10
11#include "Block.h"
12#include <prdc/cuda/resources.h>
13#include <pscf/mesh/Mesh.h>
14#include <pscf/mesh/MeshIterator.h>
15
16namespace Pscf {
17namespace Rpg {
18
19 using namespace Util;
20 using namespace Pscf::Prdc;
21
22 // CUDA kernels:
23 // (defined in anonymous namespace, used only in this file)
24
25 namespace {
26
27 /*
28 * Element-wise calculation of a = real(b * conj(c) * d), CUDA kernel
29 */
30 __global__ void _realMulVConjVV(cudaReal* a, cudaComplex const * b,
31 cudaComplex const * c,
32 cudaReal const * d, const int n)
33 {
34 int nThreads = blockDim.x * gridDim.x;
35 int startID = blockIdx.x * blockDim.x + threadIdx.x;
36 cudaComplex bt, ct;
37 for (int i = startID; i < n; i += nThreads) {
38 // Load complex numbers from global memory to local
39 // (this way the accessed memory is contiguous, rather than
40 // accessing the x or y elements individually, which are not
41 // contiguous)
42 bt = b[i];
43 ct = c[i];
44
45 // Perform calculation
46 a[i] = ((bt.x * ct.x) + (bt.y * ct.y)) * d[i];
47 }
48 }
49
50 /*
51 * Performs qNew = (4 * (qr2 * expW2) - qr) / 3 elementwise, CUDA kernel
52 */
53 __global__ void _richardsonEx(cudaReal* qNew, cudaReal const * qr,
54 cudaReal const * qr2,
55 cudaReal const * expW2, const int n)
56 {
57 int nThreads = blockDim.x * gridDim.x;
58 int startID = blockIdx.x * blockDim.x + threadIdx.x;
59 cudaReal q2;
60 for (int i = startID; i < n; i += nThreads) {
61 q2 = qr2[i] * expW2[i];
62 qNew[i] = (4.0 * q2 - qr[i]) / 3.0;
63 }
64 }
65
66 /*
67 * Performs a[i] += b[i] * c[i] * d. CUDA kernel
68 */
69 __global__ void _addEqMulVVc(cudaReal* a, cudaReal const * b,
70 cudaReal const * c, cudaReal const d,
71 const int n)
72 {
73 int nThreads = blockDim.x * gridDim.x;
74 int startID = blockIdx.x * blockDim.x + threadIdx.x;
75 for(int i = startID; i < n; i += nThreads) {
76 a[i] += b[i] * c[i] * d;
77 }
78 }
79
80 }
81
82 // CUDA kernel wrappers:
83
84 /*
85 * Element-wise calculation of a = real(b * conj(c) * d), kernel wrapper
86 */
90 DeviceArray<cudaReal> const & d)
91 {
92 int n = a.capacity();
93 UTIL_CHECK(b.capacity() >= n);
94 UTIL_CHECK(c.capacity() >= n);
95 UTIL_CHECK(d.capacity() >= n);
96
97 // GPU resources
98 int nBlocks, nThreads;
99 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
100
101 // Launch kernel
102 _realMulVConjVV<<<nBlocks, nThreads>>>(a.cArray(), b.cArray(),
103 c.cArray(), d.cArray(), n);
104 }
105
106 /*
107 * Performs qNew = (4 * (qr2 * expW2) - qr) / 3 elementwise, kernel wrapper
108 */
110 DeviceArray<cudaReal> const & qr,
111 DeviceArray<cudaReal> const & qr2,
112 DeviceArray<cudaReal> const & expW2)
113 {
114 int n = qNew.capacity();
115 UTIL_CHECK(qr.capacity() == n);
116 UTIL_CHECK(qr2.capacity() == n);
117 UTIL_CHECK(expW2.capacity() == n);
118
119 // GPU resources
120 int nBlocks, nThreads;
121 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
122
123 // Launch kernel
124 _richardsonEx<<<nBlocks, nThreads>>>(qNew.cArray(), qr.cArray(),
125 qr2.cArray(), expW2.cArray(), n);
126 }
127
128 /*
129 * Performs a[i] += b[i] * c[i] * d, kernel wrapper
130 */
132 DeviceArray<cudaReal> const & c, cudaReal const d)
133 {
134 int n = a.capacity();
135 UTIL_CHECK(b.capacity() >= n);
136 UTIL_CHECK(c.capacity() >= n);
137
138 // GPU resources
139 int nBlocks, nThreads;
140 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
141
142 // Launch kernel
143 _addEqMulVVc<<<nBlocks, nThreads>>>(a.cArray(), b.cArray(),
144 c.cArray(), d, n);
145 }
146
147 // Block<D> member functions:
148
149 /*
150 * Constructor.
151 */
152 template <int D>
154 : meshPtr_(nullptr),
155 fftPtr_(nullptr),
156 unitCellPtr_(nullptr),
157 waveListPtr_(nullptr),
158 kMeshDimensions_(0),
159 kSize_(0),
160 ds_(0.0),
161 dsTarget_(0.0),
162 ns_(0),
163 isAllocated_(false),
164 hasExpKsq_(false),
165 useBatchedFFT_(true),
166 nParams_(0)
167 {
168 propagator(0).setBlock(*this);
169 propagator(1).setBlock(*this);
170 }
171
172 /*
173 * Destructor.
174 */
175 template <int D>
178
179 template <int D>
180 void Block<D>::associate(Mesh<D> const & mesh, FFT<D> const & fft,
181 UnitCell<D> const & cell, WaveList<D>& wavelist)
182 {
183 UTIL_CHECK(!isAllocated_);
184 UTIL_CHECK(mesh.size() > 1);
185 UTIL_CHECK(fft.isSetup());
186 UTIL_CHECK(mesh.dimensions() == fft.meshDimensions());
187
188 nParams_ = cell.nParameter();
189 UTIL_CHECK(nParams_ > 0);
190
191 // store pointers to associated objects
192 meshPtr_ = &mesh;
193 fftPtr_ = &fft;
194 unitCellPtr_ = &cell;
195 waveListPtr_ = &wavelist;
196
197 // Compute Fourier space kMeshDimensions_ and kSize_
198 kSize_ = 1;
199 for (int i = 0; i < D; ++i) {
200 if (i < D - 1) {
201 kMeshDimensions_[i] = mesh.dimensions()[i];
202 } else {
203 kMeshDimensions_[i] = mesh.dimensions()[i]/2 + 1;
204 }
205 kSize_ *= kMeshDimensions_[i];
206 }
207
208 hasExpKsq_ = false;
209 }
210
211 template <int D>
212 void Block<D>::allocate(double ds, bool useBatchedFFT)
213 {
214 UTIL_CHECK(meshPtr_);
215 UTIL_CHECK(unitCellPtr_);
216 UTIL_CHECK(ds > 0.0);
217 UTIL_CHECK(!isAllocated_);
218
219 // Store useBatchedFFT
220 useBatchedFFT_ = useBatchedFFT;
221
222 // Set contour length discretization for this block
223 dsTarget_ = ds;
224 int tempNs;
225 tempNs = floor(length() / (2.0 * ds) + 0.5);
226 if (tempNs == 0) {
227 tempNs = 1; // ensure at least 3 contour steps per chain
228 }
229 ns_ = 2*tempNs + 1;
230
231 ds_ = length()/double(ns_ - 1);
232
233 // Setup fftBatched objects
234 UTIL_CHECK(!fftBatchedPair_.isSetup());
235 fftBatchedPair_.setup(mesh().dimensions(), 2);
236 if (useBatchedFFT_) {
237 UTIL_CHECK(!fftBatchedAll_.isSetup());
238 fftBatchedAll_.setup(mesh().dimensions(), ns_);
239 }
240
241 // Allocate work arrays
242 expKsq_.allocate(kMeshDimensions_);
243 expKsq2_.allocate(kMeshDimensions_);
244 expW_.allocate(mesh().dimensions());
245 expW2_.allocate(mesh().dimensions());
246 qrPair_.allocate(2 * mesh().size());
247 qkPair_.allocate(2 * kSize_);
248 q1_.allocate(mesh().dimensions());
249 q2_.allocate(mesh().dimensions());
250
251 propagator(0).allocate(ns_, mesh());
252 propagator(1).allocate(ns_, mesh());
253
254 cField().allocate(mesh().dimensions());
255
256 if (useBatchedFFT_) {
257 qkBatched_.allocate(ns_ * kSize_);
258 qk2Batched_.allocate(ns_ * kSize_);
259 }
260
261 expKsq_h_.allocate(kSize_);
262 expKsq2_h_.allocate(kSize_);
263
264 isAllocated_ = true;
265 hasExpKsq_ = false;
266 }
267
268 /*
269 * Clear all internal data that depends on lattice parameters.
270 */
271 template <int D>
273 {
274 UTIL_CHECK(unitCellPtr_);
275 UTIL_CHECK(nParams_ == unitCell().nParameter());
276 hasExpKsq_ = false;
277 }
278
279 /*
280 * Set or reset the the block length.
281 */
282 template <int D>
283 void Block<D>::setLength(double newLength)
284 {
286
287 if (isAllocated_) { // if allocate() has already been called
288 // Reset contour length discretization
289 UTIL_CHECK(dsTarget_ > 0);
290 int oldNs = ns_;
291 int tempNs;
292 tempNs = floor(length() / (2.0 * dsTarget_) + 0.5);
293 if (tempNs == 0) {
294 tempNs = 1; // ensure at least 3 contour steps per chain
295 }
296 ns_ = 2*tempNs + 1;
297 ds_ = length()/double(ns_-1);
298
299 if (oldNs != ns_) {
300 // If propagators are already allocated and ns_ has changed,
301 // reallocate memory for solutions to MDE
302 propagator(0).reallocate(ns_);
303 propagator(1).reallocate(ns_);
304
305 // If using batched FFTs, resize arrays and change batch size
306 if (useBatchedFFT_) {
307 UTIL_CHECK(fftBatchedAll_.isSetup());
308 qkBatched_.deallocate();
309 qk2Batched_.deallocate();
310 qkBatched_.allocate(ns_ * kSize_);
311 qk2Batched_.allocate(ns_ * kSize_);
312 fftBatchedAll_.resetBatchSize(ns_);
313 }
314 }
315 }
316
317 hasExpKsq_ = false;
318 }
319
320 /*
321 * Set or reset monomer statistical segment length.
322 */
323 template <int D>
324 void Block<D>::setKuhn(double kuhn)
325 {
326 BlockTmpl< Propagator<D> >::setKuhn(kuhn);
327 hasExpKsq_ = false;
328 }
329
330 template <int D>
332 {
333 UTIL_CHECK(isAllocated_);
334 UTIL_CHECK(unitCellPtr_);
335 UTIL_CHECK(unitCellPtr_->isInitialized());
336
337 // Calculate kSq if necessary
338 if (!waveListPtr_->hasKSq()) {
339 waveListPtr_->computeKSq();
340 }
341
342 double factor = -1.0*kuhn()*kuhn()*ds_/6.0;
343
344 // Calculate expKsq values on device
345 VecOp::expVc(expKsq_, waveListPtr_->kSq(), factor);
346 VecOp::expVc(expKsq2_, waveListPtr_->kSq(), factor / 2.0);
347
348 hasExpKsq_ = true;
349 }
350
351 /*
352 * Setup the contour length step algorithm.
353 */
354 template <int D>
355 void
357 {
358 // Preconditions
359 int nx = mesh().size();
360 UTIL_CHECK(nx > 0);
361 UTIL_CHECK(isAllocated_);
362
363 // Populate expW_
364 VecOp::expVc(expW_, w, -0.5 * ds_);
365 VecOp::expVc(expW2_, w, -0.25 * ds_);
366
367 // Compute expKsq arrays if necessary
368 if (!hasExpKsq_) {
369 computeExpKsq();
370 }
371
372 }
373
374 /*
375 * Integrate to calculate monomer concentration for this block
376 */
377 template <int D>
378 void Block<D>::computeConcentration(double prefactor)
379 {
380 // Preconditions
381 int nx = mesh().size();
382 UTIL_CHECK(nx > 0);
383 UTIL_CHECK(ns_ > 0);
384 UTIL_CHECK(ds_ > 0);
385 UTIL_CHECK(propagator(0).isSolved());
386 UTIL_CHECK(propagator(1).isSolved());
387 UTIL_CHECK(cField().capacity() == nx);
388
389 // Initialize cField to zero at all points
390 VecOp::eqS(cField(), 0.0);
391
392 Pscf::Rpg::Propagator<D> const & p0 = propagator(0);
393 Pscf::Rpg::Propagator<D> const & p1 = propagator(1);
394
395 addEqMulVVc(cField(), p0.q(0), p1.q(ns_ - 1), 1.0);
396 addEqMulVVc(cField(), p0.q(ns_ - 1), p1.q(0), 1.0);
397
398 for (int j = 1; j < ns_ - 1; j += 2) {
399 // Odd indices
400 addEqMulVVc(cField(), p0.q(j), p1.q(ns_ - 1 - j), 4.0);
401 }
402 for (int j = 2; j < ns_ - 2; j += 2) {
403 // Even indices
404 addEqMulVVc(cField(), p0.q(j), p1.q(ns_ - 1 - j), 2.0);
405 }
406
407 VecOp::mulEqS(cField(), (prefactor * ds_/3.0));
408 }
409
410 /*
411 * Propagate solution by one step.
412 */
413 template <int D>
414 void Block<D>::step(RField<D> const & q, RField<D>& qNew)
415 {
416 // Preconditions
417 UTIL_CHECK(isAllocated_);
418 UTIL_CHECK(hasExpKsq_);
419
420 // Check real-space mesh sizes
421 int nx = mesh().size();
422 UTIL_CHECK(nx > 0);
423 UTIL_CHECK(fft().isSetup());
424 UTIL_CHECK(fft().meshDimensions() == mesh().dimensions());
425 UTIL_CHECK(qrPair_.capacity() == nx * 2);
426 UTIL_CHECK(qkPair_.capacity() == kSize_ * 2);
427 UTIL_CHECK(expW_.capacity() == nx);
428 UTIL_CHECK(expKsq_.capacity() == kSize_);
429 UTIL_CHECK(fftBatchedPair_.isSetup());
430
431 // Set up associated workspace fields
432 RField<D> qr, qr2;
433 RFieldDft<D> qk, qk2;
434 qr.associate(qrPair_, 0, mesh().dimensions());
435 qr2.associate(qrPair_, nx, mesh().dimensions());
436 qk.associate(qkPair_, 0, mesh().dimensions());
437 qk2.associate(qkPair_, kSize_, mesh().dimensions());
438
439 // Apply pseudo-spectral algorithm
440 VecOp::mulVVPair(qr, qr2, expW_, expW2_, q); // qr = expW*q, qr2 = expW2*q
441 fftBatchedPair_.forwardTransform(qrPair_, qkPair_); // to Fourier space
442 VecOp::mulEqV(qk, expKsq_); // qk *= expKsq
443 VecOp::mulEqV(qk2, expKsq2_); // qk2 *= expKsq2
444 fftBatchedPair_.inverseTransformUnsafe(qkPair_, qrPair_); // to real space
445 VecOp::mulEqVPair(qr, qr2, expW_); // qr *= expW, qr2 *= expW
446 fft().forwardTransform(qr2, qk2); // to Fourier space, only qr2
447 VecOp::mulEqV(qk2, expKsq2_); // qk2 *= expKsq2
448 fft().inverseTransformUnsafe(qk2, qr2); // to real space, only qr2
449 richardsonEx(qNew, qr, qr2, expW2_); // qNew=(4*(qr2*expW2)-qr)/3
450 }
451
452 /*
453 * Compute stress contribution from this block.
454 */
455 template <int D>
456 void Block<D>::computeStress(double prefactor)
457 {
458 int nx = mesh().size();
459
460 // Preconditions
461 UTIL_CHECK(isAllocated_);
462 UTIL_CHECK(nx > 0);
463 UTIL_CHECK(kSize_ > 0);
464 UTIL_CHECK(ns_ > 0);
465 UTIL_CHECK(ds_ > 0);
466 UTIL_CHECK(nParams_ > 0);
467 UTIL_CHECK(mesh().dimensions() == fft().meshDimensions());
468 UTIL_CHECK(propagator(0).isSolved());
469 UTIL_CHECK(propagator(1).isSolved());
470
471 // Calculate dKSq if necessary
472 if (!waveListPtr_->hasdKSq()) {
473 waveListPtr_->computedKSq();
474 }
475
476 // Workspace variables
477 double dels, normal, increment;
478 normal = 3.0*6.0;
479 Pscf::Rpg::Propagator<D>& p0 = propagator(0);
480 Pscf::Rpg::Propagator<D>& p1 = propagator(1);
482 int i, j, n;
483 RField<D> rTmp(kMeshDimensions_); // array of real values on kgrid
484 RFieldDft<D> qk, qk2;
485
486 // Initialize dQ and stress to 0
487 stress_.clear();
488 for (i = 0; i < nParams_; ++i) {
489 dQ.append(0.0);
490 stress_.append(0.0);
491 }
492
493 if (useBatchedFFT_) {
494 // Get q at all contour points in Fourier space for both propagators
495 UTIL_CHECK(fftBatchedAll_.isSetup());
496 UTIL_CHECK(mesh().dimensions() == fftBatchedAll_.meshDimensions());
497 fftBatchedAll_.forwardTransform(p0.qAll(), qkBatched_);
498 fftBatchedAll_.forwardTransform(p1.qAll(), qk2Batched_);
499 } else {
500 // Allocate qk and qk2 to store results of individual FFTs
501 qk.allocate(mesh().dimensions());
502 qk2.allocate(mesh().dimensions());
503 }
504
505 // Main loop over contour points
506 for (j = 0; j < ns_ ; ++j) {
507
508 if (useBatchedFFT_) { // FFTs have already been calculated
509 // Associate qk and qk2 with sections of qkBatched_ and qk2Batched_
510 qk.associate(qkBatched_, j * kSize_, mesh().dimensions());
511 qk2.associate(qk2Batched_, (ns_-1-j) * kSize_, mesh().dimensions());
512 } else {
513 // Get q at contour point j in Fourier space for both propagators
514 UTIL_CHECK(fft().isSetup());
515 fft().forwardTransform(p0.q(j), qk);
516 fft().forwardTransform(p1.q(ns_-1-j), qk2);
517 }
518
519 dels = ds_;
520 if (j != 0 && j != ns_ - 1) {
521 if (j % 2 == 0) {
522 dels = dels*2.0;
523 } else {
524 dels = dels*4.0;
525 }
526 }
527
528 for (n = 0; n < nParams_ ; ++n) {
529 // Launch kernel to evaluate dQ for each basis function
530 realMulVConjVV(rTmp, qk, qk2, waveListPtr_->dKSq(n));
531
532 // Get the sum of all elements
533 increment = Reduce::sum(rTmp);
534 increment *= kuhn() * kuhn() * dels / normal;
535 dQ[n] -= increment;
536 }
537
538 if (useBatchedFFT_) {
539 qk.dissociate();
540 qk2.dissociate();
541 }
542 }
543
544 // Normalize
545 for (i = 0; i < nParams_; ++i) {
546 stress_[i] -= (dQ[i] * prefactor);
547 }
548 }
549
550}
551}
552#endif
virtual void setLength(double length)
Set the length of this block.
Class template for a block in a block copolymer.
Definition BlockTmpl.h:106
Propagator< D > & propagator(int directionId)
Definition BlockTmpl.h:191
Dynamic array on the GPU device with aligned data.
Definition rpg/System.h:32
int capacity() const
Return allocated capacity.
Data * cArray()
Return pointer to underlying C array.
Description of a regular grid of points in a periodic domain.
IntVec< D > dimensions() const
Get an IntVec<D> of the grid dimensions.
Definition Mesh.h:217
int size() const
Get total number of grid points.
Definition Mesh.h:229
Fourier transform wrapper.
bool isSetup() const
Has this FFT object been setup?
Definition cpu/FFT.h:210
IntVec< D > const & meshDimensions() const
Return the dimensions of the grid for which this was setup.
Definition cpu/FFT.h:217
Fourier transform of a real field on an FFT mesh.
void allocate(IntVec< D > const &meshDimensions)
Allocate the underlying C array and set mesh dimensions.
Field of real double precision values on an FFT mesh.
int nParameter() const
Get the number of parameters in the unit cell.
Base template for UnitCell<D> classes, D=1, 2 or 3.
Definition rpg/System.h:34
Block within a branched polymer.
void setLength(double newLength)
Set or reset block length.
void computeConcentration(double prefactor)
Compute unnormalized concentration for block by integration.
void allocate(double ds, bool useBatchedFFT=true)
Allocate internal data containers.
void setKuhn(double kuhn)
Set or reset monomer statistical segment length.
void associate(Mesh< D > const &mesh, FFT< D > const &fft, UnitCell< D > const &cell, WaveList< D > &wavelist)
Associate this object with a mesh, FFT, UnitCell, and WaveList object.
void computeStress(double prefactor)
Compute derivatives of free energy w/ respect to cell parameters.
void setupSolver(RField< D > const &w)
Set solver for this block.
void step(RField< D > const &q, RField< D > &qNew)
Compute step of integration loop, from i to i+1.
void clearUnitCellData()
Clear all internal data that depends on lattice parameters.
MDE solver for one-direction of one block.
DeviceArray< cudaReal > const & qAll()
Return the full array of q-fields (after propagator is solved).
RField< D > const & q(int i) const
Return const q-field at specified step by reference (after solving).
Class to calculate and store properties of wavevectors.
Definition WaveList.h:39
A fixed capacity (static) contiguous array with a variable logical size.
Definition rpg/System.h:28
void append(Data const &data)
Append data to the end of the array.
Definition FSArray.h:258
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
Definition global.h:68
void setThreadsLogical(int nThreadsLogical)
Given total number of threads, set 1D execution configuration.
int nThreads()
Get the number of threads per block for execution.
cudaReal sum(DeviceArray< cudaReal > const &in)
Compute sum of array elements (GPU kernel wrapper).
Definition Reduce.cu:480
void expVc(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, cudaReal const c)
Vector exponentiation w/ coefficient, a[i] = exp(b[i]*c), kernel wrapper.
Definition VecOpMisc.cu:393
void mulVVPair(DeviceArray< cudaReal > &a1, DeviceArray< cudaReal > &a2, DeviceArray< cudaReal > const &b1, DeviceArray< cudaReal > const &b2, DeviceArray< cudaReal > const &s)
Vector multiplication in pairs, ax[i] = bx[i] * s[i], kernel wrapper.
Definition VecOpMisc.cu:426
void mulEqS(DeviceArray< cudaReal > &a, cudaReal const b, const int beginIdA, const int n)
Vector multiplication in-place, a[i] *= b, kernel wrapper (cudaReal).
Definition VecOp.cu:1875
void mulEqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector multiplication in-place, a[i] *= b[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1824
void eqS(DeviceArray< cudaReal > &a, cudaReal const b, const int beginIdA, const int n)
Vector assignment, a[i] = b, kernel wrapper (cudaReal).
Definition VecOp.cu:1054
void mulEqVPair(DeviceArray< cudaReal > &a1, DeviceArray< cudaReal > &a2, DeviceArray< cudaReal > const &s)
In-place vector multiplication in pairs, ax[i] *= s[i], kernel wrapper.
Definition VecOpMisc.cu:448
Periodic fields and crystallography.
Definition CField.cpp:11
void richardsonEx(DeviceArray< cudaReal > &qNew, DeviceArray< cudaReal > const &qr, DeviceArray< cudaReal > const &qr2, DeviceArray< cudaReal > const &expW2)
Performs qNew = (4 * (qr2 * expW2) - qr) / 3 elementwise, kernel wrapper.
void realMulVConjVV(DeviceArray< cudaReal > &a, DeviceArray< cudaComplex > const &b, DeviceArray< cudaComplex > const &c, DeviceArray< cudaReal > const &d)
Element-wise calculation of a = real(b * conj(c) * d), kernel wrapper.
void addEqMulVVc(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c, cudaReal const d)
Performs a[i] += b[i] * c[i] * d, kernel wrapper.
PSCF package top-level namespace.
Definition param_pc.dox:1
Utility classes for scientific computation.