PSCF v1.4.0
rpg/solvers/Block.tpp
1#ifndef RPG_BLOCK_TPP
2#define RPG_BLOCK_TPP
3
4/*
5* PSCF - Polymer Self-Consistent Field
6*
7* Copyright 2015 - 2025, The Regents of the University of Minnesota
8* Distributed under the terms of the GNU General Public License.
9*/
10
11#include "Block.h"
12#include "Propagator.h"
13
14#include <prdc/cuda/WaveList.h>
15#include <prdc/cuda/FFT.h>
16#include <prdc/cuda/resources.h>
17#include <prdc/crystal/UnitCell.h>
18
19#include <pscf/mesh/Mesh.h>
20#include <pscf/mesh/MeshIterator.h>
21#include <pscf/solvers/BlockTmpl.tpp>
22
23namespace Pscf {
24namespace Rpg {
25
26 using namespace Util;
27 using namespace Pscf::Prdc;
28 using namespace Pscf::Prdc::Cuda;
29
30 // CUDA kernels (anonymous namespace - only used in this file)
31 namespace {
32
33 /*
34 * Element-wise calculation of a = real(b * conj(c) * d), CUDA kernel
35 */
36 __global__ void _realMulVConjVV(cudaReal* a,
37 cudaComplex const * b,
38 cudaComplex const * c,
39 cudaReal const * d, const int n)
40 {
41 int nThreads = blockDim.x * gridDim.x;
42 int startID = blockIdx.x * blockDim.x + threadIdx.x;
43 cudaComplex bt, ct;
44 for (int i = startID; i < n; i += nThreads) {
45 // Load complex numbers from global memory to local
46 // (this way the accessed memory is contiguous, rather than
47 // accessing the x or y elements individually, which are not
48 // contiguous)
49 bt = b[i];
50 ct = c[i];
51
52 // Perform calculation
53 a[i] = ((bt.x * ct.x) + (bt.y * ct.y)) * d[i];
54 }
55 }
56
57 /*
58 * Performs qNew = (4*(qr2 * expW2)-qr)/3 elementwise, CUDA kernel
59 */
60 __global__ void _richardsonEx(cudaReal* qNew,
61 cudaReal const * qr,
62 cudaReal const * qr2,
63 cudaReal const * expW2, const int n)
64 {
65 int nThreads = blockDim.x * gridDim.x;
66 int startID = blockIdx.x * blockDim.x + threadIdx.x;
67 cudaReal q2;
68 for (int i = startID; i < n; i += nThreads) {
69 q2 = qr2[i] * expW2[i];
70 qNew[i] = (4.0 * q2 - qr[i]) / 3.0;
71 }
72 }
73
74 /*
75 * Performs a[i] += b[i] * c[i] * d. CUDA kernel
76 */
77 __global__ void _addEqMulVVc(cudaReal* a,
78 cudaReal const * b,
79 cudaReal const * c,
80 const cudaReal d,
81 const int n)
82 {
83 int nThreads = blockDim.x * gridDim.x;
84 int startID = blockIdx.x * blockDim.x + threadIdx.x;
85 for (int i = startID; i < n; i += nThreads) {
86 a[i] += b[i] * c[i] * d;
87 }
88 }
89
90 /*
91 * Element-wise calculation of a[i] += b[i]*c[i]* d[i], CUDA kernel
92 */
93 __global__ void _addEqMulVVV(cudaReal* a,
94 cudaReal const * b,
95 cudaReal const * c,
96 cudaReal const * d,
97 const int n)
98 {
99 int nThreads = blockDim.x * gridDim.x;
100 int startID = blockIdx.x * blockDim.x + threadIdx.x;
101 for (int i = startID; i < n; i += nThreads) {
102 a[i] += b[i]*c[i]*d[i];
103 }
104 }
105
106 }
107
108 // CUDA kernel wrappers:
109
110 /*
111 * Element-wise calculation of a = real(b * conj(c) * d), kernel wrapper
112 *
113 * \param a output array (real)
114 * \param b input array 1 (complex)
115 * \param c input array 2 (complex)
116 * \param d input array 3 (real)
117 */
118 void realMulVConjVV(DeviceArray<cudaReal>& a,
119 DeviceArray<cudaComplex> const & b,
120 DeviceArray<cudaComplex> const & c,
121 DeviceArray<cudaReal> const & d)
122 {
123 int n = a.capacity();
124 UTIL_CHECK(b.capacity() >= n);
125 UTIL_CHECK(c.capacity() >= n);
126 UTIL_CHECK(d.capacity() >= n);
127
128 // GPU resources
129 int nBlocks, nThreads;
130 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
131
132 // Launch kernel
133 _realMulVConjVV<<<nBlocks, nThreads>>>(a.cArray(), b.cArray(),
134 c.cArray(), d.cArray(), n);
135 }
136
137 /*
138 * Performs qNew = (4 * (qr2 * expW2) - qr) / 3 elementwise, kernel wrapper
139 *
140 * \param qNew output array (a propagator slice)
141 * \param qr input array 1 (a propagator slice)
142 * \param qr2 input array 2 (a propagator slice)
143 * \param expW2 input array 3 (exp(-W[i]*ds/4) array)
144 */
145 void richardsonEx(DeviceArray<cudaReal>& qNew,
146 DeviceArray<cudaReal> const & qr,
147 DeviceArray<cudaReal> const & qr2,
148 DeviceArray<cudaReal> const & expW2)
149 {
150 int n = qNew.capacity();
151 UTIL_CHECK(qr.capacity() == n);
152 UTIL_CHECK(qr2.capacity() == n);
153 UTIL_CHECK(expW2.capacity() == n);
154
155 // GPU resources
156 int nBlocks, nThreads;
157 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
158
159 // Launch kernel
160 _richardsonEx<<<nBlocks, nThreads>>>(qNew.cArray(), qr.cArray(),
161 qr2.cArray(), expW2.cArray(),
162 n);
163 }
164
165 /*
166 * Performs a[i] += b[i] * c[i] * d, kernel wrapper
167 *
168 * \param a output array
169 * \param b input array 1
170 * \param c input array 2
171 * \param d input scalar
172 */
173 void addEqMulVVc(DeviceArray<cudaReal>& a,
174 DeviceArray<cudaReal> const & b,
175 DeviceArray<cudaReal> const & c,
176 cudaReal const d)
177 {
178 int n = a.capacity();
179 UTIL_CHECK(b.capacity() >= n);
180 UTIL_CHECK(c.capacity() >= n);
181
182 // GPU resources
183 int nBlocks, nThreads;
184 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
185
186 // Launch kernel
187 _addEqMulVVc<<<nBlocks, nThreads>>>(a.cArray(), b.cArray(),
188 c.cArray(), d, n);
189 }
190
191 /*
192 * Element-wise calculation of a[i] = b[i]*c[i]*d[i], kernel wrapper
193 */
194 void addEqMulVVV(DeviceArray<cudaReal>& a,
195 DeviceArray<cudaReal> const & b,
196 DeviceArray<cudaReal> const & c,
197 DeviceArray<cudaReal> const & d)
198 {
199 int n = a.capacity();
200 UTIL_CHECK(b.capacity() >= n);
201 UTIL_CHECK(c.capacity() >= n);
202 UTIL_CHECK(d.capacity() >= n);
203
204 // GPU resources
205 int nBlocks, nThreads;
206 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
207
208 // Launch kernel
209 _addEqMulVVV<<<nBlocks, nThreads>>>(a.cArray(), b.cArray(),
210 c.cArray(), d.cArray(), n);
211 }
212
213 // Block<D> member functions:
214
215 /*
216 * Constructor.
217 */
218 template <int D>
220 : meshPtr_(nullptr),
221 fftPtr_(nullptr),
222 unitCellPtr_(nullptr),
223 waveListPtr_(nullptr),
224 kMeshDimensions_(0),
225 kSize_(0),
226 ds_(0.0),
227 dsTarget_(0.0),
228 ns_(0),
229 nParams_(0),
230 isAllocated_(false),
231 hasExpKsq_(false),
232 useBatchedFFT_(true)
233 {
234 propagator(0).setBlock(*this);
235 propagator(1).setBlock(*this);
236 }
237
238 /*
239 * Destructor.
240 */
241 template <int D>
244
245 template <int D>
246 void Block<D>::associate(Mesh<D> const & mesh,
247 FFT<D> const & fft,
248 UnitCell<D> const & cell,
249 WaveList<D>& waveList)
250 {
251 UTIL_CHECK(!isAllocated_);
252 UTIL_CHECK(mesh.size() > 1);
253 UTIL_CHECK(fft.isSetup());
254 UTIL_CHECK(mesh.dimensions() == fft.meshDimensions());
255
256 nParams_ = cell.nParameter();
257 UTIL_CHECK(nParams_ > 0);
258
259 // Store pointers to associated objects
260 meshPtr_ = &mesh;
261 fftPtr_ = &fft;
262 unitCellPtr_ = &cell;
263 waveListPtr_ = &waveList;
264
265 hasExpKsq_ = false;
266 }
267
268 template <int D>
269 void Block<D>::allocate(double ds, bool useBatchedFFT)
270 {
271 UTIL_CHECK(meshPtr_);
272 UTIL_CHECK(unitCellPtr_);
273 UTIL_CHECK(ds > 0.0);
274 UTIL_CHECK(!isAllocated_);
275
276 // Store useBatchedFFT
277 useBatchedFFT_ = useBatchedFFT;
278
279 // Compute k-space grid dimensions (kMeshDimensions_) and size_
280 FFT<D>::computeKMesh(mesh().dimensions(), kMeshDimensions_, kSize_);
281
282 // Allocate work arrays
283 expW_.allocate(mesh().dimensions());
284 expKsq_.allocate(kMeshDimensions_);
285 expKsq2_.allocate(kMeshDimensions_);
287 expW2_.allocate(mesh().dimensions());
288 qrPair_.allocate(2 * mesh().size());
289 qkPair_.allocate(2 * kSize_);
290 } else
291 if (PolymerModel::isBead()) {
292 expWInv_.allocate(mesh().dimensions());
293 qk_.allocate(mesh().dimensions());
294 }
295
296 // Allocate space for block monomer concentration
297 cField().allocate(mesh().dimensions());
298
299 // Compute ns_
300 dsTarget_ = ds;
302
303 // Set contour length discretization for this block
304 const double length = Edge::length();
305 UTIL_CHECK(length > 0.0);
306 int tempNs;
307 tempNs = floor(length / (2.0 * ds) + 0.5);
308 if (tempNs == 0) {
309 tempNs = 1; // ensure ns_ >= 3
310 }
311 ns_ = 2*tempNs + 1;
312 ds_ = length/double(ns_ - 1);
313
314 } else
315 if (PolymerModel::isBead()) {
316
317 ds_ = ds;
318 ns_ = Edge::nBead() + 2;
319
320 }
321
322 // Allocate memory for solutions of MDE (requires ns_)
323 propagator(0).allocate(ns_, mesh());
324 propagator(1).allocate(ns_, mesh());
325
326 // Setup fftBatchedPair_
327 UTIL_CHECK(!fftBatchedPair_.isSetup());
328 fftBatchedPair_.setup(mesh().dimensions(), 2);
329
330 // Setup batched data used for stress calculation
331 if (useBatchedFFT_) {
332 UTIL_CHECK(!fftBatchedAll_.isSetup());
333 fftBatchedAll_.setup(mesh().dimensions(), ns_);
334 q0kBatched_.allocate(ns_ * kSize_);
335 q1kBatched_.allocate(ns_ * kSize_);
336 }
337
338 isAllocated_ = true;
339 hasExpKsq_ = false;
340 }
341
342 /*
343 * Set or reset the the block length.
344 */
345 template <int D>
346 void Block<D>::setLength(double newLength)
347 {
348 // Precondition
350
351 Edge::setLength(newLength);
352 const double length = Edge::length();
353
354 if (isAllocated_) { // if allocate() has already been called
355 // Reset contour length discretization
356 UTIL_CHECK(dsTarget_ > 0);
357 int oldNs = ns_;
358 int tempNs;
359 tempNs = floor(length / (2.0 * dsTarget_) + 0.5);
360 if (tempNs == 0) {
361 tempNs = 1; // ensure at least 3 contour steps per chain
362 }
363 ns_ = 2*tempNs + 1;
364 ds_ = length/double(ns_-1);
365
366 if (oldNs != ns_) {
367 // If propagators are already allocated and ns_ has changed,
368 // reallocate memory for solutions to MDE
369 propagator(0).reallocate(ns_);
370 propagator(1).reallocate(ns_);
371
372 // If using batched FFTs, resize arrays and change batch size
373 if (useBatchedFFT_) {
374 UTIL_CHECK(fftBatchedAll_.isSetup());
375 q0kBatched_.deallocate();
376 q1kBatched_.deallocate();
377 q0kBatched_.allocate(ns_ * kSize_);
378 q1kBatched_.allocate(ns_ * kSize_);
379 fftBatchedAll_.resetBatchSize(ns_);
380 }
381 }
382 }
383
384 hasExpKsq_ = false;
385 }
386
387 /*
388 * Set or reset monomer statistical segment length.
389 */
390 template <int D>
392 {
394 hasExpKsq_ = false;
395 }
396
397 /*
398 * Clear all internal data that depends on lattice parameters.
399 */
400 template <int D>
402 {
403 UTIL_CHECK(unitCellPtr_);
404 UTIL_CHECK(nParams_ == unitCell().nParameter());
405 hasExpKsq_ = false;
406 stress_.clear();
407 }
408
409 /*
410 * Compute all elements of expKsq_ and expKsq2_ arrays
411 */
412 template <int D>
413 void Block<D>::computeExpKsq()
414 {
415 UTIL_CHECK(isAllocated_);
416 UTIL_CHECK(waveListPtr_);
417
418 // Calculate kSq if necessary
419 if (!waveList().hasKSq()) {
420 waveList().computeKSq();
421 }
422
423 // Compute bSqFactor
424 const double b = BlockTmplT::kuhn();
425 double bSqFactor = -1.0 * b * b / 6.0;
427 bSqFactor *= ds_;
428 }
429
430 // Calculate expKsq values on device
431 VecOp::expVc(expKsq_, waveList().kSq(), bSqFactor);
432 VecOp::expVc(expKsq2_, waveList().kSq(), bSqFactor / 2.0);
433
434 hasExpKsq_ = true;
435 }
436
437 /*
438 * Setup the contour length step algorithm.
439 */
440 template <int D>
441 void
443 {
444 // Preconditions
445 int nx = mesh().size();
446 UTIL_CHECK(nx > 0);
447 UTIL_CHECK(isAllocated_);
448
449 // Populate expW_
451 VecOp::expVc(expW_, w, -0.5 * ds_);
452 VecOp::expVc(expW2_, w, -0.25 * ds_);
453 } else {
454 VecOp::expVc(expW_, w, -1.0);
455 VecOp::divSV(expWInv_, 1.0, expW_);
456 }
457
458 // Compute expKsq arrays if necessary
459 if (!hasExpKsq_) {
460 computeExpKsq();
461 }
462
463 }
464
465 /*
466 * Propagate solution by one step.
467 */
468 template <int D>
469 void Block<D>::stepThread(RField<D> const & qin, RField<D>& qout) const
470 {
471 // Preconditions
472 UTIL_CHECK(isAllocated_);
473 UTIL_CHECK(hasExpKsq_);
474 UTIL_CHECK(meshPtr_);
475 int nx = mesh().size();
476 UTIL_CHECK(nx > 0);
477 UTIL_CHECK(fft().isSetup());
478 UTIL_CHECK(fft().meshDimensions() == mesh().dimensions());
479 UTIL_CHECK(qrPair_.capacity() == nx * 2);
480 UTIL_CHECK(qkPair_.capacity() == kSize_ * 2);
481 UTIL_CHECK(expW_.capacity() == nx);
482 UTIL_CHECK(expKsq_.capacity() == kSize_);
483 UTIL_CHECK(fftBatchedPair_.isSetup());
484
485 // Set up associated workspace fields slices
486 RField<D> qr, qr2;
487 RFieldDft<D> qk, qk2;
488 qr.associate(qrPair_, 0, mesh().dimensions());
489 qr2.associate(qrPair_, nx, mesh().dimensions());
490 qk.associate(qkPair_, 0, mesh().dimensions());
491 qk2.associate(qkPair_, kSize_, mesh().dimensions());
492
493 // Apply pseudo-spectral algorithm
494 // qr = expW*q, qr2 = expW2*q
495 VecOp::mulVVPair(qr, qr2, expW_, expW2_, qin);
496 fftBatchedPair_.forwardTransform(qrPair_, qkPair_);
497 VecOp::mulEqV(qk, expKsq_);
498 VecOp::mulEqV(qk2, expKsq2_);
499 fftBatchedPair_.inverseTransformUnsafe(qkPair_, qrPair_);
500 VecOp::mulEqVPair(qr, qr2, expW_); // qr *= expW, qr2 *= expW
501 fft().forwardTransform(qr2, qk2);
502 VecOp::mulEqV(qk2, expKsq2_); // qk2 *= expKsq2
503 fft().inverseTransformUnsafe(qk2, qr2);
504 richardsonEx(qout, qr, qr2, expW2_); // qout=(4*(qr2*expW2)-qr)/3
505
506 // Explcitly dissociate workspace field slices
507 qr.dissociate();
508 qr2.dissociate();
509 qk.dissociate();
510 qk2.dissociate();
511 }
512
513 /*
514 * Apply one step of the MDE solution for the bead model.
515 */
516 template <int D>
517 void Block<D>::stepBead(RField<D> const & qin, RField<D>& qout) const
518 {
519 stepBondBead(qin, qout);
520 stepFieldBead(qout);
521 }
522
523 /*
524 * Apply the field operator for the bead model.
525 */
526 template <int D>
528 {
529 // Preconditions
530 int nx = mesh().size();
531 UTIL_CHECK(expW_.capacity() == nx);
532 UTIL_CHECK(q.capacity() == nx);
533
534 VecOp::mulEqV(q, expW_); // q *= expW
535 }
536
537 /*
538 * Apply the bond operator for the bead model.
539 */
540 template <int D>
541 void Block<D>::stepBondBead(RField<D> const & qin, RField<D>& qout) const
542 {
543 // Preconditions
544 UTIL_CHECK(isAllocated_);
545 UTIL_CHECK(hasExpKsq_);
546 UTIL_CHECK(meshPtr_);
547 int nx = mesh().size();
548 UTIL_CHECK(fft().isSetup());
549 UTIL_CHECK(fft().meshDimensions() == mesh().dimensions());
550 UTIL_CHECK(nx > 0);
551 UTIL_CHECK(qin.capacity() == nx);
552 UTIL_CHECK(qout.capacity() == nx);
553 UTIL_CHECK(qk_.isAllocated());
554 UTIL_CHECK(qk_.capacity() == kSize_);
555 UTIL_CHECK(expKsq_.capacity() == kSize_);
556
557 // Apply full bond operator
558 fft().forwardTransform(qin, qk_);
559 VecOp::mulEqV(qk_, expKsq_); // qk *= expKsq
560 fft().inverseTransformUnsafe(qk_, qout); // overwrites qk_
561 }
562
563 /*
564 * Apply the half-bond operator for the bead model.
565 */
566 template <int D>
567 void Block<D>::stepHalfBondBead(RField<D> const & qin, RField<D>& qout) const
568 {
569 // Preconditions
570 UTIL_CHECK(isAllocated_);
571 UTIL_CHECK(hasExpKsq_);
572 int nx = mesh().size();
573 UTIL_CHECK(nx > 0);
574 UTIL_CHECK(qin.capacity() == nx);
575 UTIL_CHECK(qout.capacity() == nx);
576 UTIL_CHECK(qk_.isAllocated());
577 UTIL_CHECK(qk_.capacity() == kSize_);
578 UTIL_CHECK(expKsq_.capacity() == kSize_);
579
580 // Apply half-bond operator
581 fft().forwardTransform(qin, qk_);
582 VecOp::mulEqV(qk_, expKsq2_); // qk *= expKsq2
583 fft().inverseTransformUnsafe(qk_, qout); // overwrites qk_
584 }
585
586 /*
587 * Integrate to calculate monomer concentration for this block
588 */
589 template <int D>
591 {
592 // Preconditions
593 int nx = mesh().size();
594 UTIL_CHECK(nx > 0);
595 UTIL_CHECK(ns_ > 0);
596 UTIL_CHECK(propagator(0).isSolved());
597 UTIL_CHECK(propagator(1).isSolved());
598 UTIL_CHECK(cField().capacity() == nx);
599
600 // Initialize cField to zero at all points
601 VecOp::eqS(cField(), 0.0);
602
603 // References to forward and reverse propagators
604 Pscf::Rpg::Propagator<D> const & p0 = propagator(0);
605 Pscf::Rpg::Propagator<D> const & p1 = propagator(1);
606
607 addEqMulVVc(cField(), p0.q(0), p1.q(ns_ - 1), 1.0);
608 addEqMulVVc(cField(), p0.q(ns_ - 1), p1.q(0), 1.0);
609
610 for (int j = 1; j < ns_ - 1; j += 2) {
611 // Odd indices
612 addEqMulVVc(cField(), p0.q(j), p1.q(ns_ - 1 - j), 4.0);
613 }
614 for (int j = 2; j < ns_ - 2; j += 2) {
615 // Even indices
616 addEqMulVVc(cField(), p0.q(j), p1.q(ns_ - 1 - j), 2.0);
617 }
618
619 VecOp::mulEqS(cField(), (prefactor * ds_/3.0));
620 }
621
622 /*
623 * Integrate to calculate monomer concentration for this block
624 */
625 template <int D>
627 {
628 // Preconditions
629 int nx = mesh().size();
630 UTIL_CHECK(nx > 0);
631 UTIL_CHECK(ns_ > 0);
632 UTIL_CHECK(propagator(0).isSolved());
633 UTIL_CHECK(propagator(1).isSolved());
634 UTIL_CHECK(cField().capacity() == nx);
635
636 // Initialize cField to zero at all points
637 VecOp::eqS(cField(), 0.0);
638
639 // References to forward and reverse propagators
640 Pscf::Rpg::Propagator<D> const & p0 = propagator(0);
641 Pscf::Rpg::Propagator<D> const & p1 = propagator(1);
642
643 // Internal beads (j = 1, ..., nbead, with nbead = ns_ -2)
644 for (int j = 1; j < ns_ - 1; ++j) {
645 addEqMulVVV(cField(), p0.q(j), p1.q(ns_ - 1 - j), expWInv_);
646 }
647
648 // Scale cField() by prefactor
649 VecOp::mulEqS(cField(), prefactor);
650 }
651
652 /*
653 * Compute stress contribution from this block.
654 */
655 template <int D>
656 void Block<D>::computeStressThread(double prefactor)
657 {
659 UTIL_CHECK(meshPtr_);
660 int nx = mesh().size();
661
662 // Preconditions
663 UTIL_CHECK(isAllocated_);
664 UTIL_CHECK(nx > 0);
665 UTIL_CHECK(kSize_ > 0);
666 UTIL_CHECK(ns_ > 0);
667 UTIL_CHECK(ds_ > 0);
668 UTIL_CHECK(nParams_ > 0);
669 UTIL_CHECK(mesh().dimensions() == fft().meshDimensions());
670
671 // References to forward and reverse propagators
674 UTIL_CHECK(p0.isSolved());
675 UTIL_CHECK(p1.isSolved());
676
677 // Calculate dKSq if necessary
678 if (!waveList().hasdKSq()) {
679 waveList().computedKSq();
680 }
681
682 // Workspace variables
683 double dels, normal, increment;
684 normal = 3.0*6.0;
686 int i, j, n;
687 RField<D> rTmp(kMeshDimensions_); // array of real values on kgrid
688
689 // Initialize dQ and stress to 0
690 stress_.clear();
691 for (i = 0; i < nParams_; ++i) {
692 dQ.append(0.0);
693 stress_.append(0.0);
694 }
695
696 if (useBatchedFFT_) {
697
698 UTIL_CHECK(fftBatchedAll_.isSetup());
699 UTIL_CHECK(mesh().dimensions() == fftBatchedAll_.meshDimensions());
700 UTIL_CHECK(!q0k_.isAllocated());
701 UTIL_CHECK(!q1k_.isAllocated());
702 // In this case, containers q0k_ and q1k_ will be associated
703 // with slices of q0kBatched_, q1kBatched_
704
705 // Computed batched FFT for propagators in both directions
706 fftBatchedAll_.forwardTransform(p0.qAll(), q0kBatched_);
707 fftBatchedAll_.forwardTransform(p1.qAll(), q1kBatched_);
708
709 } else {
710
711 if (!q0k_.isAllocated()) {
712 q0k_.allocate(mesh().dimensions());
713 q1k_.allocate(mesh().dimensions());
714 }
715
716 }
717
718 // Main loop over contour points
719 for (j = 0; j < ns_ ; ++j) {
720
721 if (useBatchedFFT_) {
722 // Batched FFTs have already been computed
723 // Associate q0k_, q1k_ with slices of q0kBatched_, q1kBatched_
724 q0k_.associate(q0kBatched_, j * kSize_, mesh().dimensions());
725 q1k_.associate(q1kBatched_, (ns_-1-j) * kSize_,
726 mesh().dimensions());
727 } else {
728 // Compute Fourier transforms at contour grid point j
729 UTIL_CHECK(fft().isSetup());
730 fft().forwardTransform(p0.q(j), q0k_);
731 fft().forwardTransform(p1.q(ns_-1-j), q1k_);
732 }
733
734 // Compute prefactor for Simpson's rule
735 dels = ds_;
736 if (j != 0 && j != ns_ - 1) {
737 if (j % 2 == 0) {
738 dels = dels*2.0;
739 } else {
740 dels = dels*4.0;
741 }
742 }
743
744 // Increment stress contributions for all unit cell parameters
745 const double b = BlockTmplT::kuhn();
746 for (n = 0; n < nParams_ ; ++n) {
747
748 // Launch kernel to evaluate dQ at all wavevectors
749 realMulVConjVV(rTmp, q0k_, q1k_, waveList().dKSq(n));
750
751 // Get the sum of all elements
752 increment = Reduce::sum(rTmp);
753 increment *= b * b * dels / normal;
754 dQ[n] -= increment;
755 }
756
757 if (useBatchedFFT_) {
758 q0k_.dissociate();
759 q1k_.dissociate();
760 }
761
762 } // end loop over contour points
763
764 // Normalize total stress values
765 for (i = 0; i < nParams_; ++i) {
766 stress_[i] -= (dQ[i] * prefactor);
767 }
768
769 }
770
771 /*
772 * Compute stress contribution from this block, in bead model.
773 */
774 template <int D>
775 void Block<D>::computeStressBead(double prefactor)
776 {
777 // Preconditions
779 UTIL_CHECK(isAllocated_);
780 int nx = mesh().size();
781 IntVec<D> const & meshDim = mesh().dimensions();
782 UTIL_CHECK(nx > 0);
783 UTIL_CHECK(kSize_ > 0);
784 UTIL_CHECK(nParams_ > 0);
785 UTIL_CHECK(meshDim == fft().meshDimensions());
786 UTIL_CHECK(ns_ > 0);
787
788 // References to forward and reverse propagators
791 UTIL_CHECK(p0.isSolved());
792 UTIL_CHECK(p1.isSolved());
793
794 // Compute batched FFTs, or check allocation of work space
795 if (useBatchedFFT_) {
796 UTIL_CHECK(fftBatchedAll_.isSetup());
797 UTIL_CHECK(meshDim == fftBatchedAll_.meshDimensions());
798 UTIL_CHECK(!q0k_.isAllocated());
799 UTIL_CHECK(!q1k_.isAllocated());
800 // In this case, containers q0k_ and q1k_ will be associated
801 // with slices of q0kBatched_, q1kBatched_
802
803 // Computed batched FFT for propagators in both directions
804 fftBatchedAll_.forwardTransform(p0.qAll(), q0kBatched_);
805 fftBatchedAll_.forwardTransform(p1.qAll(), q1kBatched_);
806 } else {
807 if (!q0k_.isAllocated()) {
808 q0k_.allocate(meshDim);
809 q1k_.allocate(meshDim);
810 }
811 }
812
813 // Calculate dKSq if necessary
814 if (!waveList().hasdKSq()) {
815 waveList().computedKSq();
816 }
817
818 // Initialize dQ to zero
820 dQ.clear();
821 for (int i = 0; i < nParams_; ++i) {
822 dQ.append(0.0);
823 }
824
825 RField<D> rTmp(kMeshDimensions_); // k-space work space
826 const double b = BlockTmplT::kuhn();
827 const double bSq = b * b / 6.0;
828 double increment;
829
830 // Half bond from head junction to first bead, if not a chain end
831 if (!p0.isHeadEnd()) {
832 if (useBatchedFFT_) {
833 // Batched FFTs have already been computed
834 // Associate q0k_, q1k_ with slices of q0kBatched_, q1kBatched_
835 q0k_.associate(q0kBatched_, 0, meshDim);
836 q1k_.associate(q1kBatched_, (ns_- 2) * kSize_, meshDim);
837 } else {
838 // Compute Fourier transforms at contour grid point j
839 UTIL_CHECK(fft().isSetup());
840 fft().forwardTransform(p0.q(0), q0k_);
841 fft().forwardTransform(p1.q(ns_ - 2), q1k_);
842 }
843 for (int n = 0; n < nParams_ ; ++n) {
844 realMulVConjVV(rTmp, q0k_, q1k_, waveList().dKSq(n));
845 VecOp::mulEqV(rTmp, expKsq2_);
846 increment = Reduce::sum(rTmp);
847 increment *= 0.5*bSq;
848 dQ[n] -= increment;
849 }
850 if (useBatchedFFT_) {
851 q0k_.dissociate();
852 q1k_.dissociate();
853 }
854 }
855
856 // Loop over all nBead - 1 full bonds within this block
857 for (int j = 1; j < ns_ - 2; ++j) {
858
859 // Retrieve or compute FFTs at two neighboring slices
860 if (useBatchedFFT_) {
861 // Batched FFTs have already been computed
862 // Associate q0k_, q1k_ with slices of q0kBatched_, q1kBatched_
863 q0k_.associate(q0kBatched_, j * kSize_, meshDim);
864 q1k_.associate(q1kBatched_, (ns_- 2 -j) * kSize_, meshDim);
865 } else {
866 // Compute Fourier transforms of p0, p1 at neighboring beads
867 UTIL_CHECK(fft().isSetup());
868 fft().forwardTransform(p0.q(j), q0k_);
869 fft().forwardTransform(p1.q(ns_ - 2 - j), q1k_);
870 }
871
872 // Loop over unit cell parameters
873 for (int n = 0; n < nParams_ ; ++n) {
874
875 // Compute contributions to dQ at all wavevectors, in rTmp
876 realMulVConjVV(rTmp, q0k_, q1k_, waveList().dKSq(n));
877 VecOp::mulEqV(rTmp, expKsq_);
878
879 // Evaluate sum of over wavevectors
880 increment = Reduce::sum(rTmp);
881 increment *= bSq;
882 dQ[n] -= increment;
883
884 }
885
886 if (useBatchedFFT_) {
887 // Dissociate to allow re-association
888 q0k_.dissociate();
889 q1k_.dissociate();
890 }
891
892 } // end loop over contour points
893
894 // Half bond from last bead to tail, if not a chain end
895 if (!p0.isTailEnd()) {
896 if (useBatchedFFT_) {
897 q0k_.associate(q0kBatched_, (ns_-2)*kSize_, meshDim);
898 q1k_.associate(q1kBatched_, 0, meshDim);
899 } else {
900 UTIL_CHECK(fft().isSetup());
901 fft().forwardTransform(p0.q(ns_-2), q0k_);
902 fft().forwardTransform(p1.q(0), q1k_);
903 }
904 for (int n = 0; n < nParams_ ; ++n) {
905 realMulVConjVV(rTmp, q0k_, q1k_, waveList().dKSq(n));
906 VecOp::mulEqV(rTmp, expKsq2_);
907 increment = Reduce::sum(rTmp);
908 increment *= 0.5*bSq;
909 dQ[n] -= increment;
910 }
911 if (useBatchedFFT_) {
912 q0k_.dissociate();
913 q1k_.dissociate();
914 }
915 }
916
917 // Normalize total stress values
918 stress_.clear();
919 for (int i = 0; i < nParams_; ++i) {
920 stress_.append(-1.0*prefactor*dQ[i]);
921 }
922
923 }
924
925}
926}
927#endif
Dynamic array on the GPU device with aligned data.
Definition DeviceArray.h:96
void dissociate()
Dissociate this object from an externally owned memory block.
int capacity() const
Return array capacity.
Data * cArray()
Return pointer to underlying C array.
int nBead() const
Get the number of beads in this block, in the bead model.
Definition Edge.h:302
virtual void setLength(double length)
Set the length of this block (only valid for thread model).
Definition Edge.cpp:62
double length() const
Get the length of this block, in the thread model.
Definition Edge.h:311
An IntVec<D, T> is a D-component vector of elements of integer type T.
Definition IntVec.h:27
Description of a regular grid of points in a periodic domain.
Definition Mesh.h:61
Fourier transform wrapper for real or complex data.
Definition cuda/FFT.h:38
static void computeKMesh(IntVec< D > const &rMeshDimensions, IntVec< D > &kMeshDimensions, int &kSize)
Compute dimensions and size of k-space mesh for DFT of real data.
Definition cuda/FFT.tpp:278
Discrete Fourier Transform (DFT) of a real field, allocated on a GPU.
void associate(DeviceArray< cudaComplex > &arr, int beginId, IntVec< D > const &meshDimensions)
Associate this object with a slice of another DeviceArray.
Field of real values on a regular mesh, allocated on a GPU device.
Definition cuda/RField.h:33
void associate(DeviceArray< cudaReal > &arr, int beginId, IntVec< D > const &meshDimensions)
Associate this object with a slice of another DeviceArray.
Class to compute and store properties associated with wavevectors.
int nParameter() const
Get the number of parameters in the unit cell.
Base template for UnitCell<D> classes, D=1, 2 or 3.
Definition UnitCell.h:56
bool isHeadEnd() const
Is the head vertex a chain end?
bool isSolved() const
Has the modified diffusion equation been solved?
bool isTailEnd() const
Is the tail vertex a chain end?
const T::RField & q(int i) const
Return q-field at a specified step.
void setLength(double newLength)
Set or reset block length (only used in thread model).
double ds() const
Contour length step size.
void allocate(double ds, bool useBatchedFFT=true)
Allocate memory and set contour step size for thread model.
void setKuhn(double kuhn)
Set or reset monomer statistical segment length.
Propagator< D > & propagator(int directionId)
Get a Propagator for a specified direction.
void stepBondBead(RField< D > const &qin, RField< D > &qout) const
Compute a bond operator for the bead model.
void computeStressBead(double prefactor)
Compute stress contribution for this block, using bead model.
void stepFieldBead(RField< D > &q) const
Apply the exponential field operator for the bead model.
void stepThread(RField< D > const &qin, RField< D > &qout) const
Compute step of integration loop, from i to i+1.
void setupSolver(RField< D > const &w)
Set solver for this block.
void associate(Mesh< D > const &mesh, FFT< D > const &fft, UnitCell< D > const &cell, WaveList< D > &waveList)
Create permanent associations with related objects.
void computeConcentrationBead(double prefactor)
Compute the concentration for this block, using the bead model.
void computeConcentrationThread(double prefactor)
Compute unnormalized concentration for block by integration.
void clearUnitCellData()
Clear all internal data that depends on lattice parameters.
RField< D > & cField()
Get the associated monomer concentration field.
void stepHalfBondBead(RField< D > const &qin, RField< D > &qout) const
Compute a half-bond operator for the bead model.
void computeStressThread(double prefactor)
Compute stress contribution for this block, using thread model.
void stepBead(RField< D > const &qin, RField< D > &qout) const
Compute one step of solution of MDE for the bead model.
MDE solver for one direction of one block.
DeviceArray< cudaReal > const & qAll()
Return the full array of q-fields as an unrolled 1D array.
A fixed capacity (static) contiguous array with a variable logical size.
Definition FSArray.h:38
void clear()
Set logical size to zero.
Definition FSArray.h:271
void append(Data const &data)
Append data to the end of the array.
Definition FSArray.h:258
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
Definition global.h:68
double sum(Array< double > const &in)
Compute sum of array elements (real).
Definition Reduce.cpp:20
void mulEqV(Array< double > &a, Array< double > const &b)
Vector-vector in-place multiplication, a[i] *= b[i] (real).
Definition VecOp.cpp:252
void mulEqS(Array< double > &a, double b)
Vector-scalar in-place multiplication, a[i] *= b (real).
Definition VecOp.cpp:265
void expVc(Array< double > &a, Array< double > const &b, const double c)
Exponentiation a scaled vector, a[i] = exp(b[i]*c) (real).
Definition VecOp.cpp:319
void eqS(Array< double > &a, double b)
Vector assignment, a[i] = b (real).
Definition VecOp.cpp:50
void divSV(Array< double > &a, double b, Array< double > const &c)
Vector division, a[i] = b / c[i].
Definition VecOp.cpp:183
void setThreadsLogical(int nThreadsLogical)
Given total number of threads, set 1D execution configuration.
int nThreads()
Get the number of threads per block for execution.
int nBlocks()
Get the current number of blocks for execution.
void mulVVPair(Array< double > &a1, Array< double > &a2, Array< double > const &b1, Array< double > const &b2, Array< double > const &c)
Vector multiplication in pairs, ax[i] = bx[i] * s[i], x=1,2.
Definition VecOp.cpp:463
void mulEqVPair(Array< double > &a1, Array< double > &a2, Array< double > const &b)
In-place vector multiplication in pairs, ax[i] *= b[i], x=1,2.
Definition VecOp.cpp:484
bool isThread()
Is the thread model in use ?
bool isBead()
Is the bead model in use ?
Fields, FFTs, and utilities for periodic boundary conditions (CUDA).
Definition CField.cu:12
Periodic fields and crystallography.
Definition complex.cpp:11
SCFT and PS-FTS with real periodic fields (GPU)
PSCF package top-level namespace.
cufftDoubleComplex cudaComplex
Complex number type used in CPU code that uses FFTW.
Definition cudaTypes.h:22
cufftDoubleReal cudaReal
Real number type used in CPU code that uses FFTW.
Definition cudaTypes.h:35