PSCF v1.3
rpg/solvers/Block.tpp
1#ifndef RPG_BLOCK_TPP
2#define RPG_BLOCK_TPP
3
4/*
5* PSCF - Polymer Self-Consistent Field
6*
7* Copyright 2015 - 2025, The Regents of the University of Minnesota
8* Distributed under the terms of the GNU General Public License.
9*/
10
11#include "Block.h"
12
13#include <prdc/cuda/WaveList.h>
14#include <prdc/cuda/FFT.h>
15#include <prdc/cuda/resources.h>
16#include <prdc/crystal/UnitCell.h>
17
18#include <pscf/mesh/Mesh.h>
19#include <pscf/mesh/MeshIterator.h>
20
21namespace Pscf {
22namespace Rpg {
23
24 using namespace Util;
25 using namespace Pscf::Prdc;
26 using namespace Pscf::Prdc::Cuda;
27
28 // CUDA kernels:
29 // (defined in anonymous namespace, used only in this file)
30
31 namespace {
32
33 /*
34 * Element-wise calculation of a = real(b * conj(c) * d), CUDA kernel
35 */
36 __global__ void _realMulVConjVV(cudaReal* a, cudaComplex const * b,
37 cudaComplex const * c,
38 cudaReal const * d, const int n)
39 {
40 int nThreads = blockDim.x * gridDim.x;
41 int startID = blockIdx.x * blockDim.x + threadIdx.x;
42 cudaComplex bt, ct;
43 for (int i = startID; i < n; i += nThreads) {
44 // Load complex numbers from global memory to local
45 // (this way the accessed memory is contiguous, rather than
46 // accessing the x or y elements individually, which are not
47 // contiguous)
48 bt = b[i];
49 ct = c[i];
50
51 // Perform calculation
52 a[i] = ((bt.x * ct.x) + (bt.y * ct.y)) * d[i];
53 }
54 }
55
56 /*
57 * Performs qNew = (4*(qr2 * expW2)-qr)/3 elementwise, CUDA kernel
58 */
59 __global__ void _richardsonEx(cudaReal* qNew,
60 cudaReal const * qr,
61 cudaReal const * qr2,
62 cudaReal const * expW2, const int n)
63 {
64 int nThreads = blockDim.x * gridDim.x;
65 int startID = blockIdx.x * blockDim.x + threadIdx.x;
66 cudaReal q2;
67 for (int i = startID; i < n; i += nThreads) {
68 q2 = qr2[i] * expW2[i];
69 qNew[i] = (4.0 * q2 - qr[i]) / 3.0;
70 }
71 }
72
73 /*
74 * Performs a[i] += b[i] * c[i] * d. CUDA kernel
75 */
76 __global__ void _addEqMulVVc(cudaReal* a,
77 cudaReal const * b,
78 cudaReal const * c,
79 const cudaReal d,
80 const int n)
81 {
82 int nThreads = blockDim.x * gridDim.x;
83 int startID = blockIdx.x * blockDim.x + threadIdx.x;
84 for (int i = startID; i < n; i += nThreads) {
85 a[i] += b[i] * c[i] * d;
86 }
87 }
88
89 /*
90 * Element-wise calculation of a[i] += b[i]*c[i]* d[i], CUDA kernel
91 */
92 __global__ void _addEqMulVVV(cudaReal* a,
93 cudaReal const * b,
94 cudaReal const * c,
95 cudaReal const * d,
96 const int n)
97 {
98 int nThreads = blockDim.x * gridDim.x;
99 int startID = blockIdx.x * blockDim.x + threadIdx.x;
100 for (int i = startID; i < n; i += nThreads) {
101 a[i] += b[i]*c[i]*d[i];
102 }
103 }
104
105 }
106
107 // CUDA kernel wrappers:
108
109 /*
110 * Element-wise calculation of a = real(b * conj(c) * d), kernel wrapper
111 *
112 * \param a output array (real)
113 * \param b input array 1 (complex)
114 * \param c input array 2 (complex)
115 * \param d input array 3 (real)
116 */
117 void realMulVConjVV(DeviceArray<cudaReal>& a,
118 DeviceArray<cudaComplex> const & b,
119 DeviceArray<cudaComplex> const & c,
120 DeviceArray<cudaReal> const & d)
121 {
122 int n = a.capacity();
123 UTIL_CHECK(b.capacity() >= n);
124 UTIL_CHECK(c.capacity() >= n);
125 UTIL_CHECK(d.capacity() >= n);
126
127 // GPU resources
128 int nBlocks, nThreads;
129 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
130
131 // Launch kernel
132 _realMulVConjVV<<<nBlocks, nThreads>>>(a.cArray(), b.cArray(),
133 c.cArray(), d.cArray(), n);
134 }
135
136 /*
137 * Performs qNew = (4 * (qr2 * expW2) - qr) / 3 elementwise, kernel wrapper
138 *
139 * \param qNew output array (a propagator slice)
140 * \param qr input array 1 (a propagator slice)
141 * \param qr2 input array 2 (a propagator slice)
142 * \param expW2 input array 3 (exp(-W[i]*ds/4) array)
143 */
144 void richardsonEx(DeviceArray<cudaReal>& qNew,
145 DeviceArray<cudaReal> const & qr,
146 DeviceArray<cudaReal> const & qr2,
147 DeviceArray<cudaReal> const & expW2)
148 {
149 int n = qNew.capacity();
150 UTIL_CHECK(qr.capacity() == n);
151 UTIL_CHECK(qr2.capacity() == n);
152 UTIL_CHECK(expW2.capacity() == n);
153
154 // GPU resources
155 int nBlocks, nThreads;
156 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
157
158 // Launch kernel
159 _richardsonEx<<<nBlocks, nThreads>>>(qNew.cArray(), qr.cArray(),
160 qr2.cArray(), expW2.cArray(), n);
161 }
162
163 /*
164 * Performs a[i] += b[i] * c[i] * d, kernel wrapper
165 *
166 * \param a output array
167 * \param b input array 1
168 * \param c input array 2
169 * \param d input scalar
170 */
171 void addEqMulVVc(DeviceArray<cudaReal>& a, DeviceArray<cudaReal> const & b,
172 DeviceArray<cudaReal> const & c, cudaReal const d)
173 {
174 int n = a.capacity();
175 UTIL_CHECK(b.capacity() >= n);
176 UTIL_CHECK(c.capacity() >= n);
177
178 // GPU resources
179 int nBlocks, nThreads;
180 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
181
182 // Launch kernel
183 _addEqMulVVc<<<nBlocks, nThreads>>>(a.cArray(), b.cArray(),
184 c.cArray(), d, n);
185 }
186
187 /*
188 * Element-wise calculation of a[i] = b[i]*c[i]*d[i], kernel wrapper
189 */
190 void addEqMulVVV(DeviceArray<cudaReal>& a,
191 DeviceArray<cudaReal> const & b,
192 DeviceArray<cudaReal> const & c,
193 DeviceArray<cudaReal> const & d)
194 {
195 int n = a.capacity();
196 UTIL_CHECK(b.capacity() >= n);
197 UTIL_CHECK(c.capacity() >= n);
198 UTIL_CHECK(d.capacity() >= n);
199
200 // GPU resources
201 int nBlocks, nThreads;
202 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
203
204 // Launch kernel
205 _addEqMulVVV<<<nBlocks, nThreads>>>(a.cArray(), b.cArray(),
206 c.cArray(), d.cArray(), n);
207 }
208
209 // Block<D> member functions:
210
211 /*
212 * Constructor.
213 */
214 template <int D>
216 : meshPtr_(nullptr),
217 fftPtr_(nullptr),
218 unitCellPtr_(nullptr),
219 waveListPtr_(nullptr),
220 kMeshDimensions_(0),
221 kSize_(0),
222 ds_(0.0),
223 dsTarget_(0.0),
224 ns_(0),
225 isAllocated_(false),
226 hasExpKsq_(false),
227 useBatchedFFT_(true),
228 nParams_(0)
229 {
230 propagator(0).setBlock(*this);
231 propagator(1).setBlock(*this);
232 }
233
234 /*
235 * Destructor.
236 */
237 template <int D>
240
241 template <int D>
243 UnitCell<D> const & cell, WaveList<D>& wavelist)
244 {
245 UTIL_CHECK(!isAllocated_);
246 UTIL_CHECK(mesh.size() > 1);
247 UTIL_CHECK(fft.isSetup());
248 UTIL_CHECK(mesh.dimensions() == fft.meshDimensions());
249
250 nParams_ = cell.nParameter();
251 UTIL_CHECK(nParams_ > 0);
252
253 // Store pointers to associated objects
254 meshPtr_ = &mesh;
255 fftPtr_ = &fft;
256 unitCellPtr_ = &cell;
257 waveListPtr_ = &wavelist;
258
259 hasExpKsq_ = false;
260 }
261
262 template <int D>
263 void Block<D>::allocate(double ds, bool useBatchedFFT)
264 {
265 UTIL_CHECK(meshPtr_);
266 UTIL_CHECK(unitCellPtr_);
267 UTIL_CHECK(ds > 0.0);
268 UTIL_CHECK(!isAllocated_);
269
270 // Store useBatchedFFT
271 useBatchedFFT_ = useBatchedFFT;
272
273 // Compute k-space grid dimensions (kMeshDimensions_) and size_
274 FFT<D>::computeKMesh(mesh().dimensions(), kMeshDimensions_, kSize_);
275
276 // Allocate work arrays
277 expW_.allocate(mesh().dimensions());
278 expKsq_.allocate(kMeshDimensions_);
279 expKsq2_.allocate(kMeshDimensions_);
281 expW2_.allocate(mesh().dimensions());
282 qrPair_.allocate(2 * mesh().size());
283 qkPair_.allocate(2 * kSize_);
284 } else
285 if (PolymerModel::isBead()) {
286 expWInv_.allocate(mesh().dimensions());
287 qk_.allocate(mesh().dimensions());
288 }
289
290 // Allocate space for block monomer concentration
291 cField().allocate(mesh().dimensions());
292
293 // Compute ns_
294 dsTarget_ = ds;
296
297 // Set contour length discretization for this block
298 UTIL_CHECK(length() > 0.0);
299 int tempNs;
300 tempNs = floor(length() / (2.0 * ds) + 0.5);
301 if (tempNs == 0) {
302 tempNs = 1; // ensure ns_ >= 3
303 }
304 ns_ = 2*tempNs + 1;
305 ds_ = length()/double(ns_ - 1);
306
307 } else
308 if (PolymerModel::isBead()) {
309
310 ds_ = ds;
311 ns_ = nBead() + 2;
312
313 }
314
315 // Allocate memory for solutions of MDE (requires ns_)
316 propagator(0).allocate(ns_, mesh());
317 propagator(1).allocate(ns_, mesh());
318
319 // Setup fftBatchedPair_
320 UTIL_CHECK(!fftBatchedPair_.isSetup());
321 fftBatchedPair_.setup(mesh().dimensions(), 2);
322
323 // Setup batched data used for stress calculation
324 if (useBatchedFFT_) {
325 UTIL_CHECK(!fftBatchedAll_.isSetup());
326 fftBatchedAll_.setup(mesh().dimensions(), ns_);
327 q0kBatched_.allocate(ns_ * kSize_);
328 q1kBatched_.allocate(ns_ * kSize_);
329 }
330
331 isAllocated_ = true;
332 hasExpKsq_ = false;
333 }
334
335 /*
336 * Set or reset the the block length.
337 */
338 template <int D>
339 void Block<D>::setLength(double newLength)
340 {
341 // Precondition
343
344 Edge::setLength(newLength);
345
346 if (isAllocated_) { // if allocate() has already been called
347 // Reset contour length discretization
348 UTIL_CHECK(dsTarget_ > 0);
349 int oldNs = ns_;
350 int tempNs;
351 tempNs = floor(length() / (2.0 * dsTarget_) + 0.5);
352 if (tempNs == 0) {
353 tempNs = 1; // ensure at least 3 contour steps per chain
354 }
355 ns_ = 2*tempNs + 1;
356 ds_ = length()/double(ns_-1);
357
358 if (oldNs != ns_) {
359 // If propagators are already allocated and ns_ has changed,
360 // reallocate memory for solutions to MDE
361 propagator(0).reallocate(ns_);
362 propagator(1).reallocate(ns_);
363
364 // If using batched FFTs, resize arrays and change batch size
365 if (useBatchedFFT_) {
366 UTIL_CHECK(fftBatchedAll_.isSetup());
367 q0kBatched_.deallocate();
368 q1kBatched_.deallocate();
369 q0kBatched_.allocate(ns_ * kSize_);
370 q1kBatched_.allocate(ns_ * kSize_);
371 fftBatchedAll_.resetBatchSize(ns_);
372 }
373 }
374 }
375
376 hasExpKsq_ = false;
377 }
378
379 /*
380 * Set or reset monomer statistical segment length.
381 */
382 template <int D>
384 {
386 hasExpKsq_ = false;
387 }
388
389 /*
390 * Clear all internal data that depends on lattice parameters.
391 */
392 template <int D>
394 {
395 UTIL_CHECK(unitCellPtr_);
396 UTIL_CHECK(nParams_ == unitCell().nParameter());
397 hasExpKsq_ = false;
398 stress_.clear();
399 }
400
401 /*
402 * Compute all elements of expKsq_ and expKsq2_ arrays
403 */
404 template <int D>
405 void Block<D>::computeExpKsq()
406 {
407 UTIL_CHECK(isAllocated_);
408 UTIL_CHECK(waveListPtr_);
409
410 // Calculate kSq if necessary
411 if (!waveListPtr_->hasKSq()) {
412 waveListPtr_->computeKSq();
413 }
414
415 // Compute bSqFactor
416 double bSqFactor = -1.0 * kuhn() * kuhn() / 6.0;
418 bSqFactor *= ds_;
419 }
420
421 // Calculate expKsq values on device
422 VecOp::expVc(expKsq_, waveListPtr_->kSq(), bSqFactor);
423 VecOp::expVc(expKsq2_, waveListPtr_->kSq(), bSqFactor / 2.0);
424
425 hasExpKsq_ = true;
426 }
427
428 /*
429 * Setup the contour length step algorithm.
430 */
431 template <int D>
432 void
434 {
435 // Preconditions
436 int nx = mesh().size();
437 UTIL_CHECK(nx > 0);
438 UTIL_CHECK(isAllocated_);
439
440 // Populate expW_
442 VecOp::expVc(expW_, w, -0.5 * ds_);
443 VecOp::expVc(expW2_, w, -0.25 * ds_);
444 } else {
445 VecOp::expVc(expW_, w, -1.0);
446 VecOp::divSV(expWInv_, 1.0, expW_);
447 }
448
449 // Compute expKsq arrays if necessary
450 if (!hasExpKsq_) {
451 computeExpKsq();
452 }
453
454 }
455
456 /*
457 * Propagate solution by one step.
458 */
459 template <int D>
460 void Block<D>::stepThread(RField<D> const & qin, RField<D>& qout)
461 {
462 // Preconditions
463 UTIL_CHECK(isAllocated_);
464 UTIL_CHECK(hasExpKsq_);
465 UTIL_CHECK(meshPtr_);
466 int nx = mesh().size();
467 UTIL_CHECK(nx > 0);
468 UTIL_CHECK(fft().isSetup());
469 UTIL_CHECK(fft().meshDimensions() == mesh().dimensions());
470 UTIL_CHECK(qrPair_.capacity() == nx * 2);
471 UTIL_CHECK(qkPair_.capacity() == kSize_ * 2);
472 UTIL_CHECK(expW_.capacity() == nx);
473 UTIL_CHECK(expKsq_.capacity() == kSize_);
474 UTIL_CHECK(fftBatchedPair_.isSetup());
475
476 // Set up associated workspace fields slices
477 RField<D> qr, qr2;
478 RFieldDft<D> qk, qk2;
479 qr.associate(qrPair_, 0, mesh().dimensions());
480 qr2.associate(qrPair_, nx, mesh().dimensions());
481 qk.associate(qkPair_, 0, mesh().dimensions());
482 qk2.associate(qkPair_, kSize_, mesh().dimensions());
483
484 // Apply pseudo-spectral algorithm
485 VecOp::mulVVPair(qr, qr2, expW_, expW2_, qin); // qr = expW*q, qr2 = expW2*q
486 fftBatchedPair_.forwardTransform(qrPair_, qkPair_);
487 VecOp::mulEqV(qk, expKsq_); // qk *= expKsq
488 VecOp::mulEqV(qk2, expKsq2_); // qk2 *= expKsq2
489 fftBatchedPair_.inverseTransformUnsafe(qkPair_, qrPair_);
490 VecOp::mulEqVPair(qr, qr2, expW_); // qr *= expW, qr2 *= expW
491 fft().forwardTransform(qr2, qk2); // to Fourier space, only qr2
492 VecOp::mulEqV(qk2, expKsq2_); // qk2 *= expKsq2
493 fft().inverseTransformUnsafe(qk2, qr2); // to real space, only qr2
494 richardsonEx(qout, qr, qr2, expW2_); // qout=(4*(qr2*expW2)-qr)/3
495 }
496
497 /*
498 * Apply one step of the MDE solution for the bead model.
499 */
500 template <int D>
501 void Block<D>::stepBead(RField<D> const & qin, RField<D>& qout)
502 {
503 stepBondBead(qin, qout);
504 stepFieldBead(qout);
505 }
506
507 /*
508 * Apply the field operator for the bead model.
509 */
510 template <int D>
512 {
513 // Preconditions
514 int nx = mesh().size();
515 UTIL_CHECK(expW_.capacity() == nx);
516 UTIL_CHECK(q.capacity() == nx);
517
518 VecOp::mulEqV(q, expW_); // q *= expW
519 }
520
521 /*
522 * Apply the bond operator for the bead model.
523 */
524 template <int D>
526 {
527 // Preconditions
528 UTIL_CHECK(isAllocated_);
529 UTIL_CHECK(hasExpKsq_);
530 UTIL_CHECK(meshPtr_);
531 int nx = mesh().size();
532 UTIL_CHECK(fft().isSetup());
533 UTIL_CHECK(fft().meshDimensions() == mesh().dimensions());
534 UTIL_CHECK(nx > 0);
535 UTIL_CHECK(qin.capacity() == nx);
536 UTIL_CHECK(qout.capacity() == nx);
537 UTIL_CHECK(qk_.isAllocated());
538 UTIL_CHECK(qk_.capacity() == kSize_);
539 UTIL_CHECK(expKsq_.capacity() == kSize_);
540
541 // Apply full bond operator
542 fft().forwardTransform(qin, qk_);
543 VecOp::mulEqV(qk_, expKsq_); // qk *= expKsq
544 fft().inverseTransformUnsafe(qk_, qout); // overwrites qk_
545 }
546
547 /*
548 * Apply the half-bond operator for the bead model.
549 */
550 template <int D>
552 {
553 // Preconditions
554 UTIL_CHECK(isAllocated_);
555 UTIL_CHECK(hasExpKsq_);
556 int nx = mesh().size();
557 UTIL_CHECK(nx > 0);
558 UTIL_CHECK(qin.capacity() == nx);
559 UTIL_CHECK(qout.capacity() == nx);
560 UTIL_CHECK(qk_.isAllocated());
561 UTIL_CHECK(qk_.capacity() == kSize_);
562 UTIL_CHECK(expKsq_.capacity() == kSize_);
563
564 // Apply half-bond operator
565 fft().forwardTransform(qin, qk_);
566 VecOp::mulEqV(qk_, expKsq2_); // qk *= expKsq2
567 fft().inverseTransformUnsafe(qk_, qout); // overwrites qk_
568 }
569
570 /*
571 * Integrate to calculate monomer concentration for this block
572 */
573 template <int D>
575 {
576 // Preconditions
577 int nx = mesh().size();
578 UTIL_CHECK(nx > 0);
579 UTIL_CHECK(ns_ > 0);
580 UTIL_CHECK(propagator(0).isSolved());
581 UTIL_CHECK(propagator(1).isSolved());
582 UTIL_CHECK(cField().capacity() == nx);
583
584 // Initialize cField to zero at all points
585 VecOp::eqS(cField(), 0.0);
586
587 // References to forward and reverse propagators
588 Pscf::Rpg::Propagator<D> const & p0 = propagator(0);
589 Pscf::Rpg::Propagator<D> const & p1 = propagator(1);
590
591 addEqMulVVc(cField(), p0.q(0), p1.q(ns_ - 1), 1.0);
592 addEqMulVVc(cField(), p0.q(ns_ - 1), p1.q(0), 1.0);
593
594 for (int j = 1; j < ns_ - 1; j += 2) {
595 // Odd indices
596 addEqMulVVc(cField(), p0.q(j), p1.q(ns_ - 1 - j), 4.0);
597 }
598 for (int j = 2; j < ns_ - 2; j += 2) {
599 // Even indices
600 addEqMulVVc(cField(), p0.q(j), p1.q(ns_ - 1 - j), 2.0);
601 }
602
603 VecOp::mulEqS(cField(), (prefactor * ds_/3.0));
604 }
605
606 /*
607 * Integrate to calculate monomer concentration for this block
608 */
609 template <int D>
611 {
612 // Preconditions
613 int nx = mesh().size();
614 UTIL_CHECK(nx > 0);
615 UTIL_CHECK(ns_ > 0);
616 UTIL_CHECK(propagator(0).isSolved());
617 UTIL_CHECK(propagator(1).isSolved());
618 UTIL_CHECK(cField().capacity() == nx);
619
620 // Initialize cField to zero at all points
621 VecOp::eqS(cField(), 0.0);
622
623 // References to forward and reverse propagators
624 Pscf::Rpg::Propagator<D> const & p0 = propagator(0);
625 Pscf::Rpg::Propagator<D> const & p1 = propagator(1);
626
627 // Internal beads (j = 1, ..., nbead, with nbead = ns_ -2)
628 for (int j = 1; j < ns_ - 1; ++j) {
629 addEqMulVVV(cField(), p0.q(j), p1.q(ns_ - 1 - j), expWInv_);
630 }
631
632 // Scale cField() by prefactor
633 VecOp::mulEqS(cField(), prefactor);
634 }
635
636 /*
637 * Compute stress contribution from this block.
638 */
639 template <int D>
640 void Block<D>::computeStressThread(double prefactor)
641 {
643 UTIL_CHECK(meshPtr_);
644 int nx = mesh().size();
645
646 // Preconditions
647 UTIL_CHECK(isAllocated_);
648 UTIL_CHECK(nx > 0);
649 UTIL_CHECK(kSize_ > 0);
650 UTIL_CHECK(ns_ > 0);
651 UTIL_CHECK(ds_ > 0);
652 UTIL_CHECK(nParams_ > 0);
653 UTIL_CHECK(mesh().dimensions() == fft().meshDimensions());
654
655 // References to forward and reverse propagators
658 UTIL_CHECK(p0.isSolved());
659 UTIL_CHECK(p1.isSolved());
660
661 // Calculate dKSq if necessary
662 if (!waveListPtr_->hasdKSq()) {
663 waveListPtr_->computedKSq();
664 }
665
666 // Workspace variables
667 double dels, normal, increment;
668 normal = 3.0*6.0;
670 int i, j, n;
671 RField<D> rTmp(kMeshDimensions_); // array of real values on kgrid
672
673 // Initialize dQ and stress to 0
674 stress_.clear();
675 for (i = 0; i < nParams_; ++i) {
676 dQ.append(0.0);
677 stress_.append(0.0);
678 }
679
680 if (useBatchedFFT_) {
681
682 UTIL_CHECK(fftBatchedAll_.isSetup());
683 UTIL_CHECK(mesh().dimensions() == fftBatchedAll_.meshDimensions());
684 UTIL_CHECK(!q0k_.isAllocated());
685 UTIL_CHECK(!q1k_.isAllocated());
686 // In this case, containers q0k_ and q1k_ will be associated
687 // with slices of q0kBatched_, q1kBatched_
688
689 // Computed batched FFT for propagators in both directions
690 fftBatchedAll_.forwardTransform(p0.qAll(), q0kBatched_);
691 fftBatchedAll_.forwardTransform(p1.qAll(), q1kBatched_);
692
693 } else {
694
695 if (!q0k_.isAllocated()) {
696 q0k_.allocate(mesh().dimensions());
697 q1k_.allocate(mesh().dimensions());
698 }
699
700 }
701
702 // Main loop over contour points
703 for (j = 0; j < ns_ ; ++j) {
704
705 if (useBatchedFFT_) {
706 // Batched FFTs have already been computed
707 // Associate q0k_, q1k_ with slices of q0kBatched_, q1kBatched_
708 q0k_.associate(q0kBatched_, j * kSize_, mesh().dimensions());
709 q1k_.associate(q1kBatched_, (ns_-1-j) * kSize_,
710 mesh().dimensions());
711 } else {
712 // Compute Fourier transforms at contour grid point j
713 UTIL_CHECK(fft().isSetup());
714 fft().forwardTransform(p0.q(j), q0k_);
715 fft().forwardTransform(p1.q(ns_-1-j), q1k_);
716 }
717
718 // Compute prefactor for Simpson's rule
719 dels = ds_;
720 if (j != 0 && j != ns_ - 1) {
721 if (j % 2 == 0) {
722 dels = dels*2.0;
723 } else {
724 dels = dels*4.0;
725 }
726 }
727
728 // Increment stress contributions for all unit cell parameters
729 for (n = 0; n < nParams_ ; ++n) {
730
731 // Launch kernel to evaluate dQ at all wavevectors
732 realMulVConjVV(rTmp, q0k_, q1k_, waveListPtr_->dKSq(n));
733
734 // Get the sum of all elements
735 increment = Reduce::sum(rTmp);
736 increment *= kuhn() * kuhn() * dels / normal;
737 dQ[n] -= increment;
738 }
739
740 if (useBatchedFFT_) {
741 q0k_.dissociate();
742 q1k_.dissociate();
743 }
744
745 } // end loop over contour points
746
747 // Normalize total stress values
748 for (i = 0; i < nParams_; ++i) {
749 stress_[i] -= (dQ[i] * prefactor);
750 }
751
752 }
753
754 /*
755 * Compute stress contribution from this block, in bead model.
756 */
757 template <int D>
758 void Block<D>::computeStressBead(double prefactor)
759 {
760 // Preconditions
762 UTIL_CHECK(isAllocated_);
763 int nx = mesh().size();
764 IntVec<D> const & meshDim = mesh().dimensions();
765 UTIL_CHECK(nx > 0);
766 UTIL_CHECK(kSize_ > 0);
767 UTIL_CHECK(nParams_ > 0);
768 UTIL_CHECK(meshDim == fft().meshDimensions());
769 UTIL_CHECK(ns_ > 0);
770
771 // References to forward and reverse propagators
774 UTIL_CHECK(p0.isSolved());
775 UTIL_CHECK(p1.isSolved());
776
777 // Compute batched FFTs, or check allocation of work space
778 if (useBatchedFFT_) {
779 UTIL_CHECK(fftBatchedAll_.isSetup());
780 UTIL_CHECK(meshDim == fftBatchedAll_.meshDimensions());
781 UTIL_CHECK(!q0k_.isAllocated());
782 UTIL_CHECK(!q1k_.isAllocated());
783 // In this case, containers q0k_ and q1k_ will be associated
784 // with slices of q0kBatched_, q1kBatched_
785
786 // Computed batched FFT for propagators in both directions
787 fftBatchedAll_.forwardTransform(p0.qAll(), q0kBatched_);
788 fftBatchedAll_.forwardTransform(p1.qAll(), q1kBatched_);
789 } else {
790 if (!q0k_.isAllocated()) {
791 q0k_.allocate(meshDim);
792 q1k_.allocate(meshDim);
793 }
794 }
795
796 // Calculate dKSq if necessary
797 if (!waveListPtr_->hasdKSq()) {
798 waveListPtr_->computedKSq();
799 }
800
801 // Initialize dQ to zero
803 dQ.clear();
804 for (int i = 0; i < nParams_; ++i) {
805 dQ.append(0.0);
806 }
807
808 RField<D> rTmp(kMeshDimensions_); // k-space work space
809 double bSq = kuhn()*kuhn()/6.0;
810 double increment;
811
812 // Half bond from head junction to first bead, if not a chain end
813 if (!p0.isHeadEnd()) {
814 if (useBatchedFFT_) {
815 // Batched FFTs have already been computed
816 // Associate q0k_, q1k_ with slices of q0kBatched_, q1kBatched_
817 q0k_.associate(q0kBatched_, 0, meshDim);
818 q1k_.associate(q1kBatched_, (ns_- 2) * kSize_, meshDim);
819 } else {
820 // Compute Fourier transforms at contour grid point j
821 UTIL_CHECK(fft().isSetup());
822 fft().forwardTransform(p0.q(0), q0k_);
823 fft().forwardTransform(p1.q(ns_ - 2), q1k_);
824 }
825 for (int n = 0; n < nParams_ ; ++n) {
826 realMulVConjVV(rTmp, q0k_, q1k_, waveListPtr_->dKSq(n));
827 VecOp::mulEqV(rTmp, expKsq2_);
828 increment = Reduce::sum(rTmp);
829 increment *= 0.5*bSq;
830 dQ[n] -= increment;
831 }
832 if (useBatchedFFT_) {
833 q0k_.dissociate();
834 q1k_.dissociate();
835 }
836 }
837
838 // Loop over all nBead - 1 full bonds within this block
839 for (int j = 1; j < ns_ - 2; ++j) {
840
841 // Retrieve or compute FFTs at two neighboring slices
842 if (useBatchedFFT_) {
843 // Batched FFTs have already been computed
844 // Associate q0k_, q1k_ with slices of q0kBatched_, q1kBatched_
845 q0k_.associate(q0kBatched_, j * kSize_, meshDim);
846 q1k_.associate(q1kBatched_, (ns_- 2 -j) * kSize_, meshDim);
847 } else {
848 // Compute Fourier transforms of p0, p1 at neighboring beads
849 UTIL_CHECK(fft().isSetup());
850 fft().forwardTransform(p0.q(j), q0k_);
851 fft().forwardTransform(p1.q(ns_ - 2 - j), q1k_);
852 }
853
854 // Loop over unit cell parameters
855 for (int n = 0; n < nParams_ ; ++n) {
856
857 // Compute contributions to dQ at all wavevectors, in rTmp
858 realMulVConjVV(rTmp, q0k_, q1k_, waveListPtr_->dKSq(n));
859 VecOp::mulEqV(rTmp, expKsq_);
860
861 // Evaluate sum of over wavevectors
862 increment = Reduce::sum(rTmp);
863 increment *= bSq;
864 dQ[n] -= increment;
865
866 }
867
868 if (useBatchedFFT_) {
869 // Dissociate to allow re-association
870 q0k_.dissociate();
871 q1k_.dissociate();
872 }
873
874 } // end loop over contour points
875
876 // Half bond from last bead to tail, if not a chain end
877 if (!p0.isTailEnd()) {
878 if (useBatchedFFT_) {
879 q0k_.associate(q0kBatched_, (ns_-2)*kSize_, meshDim);
880 q1k_.associate(q1kBatched_, 0, meshDim);
881 } else {
882 UTIL_CHECK(fft().isSetup());
883 fft().forwardTransform(p0.q(ns_-2), q0k_);
884 fft().forwardTransform(p1.q(0), q1k_);
885 }
886 for (int n = 0; n < nParams_ ; ++n) {
887 realMulVConjVV(rTmp, q0k_, q1k_, waveListPtr_->dKSq(n));
888 VecOp::mulEqV(rTmp, expKsq2_);
889 increment = Reduce::sum(rTmp);
890 increment *= 0.5*bSq;
891 dQ[n] -= increment;
892 }
893 if (useBatchedFFT_) {
894 q0k_.dissociate();
895 q1k_.dissociate();
896 }
897 }
898
899 // Normalize total stress values
900 stress_.clear();
901 for (int i = 0; i < nParams_; ++i) {
902 stress_.append(-1.0*prefactor*dQ[i]);
903 }
904
905 }
906
907}
908}
909#endif
Propagator< D > & propagator(int directionId)
Propagator< D >::FieldT & cField()
Dynamic array on the GPU device with aligned data.
Definition DeviceArray.h:43
int capacity() const
Return allocated capacity.
Data * cArray()
Return pointer to underlying C array.
virtual void setLength(double length)
Set the length of this block (only valid for thread model).
Definition Edge.cpp:68
An IntVec<D, T> is a D-component vector of elements of integer type T.
Definition IntVec.h:27
Description of a regular grid of points in a periodic domain.
Definition Mesh.h:61
Fourier transform wrapper.
Definition cpu/FFT.h:38
static void computeKMesh(IntVec< D > const &rMeshDimensions, IntVec< D > &kMeshDimensions, int &kSize)
Compute dimensions and size of k-space mesh for DFT of real data.
Definition cpu/FFT.tpp:262
Fourier transform of a real field on an FFT mesh.
Field of real double precision values on an FFT mesh.
Definition cpu/RField.h:29
Class to calculate and store properties of wavevectors.
int nParameter() const
Get the number of parameters in the unit cell.
Base template for UnitCell<D> classes, D=1, 2 or 3.
Definition UnitCell.h:56
void setLength(double newLength)
Set or reset block length (only used in thread model).
double ds() const
Contour length step size.
void allocate(double ds, bool useBatchedFFT=true)
Allocate memory and set contour step size for thread model.
void stepHalfBondBead(RField< D > const &qin, RField< D > &qout)
Compute a half-bond operator for the bead model.
Mesh< D > const & mesh() const
Return associated spatial Mesh by const reference.
void stepBondBead(RField< D > const &qin, RField< D > &qout)
Compute a bond operator for the bead model.
int nBead() const
Get the number of beads in this block, in the bead model.
Definition Edge.h:302
void setKuhn(double kuhn)
Set or reset monomer statistical segment length.
void associate(Mesh< D > const &mesh, FFT< D > const &fft, UnitCell< D > const &cell, WaveList< D > &wavelist)
Create permanent associations with related objects.
void computeStressBead(double prefactor)
Compute stress contribution for this block, using bead model.
void setupSolver(RField< D > const &w)
Set solver for this block.
void computeConcentrationBead(double prefactor)
Compute the concentration for this block, using the bead model.
void computeConcentrationThread(double prefactor)
Compute unnormalized concentration for block by integration.
void stepFieldBead(RField< D > &q)
Apply the exponential field operator for the bead model.
void clearUnitCellData()
Clear all internal data that depends on lattice parameters.
FFT< D > const & fft() const
Return associated FFT<D> object by const reference.
void computeStressThread(double prefactor)
Compute stress contribution for this block, using thread model.
void stepBead(RField< D > const &qin, RField< D > &qout)
Compute one step of solution of MDE for the bead model.
double length() const
Get the length of this block, in the thread model.
Definition Edge.h:311
void stepThread(RField< D > const &qin, RField< D > &qout)
Compute step of integration loop, from i to i+1.
MDE solver for one-direction of one block.
DeviceArray< cudaReal > const & qAll()
Return the full array of q-fields as an unrolled 1D array.
bool isHeadEnd() const
Is the head vertex a chain end?
bool isSolved() const
Has the modified diffusion equation been solved?
bool isTailEnd() const
Is the tail vertex a chain end?
RField< D > const & q(int i) const
Return q-field at specified slice.
int capacity() const
Return allocated size.
Definition Array.h:159
A fixed capacity (static) contiguous array with a variable logical size.
Definition FSArray.h:38
void clear()
Set logical size to zero.
Definition FSArray.h:271
void append(Data const &data)
Append data to the end of the array.
Definition FSArray.h:258
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
Definition global.h:68
void setThreadsLogical(int nThreadsLogical)
Given total number of threads, set 1D execution configuration.
int nThreads()
Get the number of threads per block for execution.
int nBlocks()
Get the current number of blocks for execution.
bool isThread()
Is the thread model in use ?
bool isBead()
Is the bead model in use ?
void eqS(DeviceArray< cudaReal > &a, const cudaReal b, const int beginIdA, const int n)
Vector assignment, a[i] = b, kernel wrapper (cudaReal).
Definition VecOp.cu:1073
void expVc(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, cudaReal const c)
Vector exponentiation w/ coefficient, a[i] = exp(b[i]*c), kernel wrapper.
Definition VecOpMisc.cu:393
void mulVVPair(DeviceArray< cudaReal > &a1, DeviceArray< cudaReal > &a2, DeviceArray< cudaReal > const &b1, DeviceArray< cudaReal > const &b2, DeviceArray< cudaReal > const &s)
Vector multiplication in pairs, ax[i] = bx[i] * s[i], kernel wrapper.
Definition VecOpMisc.cu:426
void mulEqS(DeviceArray< cudaReal > &a, const cudaReal b, const int beginIdA, const int n)
Vector multiplication in-place, a[i] *= b, kernel wrapper (cudaReal).
Definition VecOp.cu:1918
void mulEqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector multiplication in-place, a[i] *= b[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1867
void mulEqVPair(DeviceArray< cudaReal > &a1, DeviceArray< cudaReal > &a2, DeviceArray< cudaReal > const &s)
In-place vector multiplication in pairs, ax[i] *= s[i], kernel wrapper.
Definition VecOpMisc.cu:448
void divSV(DeviceArray< cudaReal > &a, const cudaReal b, DeviceArray< cudaReal > const &c, const int beginIdA, const int beginIdC, const int n)
Vector division, a[i] = b / c[i], kernel wrapper (cudaReal).
Definition VecOp.cu:1619
Fields, FFTs, and utilities for periodic boundary conditions (CUDA)
Definition Reduce.cpp:14
Periodic fields and crystallography.
Definition CField.cpp:11
SCFT and PS-FTS with real periodic fields (GPU)
PSCF package top-level namespace.
Definition param_pc.dox:1