12#include <prdc/cuda/resources.h>
13#include <pscf/mesh/Mesh.h>
14#include <pscf/mesh/MeshIterator.h>
30 __global__
void _realMulVConjVV(cudaReal* a, cudaComplex
const * b,
31 cudaComplex
const * c,
32 cudaReal
const * d,
const int n)
34 int nThreads = blockDim.x * gridDim.x;
35 int startID = blockIdx.x * blockDim.x + threadIdx.x;
37 for (
int i = startID; i < n; i +=
nThreads) {
46 a[i] = ((bt.x * ct.x) + (bt.y * ct.y)) * d[i];
53 __global__
void _richardsonEx(cudaReal* qNew, cudaReal
const * qr,
55 cudaReal
const * expW2,
const int n)
57 int nThreads = blockDim.x * gridDim.x;
58 int startID = blockIdx.x * blockDim.x + threadIdx.x;
60 for (
int i = startID; i < n; i +=
nThreads) {
61 q2 = qr2[i] * expW2[i];
62 qNew[i] = (4.0 * q2 - qr[i]) / 3.0;
69 __global__
void _addEqMulVVc(cudaReal* a, cudaReal
const * b,
70 cudaReal
const * c, cudaReal
const d,
73 int nThreads = blockDim.x * gridDim.x;
74 int startID = blockIdx.x * blockDim.x + threadIdx.x;
75 for(
int i = startID; i < n; i +=
nThreads) {
76 a[i] += b[i] * c[i] * d;
98 int nBlocks, nThreads;
102 _realMulVConjVV<<<nBlocks, nThreads>>>(a.
cArray(), b.
cArray(),
120 int nBlocks, nThreads;
124 _richardsonEx<<<nBlocks, nThreads>>>(qNew.
cArray(), qr.
cArray(),
139 int nBlocks, nThreads;
143 _addEqMulVVc<<<nBlocks, nThreads>>>(a.
cArray(), b.
cArray(),
156 unitCellPtr_(nullptr),
157 waveListPtr_(nullptr),
165 useBatchedFFT_(true),
194 unitCellPtr_ = &cell;
195 waveListPtr_ = &wavelist;
199 for (
int i = 0; i < D; ++i) {
203 kMeshDimensions_[i] = mesh.
dimensions()[i]/2 + 1;
205 kSize_ *= kMeshDimensions_[i];
220 useBatchedFFT_ = useBatchedFFT;
225 tempNs = floor(length() / (2.0 * ds) + 0.5);
231 ds_ = length()/double(ns_ - 1);
235 fftBatchedPair_.setup(mesh().dimensions(), 2);
236 if (useBatchedFFT_) {
238 fftBatchedAll_.setup(mesh().dimensions(), ns_);
242 expKsq_.allocate(kMeshDimensions_);
243 expKsq2_.allocate(kMeshDimensions_);
244 expW_.allocate(mesh().dimensions());
245 expW2_.allocate(mesh().dimensions());
246 qrPair_.allocate(2 * mesh().size());
247 qkPair_.allocate(2 * kSize_);
248 q1_.allocate(mesh().dimensions());
249 q2_.allocate(mesh().dimensions());
251 propagator(0).allocate(ns_, mesh());
252 propagator(1).allocate(ns_, mesh());
254 cField().allocate(mesh().dimensions());
256 if (useBatchedFFT_) {
257 qkBatched_.allocate(ns_ * kSize_);
258 qk2Batched_.allocate(ns_ * kSize_);
261 expKsq_h_.allocate(kSize_);
262 expKsq2_h_.allocate(kSize_);
275 UTIL_CHECK(nParams_ == unitCell().nParameter());
292 tempNs = floor(length() / (2.0 * dsTarget_) + 0.5);
297 ds_ = length()/double(ns_-1);
302 propagator(0).reallocate(ns_);
303 propagator(1).reallocate(ns_);
306 if (useBatchedFFT_) {
308 qkBatched_.deallocate();
309 qk2Batched_.deallocate();
310 qkBatched_.allocate(ns_ * kSize_);
311 qk2Batched_.allocate(ns_ * kSize_);
312 fftBatchedAll_.resetBatchSize(ns_);
338 if (!waveListPtr_->hasKSq()) {
339 waveListPtr_->computeKSq();
342 double factor = -1.0*kuhn()*kuhn()*ds_/6.0;
346 VecOp::expVc(expKsq2_, waveListPtr_->kSq(), factor / 2.0);
359 int nx = mesh().size();
381 int nx = mesh().size();
398 for (
int j = 1; j < ns_ - 1; j += 2) {
402 for (
int j = 2; j < ns_ - 2; j += 2) {
421 int nx = mesh().size();
424 UTIL_CHECK(fft().meshDimensions() == mesh().dimensions());
434 qr.associate(qrPair_, 0, mesh().dimensions());
435 qr2.associate(qrPair_, nx, mesh().dimensions());
436 qk.associate(qkPair_, 0, mesh().dimensions());
437 qk2.associate(qkPair_, kSize_, mesh().dimensions());
441 fftBatchedPair_.forwardTransform(qrPair_, qkPair_);
444 fftBatchedPair_.inverseTransformUnsafe(qkPair_, qrPair_);
446 fft().forwardTransform(qr2, qk2);
448 fft().inverseTransformUnsafe(qk2, qr2);
458 int nx = mesh().size();
467 UTIL_CHECK(mesh().dimensions() == fft().meshDimensions());
472 if (!waveListPtr_->hasdKSq()) {
473 waveListPtr_->computedKSq();
477 double dels, normal, increment;
488 for (i = 0; i < nParams_; ++i) {
493 if (useBatchedFFT_) {
496 UTIL_CHECK(mesh().dimensions() == fftBatchedAll_.meshDimensions());
497 fftBatchedAll_.forwardTransform(p0.
qAll(), qkBatched_);
498 fftBatchedAll_.forwardTransform(p1.
qAll(), qk2Batched_);
506 for (j = 0; j < ns_ ; ++j) {
508 if (useBatchedFFT_) {
510 qk.associate(qkBatched_, j * kSize_, mesh().dimensions());
511 qk2.associate(qk2Batched_, (ns_-1-j) * kSize_, mesh().dimensions());
515 fft().forwardTransform(p0.
q(j), qk);
516 fft().forwardTransform(p1.
q(ns_-1-j), qk2);
520 if (j != 0 && j != ns_ - 1) {
528 for (n = 0; n < nParams_ ; ++n) {
534 increment *= kuhn() * kuhn() * dels / normal;
538 if (useBatchedFFT_) {
545 for (i = 0; i < nParams_; ++i) {
546 stress_[i] -= (dQ[i] * prefactor);
virtual void setLength(double length)
Set the length of this block.
Class template for a block in a block copolymer.
Propagator< D > & propagator(int directionId)
Dynamic array on the GPU device with aligned data.
int capacity() const
Return allocated capacity.
Data * cArray()
Return pointer to underlying C array.
Description of a regular grid of points in a periodic domain.
IntVec< D > dimensions() const
Get an IntVec<D> of the grid dimensions.
int size() const
Get total number of grid points.
Fourier transform wrapper.
bool isSetup() const
Has this FFT object been setup?
IntVec< D > const & meshDimensions() const
Return the dimensions of the grid for which this was setup.
Fourier transform of a real field on an FFT mesh.
void allocate(IntVec< D > const &meshDimensions)
Allocate the underlying C array and set mesh dimensions.
Field of real double precision values on an FFT mesh.
int nParameter() const
Get the number of parameters in the unit cell.
Base template for UnitCell<D> classes, D=1, 2 or 3.
Block within a branched polymer.
void setLength(double newLength)
Set or reset block length.
void computeConcentration(double prefactor)
Compute unnormalized concentration for block by integration.
void allocate(double ds, bool useBatchedFFT=true)
Allocate internal data containers.
void setKuhn(double kuhn)
Set or reset monomer statistical segment length.
void associate(Mesh< D > const &mesh, FFT< D > const &fft, UnitCell< D > const &cell, WaveList< D > &wavelist)
Associate this object with a mesh, FFT, UnitCell, and WaveList object.
void computeStress(double prefactor)
Compute derivatives of free energy w/ respect to cell parameters.
void setupSolver(RField< D > const &w)
Set solver for this block.
void step(RField< D > const &q, RField< D > &qNew)
Compute step of integration loop, from i to i+1.
void clearUnitCellData()
Clear all internal data that depends on lattice parameters.
MDE solver for one-direction of one block.
DeviceArray< cudaReal > const & qAll()
Return the full array of q-fields (after propagator is solved).
RField< D > const & q(int i) const
Return const q-field at specified step by reference (after solving).
Class to calculate and store properties of wavevectors.
A fixed capacity (static) contiguous array with a variable logical size.
void append(Data const &data)
Append data to the end of the array.
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
void setThreadsLogical(int nThreadsLogical)
Given total number of threads, set 1D execution configuration.
int nThreads()
Get the number of threads per block for execution.
cudaReal sum(DeviceArray< cudaReal > const &in)
Compute sum of array elements (GPU kernel wrapper).
void expVc(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, cudaReal const c)
Vector exponentiation w/ coefficient, a[i] = exp(b[i]*c), kernel wrapper.
void mulVVPair(DeviceArray< cudaReal > &a1, DeviceArray< cudaReal > &a2, DeviceArray< cudaReal > const &b1, DeviceArray< cudaReal > const &b2, DeviceArray< cudaReal > const &s)
Vector multiplication in pairs, ax[i] = bx[i] * s[i], kernel wrapper.
void mulEqS(DeviceArray< cudaReal > &a, cudaReal const b, const int beginIdA, const int n)
Vector multiplication in-place, a[i] *= b, kernel wrapper (cudaReal).
void mulEqV(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, const int beginIdA, const int beginIdB, const int n)
Vector multiplication in-place, a[i] *= b[i], kernel wrapper (cudaReal).
void eqS(DeviceArray< cudaReal > &a, cudaReal const b, const int beginIdA, const int n)
Vector assignment, a[i] = b, kernel wrapper (cudaReal).
void mulEqVPair(DeviceArray< cudaReal > &a1, DeviceArray< cudaReal > &a2, DeviceArray< cudaReal > const &s)
In-place vector multiplication in pairs, ax[i] *= s[i], kernel wrapper.
Periodic fields and crystallography.
void richardsonEx(DeviceArray< cudaReal > &qNew, DeviceArray< cudaReal > const &qr, DeviceArray< cudaReal > const &qr2, DeviceArray< cudaReal > const &expW2)
Performs qNew = (4 * (qr2 * expW2) - qr) / 3 elementwise, kernel wrapper.
void realMulVConjVV(DeviceArray< cudaReal > &a, DeviceArray< cudaComplex > const &b, DeviceArray< cudaComplex > const &c, DeviceArray< cudaReal > const &d)
Element-wise calculation of a = real(b * conj(c) * d), kernel wrapper.
void addEqMulVVc(DeviceArray< cudaReal > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c, cudaReal const d)
Performs a[i] += b[i] * c[i] * d, kernel wrapper.
PSCF package top-level namespace.
Utility classes for scientific computation.