12#include <pspg/math/GpuResources.h>
13#include <pscf/mesh/Mesh.h>
14#include <pscf/mesh/MeshIterator.h>
15#include <pscf/crystal/shiftToMinimum.h>
16#include <util/containers/FMatrix.h>
17#include <util/containers/DArray.h>
18#include <util/containers/FArray.h>
29 void mulDelKsq(cudaReal* result,
const cudaComplex* q1,
30 const cudaComplex* q2,
const cudaReal* delKsq,
31 int paramN,
int kSize,
int rSize)
33 int nThreads = blockDim.x * gridDim.x;
34 int startID = blockIdx.x * blockDim.x + threadIdx.x;
35 for (
int i = startID; i < kSize; i +=
nThreads) {
36 #ifdef SINGLE_PRECISION
37 result[i] = cuCmulf( q1[i],
38 cuConjf(q2[i])).x * delKsq[paramN * rSize + i];
40 result[i] = cuCmul( q1[i],
41 cuConj(q2[i])).x * delKsq[paramN * rSize + i];
47 void pointwiseMulSameStart(
const cudaReal* a,
const cudaReal* expW,
48 const cudaReal* expW2,
49 cudaReal* q1, cudaReal* q2,
52 int nThreads = blockDim.x * gridDim.x;
53 int startID = blockIdx.x * blockDim.x + threadIdx.x;
55 for (
int i = startID; i < size; i +=
nThreads) {
57 q1[i] = expW[i] * input;
58 q2[i] = expW2[i] * input;
63 void pointwiseMulTwinned(
const cudaReal* qr1,
66 cudaReal* q1, cudaReal* q2,
int size)
68 int nThreads = blockDim.x * gridDim.x;
69 int startID = blockIdx.x * blockDim.x + threadIdx.x;
71 for (
int i = startID; i < size; i +=
nThreads) {
73 q1[i] = qr1[i] * scale;
74 q2[i] = qr2[i] * scale;
79 void scaleComplexTwinned(cudaComplex* qk1, cudaComplex* qk2,
80 const cudaReal* expksq1,
81 const cudaReal* expksq2,
84 int nThreads = blockDim.x * gridDim.x;
85 int startID = blockIdx.x * blockDim.x + threadIdx.x;
86 for (
int i = startID; i < size; i +=
nThreads) {
87 qk1[i].x *= expksq1[i];
88 qk1[i].y *= expksq1[i];
89 qk2[i].x *= expksq2[i];
90 qk2[i].y *= expksq2[i];
95 void scaleComplex(cudaComplex* a, cudaReal* scale,
int size)
97 int nThreads = blockDim.x * gridDim.x;
98 int startID = blockIdx.x * blockDim.x + threadIdx.x;
99 for(
int i = startID; i < size; i +=
nThreads) {
106 void richardsonExpTwinned(cudaReal* qNew,
const cudaReal* q1,
107 const cudaReal* qr,
const cudaReal* expW2,
int size)
109 int nThreads = blockDim.x * gridDim.x;
110 int startID = blockIdx.x * blockDim.x + threadIdx.x;
112 for (
int i = startID; i < size; i +=
nThreads) {
113 q2 = qr[i] * expW2[i];
114 qNew[i] = (4.0 * q2 - q1[i]) / 3.0;
119 void multiplyScaleQQ(cudaReal* result,
122 double scale,
int size)
125 int nThreads = blockDim.x * gridDim.x;
126 int startID = blockIdx.x * blockDim.x + threadIdx.x;
128 for(
int i = startID; i < size; i +=
nThreads) {
129 result[i] += scale * p1[i] * p2[i];
167 delete[] expKsq_host;
168 delete[] expKsq2_host;
187 tempNs = floor( length()/(2.0 *ds) + 0.5 );
193 ds_ = length()/double(ns_ - 1);
196 for (
int i = 0; i < D; ++i) {
200 kMeshDimensions_[i] = mesh.
dimensions()[i]/2 + 1;
205 for(
int i = 0; i < D; ++i) {
206 kSize_ *= kMeshDimensions_[i];
210 expKsq_.allocate(kMeshDimensions_);
211 expKsq2_.allocate(kMeshDimensions_);
221 propagator(0).allocate(ns_, mesh);
222 propagator(1).allocate(ns_, mesh);
223 gpuErrchk( cudaMalloc((
void**)&qkBatched_,
224 ns_ * kSize_ *
sizeof(cudaComplex))
226 gpuErrchk( cudaMalloc((
void**)&qk2Batched_,
227 ns_ * kSize_ *
sizeof(cudaComplex))
231 gpuErrchk(cudaMalloc((
void**)&d_temp_, nBlocks_ *
sizeof(cudaReal)));
232 temp_ =
new cudaReal[nBlocks_];
234 expKsq_host =
new cudaReal[kSize_];
235 expKsq2_host =
new cudaReal[kSize_];
251 ds_ = length/double(ns_ - 1);
276 unitCellPtr_ = &unitCell;
277 waveListPtr_ = &wavelist;
291 double factor = -1.0*kuhn()*kuhn()*ds_/6.0;
295 for(
int i = 0; i < D; ++i) {
296 kSize *= kMeshDimensions_[i];
302 Gsq = unitCell().ksq(wavelist().minImage(iter.
rank()));
303 expKsq_host[i] = exp(Gsq*factor);
304 expKsq2_host[i] = exp(Gsq*factor / 2);
307 cudaMemcpy(expKsq_.cDField(), expKsq_host,
308 kSize *
sizeof(cudaReal), cudaMemcpyHostToDevice);
309 cudaMemcpy(expKsq2_.cDField(), expKsq2_host,
310 kSize *
sizeof(cudaReal), cudaMemcpyHostToDevice);
323 int nx = mesh().size();
328 assignExp<<<nBlocks_, nThreads_>>>(expW_.cDField(), w.
cDField(),
329 (double)0.5* ds_, nx);
330 assignExp<<<nBlocks_, nThreads_>>>(expW2_.cDField(), w.
cDField(),
331 (double)0.25 * ds_, nx);
347 int nx = mesh().size();
356 assignUniformReal<<<nBlocks_, nThreads_>>>
357 (cField().cDField(), 0.0, nx);
362 multiplyScaleQQ<<<nBlocks_, nThreads_>>>
363 (cField().cDField(), p0.q(0), p1.
q(ns_ - 1), 1.0, nx);
364 multiplyScaleQQ<<<nBlocks_, nThreads_>>>
365 (cField().cDField(), p0.q(ns_-1), p1.
q(0), 1.0, nx);
366 for (
int j = 1; j < ns_ - 1; j += 2) {
368 multiplyScaleQQ<<<nBlocks_, nThreads_>>>
369 (cField().cDField(), p0.q(j), p1.
q(ns_ - 1 - j), 4.0, nx);
371 for (
int j = 2; j < ns_ - 2; j += 2) {
373 multiplyScaleQQ<<<nBlocks_, nThreads_>>>
374 (cField().cDField(), p0.q(j), p1.
q(ns_ - 1 - j), 2.0, nx);
377 scaleReal<<<nBlocks_, nThreads_>>>
378 (cField().cDField(), (prefactor * ds_/3.0), nx);
384 if (!fft_.isSetup()) {
385 fft_.setup(qr_, qk_);
386 fftBatched_.setup(mesh().dimensions(), kMeshDimensions_, ns_);
401 int nx = mesh().size();
407 int nk = qk_.capacity();
412 pointwiseMulSameStart<<<nBlocks_, nThreads_>>>
413 (q, expW_.cDField(), expW2_.cDField(),
414 qr_.cDField(), qr2_.cDField(), nx);
415 fft_.forwardTransform(qr_, qk_);
416 fft_.forwardTransform(qr2_, qk2_);
417 scaleComplexTwinned<<<nBlocks_, nThreads_>>>
418 (qk_.cDField(), qk2_.cDField(),
419 expKsq_.cDField(), expKsq2_.cDField(), nk);
420 fft_.inverseTransform(qk_, qr_);
421 fft_.inverseTransform(qk2_, q2_);
422 pointwiseMulTwinned<<<nBlocks_, nThreads_>>>
423 (qr_.cDField(), q2_.cDField(), expW_.cDField(),
424 q1_.cDField(), qr_.cDField(), nx);
425 fft_.forwardTransform(qr_, qk_);
426 scaleComplex<<<nBlocks_, nThreads_>>>(qk_.cDField(), expKsq2_.cDField(), nk);
427 fft_.inverseTransform(qk_, qr_);
428 richardsonExpTwinned<<<nBlocks_, nThreads_>>>(qNew, q1_.cDField(),
429 qr_.cDField(), expW2_.cDField(), nx);
443 int nx = mesh().size();
452 double dels, normal, increment;
457 for (i = 0; i < 6; ++i) {
465 fftBatched_.forwardTransform(p0.
head(), qkBatched_, ns_);
466 fftBatched_.forwardTransform(p1.
head(), qk2Batched_, ns_);
467 cudaMemset(qr2_.cDField(), 0, mesh().size() *
sizeof(cudaReal));
469 for (
int j = 0; j < ns_ ; ++j) {
472 if (j != 0 && j != ns_ - 1) {
480 for (
int n = 0; n < nParams_ ; ++n) {
481 mulDelKsq<<<nBlocks_, nThreads_ >>>
483 qkBatched_ + (j*kSize_),
484 qk2Batched_ + (kSize_ * (ns_ -1 -j)),
485 wavelist.
dkSq(), n , kSize_, nx);
487 increment = gpuSum(qr2_.cDField(), mesh().size());
488 increment = (increment * kuhn() * kuhn() * dels)/normal;
489 dQ [n] = dQ[n]-increment;
495 for (i = 0; i < nParams_; ++i) {
496 stress_[i] = stress_[i] - (dQ[i] * prefactor);
virtual void setLength(double length)
Set the length of this block.
Class template for a block in a block copolymer.
Propagator< D > & propagator(int directionId)
Get a Propagator for a specified direction.
An IntVec<D, T> is a D-component vector of elements of integer type T.
Iterator over points in a Mesh<D>.
int rank() const
Get the rank of current element.
void begin()
Set iterator to the first point in the mesh.
bool atEnd() const
Is this the end (i.e., one past the last point)?
void setDimensions(const IntVec< D > &dimensions)
Set the grid dimensions in all directions.
Description of a regular grid of points in a periodic domain.
IntVec< D > dimensions() const
Get an IntVec<D> of the grid dimensions.
int size() const
Get total number of grid points.
Block within a branched polymer.
void computeConcentration(double prefactor)
Compute unnormalized concentration for block by integration.
void setKuhn(double kuhn)
Set or reset monomer statistical segment length.
void computeStress(WaveList< D > const &waveList, double prefactor)
Compute derivatives of free energy w/ respect to cell parameters.
void setupSolver(RDField< D > const &w)
Set solver for this block.
void setDiscretization(double ds, const Mesh< D > &mesh)
Initialize discretization and allocate required memory.
void setupUnitCell(UnitCell< D > const &unitCell, WaveList< D > const &waveList)
Setup parameters that depend on the unit cell.
void setupFFT()
Initialize FFT and batch FFT classes.
void setLength(double length)
Set or reset block length.
void step(cudaReal const *q, cudaReal *qNew)
Compute step of integration loop, from i to i+1.
Data * cDField()
Return pointer to underlying C array.
MDE solver for one-direction of one block.
cudaReal * head() const
Return q-field at beginning of block (initial condition).
const cudaReal * q(int i) const
Return q-field at specified step.
Field of real single precision values on an FFT mesh on a device.
Container for wavevector data.
cudaReal * dkSq() const
Get a pointer to the dkSq array on device.
int nParameter() const
Get the number of parameters in the unit cell.
Base template for UnitCell<D> classes, D=1, 2 or 3.
A fixed size (static) contiguous array template.
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
int nThreads()
Get the number of threads per block for execution.
void setThreadsLogical(int nThreadsLogical)
Set the total number of threads required for execution.
C++ namespace for polymer self-consistent field theory (PSCF).
Utility classes for scientific computation.