12#include "Propagator.h"
14#include <prdc/cuda/WaveList.h>
15#include <prdc/cuda/FFT.h>
16#include <prdc/cuda/resources.h>
17#include <prdc/crystal/UnitCell.h>
19#include <pscf/mesh/Mesh.h>
20#include <pscf/mesh/MeshIterator.h>
21#include <pscf/solvers/BlockTmpl.tpp>
36 __global__
void _realMulVConjVV(
cudaReal* a,
41 int nThreads = blockDim.x * gridDim.x;
42 int startID = blockIdx.x * blockDim.x + threadIdx.x;
44 for (
int i = startID; i < n; i +=
nThreads) {
53 a[i] = ((bt.x * ct.x) + (bt.y * ct.y)) * d[i];
60 __global__
void _richardsonEx(
cudaReal* qNew,
65 int nThreads = blockDim.x * gridDim.x;
66 int startID = blockIdx.x * blockDim.x + threadIdx.x;
68 for (
int i = startID; i < n; i +=
nThreads) {
69 q2 = qr2[i] * expW2[i];
70 qNew[i] = (4.0 * q2 - qr[i]) / 3.0;
77 __global__
void _addEqMulVVc(
cudaReal* a,
83 int nThreads = blockDim.x * gridDim.x;
84 int startID = blockIdx.x * blockDim.x + threadIdx.x;
85 for (
int i = startID; i < n; i +=
nThreads) {
86 a[i] += b[i] * c[i] * d;
93 __global__
void _addEqMulVVV(
cudaReal* a,
99 int nThreads = blockDim.x * gridDim.x;
100 int startID = blockIdx.x * blockDim.x + threadIdx.x;
101 for (
int i = startID; i < n; i +=
nThreads) {
102 a[i] += b[i]*c[i]*d[i];
133 _realMulVConjVV<<<nBlocks, nThreads>>>(a.
cArray(), b.
cArray(),
160 _richardsonEx<<<nBlocks, nThreads>>>(qNew.
cArray(), qr.
cArray(),
187 _addEqMulVVc<<<nBlocks, nThreads>>>(a.
cArray(), b.
cArray(),
209 _addEqMulVVV<<<nBlocks, nThreads>>>(a.
cArray(), b.
cArray(),
222 unitCellPtr_(nullptr),
223 waveListPtr_(nullptr),
254 UTIL_CHECK(mesh.dimensions() == fft.meshDimensions());
262 unitCellPtr_ = &cell;
263 waveListPtr_ = &waveList;
277 useBatchedFFT_ = useBatchedFFT;
283 expW_.allocate(mesh().dimensions());
284 expKsq_.allocate(kMeshDimensions_);
285 expKsq2_.allocate(kMeshDimensions_);
287 expW2_.allocate(mesh().dimensions());
288 qrPair_.allocate(2 * mesh().size());
289 qkPair_.allocate(2 * kSize_);
292 expWInv_.allocate(mesh().dimensions());
293 qk_.allocate(mesh().dimensions());
297 cField().allocate(mesh().dimensions());
307 tempNs = floor(
length / (2.0 *
ds) + 0.5);
312 ds_ =
length/double(ns_ - 1);
328 fftBatchedPair_.setup(mesh().dimensions(), 2);
331 if (useBatchedFFT_) {
333 fftBatchedAll_.setup(mesh().dimensions(), ns_);
334 q0kBatched_.allocate(ns_ * kSize_);
335 q1kBatched_.allocate(ns_ * kSize_);
359 tempNs = floor(
length / (2.0 * dsTarget_) + 0.5);
364 ds_ =
length/double(ns_-1);
373 if (useBatchedFFT_) {
375 q0kBatched_.deallocate();
376 q1kBatched_.deallocate();
377 q0kBatched_.allocate(ns_ * kSize_);
378 q1kBatched_.allocate(ns_ * kSize_);
379 fftBatchedAll_.resetBatchSize(ns_);
404 UTIL_CHECK(nParams_ == unitCell().nParameter());
413 void Block<D>::computeExpKsq()
419 if (!waveList().hasKSq()) {
420 waveList().computeKSq();
424 const double b = BlockTmplT::kuhn();
425 double bSqFactor = -1.0 * b * b / 6.0;
432 VecOp::expVc(expKsq2_, waveList().kSq(), bSqFactor / 2.0);
445 int nx = mesh().size();
475 int nx = mesh().size();
478 UTIL_CHECK(fft().meshDimensions() == mesh().dimensions());
488 qr.
associate(qrPair_, 0, mesh().dimensions());
489 qr2.
associate(qrPair_, nx, mesh().dimensions());
490 qk.
associate(qkPair_, 0, mesh().dimensions());
491 qk2.
associate(qkPair_, kSize_, mesh().dimensions());
496 fftBatchedPair_.forwardTransform(qrPair_, qkPair_);
499 fftBatchedPair_.inverseTransformUnsafe(qkPair_, qrPair_);
501 fft().forwardTransform(qr2, qk2);
503 fft().inverseTransformUnsafe(qk2, qr2);
504 richardsonEx(qout, qr, qr2, expW2_);
530 int nx = mesh().size();
547 int nx = mesh().size();
549 UTIL_CHECK(fft().meshDimensions() == mesh().dimensions());
558 fft().forwardTransform(qin, qk_);
560 fft().inverseTransformUnsafe(qk_, qout);
572 int nx = mesh().size();
581 fft().forwardTransform(qin, qk_);
583 fft().inverseTransformUnsafe(qk_, qout);
593 int nx = mesh().size();
607 addEqMulVVc(
cField(), p0.
q(0), p1.
q(ns_ - 1), 1.0);
608 addEqMulVVc(
cField(), p0.
q(ns_ - 1), p1.
q(0), 1.0);
610 for (
int j = 1; j < ns_ - 1; j += 2) {
612 addEqMulVVc(
cField(), p0.
q(j), p1.
q(ns_ - 1 - j), 4.0);
614 for (
int j = 2; j < ns_ - 2; j += 2) {
616 addEqMulVVc(
cField(), p0.
q(j), p1.
q(ns_ - 1 - j), 2.0);
629 int nx = mesh().size();
644 for (
int j = 1; j < ns_ - 1; ++j) {
645 addEqMulVVV(
cField(), p0.
q(j), p1.
q(ns_ - 1 - j), expWInv_);
660 int nx = mesh().size();
669 UTIL_CHECK(mesh().dimensions() == fft().meshDimensions());
678 if (!waveList().hasdKSq()) {
679 waveList().computedKSq();
683 double dels, normal, increment;
691 for (i = 0; i < nParams_; ++i) {
696 if (useBatchedFFT_) {
699 UTIL_CHECK(mesh().dimensions() == fftBatchedAll_.meshDimensions());
706 fftBatchedAll_.forwardTransform(p0.
qAll(), q0kBatched_);
707 fftBatchedAll_.forwardTransform(p1.
qAll(), q1kBatched_);
711 if (!q0k_.isAllocated()) {
712 q0k_.allocate(mesh().dimensions());
713 q1k_.allocate(mesh().dimensions());
719 for (j = 0; j < ns_ ; ++j) {
721 if (useBatchedFFT_) {
724 q0k_.associate(q0kBatched_, j * kSize_, mesh().dimensions());
725 q1k_.associate(q1kBatched_, (ns_-1-j) * kSize_,
726 mesh().dimensions());
730 fft().forwardTransform(p0.
q(j), q0k_);
731 fft().forwardTransform(p1.
q(ns_-1-j), q1k_);
736 if (j != 0 && j != ns_ - 1) {
746 for (n = 0; n < nParams_ ; ++n) {
749 realMulVConjVV(rTmp, q0k_, q1k_, waveList().dKSq(n));
753 increment *= b * b * dels / normal;
757 if (useBatchedFFT_) {
765 for (i = 0; i < nParams_; ++i) {
766 stress_[i] -= (dQ[i] * prefactor);
780 int nx = mesh().size();
781 IntVec<D> const & meshDim = mesh().dimensions();
785 UTIL_CHECK(meshDim == fft().meshDimensions());
795 if (useBatchedFFT_) {
797 UTIL_CHECK(meshDim == fftBatchedAll_.meshDimensions());
804 fftBatchedAll_.forwardTransform(p0.
qAll(), q0kBatched_);
805 fftBatchedAll_.forwardTransform(p1.
qAll(), q1kBatched_);
807 if (!q0k_.isAllocated()) {
808 q0k_.allocate(meshDim);
809 q1k_.allocate(meshDim);
814 if (!waveList().hasdKSq()) {
815 waveList().computedKSq();
821 for (
int i = 0; i < nParams_; ++i) {
827 const double bSq = b * b / 6.0;
832 if (useBatchedFFT_) {
835 q0k_.associate(q0kBatched_, 0, meshDim);
836 q1k_.associate(q1kBatched_, (ns_- 2) * kSize_, meshDim);
840 fft().forwardTransform(p0.
q(0), q0k_);
841 fft().forwardTransform(p1.
q(ns_ - 2), q1k_);
843 for (
int n = 0; n < nParams_ ; ++n) {
844 realMulVConjVV(rTmp, q0k_, q1k_, waveList().dKSq(n));
847 increment *= 0.5*bSq;
850 if (useBatchedFFT_) {
857 for (
int j = 1; j < ns_ - 2; ++j) {
860 if (useBatchedFFT_) {
863 q0k_.associate(q0kBatched_, j * kSize_, meshDim);
864 q1k_.associate(q1kBatched_, (ns_- 2 -j) * kSize_, meshDim);
868 fft().forwardTransform(p0.
q(j), q0k_);
869 fft().forwardTransform(p1.
q(ns_ - 2 - j), q1k_);
873 for (
int n = 0; n < nParams_ ; ++n) {
876 realMulVConjVV(rTmp, q0k_, q1k_, waveList().dKSq(n));
886 if (useBatchedFFT_) {
896 if (useBatchedFFT_) {
897 q0k_.associate(q0kBatched_, (ns_-2)*kSize_, meshDim);
898 q1k_.associate(q1kBatched_, 0, meshDim);
901 fft().forwardTransform(p0.
q(ns_-2), q0k_);
902 fft().forwardTransform(p1.
q(0), q1k_);
904 for (
int n = 0; n < nParams_ ; ++n) {
905 realMulVConjVV(rTmp, q0k_, q1k_, waveList().dKSq(n));
908 increment *= 0.5*bSq;
911 if (useBatchedFFT_) {
919 for (
int i = 0; i < nParams_; ++i) {
920 stress_.append(-1.0*prefactor*dQ[i]);
virtual void setKuhn(double kuhn)
Dynamic array on the GPU device with aligned data.
void dissociate()
Dissociate this object from an externally owned memory block.
int capacity() const
Return array capacity.
Data * cArray()
Return pointer to underlying C array.
int nBead() const
Get the number of beads in this block, in the bead model.
virtual void setLength(double length)
Set the length of this block (only valid for thread model).
double length() const
Get the length of this block, in the thread model.
An IntVec<D, T> is a D-component vector of elements of integer type T.
Description of a regular grid of points in a periodic domain.
Fourier transform wrapper for real or complex data.
static void computeKMesh(IntVec< D > const &rMeshDimensions, IntVec< D > &kMeshDimensions, int &kSize)
Compute dimensions and size of k-space mesh for DFT of real data.
Discrete Fourier Transform (DFT) of a real field, allocated on a GPU.
void associate(DeviceArray< cudaComplex > &arr, int beginId, IntVec< D > const &meshDimensions)
Associate this object with a slice of another DeviceArray.
Field of real values on a regular mesh, allocated on a GPU device.
void associate(DeviceArray< cudaReal > &arr, int beginId, IntVec< D > const &meshDimensions)
Associate this object with a slice of another DeviceArray.
Class to compute and store properties associated with wavevectors.
int nParameter() const
Get the number of parameters in the unit cell.
Base template for UnitCell<D> classes, D=1, 2 or 3.
bool isHeadEnd() const
Is the head vertex a chain end?
bool isSolved() const
Has the modified diffusion equation been solved?
bool isTailEnd() const
Is the tail vertex a chain end?
const T::RField & q(int i) const
Return q-field at a specified step.
void setLength(double newLength)
Set or reset block length (only used in thread model).
double ds() const
Contour length step size.
void allocate(double ds, bool useBatchedFFT=true)
Allocate memory and set contour step size for thread model.
void setKuhn(double kuhn)
Set or reset monomer statistical segment length.
Propagator< D > & propagator(int directionId)
Get a Propagator for a specified direction.
void stepBondBead(RField< D > const &qin, RField< D > &qout) const
Compute a bond operator for the bead model.
void computeStressBead(double prefactor)
Compute stress contribution for this block, using bead model.
void stepFieldBead(RField< D > &q) const
Apply the exponential field operator for the bead model.
void stepThread(RField< D > const &qin, RField< D > &qout) const
Compute step of integration loop, from i to i+1.
void setupSolver(RField< D > const &w)
Set solver for this block.
void associate(Mesh< D > const &mesh, FFT< D > const &fft, UnitCell< D > const &cell, WaveList< D > &waveList)
Create permanent associations with related objects.
void computeConcentrationBead(double prefactor)
Compute the concentration for this block, using the bead model.
void computeConcentrationThread(double prefactor)
Compute unnormalized concentration for block by integration.
void clearUnitCellData()
Clear all internal data that depends on lattice parameters.
RField< D > & cField()
Get the associated monomer concentration field.
void stepHalfBondBead(RField< D > const &qin, RField< D > &qout) const
Compute a half-bond operator for the bead model.
void computeStressThread(double prefactor)
Compute stress contribution for this block, using thread model.
void stepBead(RField< D > const &qin, RField< D > &qout) const
Compute one step of solution of MDE for the bead model.
MDE solver for one direction of one block.
DeviceArray< cudaReal > const & qAll()
Return the full array of q-fields as an unrolled 1D array.
A fixed capacity (static) contiguous array with a variable logical size.
void clear()
Set logical size to zero.
void append(Data const &data)
Append data to the end of the array.
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
double sum(Array< double > const &in)
Compute sum of array elements (real).
void mulEqV(Array< double > &a, Array< double > const &b)
Vector-vector in-place multiplication, a[i] *= b[i] (real).
void mulEqS(Array< double > &a, double b)
Vector-scalar in-place multiplication, a[i] *= b (real).
void expVc(Array< double > &a, Array< double > const &b, const double c)
Exponentiation a scaled vector, a[i] = exp(b[i]*c) (real).
void eqS(Array< double > &a, double b)
Vector assignment, a[i] = b (real).
void divSV(Array< double > &a, double b, Array< double > const &c)
Vector division, a[i] = b / c[i].
void setThreadsLogical(int nThreadsLogical)
Given total number of threads, set 1D execution configuration.
int nThreads()
Get the number of threads per block for execution.
int nBlocks()
Get the current number of blocks for execution.
void mulVVPair(Array< double > &a1, Array< double > &a2, Array< double > const &b1, Array< double > const &b2, Array< double > const &c)
Vector multiplication in pairs, ax[i] = bx[i] * s[i], x=1,2.
void mulEqVPair(Array< double > &a1, Array< double > &a2, Array< double > const &b)
In-place vector multiplication in pairs, ax[i] *= b[i], x=1,2.
bool isThread()
Is the thread model in use ?
bool isBead()
Is the bead model in use ?
Fields, FFTs, and utilities for periodic boundary conditions (CUDA).
Periodic fields and crystallography.
SCFT and PS-FTS with real periodic fields (GPU)
PSCF package top-level namespace.
cufftDoubleComplex cudaComplex
Complex number type used in CPU code that uses FFTW.
cufftDoubleReal cudaReal
Real number type used in CPU code that uses FFTW.