9#include <pscf/cuda/ThreadArray.h>
15 using namespace Prdc::Cuda;
23 __global__
void _mcftsScale(cudaReal* a, cudaReal
const b,
const int n)
25 int nThreads = blockDim.x * gridDim.x;
26 int startID = blockIdx.x * blockDim.x + threadIdx.x;
27 for (
int i = startID; i < n; i += nThreads) {
28 a[i] = a[i] * 2 * b - b;
33 __global__
void _fourierMove(cudaComplex* a, cudaReal
const * b,
34 cudaReal
const * c,
const int n) {
35 int nThreads = blockDim.x * gridDim.x;
36 int startID = blockIdx.x * blockDim.x + threadIdx.x;
37 for (
int i = startID; i < n; i += nThreads) {
44 __global__
void _computeDField(cudaReal* d, cudaReal
const * Wc,
45 cudaReal
const * Cc, cudaReal
const a,
46 cudaReal
const b, cudaReal
const s,
49 int nThreads = blockDim.x * gridDim.x;
50 int startID = blockIdx.x * blockDim.x + threadIdx.x;
51 for (
int i = startID; i < n; i += nThreads) {
52 d[i] = a * (b * (Wc[i] - s) + Cc[i]);
57 __global__
void _computeForceBias(cudaReal* result, cudaReal
const * di,
60 cudaReal mobility,
const int n)
62 int nThreads = blockDim.x * gridDim.x;
63 int startID = blockIdx.x * blockDim.x + threadIdx.x;
64 for (
int i = startID; i < n; i += nThreads) {
65 result[i] = 0.5 * (di[i] + df[i]) *
66 (dwc[i] + mobility * (0.5 * (di[i] - df[i])));
82 int nBlocks, nThreads;
86 _mcftsScale<<<nBlocks, nThreads>>>(a.
cArray(), b, n);
101 int nBlocks, nThreads;
105 _fourierMove<<<nBlocks, nThreads>>>(a.
cArray(), b.
cArray(),
115 cudaReal
const a, cudaReal
const b, cudaReal
const s)
122 int nBlocks, nThreads;
126 _computeDField<<<nBlocks, nThreads>>>(d.
cArray(), Wc.
cArray(),
144 int nBlocks, nThreads;
148 _computeForceBias<<<nBlocks, nThreads>>>(result.
cArray(), di.
cArray(),
Dynamic array on the GPU device with aligned data.
int capacity() const
Return allocated capacity.
Data * cArray()
Return pointer to underlying C array.
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
void setThreadsLogical(int nThreadsLogical)
Given total number of threads, set 1D execution configuration.
void computeDField(DeviceArray< cudaReal > &d, DeviceArray< cudaReal > const &Wc, DeviceArray< cudaReal > const &Cc, cudaReal const a, cudaReal const b, cudaReal const s)
Compute d field (functional derivative of H[w])
void fourierMove(DeviceArray< cudaComplex > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c)
Add array b to real part of a and array c to imaginary part of a.
void mcftsScale(DeviceArray< cudaReal > &a, cudaReal const b)
Rescale array a from [0,1] to [-b, b], GPU kernel wrapper.
void computeForceBias(DeviceArray< cudaReal > &result, DeviceArray< cudaReal > const &di, DeviceArray< cudaReal > const &df, DeviceArray< cudaReal > const &dwc, cudaReal mobility)
Compute force bias.
PSCF package top-level namespace.