9#include <pscf/cuda/ThreadArray.h>
23 __global__
void _mcftsScale(cudaReal* a, cudaReal
const b,
const int n)
25 int nThreads = blockDim.x * gridDim.x;
26 int startID = blockIdx.x * blockDim.x + threadIdx.x;
27 for (
int i = startID; i < n; i += nThreads) {
28 a[i] = a[i] * 2 * b - b;
33 __global__
void _fourierMove(cudaComplex* a, cudaReal
const * b,
34 cudaReal
const * c,
const int n) {
35 int nThreads = blockDim.x * gridDim.x;
36 int startID = blockIdx.x * blockDim.x + threadIdx.x;
37 for (
int i = startID; i < n; i += nThreads) {
44 __global__
void _computeDField(cudaReal* d, cudaReal
const * Wc,
45 cudaReal
const * Cc, cudaReal
const a,
46 cudaReal
const b, cudaReal
const s,
49 int nThreads = blockDim.x * gridDim.x;
50 int startID = blockIdx.x * blockDim.x + threadIdx.x;
51 for (
int i = startID; i < n; i += nThreads) {
52 d[i] = a * (b * (Wc[i] - s) + Cc[i]);
57 __global__
void _computeForceBias(cudaReal* result, cudaReal
const * di,
60 cudaReal mobility,
const int n)
62 int nThreads = blockDim.x * gridDim.x;
63 int startID = blockIdx.x * blockDim.x + threadIdx.x;
64 for (
int i = startID; i < n; i += nThreads) {
65 result[i] = 0.5 * (di[i] + df[i]) *
66 (dwc[i] + mobility * (0.5 * (di[i] - df[i])));
82 int nBlocks, nThreads;
86 _mcftsScale<<<nBlocks, nThreads>>>(a.
cArray(), b, n);
101 int nBlocks, nThreads;
105 _fourierMove<<<nBlocks, nThreads>>>(a.
cArray(), b.
cArray(),
115 cudaReal
const a, cudaReal
const b, cudaReal
const s)
122 int nBlocks, nThreads;
126 _computeDField<<<nBlocks, nThreads>>>(d.
cArray(), Wc.
cArray(),
144 int nBlocks, nThreads;
148 _computeForceBias<<<nBlocks, nThreads>>>(result.
cArray(), di.
cArray(),
Dynamic array on the GPU device with aligned data.
int capacity() const
Return allocated capacity.
Data * cArray()
Return pointer to underlying C array.
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
void setThreadsLogical(int nThreadsLogical)
Given total number of threads, set 1D execution configuration.
Fields, FFTs, and utilities for periodic boundary conditions (CUDA)
Element-wise vector operations performed on the GPU for FTS classes.
void computeDField(DeviceArray< cudaReal > &d, DeviceArray< cudaReal > const &Wc, DeviceArray< cudaReal > const &Cc, cudaReal const a, cudaReal const b, cudaReal const s)
Compute d field (functional derivative of H[w])
void fourierMove(DeviceArray< cudaComplex > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c)
Add array b to real part of a and array c to imaginary part of a.
void mcftsScale(DeviceArray< cudaReal > &a, cudaReal const b)
Rescale array a from [0,1] to [-b, b], GPU kernel wrapper.
void computeForceBias(DeviceArray< cudaReal > &result, DeviceArray< cudaReal > const &di, DeviceArray< cudaReal > const &df, DeviceArray< cudaReal > const &dwc, cudaReal mobility)
Compute force bias.
SCFT and PS-FTS with real periodic fields (GPU)
PSCF package top-level namespace.