Loading [MathJax]/extensions/TeX/AMSsymbols.js
PSCF v1.2
VecOpFts.cu
1/*
2* PSCF Package
3*
4* Copyright 2016 - 2022, The Regents of the University of Minnesota
5* Distributed under the terms of the GNU General Public License.
6*/
7
8#include "VecOpFts.h"
9#include <pscf/cuda/ThreadArray.h>
10
11namespace Pscf {
12namespace Rpg {
13namespace VecOpFts {
14
15 using namespace Prdc::Cuda;
16
17 // CUDA kernels:
18 // (defined in anonymous namespace, used only in this file)
19
20 namespace {
21
22 // Rescale array a from [0,1] to [-b, b]
23 __global__ void _mcftsScale(cudaReal* a, cudaReal const b, const int n)
24 {
25 int nThreads = blockDim.x * gridDim.x;
26 int startID = blockIdx.x * blockDim.x + threadIdx.x;
27 for (int i = startID; i < n; i += nThreads) {
28 a[i] = a[i] * 2 * b - b;
29 }
30 }
31
32 // Add array b to real part of a and array c to imaginary part of a
33 __global__ void _fourierMove(cudaComplex* a, cudaReal const * b,
34 cudaReal const * c, const int n) {
35 int nThreads = blockDim.x * gridDim.x;
36 int startID = blockIdx.x * blockDim.x + threadIdx.x;
37 for (int i = startID; i < n; i += nThreads) {
38 a[i].x += b[i];
39 a[i].y += c[i];
40 }
41 }
42
43 // Compute d field (functional derivative of H[w])
44 __global__ void _computeDField(cudaReal* d, cudaReal const * Wc,
45 cudaReal const * Cc, cudaReal const a,
46 cudaReal const b, cudaReal const s,
47 const int n)
48 {
49 int nThreads = blockDim.x * gridDim.x;
50 int startID = blockIdx.x * blockDim.x + threadIdx.x;
51 for (int i = startID; i < n; i += nThreads) {
52 d[i] = a * (b * (Wc[i] - s) + Cc[i]);
53 }
54 }
55
56 // Compute force bias
57 __global__ void _computeForceBias(cudaReal* result, cudaReal const * di,
58 cudaReal const * df,
59 cudaReal const * dwc,
60 cudaReal mobility, const int n)
61 {
62 int nThreads = blockDim.x * gridDim.x;
63 int startID = blockIdx.x * blockDim.x + threadIdx.x;
64 for (int i = startID; i < n; i += nThreads) {
65 result[i] = 0.5 * (di[i] + df[i]) *
66 (dwc[i] + mobility * (0.5 * (di[i] - df[i])));
67 }
68 }
69
70 }
71
72 // Kernel wrappers:
73
74 /*
75 * Rescale array a from [0,1] to [-b, b], GPU kernel wrapper.
76 */
77 void mcftsScale(DeviceArray<cudaReal>& a, cudaReal const b)
78 {
79 const int n = a.capacity();
80
81 // GPU resources
82 int nBlocks, nThreads;
83 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
84
85 // Launch kernel
86 _mcftsScale<<<nBlocks, nThreads>>>(a.cArray(), b, n);
87 }
88
89 /*
90 * Add array b to real part of a and array c to imaginary part of a
91 */
93 DeviceArray<cudaReal> const & b,
94 DeviceArray<cudaReal> const & c)
95 {
96 const int n = a.capacity();
97 UTIL_CHECK(b.capacity() >= n);
98 UTIL_CHECK(c.capacity() >= n);
99
100 // GPU resources
101 int nBlocks, nThreads;
102 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
103
104 // Launch kernel
105 _fourierMove<<<nBlocks, nThreads>>>(a.cArray(), b.cArray(),
106 c.cArray(), n);
107 }
108
109 /*
110 * Compute d field (functional derivative of H[w])
111 */
113 DeviceArray<cudaReal> const & Wc,
114 DeviceArray<cudaReal> const & Cc,
115 cudaReal const a, cudaReal const b, cudaReal const s)
116 {
117 const int n = d.capacity();
118 UTIL_CHECK(Wc.capacity() == n);
119 UTIL_CHECK(Cc.capacity() == n);
120
121 // GPU resources
122 int nBlocks, nThreads;
123 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
124
125 // Launch kernel
126 _computeDField<<<nBlocks, nThreads>>>(d.cArray(), Wc.cArray(),
127 Cc.cArray(), a, b, s, n);
128 }
129
130 /*
131 * Compute force bias
132 */
134 DeviceArray<cudaReal> const & di,
135 DeviceArray<cudaReal> const & df,
136 DeviceArray<cudaReal> const & dwc, cudaReal mobility)
137 {
138 const int n = result.capacity();
139 UTIL_CHECK(di.capacity() == n);
140 UTIL_CHECK(df.capacity() == n);
141 UTIL_CHECK(dwc.capacity() == n);
142
143 // GPU resources
144 int nBlocks, nThreads;
145 ThreadArray::setThreadsLogical(n, nBlocks, nThreads);
146
147 // Launch kernel
148 _computeForceBias<<<nBlocks, nThreads>>>(result.cArray(), di.cArray(),
149 df.cArray(), dwc.cArray(),
150 mobility, n);
151 }
152
153}
154}
155}
Dynamic array on the GPU device with aligned data.
Definition rpg/System.h:32
int capacity() const
Return allocated capacity.
Data * cArray()
Return pointer to underlying C array.
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
Definition global.h:68
void setThreadsLogical(int nThreadsLogical)
Given total number of threads, set 1D execution configuration.
void computeDField(DeviceArray< cudaReal > &d, DeviceArray< cudaReal > const &Wc, DeviceArray< cudaReal > const &Cc, cudaReal const a, cudaReal const b, cudaReal const s)
Compute d field (functional derivative of H[w])
Definition VecOpFts.cu:112
void fourierMove(DeviceArray< cudaComplex > &a, DeviceArray< cudaReal > const &b, DeviceArray< cudaReal > const &c)
Add array b to real part of a and array c to imaginary part of a.
Definition VecOpFts.cu:92
void mcftsScale(DeviceArray< cudaReal > &a, cudaReal const b)
Rescale array a from [0,1] to [-b, b], GPU kernel wrapper.
Definition VecOpFts.cu:77
void computeForceBias(DeviceArray< cudaReal > &result, DeviceArray< cudaReal > const &di, DeviceArray< cudaReal > const &df, DeviceArray< cudaReal > const &dwc, cudaReal mobility)
Compute force bias.
Definition VecOpFts.cu:133
PSCF package top-level namespace.
Definition param_pc.dox:1