PSCF v1.1
FFTBatched.tpp
1#ifndef PSPG_FFT_BATCHED_TPP
2#define PSPG_FFT_BATCHED_TPP
3
4/*
5* PSCF Package
6*
7* Copyright 2016 - 2022, The Regents of the University of Minnesota
8* Distributed under the terms of the GNU General Public License.
9*/
10
11#include "FFTBatched.h"
12#include <pspg/math/GpuResources.h>
13
14namespace Pscf {
15namespace Pspg
16{
17
18static __global__ void scaleComplexData(cudaComplex* data, cudaReal scale, int size) {
19
20 //write code that will scale
21 int nThreads = blockDim.x * gridDim.x;
22 int startId = blockIdx.x * blockDim.x + threadIdx.x;
23 for(int i = startId; i < size; i += nThreads ) {
24 data[i].x *= scale;
25 data[i].y *= scale;
26 }
27
28}
29
30static __global__ void scaleRealData(cudaReal* data, cudaReal scale, int size) {
31
32 int nThreads = blockDim.x * gridDim.x;
33 int startId = blockIdx.x * blockDim.x + threadIdx.x;
34 for(int i = startId; i < size; i += nThreads ) {
35 data[i] *= scale;
36 }
37
38}
39
40 using namespace Util;
41
42 /*
43 * Default constructor.
44 */
45 template <int D>
47 : meshDimensions_(0),
48 rSize_(0),
49 kSize_(0),
50 fPlan_(0),
51 iPlan_(0),
52 isSetup_(false)
53 {}
54
55 /*
56 * Destructor.
57 */
58 template <int D>
60 {
61 if (fPlan_) {
62 cufftDestroy(fPlan_);
63 }
64 if (iPlan_) {
65 cufftDestroy(iPlan_);
66 }
67 }
68
69 /*
70 * Check and (if necessary) setup mesh dimensions.
71 */
72 template <int D>
74 {
75 // Preconditions
76 UTIL_CHECK(!isSetup_);
77 IntVec<D> rDimensions = rField.meshDimensions();
78 IntVec<D> kDimensions = kField.meshDimensions();
79 UTIL_CHECK(rDimensions == kDimensions);
80
81 // Set Mesh dimensions
82 rSize_ = 1;
83 kSize_ = 1;
84 for (int i = 0; i < D; ++i) {
85 UTIL_CHECK(rDimensions[i] > 0);
86 meshDimensions_[i] = rDimensions[i];
87 rSize_ *= rDimensions[i];
88 if (i < D - 1) {
89 kSize_ *= rDimensions[i];
90 } else {
91 kSize_ *= (rDimensions[i]/2 + 1);
92 }
93 }
94
95 //turned off to allow batch solution of MDE
96 //UTIL_CHECK(rField.capacity() == rSize_);
97 //UTIL_CHECK(kField.capacity() == kSize_);
98
99
100 // Make FFTW plans (explicit specializations)
101 makePlans(rField, kField);
102
103 isSetup_ = true;
104 }
105
106 template <int D>
107 void FFTBatched<D>::setup(const IntVec<D>& rDim, const IntVec<D>& kDim, int batchSize)
108 {
109 // Preconditions
110 UTIL_CHECK(!isSetup_);
111
112 // Set Mesh dimensions
113 rSize_ = 1;
114 kSize_ = 1;
115 for (int i = 0; i < D; ++i) {
116 UTIL_CHECK(rDim[i] > 0);
117 meshDimensions_[i] = rDim[i];
118 rSize_ *= rDim[i];
119 if (i < D - 1) {
120 kSize_ *= rDim[i];
121 } else {
122 kSize_ *= (rDim[i]/2 + 1);
123 }
124 }
125
126 //turned off to allow batch solution of MDE
127 //UTIL_CHECK(rField.capacity() == rSize_);
128 //UTIL_CHECK(kField.capacity() == kSize_);
129
130
131 // Make FFTW plans (explicit specializations)
132 makePlans(rDim, kDim, batchSize);
133
134 isSetup_ = true;
135 }
136
137 /*
138 * Make plans for batch size of 2
139 */
140 template <int D>
141 void FFTBatched<D>::makePlans(RDField<D>& rField, RDFieldDft<D>& kField)
142 {
143 int n[D];
144 int rdist = 1;
145 int kdist = 1;
146 for(int i = 0; i < D; i++) {
147 n[i] = rField.meshDimensions()[i];
148 if( i == D - 1 ) {
149 kdist *= n[i]/2 + 1;
150 } else {
151 kdist *= n[i];
152 }
153 rdist *= n[i];
154
155 }
156
157 #ifdef SINGLE_PRECISION
158 if(cufftPlanMany(&fPlan_, D, n, //plan, rank, n
159 NULL, 1, rdist, //inembed, istride, idist
160 NULL, 1, kdist, //onembed, ostride, odist
161 CUFFT_R2C, 2) != CUFFT_SUCCESS) {
162 std::cout<<"plan creation failed "<<std::endl;
163 exit(1);
164 }
165 if(cufftPlanMany(&iPlan_, D, n, //plan, rank, n
166 NULL, 1, rdist, //inembed, istride, idist
167 NULL, 1, kdist, //onembed, ostride, odist
168 CUFFT_C2R, 2) != CUFFT_SUCCESS) {
169 std::cout<<"plan creation failed "<<std::endl;
170 exit(1);
171 }
172 #else
173 if(cufftPlanMany(&fPlan_, D, n, //plan, rank, n
174 NULL, 1, rdist, //inembed, istride, idist
175 NULL, 1, kdist, //onembed, ostride, odist
176 CUFFT_D2Z, 2) != CUFFT_SUCCESS) {
177 std::cout<<"plan creation failed "<<std::endl;
178 exit(1);
179 }
180 if(cufftPlanMany(&iPlan_, D, n, //plan, rank, n
181 NULL, 1, rdist, //inembed, istride, idist
182 NULL, 1, kdist, //onembed, ostride, odist
183 CUFFT_Z2D, 2) != CUFFT_SUCCESS) {
184 std::cout<<"plan creation failed "<<std::endl;
185 exit(1);
186 }
187 #endif
188 }
189
190 /*
191 * Make plans for variable batch size
192 */
193
194 template <int D>
195 void FFTBatched<D>::makePlans(const IntVec<D>& rDim, const IntVec<D>& kDim, int batchSize)
196 {
197 int n[D];
198 int rdist = 1;
199 int kdist = 1;
200 for(int i = 0; i < D; i++) {
201 n[i] = rDim[i];
202 if( i == D - 1 ) {
203 kdist *= n[i]/2 + 1;
204 } else {
205 kdist *= n[i];
206 }
207 rdist *= n[i];
208
209 }
210
211 #ifdef SINGLE_PRECISION
212 if(cufftPlanMany(&fPlan_, D, n, //plan, rank, n
213 NULL, 1, rdist, //inembed, istride, idist
214 NULL, 1, kdist, //onembed, ostride, odist
215 CUFFT_R2C, batchSize) != CUFFT_SUCCESS) {
216 std::cout<<"plan creation failed "<<std::endl;
217 exit(1);
218 }
219 if(cufftPlanMany(&iPlan_, D, n, //plan, rank, n
220 NULL, 1, kdist, //inembed, istride, idist
221 NULL, 1, rdist, //onembed, ostride, odist
222 CUFFT_C2R, batchSize) != CUFFT_SUCCESS) {
223 std::cout<<"plan creation failed "<<std::endl;
224 exit(1);
225 }
226 #else
227 if(cufftPlanMany(&fPlan_, D, n, //plan, rank, n
228 NULL, 1, rdist, //inembed, istride, idist
229 NULL, 1, kdist, //onembed, ostride, odist
230 CUFFT_D2Z, batchSize) != CUFFT_SUCCESS) {
231 std::cout<<"plan creation failed "<<std::endl;
232 exit(1);
233 }
234 if(cufftPlanMany(&iPlan_, D, n, //plan, rank, n
235 NULL, 1, kdist, //inembed, istride, idist
236 NULL, 1, rdist, //onembed, ostride, odist
237 CUFFT_Z2D, batchSize) != CUFFT_SUCCESS) {
238 std::cout<<"plan creation failed "<<std::endl;
239 exit(1);
240 }
241 #endif
242 }
243
244 /*
245 * Execute forward transform.
246 */
247 template <int D>
249 {
250 // GPU resources
251 int nBlocks, nThreads;
252 ThreadGrid::setThreadsLogical(rSize_*2, nBlocks, nThreads);
253
254 // Check dimensions or setup
255 if (isSetup_) {
256 UTIL_CHECK(rField.capacity() == 2*rSize_);
257 UTIL_CHECK(kField.capacity() == 2*kSize_);
258 } else {
259 //UTIL_CHECK(0);
260 //should never reach here in a parallel block. breaks instantly
261 setup(rField, kField);
262 }
263
264 // Copy rescaled input data prior to work array
265 cudaReal scale = 1.0/cudaReal(rSize_);
266 //scale for every batch
267 scaleRealData<<<nBlocks, nThreads>>>(rField.cDField(), scale, rSize_ * 2);
268
269 //perform fft
270 #ifdef SINGLE_PRECISION
271 if(cufftExecR2C(fPlan_, rField.cDField(), kField.cDField()) != CUFFT_SUCCESS) {
272 std::cout<<"CUFFT error: forward"<<std::endl;
273 return;
274 }
275 #else
276 if(cufftExecD2Z(fPlan_, rField.cDField(), kField.cDField()) != CUFFT_SUCCESS) {
277 std::cout<<"CUFFT error: forward"<<std::endl;
278 return;
279 }
280 #endif
281
282 }
283
284 /*
285 * Execute forward transform.
286 */
287 template <int D>
289 (cudaReal* rField, cudaComplex* kField, int batchSize)
290 {
291 // GPU resources
292 int nBlocks, nThreads;
293 ThreadGrid::setThreadsLogical(rSize_*batchSize, nBlocks, nThreads);
294
295 // Check dimensions or setup
296 if (isSetup_) {
297 } else {
298 std::cerr<<"Need to setup before running transform"<<std::endl;
299 exit(1);
300 }
301
302 // Copy rescaled input data prior to work array
303 cudaReal scale = 1.0/cudaReal(rSize_);
304 //scale for every batch
305 scaleRealData<<<nBlocks, nThreads>>>(rField, scale, rSize_ * batchSize);
306
307 //perform fft
308 #ifdef SINGLE_PRECISION
309 if(cufftExecR2C(fPlan_, rField, kField) != CUFFT_SUCCESS) {
310 std::cout<<"CUFFT error: forward"<<std::endl;
311 return;
312 }
313 #else
314 if(cufftExecD2Z(fPlan_, rField, kField) != CUFFT_SUCCESS) {
315 std::cout<<"CUFFT error: forward"<<std::endl;
316 return;
317 }
318 #endif
319
320 }
321
322 /*
323 * Execute inverse (complex-to-real) transform.
324 */
325 template <int D>
327 {
328 if (!isSetup_) {
329 //UTIL_CHECK(0);
330 setup(rField, kField);
331 }
332
333 #ifdef SINGLE_PRECISION
334 if(cufftExecC2R(iPlan_, kField.cDField(), rField.cDField()) != CUFFT_SUCCESS) {
335 std::cout<<"CUFFT error: inverse"<<std::endl;
336 return;
337 }
338 #else
339 if(cufftExecZ2D(iPlan_, kField.cDField(), rField.cDField()) != CUFFT_SUCCESS) {
340 std::cout<<"CUFFT error: inverse"<<std::endl;
341 return;
342 }
343 #endif
344
345 }
346
347
348 /*
349 * Execute inverse (complex-to-real) transform.
350 */
351 template <int D>
353 (cudaComplex* kField, cudaReal* rField, int batchSize)
354 {
355 if (!isSetup_) {
356 std::cerr<<"Need to setup before running FFTbatchec"<<std::endl;
357 }
358
359 #ifdef SINGLE_PRECISION
360 if(cufftExecC2R(iPlan_, kField, rField) != CUFFT_SUCCESS) {
361 std::cout<<"CUFFT error: inverse"<<std::endl;
362 return;
363 }
364 #else
365 if(cufftExecZ2D(iPlan_, kField, rField) != CUFFT_SUCCESS) {
366 std::cout<<"CUFFT error: inverse"<<std::endl;
367 return;
368 }
369 #endif
370
371 }
372
373}
374}
375#endif
An IntVec<D, T> is a D-component vector of elements of integer type T.
Definition: IntVec.h:27
Data * cDField()
Return pointer to underlying C array.
Definition: DField.h:119
int capacity() const
Return allocated size.
Definition: DField.h:112
FFTBatched()
Default constructor.
Definition: FFTBatched.tpp:46
void setup(RDField< D > &rDField, RDFieldDft< D > &kDField)
Check and setup grid dimensions if necessary.
Definition: FFTBatched.tpp:73
void inverseTransform(RDFieldDft< D > &in, RDField< D > &out)
Compute inverse (complex-to-real) Fourier transform.
Definition: FFTBatched.tpp:326
virtual ~FFTBatched()
Destructor.
Definition: FFTBatched.tpp:59
void forwardTransform(RDField< D > &in, RDFieldDft< D > &out)
Compute forward (real-to-complex) Fourier transform.
Definition: FFTBatched.tpp:248
Discrete Fourier Transform (DFT) of a real field on an FFT mesh.
Definition: RDFieldDft.h:35
const IntVec< D > & meshDimensions() const
Return vector of mesh dimensions by constant reference.
Definition: RDFieldDft.h:142
Field of real single precision values on an FFT mesh on a device.
Definition: RDField.h:34
const IntVec< D > & meshDimensions() const
Return mesh dimensions by constant reference.
Definition: RDField.h:123
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
Definition: global.h:68
int nThreads()
Get the number of threads per block for execution.
Definition: ThreadGrid.cu:173
void setThreadsLogical(int nThreadsLogical)
Set the total number of threads required for execution.
Definition: ThreadGrid.cu:80
C++ namespace for polymer self-consistent field theory (PSCF).
Utility classes for scientific computation.
Definition: accumulators.mod:1