1#ifndef PSPG_FFT_BATCHED_TPP
2#define PSPG_FFT_BATCHED_TPP
11#include "FFTBatched.h"
12#include <pspg/math/GpuResources.h>
18static __global__
void scaleComplexData(cudaComplex* data, cudaReal scale,
int size) {
21 int nThreads = blockDim.x * gridDim.x;
22 int startId = blockIdx.x * blockDim.x + threadIdx.x;
23 for(
int i = startId; i < size; i +=
nThreads ) {
30static __global__
void scaleRealData(cudaReal* data, cudaReal scale,
int size) {
32 int nThreads = blockDim.x * gridDim.x;
33 int startId = blockIdx.x * blockDim.x + threadIdx.x;
34 for(
int i = startId; i < size; i +=
nThreads ) {
84 for (
int i = 0; i < D; ++i) {
86 meshDimensions_[i] = rDimensions[i];
87 rSize_ *= rDimensions[i];
89 kSize_ *= rDimensions[i];
91 kSize_ *= (rDimensions[i]/2 + 1);
101 makePlans(rField, kField);
115 for (
int i = 0; i < D; ++i) {
117 meshDimensions_[i] = rDim[i];
122 kSize_ *= (rDim[i]/2 + 1);
132 makePlans(rDim, kDim, batchSize);
141 void FFTBatched<D>::makePlans(RDField<D>& rField, RDFieldDft<D>& kField)
146 for(
int i = 0; i < D; i++) {
147 n[i] = rField.meshDimensions()[i];
157 #ifdef SINGLE_PRECISION
158 if(cufftPlanMany(&fPlan_, D, n,
161 CUFFT_R2C, 2) != CUFFT_SUCCESS) {
162 std::cout<<
"plan creation failed "<<std::endl;
165 if(cufftPlanMany(&iPlan_, D, n,
168 CUFFT_C2R, 2) != CUFFT_SUCCESS) {
169 std::cout<<
"plan creation failed "<<std::endl;
173 if(cufftPlanMany(&fPlan_, D, n,
176 CUFFT_D2Z, 2) != CUFFT_SUCCESS) {
177 std::cout<<
"plan creation failed "<<std::endl;
180 if(cufftPlanMany(&iPlan_, D, n,
183 CUFFT_Z2D, 2) != CUFFT_SUCCESS) {
184 std::cout<<
"plan creation failed "<<std::endl;
195 void FFTBatched<D>::makePlans(
const IntVec<D>& rDim,
const IntVec<D>& kDim,
int batchSize)
200 for(
int i = 0; i < D; i++) {
211 #ifdef SINGLE_PRECISION
212 if(cufftPlanMany(&fPlan_, D, n,
215 CUFFT_R2C, batchSize) != CUFFT_SUCCESS) {
216 std::cout<<
"plan creation failed "<<std::endl;
219 if(cufftPlanMany(&iPlan_, D, n,
222 CUFFT_C2R, batchSize) != CUFFT_SUCCESS) {
223 std::cout<<
"plan creation failed "<<std::endl;
227 if(cufftPlanMany(&fPlan_, D, n,
230 CUFFT_D2Z, batchSize) != CUFFT_SUCCESS) {
231 std::cout<<
"plan creation failed "<<std::endl;
234 if(cufftPlanMany(&iPlan_, D, n,
237 CUFFT_Z2D, batchSize) != CUFFT_SUCCESS) {
238 std::cout<<
"plan creation failed "<<std::endl;
251 int nBlocks, nThreads;
261 setup(rField, kField);
265 cudaReal scale = 1.0/cudaReal(rSize_);
267 scaleRealData<<<nBlocks, nThreads>>>(rField.
cDField(), scale, rSize_ * 2);
270 #ifdef SINGLE_PRECISION
271 if(cufftExecR2C(fPlan_, rField.
cDField(), kField.
cDField()) != CUFFT_SUCCESS) {
272 std::cout<<
"CUFFT error: forward"<<std::endl;
276 if(cufftExecD2Z(fPlan_, rField.
cDField(), kField.
cDField()) != CUFFT_SUCCESS) {
277 std::cout<<
"CUFFT error: forward"<<std::endl;
289 (cudaReal* rField, cudaComplex* kField,
int batchSize)
292 int nBlocks, nThreads;
298 std::cerr<<
"Need to setup before running transform"<<std::endl;
303 cudaReal scale = 1.0/cudaReal(rSize_);
305 scaleRealData<<<nBlocks, nThreads>>>(rField, scale, rSize_ * batchSize);
308 #ifdef SINGLE_PRECISION
309 if(cufftExecR2C(fPlan_, rField, kField) != CUFFT_SUCCESS) {
310 std::cout<<
"CUFFT error: forward"<<std::endl;
314 if(cufftExecD2Z(fPlan_, rField, kField) != CUFFT_SUCCESS) {
315 std::cout<<
"CUFFT error: forward"<<std::endl;
330 setup(rField, kField);
333 #ifdef SINGLE_PRECISION
334 if(cufftExecC2R(iPlan_, kField.
cDField(), rField.
cDField()) != CUFFT_SUCCESS) {
335 std::cout<<
"CUFFT error: inverse"<<std::endl;
339 if(cufftExecZ2D(iPlan_, kField.
cDField(), rField.
cDField()) != CUFFT_SUCCESS) {
340 std::cout<<
"CUFFT error: inverse"<<std::endl;
353 (cudaComplex* kField, cudaReal* rField,
int batchSize)
356 std::cerr<<
"Need to setup before running FFTbatchec"<<std::endl;
359 #ifdef SINGLE_PRECISION
360 if(cufftExecC2R(iPlan_, kField, rField) != CUFFT_SUCCESS) {
361 std::cout<<
"CUFFT error: inverse"<<std::endl;
365 if(cufftExecZ2D(iPlan_, kField, rField) != CUFFT_SUCCESS) {
366 std::cout<<
"CUFFT error: inverse"<<std::endl;
An IntVec<D, T> is a D-component vector of elements of integer type T.
Data * cDField()
Return pointer to underlying C array.
int capacity() const
Return allocated size.
FFTBatched()
Default constructor.
void setup(RDField< D > &rDField, RDFieldDft< D > &kDField)
Check and setup grid dimensions if necessary.
void inverseTransform(RDFieldDft< D > &in, RDField< D > &out)
Compute inverse (complex-to-real) Fourier transform.
virtual ~FFTBatched()
Destructor.
void forwardTransform(RDField< D > &in, RDFieldDft< D > &out)
Compute forward (real-to-complex) Fourier transform.
Discrete Fourier Transform (DFT) of a real field on an FFT mesh.
const IntVec< D > & meshDimensions() const
Return vector of mesh dimensions by constant reference.
Field of real single precision values on an FFT mesh on a device.
const IntVec< D > & meshDimensions() const
Return mesh dimensions by constant reference.
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
int nThreads()
Get the number of threads per block for execution.
void setThreadsLogical(int nThreadsLogical)
Set the total number of threads required for execution.
C++ namespace for polymer self-consistent field theory (PSCF).
Utility classes for scientific computation.