1#ifndef PRDC_CUDA_FFT_BATCHED_TPP
2#define PRDC_CUDA_FFT_BATCHED_TPP
11#include "FFTBatched.h"
102 for (
int i = 0; i < D; ++i) {
104 meshDimensions_[i] = meshDimensions[i];
106 kMeshDimensions_[i] = meshDimensions[i];
108 kMeshDimensions_[i] = (meshDimensions[i]/2 + 1);
110 rSize_ *= meshDimensions_[i];
111 kSize_ *= kMeshDimensions_[i];
115 makePlans(batchSize);
128 if (batchSize == batchSize_) {
133 makePlans(batchSize);
143 batchSize_ = batchSize;
149 for(
int i = 0; i < D; i++) {
151 n[i] = meshDimensions_[i];
154 #ifdef SINGLE_PRECISION
155 if (cufftPlanMany(&fPlan_, D, n,
158 CUFFT_R2C, batchSize_) != CUFFT_SUCCESS) {
159 std::cout<<
"FFTBatched: plan creation failed "<<std::endl;
162 if (cufftPlanMany(&iPlan_, D, n,
165 CUFFT_C2R, batchSize_) != CUFFT_SUCCESS) {
166 std::cout<<
"FFTBatched: plan creation failed "<<std::endl;
170 if (cufftPlanMany(&fPlan_, D, n,
173 CUFFT_D2Z, batchSize_) != CUFFT_SUCCESS) {
174 std::cout<<
"FFTBatched: plan creation failed "<<std::endl;
177 if (cufftPlanMany(&iPlan_, D, n,
180 CUFFT_Z2D, batchSize_) != CUFFT_SUCCESS) {
181 std::cout<<
"FFTBatched: plan creation failed "<<std::endl;
203 #ifdef SINGLE_PRECISION
204 result = cufftExecR2C(fPlan_,
const_cast<cudaReal*
>(rFields.
cArray()),
207 result = cufftExecD2Z(fPlan_,
const_cast<cudaReal*
>(rFields.
cArray()),
211 if (result != CUFFT_SUCCESS) {
212 UTIL_THROW(
"Failure in cufft real-to-complex forward transform.");
216 cudaReal scale = 1.0/cudaReal(rSize_);
236 #ifdef SINGLE_PRECISION
237 result = cufftExecC2R(iPlan_, kFields.
cArray(), rFields.
cArray());
239 result = cufftExecZ2D(iPlan_, kFields.
cArray(), rFields.
cArray());
242 if (result != CUFFT_SUCCESS) {
243 UTIL_THROW(
"Failure in cufft complex-to-real inverse transform.");
Dynamic array on the GPU device with aligned data.
int capacity() const
Return allocated capacity.
Data * cArray()
Return pointer to underlying C array.
An IntVec<D, T> is a D-component vector of elements of integer type T.
Batched Fourier transform wrapper for real data.
virtual ~FFTBatched()
Destructor.
void forwardTransform(DeviceArray< cudaReal > const &rFields, DeviceArray< cudaComplex > &kFields) const
Compute batched forward (real-to-complex) DFTs.
void inverseTransformUnsafe(DeviceArray< cudaComplex > &kFields, DeviceArray< cudaReal > &rFields) const
Compute inverse (complex-to-real) Fourier transform.
void setup(IntVec< D > const &meshDimensions, int batchSize)
Set up FFT calculation (get grid dimensions and make FFT plan)
FFTBatched()
Default constructor.
void resetBatchSize(int batchSize)
Set the batch size to a new value.
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
#define UTIL_THROW(msg)
Macro for throwing an Exception, reporting function, file and line number.
void mulEqS(DeviceArray< cudaReal > &a, cudaReal const b, const int beginIdA, const int n)
Vector multiplication in-place, a[i] *= b, kernel wrapper (cudaReal).
PSCF package top-level namespace.
Utility classes for scientific computation.