1#ifndef PSPG_THREADGRID_CU
2#define PSPG_THREADGRID_CU
13 int MAX_THREADS_PER_BLOCK = -1;
16 int THREADS_PER_BLOCK = -1;
22 int THREADS_LOGICAL = -1;
39 cudaGetDeviceCount(&count);
52 cudaGetDeviceProperties(&dprop, 0);
53 int maxThPerSM = dprop.maxThreadsPerMultiProcessor;
59 int threadsPerBlock = (maxThPerSM & (~(maxThPerSM - 1)));
63 if (threadsPerBlock > dprop.maxThreadsPerBlock)
74 MAX_THREADS_PER_BLOCK = nThreadsPerBlock;
86 if (MAX_THREADS_PER_BLOCK == -1)
100 THREADS_PER_BLOCK = MAX_THREADS_PER_BLOCK;
104 UNUSED_THREADS = (BLOCKS*THREADS_PER_BLOCK > THREADS_LOGICAL);
127 cudaDeviceProp dprop;
128 cudaGetDeviceProperties(&dprop, 0);
129 int warpSize = dprop.warpSize;
130 int maxThreadsPerMultiProcessor = dprop.maxThreadsPerMultiProcessor;
134 if ((MAX_THREADS_PER_BLOCK & (MAX_THREADS_PER_BLOCK - 1)) != 0) {
135 UTIL_THROW(
"Set number of threads per block must be a power of two.");
140 if (MAX_THREADS_PER_BLOCK%warpSize != 0)
144 "Number of threads per block must be a multiple of warp size %d.\n",
155 if (maxThreadsPerMultiProcessor % MAX_THREADS_PER_BLOCK%warpSize != 0) {
157 <<
"WARNING: The number of threads per block ("
158 << MAX_THREADS_PER_BLOCK
159 <<
") is not an even divisor of the maximum number"
160 <<
" of threads per streaming multiprocessor ("
161 << maxThreadsPerMultiProcessor
162 <<
"). Performance will be suboptimal."
174 {
return THREADS_PER_BLOCK; }
177 {
return THREADS_LOGICAL; }
180 {
return UNUSED_THREADS; }
#define UTIL_THROW(msg)
Macro for throwing an Exception, reporting function, file and line number.
#define UTIL_ASSERT(condition)
Assertion macro suitable for debugging serial or parallel code.
bool hasUnusedThreads()
Indicates whether there will be unused threads.
int nThreadsLogical()
Return previously requested total number of threads.
int nThreads()
Get the number of threads per block for execution.
int nBlocks()
Get the current number of blocks for execution.
void setThreadsPerBlock()
Set the number of threads per block to a default value.
void checkExecutionConfig()
Check the execution configuration (threads and block counts).
void setThreadsLogical(int nThreadsLogical)
Set the total number of threads required for execution.
void init()
Initialize static variables in Pspg::ThreadGrid namespace.
C++ namespace for polymer self-consistent field theory (PSCF).
Utility classes for scientific computation.