1#ifndef PSCF_THREAD_ARRAY_CU
2#define PSCF_THREAD_ARRAY_CU
4#include "ThreadArray.h"
5#include <cuda_runtime.h>
14 int MAX_THREADS_PER_BLOCK = -1;
18 int THREADS_PER_BLOCK = -1;
25 int THREADS_LOGICAL = -1;
36namespace ThreadArray {
44 cudaGetDeviceCount(&count);
48 }
else if (count > 1) {
49 Log::file() <<
"\nWarning: multiple GPUs detected.\n"
50 <<
"This program is not compatible with multiple devices.\n"
51 <<
"Only the first device will be used." << std::endl;
62 cudaGetDeviceProperties(&dprop, 0);
63 int maxThPerSM = dprop.maxThreadsPerMultiProcessor;
69 int threadsPerBlock = (maxThPerSM & (~(maxThPerSM - 1)));
72 while (threadsPerBlock > dprop.maxThreadsPerBlock) {
81 MAX_THREADS_PER_BLOCK = nThreadsPerBlock;
93 if (MAX_THREADS_PER_BLOCK == -1)
107 THREADS_PER_BLOCK = MAX_THREADS_PER_BLOCK;
111 UNUSED_THREADS = (BLOCKS*THREADS_PER_BLOCK > THREADS_LOGICAL);
134 cudaDeviceProp dprop;
135 cudaGetDeviceProperties(&dprop, 0);
136 WARP_SIZE = dprop.warpSize;
137 int maxThreadsPerMultiProcessor = dprop.maxThreadsPerMultiProcessor;
141 if ((MAX_THREADS_PER_BLOCK & (MAX_THREADS_PER_BLOCK - 1)) != 0) {
142 UTIL_THROW(
"Threads per block must be a power of two.");
147 if (MAX_THREADS_PER_BLOCK % WARP_SIZE != 0)
151 "Threads per block must be a multiple of warp size %d.\n",
162 if (maxThreadsPerMultiProcessor % MAX_THREADS_PER_BLOCK != 0)
165 <<
"WARNING: The number of threads per block ("
166 << MAX_THREADS_PER_BLOCK
167 <<
") is not an even divisor of the maximum number"
168 <<
" of threads per streaming multiprocessor ("
169 << maxThreadsPerMultiProcessor
170 <<
"). Performance will be suboptimal."
182 {
return THREADS_PER_BLOCK; }
185 {
return THREADS_LOGICAL; }
188 {
return WARP_SIZE; }
191 {
return UNUSED_THREADS; }
static std::ostream & file()
Get log ostream by reference.
#define UTIL_THROW(msg)
Macro for throwing an Exception, reporting function, file and line number.
#define UTIL_ASSERT(condition)
Assertion macro suitable for debugging serial or parallel code.
void checkExecutionConfig()
Check the execution configuration (threads and block counts).
int warpSize()
Get the warp size.
void init()
Initialize static variables in Pscf::ThreadArray namespace.
void setThreadsLogical(int nThreadsLogical)
Given total number of threads, set 1D execution configuration.
int nThreads()
Get the number of threads per block for execution.
int nThreadsLogical()
Return previously requested total number of threads.
bool hasUnusedThreads()
Indicates whether there will be unused threads.
void setThreadsPerBlock()
Set the number of threads per block to a default value.
int nBlocks()
Get the current number of blocks for execution.
PSCF package top-level namespace.
Utility classes for scientific computation.