1#ifndef PSPG_WAVE_LIST_TPP
2#define PSPG_WAVE_LIST_TPP
13#include <pspg/math/GpuResources.h>
23 void makeDksqHelperWave(cudaReal* dksq,
const int* waveBz,
24 const cudaReal* dkkBasis,
28 int nParams,
int kSize,
34 int nThreads = blockDim.x * gridDim.x;
35 int startID = blockIdx.x * blockDim.x + threadIdx.x;
39 for (
int param = 0; param < nParams; ++param) {
40 for (
int i = startID; i < size; i +=
nThreads) {
41 for (
int j = 0; j < dim; ++j) {
42 for (
int k = 0; k < dim; ++k) {
45 dksq[(param * size) + i]
46 += waveBz[selfId[i] * dim + j]
47 * waveBz[ selfId[i] * dim + k]
48 * dkkBasis[k + (j * dim) + (param * dim * dim)];
51 dksq[(param * size) + i] += waveBz[selfId[pId] * dim + j]
52 * waveBz[selfId[pId] * dim + k]
53 * dkkBasis[k + (j * dim) + (param * dim * dim)];
62 void makeDksqReduction(cudaReal* dksq,
const int* partnerId,
63 int nParams,
int kSize,
int rSize)
65 int nThreads = blockDim.x * gridDim.x;
66 int startID = blockIdx.x * blockDim.x + threadIdx.x;
70 for(
int param = 0; param < nParams; ++param) {
71 for (
int i = startID + kSize; i < rSize; i +=
nThreads) {
73 dksq[(param * rSize) + pId] += dksq[(param * rSize) + i];
80 : minImage_d(nullptr),
82 partnerIdTable(nullptr),
83 partnerIdTable_d(nullptr),
88 hasMinimumImages_(false)
93 partnerIdTable =
nullptr;
94 partnerIdTable_d =
nullptr;
105 cudaFree(minImage_d);
107 cudaFree(partnerIdTable_d);
108 cudaFree(selfIdTable_d);
109 cudaFree(implicit_d);
110 cudaFree(dkkBasis_d);
124 rSize_ = mesh.
size();
130 for(
int i = 0; i < D; ++i) {
138 minImage_.allocate(kSize_);
140 cudaMalloc((
void**) &minImage_d,
sizeof(
int) * rSize_ * D)
143 kSq_ =
new cudaReal[rSize_];
145 cudaMalloc((
void**) &dkSq_,
sizeof(cudaReal) * rSize_ * nParams_)
148 partnerIdTable =
new int[mesh.
size()];
150 cudaMalloc((
void**) &partnerIdTable_d,
sizeof(
int) * mesh.
size())
153 selfIdTable =
new int[mesh.
size()];
155 cudaMalloc((
void**) &selfIdTable_d,
sizeof(
int) * mesh.
size())
158 implicit =
new bool[mesh.
size()];
160 cudaMalloc((
void**) &implicit_d,
sizeof(
bool) * mesh.
size())
163 dkkBasis =
new cudaReal[6 * D * D];
165 cudaMalloc((
void**) &dkkBasis_d,
sizeof(cudaReal) * 6 * D * D)
191 int implicitRank = kSize_;
193 int* invertedIdTable =
new int[rSize_];
198 implicit[kDimRank] =
false;
199 selfIdTable[kDimRank] = itr.
rank();
200 invertedIdTable[itr.
rank()] = kDimRank;
203 implicit[implicitRank] =
true;
204 selfIdTable[implicitRank] = itr.
rank();
205 invertedIdTable[itr.
rank()] = implicitRank;
210 int* tempMinImage =
new int[rSize_ * D];
217 minImage_ + (itr.
rank() * D));
224 tempIntVec = shiftToMinimum(waveId, mesh.
dimensions(), unitCell);
225 for(
int i = 0; i < D; i++) {
226 (tempMinImage + (itr.
rank() * D))[i] = tempIntVec[i];
230 minImage_[invertedIdTable[itr.
rank()]] = tempIntVec;
233 for(
int j = 0; j < D; ++j) {
237 partnerId = mesh.
rank(G2);
238 partnerIdTable[invertedIdTable[itr.
rank()]] = invertedIdTable[partnerId];
255 cudaMemcpy(minImage_d, tempMinImage,
256 sizeof(
int) * rSize_ * D, cudaMemcpyHostToDevice)
261 cudaMemcpy(partnerIdTable_d, partnerIdTable,
262 sizeof(
int) * mesh.
size(), cudaMemcpyHostToDevice)
266 cudaMemcpy(selfIdTable_d, selfIdTable,
267 sizeof(
int) * mesh.
size(), cudaMemcpyHostToDevice)
271 cudaMemcpy(implicit_d, implicit,
272 sizeof(
bool) * mesh.
size(), cudaMemcpyHostToDevice)
275 delete[] tempMinImage;
276 hasMinimumImages_ =
true;
298 int nBlocks, nThreads;
302 for(
int i = 0 ; i < unitCell.
nParameter(); ++i) {
303 for(
int j = 0; j < D; ++j) {
304 for(
int k = 0; k < D; ++k) {
305 idx = k + (j * D) + (i * D * D);
306 dkkBasis[idx] = unitCell.
dkkBasis(i, j, k);
311 cudaMemcpy(dkkBasis_d, dkkBasis,
312 sizeof(cudaReal) * unitCell.
nParameter() * D * D,
313 cudaMemcpyHostToDevice);
315 cudaMemset(dkSq_, 0, unitCell.
nParameter() * rSize_ *
sizeof(cudaReal));
316 makeDksqHelperWave<<<nBlocks, nThreads>>>
317 (dkSq_, minImage_d, dkkBasis_d, partnerIdTable_d,
318 selfIdTable_d, implicit_d, unitCell.
nParameter(),
321 makeDksqReduction<<<nBlocks, nThreads>>>
322 (dkSq_, partnerIdTable_d, unitCell.
nParameter(),
An IntVec<D, T> is a D-component vector of elements of integer type T.
Iterator over points in a Mesh<D>.
int rank() const
Get the rank of current element.
void begin()
Set iterator to the first point in the mesh.
bool atEnd() const
Is this the end (i.e., one past the last point)?
IntVec< D > position() const
Get current position in the grid, as integer vector.
Description of a regular grid of points in a periodic domain.
IntVec< D > dimensions() const
Get an IntVec<D> of the grid dimensions.
int size() const
Get total number of grid points.
int rank(IntVec< D > const &position) const
Get the rank of a grid point with specified position.
int shift(int &coordinate, int i) const
Shift a periodic coordinate into range.
int dimension(int i) const
Get grid dimension along Cartesian direction i.
void computeMinimumImages(Mesh< D > const &mesh, UnitCell< D > const &unitCell)
Compute minimum images of wavevectors.
void computeKSq(UnitCell< D > const &unitCell)
Compute square norm |k|^2 for all wavevectors.
void computedKSq(UnitCell< D > const &unitCell)
Compute derivatives of |k|^2 w/ respect to unit cell parameters.
void allocate(Mesh< D > const &mesh, UnitCell< D > const &unitCell)
Allocate memory for all arrays.
int nParameter() const
Get the number of parameters in the unit cell.
virtual double ksq(IntVec< D > const &k) const
Compute square magnitude of reciprocal lattice vector.
bool isInitialized() const
Has this unit cell been initialized?
double dkkBasis(int k, int i, int j) const
Get derivative of dot product bi.bj with respect to parameter k.
Base template for UnitCell<D> classes, D=1, 2 or 3.
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
int nThreads()
Get the number of threads per block for execution.
void setThreadsLogical(int nThreadsLogical)
Set the total number of threads required for execution.
C++ namespace for polymer self-consistent field theory (PSCF).