PSCF v1.1
WaveList.tpp
1#ifndef PSPG_WAVE_LIST_TPP
2#define PSPG_WAVE_LIST_TPP
3
4/*
5* PSCF - Polymer Self-Consistent Field Theory
6*
7* Copyright 2016 - 2022, The Regents of the University of Minnesota
8* Distributed under the terms of the GNU General Public License.
9*/
10
11#include "WaveList.h"
12#include "cuComplex.h"
13#include <pspg/math/GpuResources.h>
14
15namespace Pscf {
16namespace Pspg
17{
18
19 // Need a reference table that maps index to a pair wavevector
20 // Ideally we can have a group of thread dealing with only
21 // the non-implicit part and the implicit part
22 static __global__
23 void makeDksqHelperWave(cudaReal* dksq, const int* waveBz,
24 const cudaReal* dkkBasis,
25 const int* partnerId,
26 const int* selfId,
27 const bool* implicit,
28 int nParams, int kSize,
29 int size, int dim)
30 {
31 // Actual size is nStar*nParams
32 // Each thread does nParams calculation
33 // Big performance hit if thread >= 0.5dimension(n-1)
34 int nThreads = blockDim.x * gridDim.x;
35 int startID = blockIdx.x * blockDim.x + threadIdx.x;
36
37 //loop through the entire array
38 int pId;
39 for (int param = 0; param < nParams; ++param) {
40 for (int i = startID; i < size; i += nThreads) {
41 for (int j = 0; j < dim; ++j) {
42 for (int k = 0; k < dim; ++k) {
43 if (!implicit[i]) {
44 // Not = need += so still need memset
45 dksq[(param * size) + i]
46 += waveBz[selfId[i] * dim + j]
47 * waveBz[ selfId[i] * dim + k]
48 * dkkBasis[k + (j * dim) + (param * dim * dim)];
49 } else {
50 pId = partnerId[i];
51 dksq[(param * size) + i] += waveBz[selfId[pId] * dim + j]
52 * waveBz[selfId[pId] * dim + k]
53 * dkkBasis[k + (j * dim) + (param * dim * dim)];
54 }
55 } //dim
56 } //dim
57 } //size
58 } //nParams
59 }
60
61 static __global__
62 void makeDksqReduction(cudaReal* dksq, const int* partnerId,
63 int nParams, int kSize, int rSize)
64 {
65 int nThreads = blockDim.x * gridDim.x;
66 int startID = blockIdx.x * blockDim.x + threadIdx.x;
67
68 // Add i in the implicit part into their partner's result
69 int pId;
70 for(int param = 0; param < nParams; ++param) {
71 for (int i = startID + kSize; i < rSize; i += nThreads) {
72 pId = partnerId[i];
73 dksq[(param * rSize) + pId] += dksq[(param * rSize) + i];
74 }
75 }
76 }
77
78 template <int D>
80 : minImage_d(nullptr),
81 dkSq_(nullptr),
82 partnerIdTable(nullptr),
83 partnerIdTable_d(nullptr),
84 kSize_(0),
85 rSize_(0),
86 nParams_(0),
87 isAllocated_(false),
88 hasMinimumImages_(false)
89 {
90 #if 0
91 minImage_d = nullptr;
92 dkSq_ = nullptr;
93 partnerIdTable = nullptr;
94 partnerIdTable_d = nullptr;
95 kSize_ = 0;
96 rSize_ = 0;
97 nParams_ = 0;
98 isAllocated_ = false;
99 #endif
100 }
101
102 template <int D>
104 if (isAllocated_) {
105 cudaFree(minImage_d);
106 cudaFree(dkSq_);
107 cudaFree(partnerIdTable_d);
108 cudaFree(selfIdTable_d);
109 cudaFree(implicit_d);
110 cudaFree(dkkBasis_d);
111 delete[] kSq_;
112 delete implicit;
113 }
114 }
115
116 template <int D>
117 void WaveList<D>::allocate(Mesh<D> const & mesh,
118 UnitCell<D> const & unitCell)
119 {
120 UTIL_CHECK(mesh.size() > 0);
121 UTIL_CHECK(unitCell.nParameter() > 0);
122 UTIL_CHECK(!isAllocated_);
123
124 rSize_ = mesh.size();
125 dimensions_ = mesh.dimensions();
126 nParams_ = unitCell.nParameter();
127
128 // Compute DFT mesh size kSize_
129 kSize_ = 1;
130 for(int i = 0; i < D; ++i) {
131 if (i < D - 1) {
132 kSize_ *= mesh.dimension(i);
133 } else {
134 kSize_ *= (mesh.dimension(i)/ 2 + 1);
135 }
136 }
137
138 minImage_.allocate(kSize_);
139 gpuErrchk(
140 cudaMalloc((void**) &minImage_d, sizeof(int) * rSize_ * D)
141 );
142
143 kSq_ = new cudaReal[rSize_];
144 gpuErrchk(
145 cudaMalloc((void**) &dkSq_, sizeof(cudaReal) * rSize_ * nParams_)
146 );
147
148 partnerIdTable = new int[mesh.size()];
149 gpuErrchk(
150 cudaMalloc((void**) &partnerIdTable_d, sizeof(int) * mesh.size())
151 );
152
153 selfIdTable = new int[mesh.size()];
154 gpuErrchk(
155 cudaMalloc((void**) &selfIdTable_d, sizeof(int) * mesh.size())
156 );
157
158 implicit = new bool[mesh.size()];
159 gpuErrchk(
160 cudaMalloc((void**) &implicit_d, sizeof(bool) * mesh.size())
161 );
162
163 dkkBasis = new cudaReal[6 * D * D];
164 gpuErrchk(
165 cudaMalloc((void**) &dkkBasis_d, sizeof(cudaReal) * 6 * D * D)
166 );
167
168 isAllocated_ = true;
169 }
170
171 template <int D>
173 UnitCell<D> const & unitCell) {
174 // Precondition
175 UTIL_CHECK (isAllocated_);
176 UTIL_CHECK (mesh.size() > 0);
177 UTIL_CHECK (unitCell.nParameter() > 0);
178 UTIL_CHECK (unitCell.lattice() != UnitCell<D>::Null);
179 UTIL_CHECK (unitCell.isInitialized());
180
181 MeshIterator<D> itr(mesh.dimensions());
182 IntVec<D> waveId;
183 IntVec<D> G2;
184 IntVec<D> tempIntVec;
185 int partnerId;
186
187 //min image needs mesh size of them
188 //partner only need kSize of them
189 //does setting iterator over kdim solves thing?
190 int kDimRank = 0;
191 int implicitRank = kSize_;
192 //kDimRank + implicitRank = rSize
193 int* invertedIdTable = new int[rSize_];
194
195 for (itr.begin(); !itr.atEnd(); ++itr) {
196 // If not implicit
197 if (itr.position(D - 1) < mesh.dimension(D-1)/2 + 1) {
198 implicit[kDimRank] = false;
199 selfIdTable[kDimRank] = itr.rank();
200 invertedIdTable[itr.rank()] = kDimRank;
201 kDimRank++;
202 } else {
203 implicit[implicitRank] = true;
204 selfIdTable[implicitRank] = itr.rank();
205 invertedIdTable[itr.rank()] = implicitRank;
206 implicitRank++;
207 }
208 }
209
210 int* tempMinImage = new int[rSize_ * D];
211 for (itr.begin(); !itr.atEnd(); ++itr) {
212 kSq_[itr.rank()] = unitCell.ksq(itr.position());
213
214 #if 0
215 //we get position but set mesh dim to be larger, should be okay
216 shiftToMinimum(itr.position(), mesh.dimensions(),
217 minImage_ + (itr.rank() * D));
218 #endif
219
220 // We get position but set mesh dim to be larger, should be okay
221 // not the most elegant code with repeated copying but reduces
222 // repeated code from pscf
223 waveId = itr.position();
224 tempIntVec = shiftToMinimum(waveId, mesh.dimensions(), unitCell);
225 for(int i = 0; i < D; i++) {
226 (tempMinImage + (itr.rank() * D))[i] = tempIntVec[i];
227 }
228
229 if(itr.position(D - 1) < mesh.dimension(D-1)/2 + 1) {
230 minImage_[invertedIdTable[itr.rank()]] = tempIntVec;
231 }
232
233 for(int j = 0; j < D; ++j) {
234 G2[j] = -waveId[j];
235 }
236 mesh.shift(G2);
237 partnerId = mesh.rank(G2);
238 partnerIdTable[invertedIdTable[itr.rank()]] = invertedIdTable[partnerId];
239 }
240
241 #if 0
242 /*
243 std::cout<< "Sum kDimRank implicitRank: "
244 << kDimRank + (implicitRank-kSize_)<<std::endl;
245 std::cout<< "This is kDimRank sanity check "<<kDimRank<<std::endl;
246 for (int i = 0; i < rSize_; i++) {
247 std::cout << i << ' '
248 << selfIdTable[i]<< ' '<<partnerIdTable[i]<<' '
249 << implicit[i] << std::endl;
250 }
251 */
252 #endif
253
254 gpuErrchk(
255 cudaMemcpy(minImage_d, tempMinImage,
256 sizeof(int) * rSize_ * D, cudaMemcpyHostToDevice)
257 );
258
259 // Partner is much smaller but we keep this for now
260 gpuErrchk(
261 cudaMemcpy(partnerIdTable_d, partnerIdTable,
262 sizeof(int) * mesh.size(), cudaMemcpyHostToDevice)
263 );
264
265 gpuErrchk(
266 cudaMemcpy(selfIdTable_d, selfIdTable,
267 sizeof(int) * mesh.size(), cudaMemcpyHostToDevice)
268 );
269
270 gpuErrchk(
271 cudaMemcpy(implicit_d, implicit,
272 sizeof(bool) * mesh.size(), cudaMemcpyHostToDevice)
273 );
274
275 delete[] tempMinImage;
276 hasMinimumImages_ = true;
277 }
278
279 template <int D>
280 void WaveList<D>::computeKSq(UnitCell<D> const & unitCell) {
281 //pass for now
282 }
283
284 template <int D>
286 {
287 // dkkbasis is something determined from unit cell size
288 // min image needs to be on device but okay since its only done once.
289 // Second to last parameter is number of stars originally
290
291 // Precondition
292 UTIL_CHECK (hasMinimumImages_);
293 UTIL_CHECK (unitCell.nParameter() > 0);
294 UTIL_CHECK (unitCell.lattice() != UnitCell<D>::Null);
295 UTIL_CHECK (unitCell.isInitialized());
296
297 // GPU resources
298 int nBlocks, nThreads;
299 ThreadGrid::setThreadsLogical(rSize_, nBlocks, nThreads);
300
301 int idx;
302 for(int i = 0 ; i < unitCell.nParameter(); ++i) {
303 for(int j = 0; j < D; ++j) {
304 for(int k = 0; k < D; ++k) {
305 idx = k + (j * D) + (i * D * D);
306 dkkBasis[idx] = unitCell.dkkBasis(i, j, k);
307 }
308 }
309 }
310
311 cudaMemcpy(dkkBasis_d, dkkBasis,
312 sizeof(cudaReal) * unitCell.nParameter() * D * D,
313 cudaMemcpyHostToDevice);
314
315 cudaMemset(dkSq_, 0, unitCell.nParameter() * rSize_ * sizeof(cudaReal));
316 makeDksqHelperWave<<<nBlocks, nThreads>>>
317 (dkSq_, minImage_d, dkkBasis_d, partnerIdTable_d,
318 selfIdTable_d, implicit_d, unitCell.nParameter(),
319 kSize_, rSize_, D);
320
321 makeDksqReduction<<<nBlocks, nThreads>>>
322 (dkSq_, partnerIdTable_d, unitCell.nParameter(),
323 kSize_, rSize_);
324 }
325
326
327}
328}
329#endif
An IntVec<D, T> is a D-component vector of elements of integer type T.
Definition: IntVec.h:27
Iterator over points in a Mesh<D>.
Definition: MeshIterator.h:29
int rank() const
Get the rank of current element.
Definition: MeshIterator.h:136
void begin()
Set iterator to the first point in the mesh.
bool atEnd() const
Is this the end (i.e., one past the last point)?
Definition: MeshIterator.h:141
IntVec< D > position() const
Get current position in the grid, as integer vector.
Definition: MeshIterator.h:122
Description of a regular grid of points in a periodic domain.
Definition: Mesh.h:61
IntVec< D > dimensions() const
Get an IntVec<D> of the grid dimensions.
Definition: Mesh.h:217
int size() const
Get total number of grid points.
Definition: Mesh.h:229
int rank(IntVec< D > const &position) const
Get the rank of a grid point with specified position.
Definition: Mesh.tpp:72
int shift(int &coordinate, int i) const
Shift a periodic coordinate into range.
Definition: Mesh.tpp:131
int dimension(int i) const
Get grid dimension along Cartesian direction i.
Definition: Mesh.h:221
WaveList()
Constructor.
Definition: WaveList.tpp:79
void computeMinimumImages(Mesh< D > const &mesh, UnitCell< D > const &unitCell)
Compute minimum images of wavevectors.
Definition: WaveList.tpp:172
void computeKSq(UnitCell< D > const &unitCell)
Compute square norm |k|^2 for all wavevectors.
Definition: WaveList.tpp:280
~WaveList()
Destructor.
Definition: WaveList.tpp:103
void computedKSq(UnitCell< D > const &unitCell)
Compute derivatives of |k|^2 w/ respect to unit cell parameters.
Definition: WaveList.tpp:285
void allocate(Mesh< D > const &mesh, UnitCell< D > const &unitCell)
Allocate memory for all arrays.
Definition: WaveList.tpp:117
int nParameter() const
Get the number of parameters in the unit cell.
Definition: UnitCellBase.h:243
virtual double ksq(IntVec< D > const &k) const
Compute square magnitude of reciprocal lattice vector.
Definition: UnitCellBase.h:352
bool isInitialized() const
Has this unit cell been initialized?
Definition: UnitCellBase.h:141
double dkkBasis(int k, int i, int j) const
Get derivative of dot product bi.bj with respect to parameter k.
Definition: UnitCellBase.h:305
Base template for UnitCell<D> classes, D=1, 2 or 3.
Definition: UnitCell.h:44
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
Definition: global.h:68
int nThreads()
Get the number of threads per block for execution.
Definition: ThreadGrid.cu:173
void setThreadsLogical(int nThreadsLogical)
Set the total number of threads required for execution.
Definition: ThreadGrid.cu:80
C++ namespace for polymer self-consistent field theory (PSCF).