PSCF v1.2
WaveList.tpp
1#ifndef RPG_WAVE_LIST_TPP
2#define RPG_WAVE_LIST_TPP
3
4/*
5* PSCF - Polymer Self-Consistent Field Theory
6*
7* Copyright 2016 - 2022, The Regents of the University of Minnesota
8* Distributed under the terms of the GNU General Public License.
9*/
10
11#include "WaveList.h"
12#include <prdc/cuda/resources.h>
13#include <pscf/mesh/MeshIterator.h>
14
15namespace Pscf {
16namespace Rpg
17{
18
19 // CUDA kernels:
20 // (defined in anonymous namespace, used only in this file)
21
22 namespace {
23
24 /*
25 * Compute minimum images of each wavevector, template declaration.
26 */
27 template <int D>
28 __global__ void _computeMinimumImages(int* minImages, cudaReal* kSq,
29 cudaReal const * kBasis,
30 int const * meshDims,
31 int const kSize);
32
33 /*
34 * Compute minimum images of each wavevector, 1D explicit specialization.
35 *
36 * Kernel must be launched with kSize threads. (One thread calculates
37 * one minimum image.)
38 *
39 * When launched, this kernel requires no dynamic memory allocation.
40 */
41 template <>
42 __global__ void _computeMinimumImages<1>(int* minImages, cudaReal* kSq,
43 cudaReal const * kBasis,
44 int const * meshDims,
45 int const kSize)
46 {
47 unsigned int nThreads = blockDim.x * gridDim.x;
48 unsigned int startID = blockIdx.x * blockDim.x + threadIdx.x;
49
50 // Load kBasis and meshDims
51 cudaReal kBasis_ = kBasis[0];
52 int meshDims_ = meshDims[0];
53
54 for (int i = startID; i < kSize; i += nThreads) {
55 int img = minImages[i];
56
57 while (img > (meshDims_>>1)) { // note: x>>1 is same as x/2
58 img -= meshDims_;
59 }
60 while (img < -1*(meshDims_>>1)) {
61 img += meshDims_;
62 }
63
64 minImages[i] = img;
65 kSq[i] = img * img * kBasis_ * kBasis_;
66 }
67 }
68
69 /*
70 * Compute minimum images of each wavevector, 2D explicit specialization.
71 *
72 * This kernel should be launched with >=32*kSize threads.
73 *
74 * 32 threads are used to calculate one minimum image. In the CPU code,
75 * we compare 25 different images of a wave to determine the minimum
76 * image, and here we choose 32 instead because it is a power of 2,
77 * allowing us to use a reduction algorithm to compare the images.
78 *
79 * When launched, this kernel requires dynamic memory allocation of
80 * (2*sizeof(int) + sizeof(cudaReal)) * nThreadsPerBlock.
81 */
82 template <>
83 __global__ void _computeMinimumImages<2>(int* minImages, cudaReal* kSq,
84 cudaReal const * kBasis,
85 int const * meshDims,
86 int const kSize)
87 {
88 unsigned int startID = blockIdx.x * blockDim.x + threadIdx.x;
89
90 // Determine which lattice parameter and which image to evaluate
91 unsigned int paramID = (startID >> 5); // equivalent to startID / 32
92 unsigned int imageID = startID - (paramID * 32); // value in [0, 31]
93
94 // Determine the image that will be evaluated by this thread
95 // (uses integer division, not ideal for speed)
96 int s0, s1;
97 s0 = imageID / 5;
98 s1 = imageID - (s0 * 5);
99 s0 -= 2; // shift values to go from -2 to 4, not 0 to 6
100 s1 -= 2; // shift values to go from -2 to 2, not 0 to 4
101
102 // Set epsilon
103 #ifdef SINGLE_PRECISION
104 cudaReal epsilon = 1.0E-4;
105 #else
106 cudaReal epsilon = 1.0E-8;
107 #endif
108
109 // Declare dynamically allocated arrays in shared memory
110 // (allocated space is shared between cudaReal array and int array)
111 extern __shared__ cudaReal kSqVals_[];
112 int* images_ = (int*)&kSqVals_[blockDim.x];
113
114 if (paramID < kSize) { // only evaluate if on the k-grid
115
116 // Load image from global memory and shift based on s0 and s1
117 images_[2*threadIdx.x] = minImages[paramID] + (s0 * meshDims[0]);
118 images_[2*threadIdx.x+1] = minImages[kSize + paramID] +
119 (s1 * meshDims[1]);
120
121 // Calculate kSq for this wave
122 cudaReal kVec0 = (kBasis[0] * images_[2*threadIdx.x]) +
123 (kBasis[2] * images_[2*threadIdx.x+1]);
124 cudaReal kVec1 = (kBasis[1] * images_[2*threadIdx.x]) +
125 (kBasis[3] * images_[2*threadIdx.x+1]);
126
127 kSqVals_[threadIdx.x] = (kVec0*kVec0) + (kVec1*kVec1);
128 }
129 __syncthreads(); // wait for all threads to finish
130
131 // Perform a parallel reduction on 32 threads to find min image
132 // (note: stride >>= 1 is equivalent to stride /= 2)
133 for (int stride = 16; stride > 0; stride >>= 1) {
134 if (paramID < kSize) { // only evaluate if on the k-grid
135 if (imageID < stride) {
136 bool swap = false;
137 if (kSqVals_[threadIdx.x+stride] <
138 (kSqVals_[threadIdx.x] - epsilon)) {
139 swap = true;
140 } else if (kSqVals_[threadIdx.x+stride] <
141 (kSqVals_[threadIdx.x] + epsilon)) {
142 // kSq values effectively equal.
143 // Determine whether to swap based on hkl indices
144 for (int i = 0; i < 2; ++i) {
145 if (images_[2*threadIdx.x+i] >
146 images_[2*(threadIdx.x+stride)+i]) {
147 break;
148 } else
149 if (images_[2*threadIdx.x+i] <
150 images_[2*(threadIdx.x+stride)+i]) {
151 swap = true;
152 break;
153 }
154 }
155 }
156 if (swap) {
157 images_[2*threadIdx.x] =
158 images_[2*(threadIdx.x+stride)];
159 images_[2*threadIdx.x+1] =
160 images_[2*(threadIdx.x+stride)+1];
161 kSqVals_[threadIdx.x] = kSqVals_[threadIdx.x+stride];
162 }
163 }
164 }
165 // note: no __syncthreads() needed here because this reduction
166 // occurswithin 1 warp, so all threads are already synced)
167 }
168
169 // At this point, for any thread with imageID == 0, the corresponding
170 // entries in kSqVals_ and images_ should contain the kSq value and
171 // the minimum image, respectively. Store values and exit
172 if ((imageID == 0) && (paramID < kSize)) {
173 kSq[paramID] = kSqVals_[threadIdx.x];
174 minImages[paramID] = images_[2*threadIdx.x];
175 minImages[paramID + kSize] = images_[2*threadIdx.x+1];
176 }
177 }
178
179 /*
180 * Compute minimum images of each wavevector, 3D explicit specialization.
181 *
182 * This kernel should be launched with >=128*kSize threads.
183 *
184 * 128 threads are used to calculate one minimum image. In the CPU code,
185 * we compare 125 different images of a wave to determine the minimum
186 * image, and here we choose 128 instead because it is a power of 2,
187 * allowing us to use a reduction algorithm to compare the images.
188 *
189 * When launched, this kernel requires dynamic memory allocation of
190 * (3*sizeof(int) + sizeof(cudaReal)) * nThreadsPerBlock.
191 */
192 template <>
193 __global__ void _computeMinimumImages<3>(int* minImages, cudaReal* kSq,
194 cudaReal const * kBasis,
195 int const * meshDims,
196 int const kSize)
197 {
198 unsigned int startID = blockIdx.x * blockDim.x + threadIdx.x;
199
200 // Determine which lattice parameter and which image to evaluate
201 unsigned int paramID = (startID >> 7); // equivalent to startID / 128
202 unsigned int imageID = startID - (paramID * 128);
203
204 // Determine the image that will be evaluated by this thread
205 // (uses integer division, not ideal for speed)
206 int s0, s1, s2;
207 s0 = imageID / 25;
208 s1 = (imageID - (s0 * 25)) / 5;
209 s2 = imageID - (s0 * 25) - (s1 * 5);
210 s0 -= 2; // shift values to go from -2 to 2
211 s1 -= 2;
212 s2 -= 2;
213
214 // Set epsilon
215 #ifdef SINGLE_PRECISION
216 cudaReal epsilon = 1.0E-4;
217 #else
218 cudaReal epsilon = 1.0E-8;
219 #endif
220
221 // Declare dynamically allocated arrays in shared memory
222 // (allocated space is shared between cudaReal array and int array)
223 extern __shared__ cudaReal kSqVals_[];
224 int* images_ = (int*)&kSqVals_[blockDim.x];
225
226 if (paramID < kSize) { // only evaluate if on the k-grid
227
228 // Load image data from global memory
229 images_[3*threadIdx.x] = minImages[paramID] + (s0 * meshDims[0]);
230 images_[3*threadIdx.x+1] = minImages[kSize + paramID] +
231 (s1 * meshDims[1]);
232 images_[3*threadIdx.x+2] = minImages[kSize + kSize + paramID] +
233 (s2 * meshDims[2]);
234
235 // Calculate kSq for this wave
236 cudaReal kVec0(0.0), kVec1(0.0), kVec2(0.0);
237 for (int k = 0; k < 3; ++k) {
238 kVec0 += kBasis[3*k] * images_[3*threadIdx.x + k];
239 kVec1 += kBasis[3*k+1] * images_[3*threadIdx.x + k];
240 kVec2 += kBasis[3*k+2] * images_[3*threadIdx.x + k];
241 }
242
243 kSqVals_[threadIdx.x] = (kVec0*kVec0) + (kVec1*kVec1) +
244 (kVec2*kVec2);
245 }
246 __syncthreads(); // wait for all threads to finish
247
248 // Perform a parallel reduction on 128 threads to find min image
249 // (note: stride >>= 1 is equivalent to stride /= 2)
250 for (int stride = 64; stride > 0; stride >>= 1) {
251 if (paramID < kSize) { // only evaluate if on the k-grid
252 if (imageID < stride) {
253 bool swap = false;
254 if (kSqVals_[threadIdx.x+stride] <
255 (kSqVals_[threadIdx.x] - epsilon)) {
256 swap = true;
257 } else if (kSqVals_[threadIdx.x+stride] <
258 (kSqVals_[threadIdx.x] + epsilon)) {
259 // kSq values effectively equal.
260 // Determine whether to swap based on hkl indices
261 for (int i = 0; i < 3; ++i) {
262 if (images_[3*threadIdx.x+i] >
263 images_[3*(threadIdx.x+stride)+i]) {
264 break;
265 } else
266 if (images_[3*threadIdx.x+i] <
267 images_[3*(threadIdx.x+stride)+i]) {
268 swap = true;
269 break;
270 }
271 }
272 }
273 if (swap) {
274 images_[3*threadIdx.x] =
275 images_[3*(threadIdx.x+stride)];
276 images_[3*threadIdx.x+1] =
277 images_[3*(threadIdx.x+stride)+1];
278 images_[3*threadIdx.x+2] =
279 images_[3*(threadIdx.x+stride)+2];
280 kSqVals_[threadIdx.x] = kSqVals_[threadIdx.x+stride];
281 }
282 }
283 }
284 __syncthreads(); // wait for all threads to finish
285 }
286
287 // At this point, for any thread with imageID == 0, the corresponding
288 // entries in kSqVals_ and images_ should contain the kSq value and
289 // the minimum image, respectively. Store values and exit
290 if ((imageID == 0) && (paramID < kSize)) {
291 kSq[paramID] = kSqVals_[threadIdx.x];
292 minImages[paramID] = images_[3*threadIdx.x];
293 minImages[paramID + kSize] = images_[3*threadIdx.x+1];
294 minImages[paramID + kSize + kSize] = images_[3*threadIdx.x+2];
295 }
296 }
297
298 /*
299 * Compute the kSq array on the GPU.
300 */
301 template <int D>
302 __global__ void _computeKSq(cudaReal* kSq, int const * waveBz,
303 cudaReal const * kBasis,
304 int const nParams, int const kSize)
305 {
306 int nThreads = blockDim.x * gridDim.x;
307 int startID = blockIdx.x * blockDim.x + threadIdx.x;
308
309 // Load kBasis into shared memory for fast access
310 __shared__ cudaReal kBasis_[D*D];
311 if (threadIdx.x < D * D) {
312 kBasis_[threadIdx.x] = kBasis[threadIdx.x];
313 }
314 __syncthreads(); // wait for all threads to finish
315
316 // Variables to be used in the loop
317 int i, j, k, waveBz_;
318 cudaReal kVec[D], kSqVal;
319 // Note: usually local arrays are very slow in a CUDA kernel, but
320 // the compiler should ideally be able to unroll the loops below
321 // and store the kVec array in the register, because the full
322 // structure of the loops below is known at compile time.
323
324 // Loop through array
325 for (i = startID; i < kSize; i += nThreads) {
326
327 // initialize kVec to 0
328 for (j = 0; j < D; ++j) {
329 kVec[j] = 0.0;
330 }
331
332 // Calculate kVec
333 for (j = 0; j < D; ++j) {
334 waveBz_ = waveBz[i + (j * kSize)];
335 for (k = 0; k < D; ++k) {
336 kVec[k] += kBasis_[k + (D*j)] * waveBz_;
337 }
338 }
339
340 // Compute kSq
341 kSqVal = 0.0;
342 for (j = 0; j < D; ++j) {
343 kSqVal += kVec[j] * kVec[j];
344 }
345
346 // Store value in global memory
347 kSq[i] = kSqVal;
348
349 } // kSize
350 }
351
352 /*
353 * Compute the dKSq array on the GPU
354 */
355 template <int D>
356 __global__ void _computedKSq(cudaReal* dKSq, int const * waveBz,
357 cudaReal const * dkkBasis,
358 bool const * implicitInverse,
359 int const nParams, int const kSize)
360 {
361 // Size of dKSq is kSize * nParams
362 // Each thread does nParams calculations
363 int nThreads = blockDim.x * gridDim.x;
364 int startID = blockIdx.x * blockDim.x + threadIdx.x;
365
366 // Load dkkBasis into shared memory for fast access
367 // (max size of dkkBasis is 54 elements for triclinic unit cell)
368 extern __shared__ cudaReal dkkBasis_[];
369 if (threadIdx.x < nParams * D * D) {
370 dkkBasis_[threadIdx.x] = dkkBasis[threadIdx.x];
371 }
372 __syncthreads(); // wait for all threads to finish
373
374 // Variables to be used in the loop
375 int param, i, j, k, waveBz_[D];
376 cudaReal dKSqVal;
377
378 // Loop through array
379 for (param = 0; param < nParams; ++param) {
380 for (i = startID; i < kSize; i += nThreads) {
381
382 // initialize to 0
383 dKSqVal = 0.0;
384
385 // Load waveBz to local memory
386 for (j = 0; j < D; ++j) {
387 waveBz_[j] = waveBz[i + (j * kSize)];
388 }
389
390 // Compute dKSq
391 for (j = 0; j < D; ++j) {
392 for (k = 0; k < D; ++k) {
393 dKSqVal += waveBz_[j] * waveBz_[k]
394 * dkkBasis_[k + (j * D) + (param * D * D)];
395 } // D
396 } // D
397
398 if (implicitInverse[i]) { // if element i's inverse is implicit
399 dKSqVal *= 2;
400 }
401
402 dKSq[(param * kSize) + i] = dKSqVal;
403
404 } // kSize
405 } // nParams
406 }
407 }
408
409 template <int D>
411 : kSize_(0),
412 isAllocated_(false),
413 hasMinimumImages_(false),
414 hasKSq_(false),
415 hasdKSq_(false),
416 unitCellPtr_(nullptr),
417 meshPtr_(nullptr)
418 {}
419
420 template <int D>
423
424 template <int D>
425 void WaveList<D>::allocate(Mesh<D> const & m, UnitCell<D> const & c)
426 {
427 UTIL_CHECK(m.size() > 0);
428 UTIL_CHECK(c.nParameter() > 0);
429 UTIL_CHECK(!isAllocated_);
430
431 // Create permanent associations with mesh and unit cell
432 unitCellPtr_ = &c;
433 meshPtr_ = &m;
434
435 int nParams = unitCell().nParameter();
436
437 // Compute DFT mesh size kSize_ and dimensions kMeshDimensions_
438 kSize_ = 1;
439 for(int i = 0; i < D; ++i) {
440 if (i < D - 1) {
441 kMeshDimensions_[i] = mesh().dimension(i);
442 } else {
443 kMeshDimensions_[i] = mesh().dimension(i) / 2 + 1;
444 }
445 kSize_ *= kMeshDimensions_[i];
446 }
447
448 minImages_.allocate(kSize_ * D);
449 kSq_.allocate(kMeshDimensions_);
450 dKSq_.allocate(kSize_ * nParams);
451 dKSqSlices_.allocate(nParams);
452 for (int i = 0; i < nParams; i++) {
453 dKSqSlices_[i].associate(dKSq_, i*kSize_, kMeshDimensions_);
454 }
455
456 // Set up implicitInverse_ array (only depends on mesh dimensions)
457 implicitInverse_.allocate(kSize_);
458 MeshIterator<D> kItr(kMeshDimensions_);
459 HostDArray<bool> implicitInverse_h(kSize_);
460 int inverseId;
461 for (kItr.begin(); !kItr.atEnd(); ++kItr) {
462 if (kItr.position(D-1) == 0) {
463 inverseId = 0;
464 } else {
465 inverseId = mesh().dimension(D-1) - kItr.position(D-1);
466 }
467 if (inverseId > kMeshDimensions_[D-1]) {
468 implicitInverse_h[kItr.rank()] = true;
469 } else {
470 implicitInverse_h[kItr.rank()] = false;
471 }
472 }
473 implicitInverse_ = implicitInverse_h; // transfer to device memory
474
475 isAllocated_ = true;
476 }
477
478 template <int D>
480 {
481 if (hasVariableAngle()) {
482 hasMinimumImages_ = false;
483 }
484 hasKSq_ = false;
485 hasdKSq_ = false;
486 }
487
488 template <int D>
490 {
491 if (hasMinimumImages_) return; // min images already calculated
492
493 // Precondition
494 UTIL_CHECK(isAllocated_);
495 UTIL_CHECK(unitCell().lattice() != UnitCell<D>::Null);
496 UTIL_CHECK(unitCell().isInitialized());
497 UTIL_CHECK(minImages_.capacity() == kSize_ * D);
498
499 // Set initial array of images to contain the k-grid points
500 HostDArray<int> imagesTmp(D*kSize_);
501 MeshIterator<D> kItr(kMeshDimensions_);
502 for (int i = 0; i < D; i++) {
503 for (kItr.begin(); !kItr.atEnd(); ++kItr) {
504 imagesTmp[kItr.rank() + (i*kSize_)] = kItr.position(i);
505 }
506 }
507 minImages_ = imagesTmp; // copy to device
508
509 // Get kBasis and meshDims and store on device
510 HostDArray<cudaReal> kBasis_h(D*D);
511 HostDArray<int> meshDims_h(D);
512 DeviceArray<cudaReal> kBasis(D*D);
513 DeviceArray<int> meshDims(D);
514 int idx = 0;
515 for(int j = 0; j < D; ++j) {
516 for(int k = 0; k < D; ++k) {
517 kBasis_h[idx] = unitCell().kBasis(j)[k];
518 idx++;
519 }
520 meshDims_h[j] = mesh().dimension(j);
521 }
522 kBasis = kBasis_h;
523 meshDims = meshDims_h;
524
525 // Set number of threads per gridpoint (depends on D)
526 int threadsPerGP;
527 if (D == 3) {
528 threadsPerGP = 128;
529 } else if (D == 2) {
530 threadsPerGP = 32;
531 } else if (D == 1) {
532 threadsPerGP = 1;
533 }
534
535 // GPU resources
536 int nBlocks, nThreads;
537 ThreadArray::setThreadsLogical(kSize_*threadsPerGP, nBlocks, nThreads);
538
539 if ((D == 3) && (nThreads < 128)) {
540 // Thread blocks too small. Manually set nThreads to 128
542 ThreadArray::setThreadsLogical(kSize_*threadsPerGP, nBlocks, nThreads);
543
544 // If the above was successful, print warning
545 Log::file() << "Warning: "
546 << "nThreads too small for computeMinimumImages.\n"
547 << "Setting nThreads equal to 128." << std::endl;
548 }
549
550 // Launch kernel
551 size_t sz = (D * sizeof(int) + sizeof(cudaReal)) * nThreads;
552 _computeMinimumImages<D><<<nBlocks, nThreads, sz>>>
553 (minImages_.cArray(), kSq_.cArray(), kBasis.cArray(),
554 meshDims.cArray(), kSize_);
555
556 hasMinimumImages_ = true;
557 hasKSq_ = true;
558 }
559
560 template <int D>
562 {
563 if (hasKSq_) return; // kSq already calculated
564
565 if (!hasMinimumImages_) {
566 computeMinimumImages(); // compute both min images and kSq
567 return;
568 }
569
570 // If this point is reached, calculate kSq using _computeKSq kernel
571
572 // Precondition
573 UTIL_CHECK(unitCell().nParameter() > 0);
574 UTIL_CHECK(unitCell().lattice() != UnitCell<D>::Null);
575 UTIL_CHECK(unitCell().isInitialized());
576
577 // Get kBasis and store on device
578 HostDArray<cudaReal> kBasis_h(D*D);
579 DeviceArray<cudaReal> kBasis(D*D);
580 int idx = 0;
581 for(int j = 0; j < D; ++j) {
582 for(int k = 0; k < D; ++k) {
583 kBasis_h[idx] = unitCell().kBasis(j)[k];
584 idx++;
585 }
586 }
587 kBasis = kBasis_h;
588
589 // GPU resources
590 int nBlocks, nThreads;
591 ThreadArray::setThreadsLogical(kSize_, nBlocks, nThreads);
592
593 // Launch kernel to calculate kSq on device
594 _computeKSq<D><<<nBlocks, nThreads>>>
595 (kSq_.cArray(), minImages_.cArray(), kBasis.cArray(),
596 unitCell().nParameter(), kSize_);
597
598 hasKSq_ = true;
599 }
600
601 template <int D>
603 {
604 if (hasdKSq_) return; // dKSq already calculated
605
606 // Compute minimum images if needed
607 if (!hasMinimumImages_) {
608 computeMinimumImages();
609 }
610
611 // Precondition
612 UTIL_CHECK(unitCell().nParameter() > 0);
613 UTIL_CHECK(unitCell().lattice() != UnitCell<D>::Null);
614 UTIL_CHECK(unitCell().isInitialized());
615
616 // Calculate dkkBasis and store on device
617 int idx;
618 HostDArray<cudaReal> dkkBasis_h(unitCell().nParameter() * D * D);
619 DeviceArray<cudaReal> dkkBasis;
620 for(int i = 0 ; i < unitCell().nParameter(); ++i) {
621 for(int j = 0; j < D; ++j) {
622 for(int k = 0; k < D; ++k) {
623 idx = k + (j * D) + (i * D * D);
624 dkkBasis_h[idx] = unitCell().dkkBasis(i, j, k);
625 }
626 }
627 }
628 dkkBasis = dkkBasis_h;
629
630 // GPU resources
631 int nBlocks, nThreads;
632 ThreadArray::setThreadsLogical(kSize_, nBlocks, nThreads);
633
634 // Kernel requires block size to be >= the size of dkkBasis.
635 // Max size of dkkBasis is 54, so this should always be satisfied
636 UTIL_CHECK(nThreads > dkkBasis.capacity());
637
638 // Launch kernel to calculate dKSq on device
639 size_t sz = sizeof(cudaReal)*dkkBasis.capacity();
640 _computedKSq<D><<<nBlocks, nThreads, sz>>>
641 (dKSq_.cArray(), minImages_.cArray(), dkkBasis.cArray(),
642 implicitInverse_.cArray(), unitCell().nParameter(), kSize_);
643
644 hasdKSq_ = true;
645 }
646
647}
648}
649#endif
Dynamic array on the GPU device with aligned data.
Definition rpg/System.h:32
int capacity() const
Return allocated capacity.
Data * cArray()
Return pointer to underlying C array.
Template for dynamic array stored in host CPU memory.
Definition HostDArray.h:43
Iterator over points in a Mesh<D>.
int rank() const
Get the rank of current element.
void begin()
Set iterator to the first point in the mesh.
bool atEnd() const
Is this the end (i.e., one past the last point)?
IntVec< D > position() const
Get current position in the grid, as integer vector.
Description of a regular grid of points in a periodic domain.
int size() const
Get total number of grid points.
Definition Mesh.h:229
int nParameter() const
Get the number of parameters in the unit cell.
Base template for UnitCell<D> classes, D=1, 2 or 3.
Definition rpg/System.h:34
void computedKSq()
Compute derivatives of |k|^2 w/ respect to unit cell parameters.
Definition WaveList.tpp:602
~WaveList()
Destructor.
Definition WaveList.tpp:421
WaveList()
Constructor.
Definition WaveList.tpp:410
void computeMinimumImages()
Compute minimum images of wavevectors.
Definition WaveList.tpp:489
void clearUnitCellData()
Clear all internal data that depends on lattice parameters.
Definition WaveList.tpp:479
void allocate(Mesh< D > const &m, UnitCell< D > const &c)
Allocate memory and set association with a Mesh and UnitCell object.
Definition WaveList.tpp:425
void computeKSq()
Compute sq.
Definition WaveList.tpp:561
static std::ostream & file()
Get log ostream by reference.
Definition Log.cpp:57
#define UTIL_CHECK(condition)
Assertion macro suitable for serial or parallel production code.
Definition global.h:68
void setThreadsLogical(int nThreadsLogical)
Given total number of threads, set 1D execution configuration.
int nThreads()
Get the number of threads per block for execution.
void setThreadsPerBlock()
Set the number of threads per block to a default value.
dim3 meshDims()
Return last requested multidimensional grid of threads.
PSCF package top-level namespace.
Definition param_pc.dox:1