PSCF v1.1
LinearAlgebra.cu
1#ifndef PSPG_LINEAR_ALGEBRA_CU
2#define PSPG_LINEAR_ALGEBRA_CU
3
4#include "LinearAlgebra.h"
5
6namespace Pscf {
7namespace Pspg {
8
9__global__ void subtractUniform(cudaReal* result, cudaReal rhs, int size)
10{
11 int nThreads = blockDim.x * gridDim.x;
12 int startID = blockIdx.x * blockDim.x + threadIdx.x;
13 for (int i = startID; i < size; i += nThreads) {
14 result[i] -= rhs;
15 }
16}
17
18__global__ void addUniform(cudaReal* result, cudaReal rhs, int size)
19{
20 int nThreads = blockDim.x * gridDim.x;
21 int startID = blockIdx.x * blockDim.x + threadIdx.x;
22 for (int i = startID; i < size; i += nThreads) {
23 result[i] += rhs;
24 }
25}
26
27__global__ void pointWiseSubtract(cudaReal* result, const cudaReal* rhs, int size)
28{
29 int nThreads = blockDim.x * gridDim.x;
30 int startID = blockIdx.x * blockDim.x + threadIdx.x;
31 for (int i = startID; i < size; i += nThreads) {
32 result[i] -= rhs[i];
33 }
34}
35
36__global__ void pointWiseSubtractFloat(cudaReal* result, const float rhs, int size)
37{
38 int nThreads = blockDim.x * gridDim.x;
39 int startID = blockIdx.x * blockDim.x + threadIdx.x;
40 for (int i = startID; i < size; i += nThreads) {
41 result[i] -= rhs;
42 }
43}
44
45__global__ void pointWiseBinarySubtract(const cudaReal* a, const cudaReal* b, cudaReal* result, int size)
46{
47 int nThreads = blockDim.x * gridDim.x;
48 int startID = blockIdx.x * blockDim.x + threadIdx.x;
49 for (int i = startID; i < size; i += nThreads) {
50 result[i] = a[i] - b[i];
51 }
52}
53
54__global__ void pointWiseAdd(cudaReal* result, const cudaReal* rhs, int size)
55{
56 int nThreads = blockDim.x * gridDim.x;
57 int startID = blockIdx.x * blockDim.x + threadIdx.x;
58 for (int i = startID; i < size; i += nThreads) {
59 result[i] += rhs[i];
60 }
61}
62
63__global__ void pointWiseBinaryAdd(const cudaReal* a, const cudaReal* b, cudaReal* result, int size)
64{
65 int nThreads = blockDim.x * gridDim.x;
66 int startID = blockIdx.x * blockDim.x + threadIdx.x;
67 for (int i = startID; i < size; i += nThreads) {
68 result[i] = a[i] + b[i];
69 }
70}
71
72__global__ void pointWiseAddScale(cudaReal* result, const cudaReal* rhs, double scale, int size)
73{
74 int nThreads = blockDim.x * gridDim.x;
75 int startID = blockIdx.x * blockDim.x + threadIdx.x;
76 for (int i = startID; i < size; i += nThreads) {
77 result[i] += scale * rhs[i];
78 }
79}
80
81__global__ void inPlacePointwiseMul(cudaReal* a, const cudaReal* b, int size)
82{
83 int nThreads = blockDim.x * gridDim.x;
84 int startID = blockIdx.x * blockDim.x + threadIdx.x;
85 for(int i = startID; i < size; i += nThreads) {
86 a[i] *= b[i];
87 }
88}
89
90__global__ void pointWiseBinaryMultiply(const cudaReal* a, const cudaReal* b, cudaReal* result, int size)
91{
92 int nThreads = blockDim.x * gridDim.x;
93 int startID = blockIdx.x * blockDim.x + threadIdx.x;
94 for (int i = startID; i < size; i += nThreads) {
95 result[i] = a[i] * b[i];
96 }
97}
98
99__global__ void assignUniformReal(cudaReal* result, cudaReal uniform, int size)
100{
101 int nThreads = blockDim.x * gridDim.x;
102 int startID = blockIdx.x * blockDim.x + threadIdx.x;
103 for(int i = startID; i < size; i += nThreads) {
104 result[i] = uniform;
105 }
106}
107
108__global__ void assignReal(cudaReal* result, const cudaReal* rhs, int size)
109{
110 int nThreads = blockDim.x * gridDim.x;
111 int startID = blockIdx.x * blockDim.x + threadIdx.x;
112 for(int i = startID; i < size; i += nThreads) {
113 result[i] = rhs[i];
114 }
115}
116
117__global__ void assignExp(cudaReal* out, const cudaReal* w, double constant, int size)
118{
119 int nThreads = blockDim.x * gridDim.x;
120 int startID = blockIdx.x * blockDim.x + threadIdx.x;
121 for(int i = startID; i < size; i += nThreads) {
122 out[i] = exp(-w[i]*constant);
123 }
124}
125
126__global__ void scaleReal(cudaReal* result, double scale, int size)
127{
128 int nThreads = blockDim.x * gridDim.x;
129 int startID = blockIdx.x * blockDim.x + threadIdx.x;
130
131 for (int i = startID; i < size; i += nThreads) {
132 result[i] *= scale;
133 }
134}
135
136
137
138}
139}
140#endif
int nThreads()
Get the number of threads per block for execution.
Definition: ThreadGrid.cu:173
C++ namespace for polymer self-consistent field theory (PSCF).