1#ifndef PSPG_LINEAR_ALGEBRA_CU
2#define PSPG_LINEAR_ALGEBRA_CU
4#include "LinearAlgebra.h"
9__global__
void subtractUniform(cudaReal* result, cudaReal rhs,
int size)
11 int nThreads = blockDim.x * gridDim.x;
12 int startID = blockIdx.x * blockDim.x + threadIdx.x;
13 for (
int i = startID; i < size; i +=
nThreads) {
18__global__
void addUniform(cudaReal* result, cudaReal rhs,
int size)
20 int nThreads = blockDim.x * gridDim.x;
21 int startID = blockIdx.x * blockDim.x + threadIdx.x;
22 for (
int i = startID; i < size; i +=
nThreads) {
27__global__
void pointWiseSubtract(cudaReal* result,
const cudaReal* rhs,
int size)
29 int nThreads = blockDim.x * gridDim.x;
30 int startID = blockIdx.x * blockDim.x + threadIdx.x;
31 for (
int i = startID; i < size; i +=
nThreads) {
36__global__
void pointWiseSubtractFloat(cudaReal* result,
const float rhs,
int size)
38 int nThreads = blockDim.x * gridDim.x;
39 int startID = blockIdx.x * blockDim.x + threadIdx.x;
40 for (
int i = startID; i < size; i +=
nThreads) {
45__global__
void pointWiseBinarySubtract(
const cudaReal* a,
const cudaReal* b, cudaReal* result,
int size)
47 int nThreads = blockDim.x * gridDim.x;
48 int startID = blockIdx.x * blockDim.x + threadIdx.x;
49 for (
int i = startID; i < size; i +=
nThreads) {
50 result[i] = a[i] - b[i];
54__global__
void pointWiseAdd(cudaReal* result,
const cudaReal* rhs,
int size)
56 int nThreads = blockDim.x * gridDim.x;
57 int startID = blockIdx.x * blockDim.x + threadIdx.x;
58 for (
int i = startID; i < size; i +=
nThreads) {
63__global__
void pointWiseBinaryAdd(
const cudaReal* a,
const cudaReal* b, cudaReal* result,
int size)
65 int nThreads = blockDim.x * gridDim.x;
66 int startID = blockIdx.x * blockDim.x + threadIdx.x;
67 for (
int i = startID; i < size; i +=
nThreads) {
68 result[i] = a[i] + b[i];
72__global__
void pointWiseAddScale(cudaReal* result,
const cudaReal* rhs,
double scale,
int size)
74 int nThreads = blockDim.x * gridDim.x;
75 int startID = blockIdx.x * blockDim.x + threadIdx.x;
76 for (
int i = startID; i < size; i +=
nThreads) {
77 result[i] += scale * rhs[i];
81__global__
void inPlacePointwiseMul(cudaReal* a,
const cudaReal* b,
int size)
83 int nThreads = blockDim.x * gridDim.x;
84 int startID = blockIdx.x * blockDim.x + threadIdx.x;
85 for(
int i = startID; i < size; i +=
nThreads) {
90__global__
void pointWiseBinaryMultiply(
const cudaReal* a,
const cudaReal* b, cudaReal* result,
int size)
92 int nThreads = blockDim.x * gridDim.x;
93 int startID = blockIdx.x * blockDim.x + threadIdx.x;
94 for (
int i = startID; i < size; i +=
nThreads) {
95 result[i] = a[i] * b[i];
99__global__
void assignUniformReal(cudaReal* result, cudaReal uniform,
int size)
101 int nThreads = blockDim.x * gridDim.x;
102 int startID = blockIdx.x * blockDim.x + threadIdx.x;
103 for(
int i = startID; i < size; i +=
nThreads) {
108__global__
void assignReal(cudaReal* result,
const cudaReal* rhs,
int size)
110 int nThreads = blockDim.x * gridDim.x;
111 int startID = blockIdx.x * blockDim.x + threadIdx.x;
112 for(
int i = startID; i < size; i +=
nThreads) {
117__global__
void assignExp(cudaReal* out,
const cudaReal* w,
double constant,
int size)
119 int nThreads = blockDim.x * gridDim.x;
120 int startID = blockIdx.x * blockDim.x + threadIdx.x;
121 for(
int i = startID; i < size; i +=
nThreads) {
122 out[i] = exp(-w[i]*constant);
126__global__
void scaleReal(cudaReal* result,
double scale,
int size)
128 int nThreads = blockDim.x * gridDim.x;
129 int startID = blockIdx.x * blockDim.x + threadIdx.x;
131 for (
int i = startID; i < size; i +=
nThreads) {
int nThreads()
Get the number of threads per block for execution.
C++ namespace for polymer self-consistent field theory (PSCF).