30 __global__
void _eqV(cudaReal* a, cudaReal
const * b,
const int n)
32 int nThreads = blockDim.x * gridDim.x;
33 int startID = blockIdx.x * blockDim.x + threadIdx.x;
34 for (
int i = startID; i < n; i += nThreads) {
46 __global__
void _eqV(cudaComplex* a, cudaComplex
const * b,
const int n)
48 int nThreads = blockDim.x * gridDim.x;
49 int startID = blockIdx.x * blockDim.x + threadIdx.x;
50 for (
int i = startID; i < n; i += nThreads) {
63 __global__
void _eqS(cudaReal* a,
const cudaReal b,
const int n)
65 int nThreads = blockDim.x * gridDim.x;
66 int startID = blockIdx.x * blockDim.x + threadIdx.x;
67 for (
int i = startID; i < n; i += nThreads) {
79 __global__
void _eqS(cudaComplex* a,
const cudaComplex b,
const int n)
81 int nThreads = blockDim.x * gridDim.x;
82 int startID = blockIdx.x * blockDim.x + threadIdx.x;
83 for (
int i = startID; i < n; i += nThreads) {
97 __global__
void _addVV(cudaReal* a, cudaReal
const * b,
98 cudaReal
const * c,
const int n)
100 int nThreads = blockDim.x * gridDim.x;
101 int startID = blockIdx.x * blockDim.x + threadIdx.x;
102 for (
int i = startID; i < n; i += nThreads) {
115 __global__
void _addVV(cudaComplex* a, cudaComplex
const * b,
116 cudaComplex
const * c,
const int n)
118 int nThreads = blockDim.x * gridDim.x;
119 int startID = blockIdx.x * blockDim.x + threadIdx.x;
120 for (
int i = startID; i < n; i += nThreads) {
121 a[i].x = b[i].x + c[i].x;
122 a[i].y = b[i].y + c[i].y;
134 __global__
void _addVV(cudaComplex* a, cudaReal
const * b,
135 cudaComplex
const * c,
const int n)
137 int nThreads = blockDim.x * gridDim.x;
138 int startID = blockIdx.x * blockDim.x + threadIdx.x;
139 for (
int i = startID; i < n; i += nThreads) {
140 a[i].x = b[i] + c[i].x;
153 __global__
void _addVV(cudaComplex* a, cudaComplex
const * b,
154 cudaReal
const * c,
const int n)
156 int nThreads = blockDim.x * gridDim.x;
157 int startID = blockIdx.x * blockDim.x + threadIdx.x;
158 for (
int i = startID; i < n; i += nThreads) {
159 a[i].x = b[i].x + c[i];
172 __global__
void _addVS(cudaReal* a, cudaReal
const * b,
173 const cudaReal c,
const int n)
175 int nThreads = blockDim.x * gridDim.x;
176 int startID = blockIdx.x * blockDim.x + threadIdx.x;
177 for (
int i = startID; i < n; i += nThreads) {
190 __global__
void _addVS(cudaComplex* a, cudaComplex
const * b,
191 const cudaComplex c,
const int n)
193 int nThreads = blockDim.x * gridDim.x;
194 int startID = blockIdx.x * blockDim.x + threadIdx.x;
195 for (
int i = startID; i < n; i += nThreads) {
196 a[i].x = b[i].x + c.x;
197 a[i].y = b[i].y + c.y;
209 __global__
void _addVS(cudaComplex* a, cudaReal
const * b,
210 const cudaComplex c,
const int n)
212 int nThreads = blockDim.x * gridDim.x;
213 int startID = blockIdx.x * blockDim.x + threadIdx.x;
214 for (
int i = startID; i < n; i += nThreads) {
228 __global__
void _addVS(cudaComplex* a, cudaComplex
const * b,
229 const cudaReal c,
const int n)
231 int nThreads = blockDim.x * gridDim.x;
232 int startID = blockIdx.x * blockDim.x + threadIdx.x;
233 for (
int i = startID; i < n; i += nThreads) {
247 __global__
void _subVV(cudaReal* a, cudaReal
const * b,
248 cudaReal
const * c,
const int n)
250 int nThreads = blockDim.x * gridDim.x;
251 int startID = blockIdx.x * blockDim.x + threadIdx.x;
252 for (
int i = startID; i < n; i += nThreads) {
265 __global__
void _subVV(cudaComplex* a, cudaComplex
const * b,
266 cudaComplex
const * c,
const int n)
268 int nThreads = blockDim.x * gridDim.x;
269 int startID = blockIdx.x * blockDim.x + threadIdx.x;
270 for (
int i = startID; i < n; i += nThreads) {
271 a[i].x = b[i].x - c[i].x;
272 a[i].y = b[i].y - c[i].y;
284 __global__
void _subVV(cudaComplex* a, cudaReal
const * b,
285 cudaComplex
const * c,
const int n)
287 int nThreads = blockDim.x * gridDim.x;
288 int startID = blockIdx.x * blockDim.x + threadIdx.x;
289 for (
int i = startID; i < n; i += nThreads) {
290 a[i].x = b[i] - c[i].x;
291 a[i].y = 0.0 - c[i].y;
303 __global__
void _subVV(cudaComplex* a, cudaComplex
const * b,
304 cudaReal
const * c,
const int n)
306 int nThreads = blockDim.x * gridDim.x;
307 int startID = blockIdx.x * blockDim.x + threadIdx.x;
308 for (
int i = startID; i < n; i += nThreads) {
309 a[i].x = b[i].x - c[i];
322 __global__
void _subVS(cudaReal* a, cudaReal
const * b,
323 const cudaReal c,
const int n)
325 int nThreads = blockDim.x * gridDim.x;
326 int startID = blockIdx.x * blockDim.x + threadIdx.x;
327 for (
int i = startID; i < n; i += nThreads) {
340 __global__
void _subVS(cudaComplex* a, cudaComplex
const * b,
341 const cudaComplex c,
const int n)
343 int nThreads = blockDim.x * gridDim.x;
344 int startID = blockIdx.x * blockDim.x + threadIdx.x;
345 for (
int i = startID; i < n; i += nThreads) {
346 a[i].x = b[i].x - c.x;
347 a[i].y = b[i].y - c.y;
359 __global__
void _subVS(cudaComplex* a, cudaReal
const * b,
360 const cudaComplex c,
const int n)
362 int nThreads = blockDim.x * gridDim.x;
363 int startID = blockIdx.x * blockDim.x + threadIdx.x;
364 for (
int i = startID; i < n; i += nThreads) {
378 __global__
void _subVS(cudaComplex* a, cudaComplex
const * b,
379 const cudaReal c,
const int n)
381 int nThreads = blockDim.x * gridDim.x;
382 int startID = blockIdx.x * blockDim.x + threadIdx.x;
383 for (
int i = startID; i < n; i += nThreads) {
397 __global__
void _mulVV(cudaReal* a, cudaReal
const * b,
398 cudaReal
const * c,
const int n)
400 int nThreads = blockDim.x * gridDim.x;
401 int startID = blockIdx.x * blockDim.x + threadIdx.x;
402 for (
int i = startID; i < n; i += nThreads) {
415 __global__
void _mulVV(cudaComplex* a, cudaComplex
const * b,
416 cudaComplex
const * c,
const int n)
418 int nThreads = blockDim.x * gridDim.x;
419 int startID = blockIdx.x * blockDim.x + threadIdx.x;
420 for (
int i = startID; i < n; i += nThreads) {
421 a[i].x = (b[i].x * c[i].x) - (b[i].y * c[i].y);
422 a[i].y = (b[i].x * c[i].y) + (b[i].y * c[i].x);
434 __global__
void _mulVV(cudaComplex* a, cudaReal
const * b,
435 cudaComplex
const * c,
const int n)
437 int nThreads = blockDim.x * gridDim.x;
438 int startID = blockIdx.x * blockDim.x + threadIdx.x;
439 for (
int i = startID; i < n; i += nThreads) {
440 a[i].x = b[i] * c[i].x;
441 a[i].y = b[i] * c[i].y;
453 __global__
void _mulVV(cudaComplex* a, cudaComplex
const * b,
454 cudaReal
const * c,
const int n)
456 int nThreads = blockDim.x * gridDim.x;
457 int startID = blockIdx.x * blockDim.x + threadIdx.x;
458 for (
int i = startID; i < n; i += nThreads) {
459 a[i].x = b[i].x * c[i];
460 a[i].y = b[i].y * c[i];
472 __global__
void _mulVS(cudaReal* a, cudaReal
const * b,
473 const cudaReal c,
const int n)
475 int nThreads = blockDim.x * gridDim.x;
476 int startID = blockIdx.x * blockDim.x + threadIdx.x;
477 for (
int i = startID; i < n; i += nThreads) {
490 __global__
void _mulVS(cudaComplex* a, cudaComplex
const * b,
491 const cudaComplex c,
const int n)
493 int nThreads = blockDim.x * gridDim.x;
494 int startID = blockIdx.x * blockDim.x + threadIdx.x;
495 for (
int i = startID; i < n; i += nThreads) {
496 a[i].x = (b[i].x * c.x) - (b[i].y * c.y);
497 a[i].y = (b[i].x * c.y) + (b[i].y * c.x);
509 __global__
void _mulVS(cudaComplex* a, cudaReal
const * b,
510 const cudaComplex c,
const int n)
512 int nThreads = blockDim.x * gridDim.x;
513 int startID = blockIdx.x * blockDim.x + threadIdx.x;
514 for (
int i = startID; i < n; i += nThreads) {
528 __global__
void _mulVS(cudaComplex* a, cudaComplex
const * b,
529 const cudaReal c,
const int n)
531 int nThreads = blockDim.x * gridDim.x;
532 int startID = blockIdx.x * blockDim.x + threadIdx.x;
533 for (
int i = startID; i < n; i += nThreads) {
547 __global__
void _divVV(cudaReal* a, cudaReal
const * b,
548 cudaReal
const * c,
const int n)
550 int nThreads = blockDim.x * gridDim.x;
551 int startID = blockIdx.x * blockDim.x + threadIdx.x;
552 for (
int i = startID; i < n; i += nThreads) {
565 __global__
void _divVV(cudaComplex* a, cudaComplex
const * b,
566 cudaReal
const * c,
const int n)
568 int nThreads = blockDim.x * gridDim.x;
569 int startID = blockIdx.x * blockDim.x + threadIdx.x;
570 for (
int i = startID; i < n; i += nThreads) {
571 a[i].x = b[i].x / c[i];
572 a[i].y = b[i].y / c[i];
584 __global__
void _divVS(cudaReal* a, cudaReal
const * b,
585 const cudaReal c,
const int n)
587 int nThreads = blockDim.x * gridDim.x;
588 int startID = blockIdx.x * blockDim.x + threadIdx.x;
589 for (
int i = startID; i < n; i += nThreads) {
602 __global__
void _divVS(cudaComplex* a, cudaComplex
const * b,
603 const cudaReal c,
const int n)
605 int nThreads = blockDim.x * gridDim.x;
606 int startID = blockIdx.x * blockDim.x + threadIdx.x;
607 for (
int i = startID; i < n; i += nThreads) {
621 __global__
void _divSV(cudaReal* a,
const cudaReal b,
622 cudaReal
const * c,
const int n)
624 int nThreads = blockDim.x * gridDim.x;
625 int startID = blockIdx.x * blockDim.x + threadIdx.x;
626 for (
int i = startID; i < n; i += nThreads) {
638 __global__
void _expV(cudaReal* a, cudaReal
const * b,
const int n)
640 int nThreads = blockDim.x * gridDim.x;
641 int startID = blockIdx.x * blockDim.x + threadIdx.x;
642 for (
int i = startID; i < n; i += nThreads) {
654 __global__
void _expV(cudaComplex* a, cudaComplex
const * b,
const int n)
656 int nThreads = blockDim.x * gridDim.x;
657 int startID = blockIdx.x * blockDim.x + threadIdx.x;
658 for (
int i = startID; i < n; i += nThreads) {
659 a[i].x = exp(b[i].x) * cos(b[i].y);
660 a[i].y = exp(b[i].x) * sin(b[i].y);
671 __global__
void _addEqV(cudaReal* a, cudaReal
const * b,
const int n)
673 int nThreads = blockDim.x * gridDim.x;
674 int startID = blockIdx.x * blockDim.x + threadIdx.x;
675 for (
int i = startID; i < n; i += nThreads) {
687 __global__
void _addEqV(cudaComplex* a, cudaComplex
const * b,
const int n)
689 int nThreads = blockDim.x * gridDim.x;
690 int startID = blockIdx.x * blockDim.x + threadIdx.x;
691 for (
int i = startID; i < n; i += nThreads) {
704 __global__
void _addEqV(cudaComplex* a, cudaReal
const * b,
const int n)
706 int nThreads = blockDim.x * gridDim.x;
707 int startID = blockIdx.x * blockDim.x + threadIdx.x;
708 for (
int i = startID; i < n; i += nThreads) {
720 __global__
void _addEqS(cudaReal* a,
const cudaReal b,
const int n)
722 int nThreads = blockDim.x * gridDim.x;
723 int startID = blockIdx.x * blockDim.x + threadIdx.x;
724 for (
int i = startID; i < n; i += nThreads) {
736 __global__
void _addEqS(cudaComplex* a,
const cudaComplex b,
739 int nThreads = blockDim.x * gridDim.x;
740 int startID = blockIdx.x * blockDim.x + threadIdx.x;
741 for (
int i = startID; i < n; i += nThreads) {
754 __global__
void _addEqS(cudaComplex* a,
const cudaReal b,
const int n)
756 int nThreads = blockDim.x * gridDim.x;
757 int startID = blockIdx.x * blockDim.x + threadIdx.x;
758 for (
int i = startID; i < n; i += nThreads) {
770 __global__
void _subEqV(cudaReal* a, cudaReal
const * b,
const int n)
772 int nThreads = blockDim.x * gridDim.x;
773 int startID = blockIdx.x * blockDim.x + threadIdx.x;
774 for (
int i = startID; i < n; i += nThreads) {
786 __global__
void _subEqV(cudaComplex* a, cudaComplex
const * b,
const int n)
788 int nThreads = blockDim.x * gridDim.x;
789 int startID = blockIdx.x * blockDim.x + threadIdx.x;
790 for (
int i = startID; i < n; i += nThreads) {
803 __global__
void _subEqV(cudaComplex* a, cudaReal
const * b,
const int n)
805 int nThreads = blockDim.x * gridDim.x;
806 int startID = blockIdx.x * blockDim.x + threadIdx.x;
807 for (
int i = startID; i < n; i += nThreads) {
819 __global__
void _subEqS(cudaReal* a,
const cudaReal b,
const int n)
821 int nThreads = blockDim.x * gridDim.x;
822 int startID = blockIdx.x * blockDim.x + threadIdx.x;
823 for (
int i = startID; i < n; i += nThreads) {
835 __global__
void _subEqS(cudaComplex* a,
const cudaComplex b,
const int n)
837 int nThreads = blockDim.x * gridDim.x;
838 int startID = blockIdx.x * blockDim.x + threadIdx.x;
839 for (
int i = startID; i < n; i += nThreads) {
852 __global__
void _subEqS(cudaComplex* a,
const cudaReal b,
const int n)
854 int nThreads = blockDim.x * gridDim.x;
855 int startID = blockIdx.x * blockDim.x + threadIdx.x;
856 for (
int i = startID; i < n; i += nThreads) {
868 __global__
void _mulEqV(cudaReal* a, cudaReal
const * b,
const int n)
870 int nThreads = blockDim.x * gridDim.x;
871 int startID = blockIdx.x * blockDim.x + threadIdx.x;
872 for (
int i = startID; i < n; i += nThreads) {
884 __global__
void _mulEqV(cudaComplex* a, cudaComplex
const * b,
const int n)
886 int nThreads = blockDim.x * gridDim.x;
887 int startID = blockIdx.x * blockDim.x + threadIdx.x;
889 for (
int i = startID; i < n; i += nThreads) {
890 c.x = (a[i].x * b[i].x) - (a[i].y * b[i].y);
891 c.y = (a[i].x * b[i].y) + (a[i].y * b[i].x);
904 __global__
void _mulEqV(cudaComplex* a, cudaReal
const * b,
const int n)
906 int nThreads = blockDim.x * gridDim.x;
907 int startID = blockIdx.x * blockDim.x + threadIdx.x;
908 for (
int i = startID; i < n; i += nThreads) {
921 __global__
void _mulEqS(cudaReal* a,
const cudaReal b,
const int n)
923 int nThreads = blockDim.x * gridDim.x;
924 int startID = blockIdx.x * blockDim.x + threadIdx.x;
925 for (
int i = startID; i < n; i += nThreads) {
937 __global__
void _mulEqS(cudaComplex* a,
const cudaComplex b,
const int n)
939 int nThreads = blockDim.x * gridDim.x;
940 int startID = blockIdx.x * blockDim.x + threadIdx.x;
942 for (
int i = startID; i < n; i += nThreads) {
943 c.x = (a[i].x * b.x) - (a[i].y * b.y);
944 c.y = (a[i].x * b.y) + (a[i].y * b.x);
957 __global__
void _mulEqS(cudaComplex* a,
const cudaReal b,
const int n)
959 int nThreads = blockDim.x * gridDim.x;
960 int startID = blockIdx.x * blockDim.x + threadIdx.x;
961 for (
int i = startID; i < n; i += nThreads) {
974 __global__
void _divEqV(cudaReal* a, cudaReal
const * b,
const int n)
976 int nThreads = blockDim.x * gridDim.x;
977 int startID = blockIdx.x * blockDim.x + threadIdx.x;
978 for (
int i = startID; i < n; i += nThreads) {
990 __global__
void _divEqV(cudaComplex* a, cudaReal
const * b,
const int n)
992 int nThreads = blockDim.x * gridDim.x;
993 int startID = blockIdx.x * blockDim.x + threadIdx.x;
994 for (
int i = startID; i < n; i += nThreads) {
1007 __global__
void _divEqS(cudaReal* a,
const cudaReal b,
const int n)
1009 int nThreads = blockDim.x * gridDim.x;
1010 int startID = blockIdx.x * blockDim.x + threadIdx.x;
1011 for (
int i = startID; i < n; i += nThreads) {
1023 __global__
void _divEqS(cudaComplex* a,
const cudaReal b,
const int n)
1025 int nThreads = blockDim.x * gridDim.x;
1026 int startID = blockIdx.x * blockDim.x + threadIdx.x;
1027 for (
int i = startID; i < n; i += nThreads) {
1040 const int beginIdA,
const int beginIdB,
const int n)
1046 int nBlocks, nThreads;
1050 _eqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1052 cudaErrorCheck( cudaGetLastError() );
1057 const int beginIdA,
const int beginIdB,
const int n)
1063 int nBlocks, nThreads;
1067 _eqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1069 cudaErrorCheck( cudaGetLastError() );
1074 const int beginIdA,
const int n)
1079 int nBlocks, nThreads;
1083 _eqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1084 cudaErrorCheck( cudaGetLastError() );
1089 const int beginIdA,
const int n)
1094 int nBlocks, nThreads;
1098 _eqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1099 cudaErrorCheck( cudaGetLastError() );
1105 const int beginIdB,
const int beginIdC,
const int n)
1112 int nBlocks, nThreads;
1116 _addVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1118 cudaErrorCheck( cudaGetLastError() );
1124 const int beginIdB,
const int beginIdC,
const int n)
1131 int nBlocks, nThreads;
1135 _addVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1137 cudaErrorCheck( cudaGetLastError() );
1143 const int beginIdB,
const int beginIdC,
const int n)
1150 int nBlocks, nThreads;
1154 _addVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1156 cudaErrorCheck( cudaGetLastError() );
1162 const int beginIdB,
const int beginIdC,
const int n)
1169 int nBlocks, nThreads;
1173 _addVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1175 cudaErrorCheck( cudaGetLastError() );
1180 const cudaReal c,
const int beginIdA,
const int beginIdB,
int n)
1186 int nBlocks, nThreads;
1190 _addVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1192 cudaErrorCheck( cudaGetLastError() );
1197 const cudaComplex c,
const int beginIdA,
const int beginIdB,
int n)
1203 int nBlocks, nThreads;
1207 _addVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1209 cudaErrorCheck( cudaGetLastError() );
1214 const cudaComplex c,
const int beginIdA,
const int beginIdB,
int n)
1220 int nBlocks, nThreads;
1224 _addVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1226 cudaErrorCheck( cudaGetLastError() );
1231 const cudaReal c,
const int beginIdA,
const int beginIdB,
int n)
1237 int nBlocks, nThreads;
1241 _addVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1243 cudaErrorCheck( cudaGetLastError() );
1249 const int beginIdB,
const int beginIdC,
const int n)
1256 int nBlocks, nThreads;
1260 _subVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1262 cudaErrorCheck( cudaGetLastError() );
1268 const int beginIdB,
const int beginIdC,
const int n)
1275 int nBlocks, nThreads;
1279 _subVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1281 cudaErrorCheck( cudaGetLastError() );
1287 const int beginIdB,
const int beginIdC,
const int n)
1294 int nBlocks, nThreads;
1298 _subVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1300 cudaErrorCheck( cudaGetLastError() );
1306 const int beginIdB,
const int beginIdC,
const int n)
1313 int nBlocks, nThreads;
1317 _subVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1319 cudaErrorCheck( cudaGetLastError() );
1325 const int beginIdA,
const int beginIdB,
const int n)
1331 int nBlocks, nThreads;
1335 _subVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1337 cudaErrorCheck( cudaGetLastError() );
1342 const cudaComplex c,
const int beginIdA,
const int beginIdB,
1349 int nBlocks, nThreads;
1353 _subVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1355 cudaErrorCheck( cudaGetLastError() );
1360 const cudaComplex c,
const int beginIdA,
const int beginIdB,
1367 int nBlocks, nThreads;
1371 _subVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1373 cudaErrorCheck( cudaGetLastError() );
1378 const cudaReal c,
const int beginIdA,
const int beginIdB,
1385 int nBlocks, nThreads;
1389 _subVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1391 cudaErrorCheck( cudaGetLastError() );
1397 const int beginIdB,
const int beginIdC,
const int n)
1404 int nBlocks, nThreads;
1408 _mulVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1410 cudaErrorCheck( cudaGetLastError() );
1416 const int beginIdB,
const int beginIdC,
const int n)
1423 int nBlocks, nThreads;
1427 _mulVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1429 cudaErrorCheck( cudaGetLastError() );
1435 const int beginIdB,
const int beginIdC,
const int n)
1442 int nBlocks, nThreads;
1446 _mulVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1448 cudaErrorCheck( cudaGetLastError() );
1454 const int beginIdB,
const int beginIdC,
const int n)
1461 int nBlocks, nThreads;
1465 _mulVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1467 cudaErrorCheck( cudaGetLastError() );
1472 const cudaReal c,
const int beginIdA,
const int beginIdB,
1479 int nBlocks, nThreads;
1483 _mulVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1485 cudaErrorCheck( cudaGetLastError() );
1490 const cudaComplex c,
const int beginIdA,
const int beginIdB,
1497 int nBlocks, nThreads;
1501 _mulVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1503 cudaErrorCheck( cudaGetLastError() );
1509 const cudaComplex c,
1510 const int beginIdA,
const int beginIdB,
const int n)
1516 int nBlocks, nThreads;
1520 _mulVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1522 cudaErrorCheck( cudaGetLastError() );
1527 const cudaReal c,
const int beginIdA,
const int beginIdB,
1534 int nBlocks, nThreads;
1538 _mulVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1540 cudaErrorCheck( cudaGetLastError() );
1546 const int beginIdB,
const int beginIdC,
const int n)
1553 int nBlocks, nThreads;
1557 _divVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1559 cudaErrorCheck( cudaGetLastError() );
1565 const int beginIdB,
const int beginIdC,
const int n)
1572 int nBlocks, nThreads;
1576 _divVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1578 cudaErrorCheck( cudaGetLastError() );
1583 const cudaReal c,
const int beginIdA,
const int beginIdB,
1590 int nBlocks, nThreads;
1594 _divVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1596 cudaErrorCheck( cudaGetLastError() );
1602 const cudaReal c,
const int beginIdA,
const int beginIdB,
1609 int nBlocks, nThreads;
1613 _divVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1615 cudaErrorCheck( cudaGetLastError() );
1622 const int beginIdA,
const int beginIdC,
const int n)
1628 int nBlocks, nThreads;
1632 _divSV<<<nBlocks, nThreads>>>(a.
cArray() + beginIdA, b,
1633 c.
cArray() + beginIdC, n);
1634 cudaErrorCheck( cudaGetLastError() );
1639 const int beginIdA,
const int beginIdB,
const int n)
1645 int nBlocks, nThreads;
1649 _expV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1651 cudaErrorCheck( cudaGetLastError() );
1656 const int beginIdA,
const int beginIdB,
const int n)
1662 int nBlocks, nThreads;
1666 _expV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1668 cudaErrorCheck( cudaGetLastError() );
1673 const int beginIdA,
const int beginIdB,
const int n)
1679 int nBlocks, nThreads;
1683 _addEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1685 cudaErrorCheck( cudaGetLastError() );
1691 const int beginIdA,
const int beginIdB,
const int n)
1697 int nBlocks, nThreads;
1701 _addEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1703 cudaErrorCheck( cudaGetLastError() );
1708 const int beginIdA,
const int beginIdB,
const int n)
1714 int nBlocks, nThreads;
1718 _addEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1720 cudaErrorCheck( cudaGetLastError() );
1725 const int beginIdA,
const int n)
1730 int nBlocks, nThreads;
1734 _addEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1735 cudaErrorCheck( cudaGetLastError() );
1740 const int beginIdA,
const int n)
1745 int nBlocks, nThreads;
1749 _addEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1750 cudaErrorCheck( cudaGetLastError() );
1755 const int beginIdA,
const int n)
1760 int nBlocks, nThreads;
1764 _addEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1765 cudaErrorCheck( cudaGetLastError() );
1771 const int beginIdA,
const int beginIdB,
const int n)
1777 int nBlocks, nThreads;
1781 _subEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1783 cudaErrorCheck( cudaGetLastError() );
1789 const int beginIdA,
const int beginIdB,
const int n)
1795 int nBlocks, nThreads;
1799 _subEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1801 cudaErrorCheck( cudaGetLastError() );
1806 const int beginIdA,
const int beginIdB,
const int n)
1812 int nBlocks, nThreads;
1816 _subEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1818 cudaErrorCheck( cudaGetLastError() );
1823 const int beginIdA,
const int n)
1828 int nBlocks, nThreads;
1832 _subEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1833 cudaErrorCheck( cudaGetLastError() );
1838 const int beginIdA,
const int n)
1843 int nBlocks, nThreads;
1847 _subEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1848 cudaErrorCheck( cudaGetLastError() );
1853 const int beginIdA,
const int n)
1858 int nBlocks, nThreads;
1862 _subEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1863 cudaErrorCheck( cudaGetLastError() );
1868 const int beginIdA,
const int beginIdB,
const int n)
1874 int nBlocks, nThreads;
1878 _mulEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1880 cudaErrorCheck( cudaGetLastError() );
1885 const int beginIdA,
const int beginIdB,
const int n)
1891 int nBlocks, nThreads;
1895 _mulEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1897 cudaErrorCheck( cudaGetLastError() );
1902 const int beginIdA,
const int beginIdB,
const int n)
1908 int nBlocks, nThreads;
1912 _mulEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1914 cudaErrorCheck( cudaGetLastError() );
1919 const int beginIdA,
const int n)
1924 int nBlocks, nThreads;
1928 _mulEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1929 cudaErrorCheck( cudaGetLastError() );
1934 const cudaComplex b,
1935 const int beginIdA,
const int n)
1940 int nBlocks, nThreads;
1944 _mulEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1945 cudaErrorCheck( cudaGetLastError() );
1950 const int beginIdA,
const int n)
1955 int nBlocks, nThreads;
1959 _mulEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1960 cudaErrorCheck( cudaGetLastError() );
1965 const int beginIdA,
const int beginIdB,
const int n)
1971 int nBlocks, nThreads;
1975 _divEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1977 cudaErrorCheck( cudaGetLastError() );
1982 const int beginIdA,
const int beginIdB,
const int n)
1988 int nBlocks, nThreads;
1992 _divEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1994 cudaErrorCheck( cudaGetLastError() );
1999 const int beginIdA,
const int n)
2004 int nBlocks, nThreads;
2008 _divEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
2009 cudaErrorCheck( cudaGetLastError() );
2014 const int beginIdA,
const int n)
2019 int nBlocks, nThreads;
2023 _divEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
2024 cudaErrorCheck( cudaGetLastError() );