30 __global__
void _eqV(cudaReal* a, cudaReal
const * b,
const int n)
32 int nThreads = blockDim.x * gridDim.x;
33 int startID = blockIdx.x * blockDim.x + threadIdx.x;
34 for(
int i = startID; i < n; i += nThreads) {
46 __global__
void _eqV(cudaComplex* a, cudaComplex
const * b,
const int n)
48 int nThreads = blockDim.x * gridDim.x;
49 int startID = blockIdx.x * blockDim.x + threadIdx.x;
50 for(
int i = startID; i < n; i += nThreads) {
63 __global__
void _eqS(cudaReal* a, cudaReal
const b,
const int n)
65 int nThreads = blockDim.x * gridDim.x;
66 int startID = blockIdx.x * blockDim.x + threadIdx.x;
67 for(
int i = startID; i < n; i += nThreads) {
79 __global__
void _eqS(cudaComplex* a, cudaComplex
const b,
const int n)
81 int nThreads = blockDim.x * gridDim.x;
82 int startID = blockIdx.x * blockDim.x + threadIdx.x;
83 for(
int i = startID; i < n; i += nThreads) {
97 __global__
void _addVV(cudaReal* a, cudaReal
const * b,
98 cudaReal
const * c,
const int n)
100 int nThreads = blockDim.x * gridDim.x;
101 int startID = blockIdx.x * blockDim.x + threadIdx.x;
102 for(
int i = startID; i < n; i += nThreads) {
115 __global__
void _addVV(cudaComplex* a, cudaComplex
const * b,
116 cudaComplex
const * c,
const int n)
118 int nThreads = blockDim.x * gridDim.x;
119 int startID = blockIdx.x * blockDim.x + threadIdx.x;
120 for(
int i = startID; i < n; i += nThreads) {
121 a[i].x = b[i].x + c[i].x;
122 a[i].y = b[i].y + c[i].y;
134 __global__
void _addVV(cudaComplex* a, cudaReal
const * b,
135 cudaComplex
const * c,
const int n)
137 int nThreads = blockDim.x * gridDim.x;
138 int startID = blockIdx.x * blockDim.x + threadIdx.x;
139 for(
int i = startID; i < n; i += nThreads) {
140 a[i].x = b[i] + c[i].x;
153 __global__
void _addVV(cudaComplex* a, cudaComplex
const * b,
154 cudaReal
const * c,
const int n)
156 int nThreads = blockDim.x * gridDim.x;
157 int startID = blockIdx.x * blockDim.x + threadIdx.x;
158 for(
int i = startID; i < n; i += nThreads) {
159 a[i].x = b[i].x + c[i];
172 __global__
void _addVS(cudaReal* a, cudaReal
const * b,
173 cudaReal
const c,
const int n)
175 int nThreads = blockDim.x * gridDim.x;
176 int startID = blockIdx.x * blockDim.x + threadIdx.x;
177 for(
int i = startID; i < n; i += nThreads) {
190 __global__
void _addVS(cudaComplex* a, cudaComplex
const * b,
191 cudaComplex
const c,
const int n)
193 int nThreads = blockDim.x * gridDim.x;
194 int startID = blockIdx.x * blockDim.x + threadIdx.x;
195 for(
int i = startID; i < n; i += nThreads) {
196 a[i].x = b[i].x + c.x;
197 a[i].y = b[i].y + c.y;
209 __global__
void _addVS(cudaComplex* a, cudaReal
const * b,
210 cudaComplex
const c,
const int n)
212 int nThreads = blockDim.x * gridDim.x;
213 int startID = blockIdx.x * blockDim.x + threadIdx.x;
214 for(
int i = startID; i < n; i += nThreads) {
228 __global__
void _addVS(cudaComplex* a, cudaComplex
const * b,
229 cudaReal
const c,
const int n)
231 int nThreads = blockDim.x * gridDim.x;
232 int startID = blockIdx.x * blockDim.x + threadIdx.x;
233 for(
int i = startID; i < n; i += nThreads) {
247 __global__
void _subVV(cudaReal* a, cudaReal
const * b,
248 cudaReal
const * c,
const int n)
250 int nThreads = blockDim.x * gridDim.x;
251 int startID = blockIdx.x * blockDim.x + threadIdx.x;
252 for(
int i = startID; i < n; i += nThreads) {
265 __global__
void _subVV(cudaComplex* a, cudaComplex
const * b,
266 cudaComplex
const * c,
const int n)
268 int nThreads = blockDim.x * gridDim.x;
269 int startID = blockIdx.x * blockDim.x + threadIdx.x;
270 for(
int i = startID; i < n; i += nThreads) {
271 a[i].x = b[i].x - c[i].x;
272 a[i].y = b[i].y - c[i].y;
284 __global__
void _subVV(cudaComplex* a, cudaReal
const * b,
285 cudaComplex
const * c,
const int n)
287 int nThreads = blockDim.x * gridDim.x;
288 int startID = blockIdx.x * blockDim.x + threadIdx.x;
289 for(
int i = startID; i < n; i += nThreads) {
290 a[i].x = b[i] - c[i].x;
291 a[i].y = 0.0 - c[i].y;
303 __global__
void _subVV(cudaComplex* a, cudaComplex
const * b,
304 cudaReal
const * c,
const int n)
306 int nThreads = blockDim.x * gridDim.x;
307 int startID = blockIdx.x * blockDim.x + threadIdx.x;
308 for(
int i = startID; i < n; i += nThreads) {
309 a[i].x = b[i].x - c[i];
322 __global__
void _subVS(cudaReal* a, cudaReal
const * b,
323 cudaReal
const c,
const int n)
325 int nThreads = blockDim.x * gridDim.x;
326 int startID = blockIdx.x * blockDim.x + threadIdx.x;
327 for(
int i = startID; i < n; i += nThreads) {
340 __global__
void _subVS(cudaComplex* a, cudaComplex
const * b,
341 cudaComplex
const c,
const int n)
343 int nThreads = blockDim.x * gridDim.x;
344 int startID = blockIdx.x * blockDim.x + threadIdx.x;
345 for(
int i = startID; i < n; i += nThreads) {
346 a[i].x = b[i].x - c.x;
347 a[i].y = b[i].y - c.y;
359 __global__
void _subVS(cudaComplex* a, cudaReal
const * b,
360 cudaComplex
const c,
const int n)
362 int nThreads = blockDim.x * gridDim.x;
363 int startID = blockIdx.x * blockDim.x + threadIdx.x;
364 for(
int i = startID; i < n; i += nThreads) {
378 __global__
void _subVS(cudaComplex* a, cudaComplex
const * b,
379 cudaReal
const c,
const int n)
381 int nThreads = blockDim.x * gridDim.x;
382 int startID = blockIdx.x * blockDim.x + threadIdx.x;
383 for(
int i = startID; i < n; i += nThreads) {
397 __global__
void _mulVV(cudaReal* a, cudaReal
const * b,
398 cudaReal
const * c,
const int n)
400 int nThreads = blockDim.x * gridDim.x;
401 int startID = blockIdx.x * blockDim.x + threadIdx.x;
402 for(
int i = startID; i < n; i += nThreads) {
415 __global__
void _mulVV(cudaComplex* a, cudaComplex
const * b,
416 cudaComplex
const * c,
const int n)
418 int nThreads = blockDim.x * gridDim.x;
419 int startID = blockIdx.x * blockDim.x + threadIdx.x;
420 for(
int i = startID; i < n; i += nThreads) {
421 a[i].x = (b[i].x * c[i].x) - (b[i].y * c[i].y);
422 a[i].y = (b[i].x * c[i].y) + (b[i].y * c[i].x);
434 __global__
void _mulVV(cudaComplex* a, cudaReal
const * b,
435 cudaComplex
const * c,
const int n)
437 int nThreads = blockDim.x * gridDim.x;
438 int startID = blockIdx.x * blockDim.x + threadIdx.x;
439 for(
int i = startID; i < n; i += nThreads) {
440 a[i].x = b[i] * c[i].x;
441 a[i].y = b[i] * c[i].y;
453 __global__
void _mulVV(cudaComplex* a, cudaComplex
const * b,
454 cudaReal
const * c,
const int n)
456 int nThreads = blockDim.x * gridDim.x;
457 int startID = blockIdx.x * blockDim.x + threadIdx.x;
458 for(
int i = startID; i < n; i += nThreads) {
459 a[i].x = b[i].x * c[i];
460 a[i].y = b[i].y * c[i];
472 __global__
void _mulVS(cudaReal* a, cudaReal
const * b,
473 cudaReal
const c,
const int n)
475 int nThreads = blockDim.x * gridDim.x;
476 int startID = blockIdx.x * blockDim.x + threadIdx.x;
477 for(
int i = startID; i < n; i += nThreads) {
490 __global__
void _mulVS(cudaComplex* a, cudaComplex
const * b,
491 cudaComplex
const c,
const int n)
493 int nThreads = blockDim.x * gridDim.x;
494 int startID = blockIdx.x * blockDim.x + threadIdx.x;
495 for(
int i = startID; i < n; i += nThreads) {
496 a[i].x = (b[i].x * c.x) - (b[i].y * c.y);
497 a[i].y = (b[i].x * c.y) + (b[i].y * c.x);
509 __global__
void _mulVS(cudaComplex* a, cudaReal
const * b,
510 cudaComplex
const c,
const int n)
512 int nThreads = blockDim.x * gridDim.x;
513 int startID = blockIdx.x * blockDim.x + threadIdx.x;
514 for(
int i = startID; i < n; i += nThreads) {
528 __global__
void _mulVS(cudaComplex* a, cudaComplex
const * b,
529 cudaReal
const c,
const int n)
531 int nThreads = blockDim.x * gridDim.x;
532 int startID = blockIdx.x * blockDim.x + threadIdx.x;
533 for(
int i = startID; i < n; i += nThreads) {
547 __global__
void _divVV(cudaReal* a, cudaReal
const * b,
548 cudaReal
const * c,
const int n)
550 int nThreads = blockDim.x * gridDim.x;
551 int startID = blockIdx.x * blockDim.x + threadIdx.x;
552 for(
int i = startID; i < n; i += nThreads) {
565 __global__
void _divVV(cudaComplex* a, cudaComplex
const * b,
566 cudaReal
const * c,
const int n)
568 int nThreads = blockDim.x * gridDim.x;
569 int startID = blockIdx.x * blockDim.x + threadIdx.x;
570 for(
int i = startID; i < n; i += nThreads) {
571 a[i].x = b[i].x / c[i];
572 a[i].y = b[i].y / c[i];
584 __global__
void _divVS(cudaReal* a, cudaReal
const * b,
585 cudaReal
const c,
const int n)
587 int nThreads = blockDim.x * gridDim.x;
588 int startID = blockIdx.x * blockDim.x + threadIdx.x;
589 for(
int i = startID; i < n; i += nThreads) {
602 __global__
void _divVS(cudaComplex* a, cudaComplex
const * b,
603 cudaReal
const c,
const int n)
605 int nThreads = blockDim.x * gridDim.x;
606 int startID = blockIdx.x * blockDim.x + threadIdx.x;
607 for(
int i = startID; i < n; i += nThreads) {
620 __global__
void _expV(cudaReal* a, cudaReal
const * b,
const int n)
622 int nThreads = blockDim.x * gridDim.x;
623 int startID = blockIdx.x * blockDim.x + threadIdx.x;
624 for(
int i = startID; i < n; i += nThreads) {
636 __global__
void _expV(cudaComplex* a, cudaComplex
const * b,
const int n)
638 int nThreads = blockDim.x * gridDim.x;
639 int startID = blockIdx.x * blockDim.x + threadIdx.x;
640 for(
int i = startID; i < n; i += nThreads) {
641 a[i].x = exp(b[i].x) * cos(b[i].y);
642 a[i].y = exp(b[i].x) * sin(b[i].y);
653 __global__
void _addEqV(cudaReal* a, cudaReal
const * b,
const int n)
655 int nThreads = blockDim.x * gridDim.x;
656 int startID = blockIdx.x * blockDim.x + threadIdx.x;
657 for(
int i = startID; i < n; i += nThreads) {
669 __global__
void _addEqV(cudaComplex* a, cudaComplex
const * b,
const int n)
671 int nThreads = blockDim.x * gridDim.x;
672 int startID = blockIdx.x * blockDim.x + threadIdx.x;
673 for(
int i = startID; i < n; i += nThreads) {
686 __global__
void _addEqV(cudaComplex* a, cudaReal
const * b,
const int n)
688 int nThreads = blockDim.x * gridDim.x;
689 int startID = blockIdx.x * blockDim.x + threadIdx.x;
690 for(
int i = startID; i < n; i += nThreads) {
702 __global__
void _addEqS(cudaReal* a, cudaReal
const b,
const int n)
704 int nThreads = blockDim.x * gridDim.x;
705 int startID = blockIdx.x * blockDim.x + threadIdx.x;
706 for(
int i = startID; i < n; i += nThreads) {
718 __global__
void _addEqS(cudaComplex* a, cudaComplex
const b,
const int n)
720 int nThreads = blockDim.x * gridDim.x;
721 int startID = blockIdx.x * blockDim.x + threadIdx.x;
722 for(
int i = startID; i < n; i += nThreads) {
735 __global__
void _addEqS(cudaComplex* a, cudaReal
const b,
const int n)
737 int nThreads = blockDim.x * gridDim.x;
738 int startID = blockIdx.x * blockDim.x + threadIdx.x;
739 for(
int i = startID; i < n; i += nThreads) {
751 __global__
void _subEqV(cudaReal* a, cudaReal
const * b,
const int n)
753 int nThreads = blockDim.x * gridDim.x;
754 int startID = blockIdx.x * blockDim.x + threadIdx.x;
755 for(
int i = startID; i < n; i += nThreads) {
767 __global__
void _subEqV(cudaComplex* a, cudaComplex
const * b,
const int n)
769 int nThreads = blockDim.x * gridDim.x;
770 int startID = blockIdx.x * blockDim.x + threadIdx.x;
771 for(
int i = startID; i < n; i += nThreads) {
784 __global__
void _subEqV(cudaComplex* a, cudaReal
const * b,
const int n)
786 int nThreads = blockDim.x * gridDim.x;
787 int startID = blockIdx.x * blockDim.x + threadIdx.x;
788 for(
int i = startID; i < n; i += nThreads) {
800 __global__
void _subEqS(cudaReal* a, cudaReal
const b,
const int n)
802 int nThreads = blockDim.x * gridDim.x;
803 int startID = blockIdx.x * blockDim.x + threadIdx.x;
804 for(
int i = startID; i < n; i += nThreads) {
816 __global__
void _subEqS(cudaComplex* a, cudaComplex
const b,
const int n)
818 int nThreads = blockDim.x * gridDim.x;
819 int startID = blockIdx.x * blockDim.x + threadIdx.x;
820 for(
int i = startID; i < n; i += nThreads) {
833 __global__
void _subEqS(cudaComplex* a, cudaReal
const b,
const int n)
835 int nThreads = blockDim.x * gridDim.x;
836 int startID = blockIdx.x * blockDim.x + threadIdx.x;
837 for(
int i = startID; i < n; i += nThreads) {
849 __global__
void _mulEqV(cudaReal* a, cudaReal
const * b,
const int n)
851 int nThreads = blockDim.x * gridDim.x;
852 int startID = blockIdx.x * blockDim.x + threadIdx.x;
853 for(
int i = startID; i < n; i += nThreads) {
865 __global__
void _mulEqV(cudaComplex* a, cudaComplex
const * b,
const int n)
867 int nThreads = blockDim.x * gridDim.x;
868 int startID = blockIdx.x * blockDim.x + threadIdx.x;
870 for(
int i = startID; i < n; i += nThreads) {
871 c.x = (a[i].x * b[i].x) - (a[i].y * b[i].y);
872 c.y = (a[i].x * b[i].y) + (a[i].y * b[i].x);
885 __global__
void _mulEqV(cudaComplex* a, cudaReal
const * b,
const int n)
887 int nThreads = blockDim.x * gridDim.x;
888 int startID = blockIdx.x * blockDim.x + threadIdx.x;
889 for(
int i = startID; i < n; i += nThreads) {
902 __global__
void _mulEqS(cudaReal* a, cudaReal
const b,
const int n)
904 int nThreads = blockDim.x * gridDim.x;
905 int startID = blockIdx.x * blockDim.x + threadIdx.x;
906 for(
int i = startID; i < n; i += nThreads) {
918 __global__
void _mulEqS(cudaComplex* a, cudaComplex
const b,
const int n)
920 int nThreads = blockDim.x * gridDim.x;
921 int startID = blockIdx.x * blockDim.x + threadIdx.x;
923 for(
int i = startID; i < n; i += nThreads) {
924 c.x = (a[i].x * b.x) - (a[i].y * b.y);
925 c.y = (a[i].x * b.y) + (a[i].y * b.x);
938 __global__
void _mulEqS(cudaComplex* a, cudaReal
const b,
const int n)
940 int nThreads = blockDim.x * gridDim.x;
941 int startID = blockIdx.x * blockDim.x + threadIdx.x;
942 for(
int i = startID; i < n; i += nThreads) {
955 __global__
void _divEqV(cudaReal* a, cudaReal
const * b,
const int n)
957 int nThreads = blockDim.x * gridDim.x;
958 int startID = blockIdx.x * blockDim.x + threadIdx.x;
959 for(
int i = startID; i < n; i += nThreads) {
971 __global__
void _divEqV(cudaComplex* a, cudaReal
const * b,
const int n)
973 int nThreads = blockDim.x * gridDim.x;
974 int startID = blockIdx.x * blockDim.x + threadIdx.x;
975 for(
int i = startID; i < n; i += nThreads) {
988 __global__
void _divEqS(cudaReal* a, cudaReal
const b,
const int n)
990 int nThreads = blockDim.x * gridDim.x;
991 int startID = blockIdx.x * blockDim.x + threadIdx.x;
992 for(
int i = startID; i < n; i += nThreads) {
1004 __global__
void _divEqS(cudaComplex* a, cudaReal
const b,
const int n)
1006 int nThreads = blockDim.x * gridDim.x;
1007 int startID = blockIdx.x * blockDim.x + threadIdx.x;
1008 for(
int i = startID; i < n; i += nThreads) {
1021 const int beginIdA,
const int beginIdB,
const int n)
1027 int nBlocks, nThreads;
1031 _eqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1033 cudaErrorCheck( cudaGetLastError() );
1038 const int beginIdA,
const int beginIdB,
const int n)
1044 int nBlocks, nThreads;
1048 _eqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1050 cudaErrorCheck( cudaGetLastError() );
1055 const int beginIdA,
const int n)
1060 int nBlocks, nThreads;
1064 _eqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1065 cudaErrorCheck( cudaGetLastError() );
1070 const int beginIdA,
const int n)
1075 int nBlocks, nThreads;
1079 _eqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1080 cudaErrorCheck( cudaGetLastError() );
1086 const int beginIdB,
const int beginIdC,
const int n)
1093 int nBlocks, nThreads;
1097 _addVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1099 cudaErrorCheck( cudaGetLastError() );
1105 const int beginIdB,
const int beginIdC,
const int n)
1112 int nBlocks, nThreads;
1116 _addVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1118 cudaErrorCheck( cudaGetLastError() );
1124 const int beginIdB,
const int beginIdC,
const int n)
1131 int nBlocks, nThreads;
1135 _addVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1137 cudaErrorCheck( cudaGetLastError() );
1143 const int beginIdB,
const int beginIdC,
const int n)
1150 int nBlocks, nThreads;
1154 _addVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1156 cudaErrorCheck( cudaGetLastError() );
1161 cudaReal
const c,
const int beginIdA,
const int beginIdB,
int n)
1167 int nBlocks, nThreads;
1171 _addVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1173 cudaErrorCheck( cudaGetLastError() );
1178 cudaComplex
const c,
const int beginIdA,
const int beginIdB,
int n)
1184 int nBlocks, nThreads;
1188 _addVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1190 cudaErrorCheck( cudaGetLastError() );
1195 cudaComplex
const c,
const int beginIdA,
const int beginIdB,
int n)
1201 int nBlocks, nThreads;
1205 _addVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1207 cudaErrorCheck( cudaGetLastError() );
1212 cudaReal
const c,
const int beginIdA,
const int beginIdB,
int n)
1218 int nBlocks, nThreads;
1222 _addVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1224 cudaErrorCheck( cudaGetLastError() );
1230 const int beginIdB,
const int beginIdC,
const int n)
1237 int nBlocks, nThreads;
1241 _subVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1243 cudaErrorCheck( cudaGetLastError() );
1249 const int beginIdB,
const int beginIdC,
const int n)
1256 int nBlocks, nThreads;
1260 _subVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1262 cudaErrorCheck( cudaGetLastError() );
1268 const int beginIdB,
const int beginIdC,
const int n)
1275 int nBlocks, nThreads;
1279 _subVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1281 cudaErrorCheck( cudaGetLastError() );
1287 const int beginIdB,
const int beginIdC,
const int n)
1294 int nBlocks, nThreads;
1298 _subVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1300 cudaErrorCheck( cudaGetLastError() );
1306 const int beginIdA,
const int beginIdB,
const int n)
1312 int nBlocks, nThreads;
1316 _subVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1318 cudaErrorCheck( cudaGetLastError() );
1323 cudaComplex
const c,
const int beginIdA,
const int beginIdB,
1330 int nBlocks, nThreads;
1334 _subVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1336 cudaErrorCheck( cudaGetLastError() );
1341 cudaComplex
const c,
const int beginIdA,
const int beginIdB,
1348 int nBlocks, nThreads;
1352 _subVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1354 cudaErrorCheck( cudaGetLastError() );
1359 cudaReal
const c,
const int beginIdA,
const int beginIdB,
1366 int nBlocks, nThreads;
1370 _subVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1372 cudaErrorCheck( cudaGetLastError() );
1378 const int beginIdB,
const int beginIdC,
const int n)
1385 int nBlocks, nThreads;
1389 _mulVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1391 cudaErrorCheck( cudaGetLastError() );
1397 const int beginIdB,
const int beginIdC,
const int n)
1404 int nBlocks, nThreads;
1408 _mulVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1410 cudaErrorCheck( cudaGetLastError() );
1416 const int beginIdB,
const int beginIdC,
const int n)
1423 int nBlocks, nThreads;
1427 _mulVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1429 cudaErrorCheck( cudaGetLastError() );
1435 const int beginIdB,
const int beginIdC,
const int n)
1442 int nBlocks, nThreads;
1446 _mulVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1448 cudaErrorCheck( cudaGetLastError() );
1453 cudaReal
const c,
const int beginIdA,
const int beginIdB,
1460 int nBlocks, nThreads;
1464 _mulVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1466 cudaErrorCheck( cudaGetLastError() );
1471 cudaComplex
const c,
const int beginIdA,
const int beginIdB,
1478 int nBlocks, nThreads;
1482 _mulVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1484 cudaErrorCheck( cudaGetLastError() );
1489 cudaComplex
const c,
const int beginIdA,
const int beginIdB,
1496 int nBlocks, nThreads;
1500 _mulVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1502 cudaErrorCheck( cudaGetLastError() );
1507 cudaReal
const c,
const int beginIdA,
const int beginIdB,
1514 int nBlocks, nThreads;
1518 _mulVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1520 cudaErrorCheck( cudaGetLastError() );
1526 const int beginIdB,
const int beginIdC,
const int n)
1533 int nBlocks, nThreads;
1537 _divVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1539 cudaErrorCheck( cudaGetLastError() );
1545 const int beginIdB,
const int beginIdC,
const int n)
1552 int nBlocks, nThreads;
1556 _divVV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1558 cudaErrorCheck( cudaGetLastError() );
1563 cudaReal
const c,
const int beginIdA,
const int beginIdB,
1570 int nBlocks, nThreads;
1574 _divVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1576 cudaErrorCheck( cudaGetLastError() );
1581 cudaReal
const c,
const int beginIdA,
const int beginIdB,
1588 int nBlocks, nThreads;
1592 _divVS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b.
cArray()+beginIdB,
1594 cudaErrorCheck( cudaGetLastError() );
1599 const int beginIdA,
const int beginIdB,
const int n)
1605 int nBlocks, nThreads;
1609 _expV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1611 cudaErrorCheck( cudaGetLastError() );
1616 const int beginIdA,
const int beginIdB,
const int n)
1622 int nBlocks, nThreads;
1626 _expV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1628 cudaErrorCheck( cudaGetLastError() );
1633 const int beginIdA,
const int beginIdB,
const int n)
1639 int nBlocks, nThreads;
1643 _addEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1645 cudaErrorCheck( cudaGetLastError() );
1650 const int beginIdA,
const int beginIdB,
const int n)
1656 int nBlocks, nThreads;
1660 _addEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1662 cudaErrorCheck( cudaGetLastError() );
1667 const int beginIdA,
const int beginIdB,
const int n)
1673 int nBlocks, nThreads;
1677 _addEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1679 cudaErrorCheck( cudaGetLastError() );
1684 const int beginIdA,
const int n)
1689 int nBlocks, nThreads;
1693 _addEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1694 cudaErrorCheck( cudaGetLastError() );
1699 const int beginIdA,
const int n)
1704 int nBlocks, nThreads;
1708 _addEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1709 cudaErrorCheck( cudaGetLastError() );
1714 const int beginIdA,
const int n)
1719 int nBlocks, nThreads;
1723 _addEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1724 cudaErrorCheck( cudaGetLastError() );
1729 const int beginIdA,
const int beginIdB,
const int n)
1735 int nBlocks, nThreads;
1739 _subEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1741 cudaErrorCheck( cudaGetLastError() );
1746 const int beginIdA,
const int beginIdB,
const int n)
1752 int nBlocks, nThreads;
1756 _subEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1758 cudaErrorCheck( cudaGetLastError() );
1763 const int beginIdA,
const int beginIdB,
const int n)
1769 int nBlocks, nThreads;
1773 _subEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1775 cudaErrorCheck( cudaGetLastError() );
1780 const int beginIdA,
const int n)
1785 int nBlocks, nThreads;
1789 _subEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1790 cudaErrorCheck( cudaGetLastError() );
1795 const int beginIdA,
const int n)
1800 int nBlocks, nThreads;
1804 _subEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1805 cudaErrorCheck( cudaGetLastError() );
1810 const int beginIdA,
const int n)
1815 int nBlocks, nThreads;
1819 _subEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1820 cudaErrorCheck( cudaGetLastError() );
1825 const int beginIdA,
const int beginIdB,
const int n)
1831 int nBlocks, nThreads;
1835 _mulEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1837 cudaErrorCheck( cudaGetLastError() );
1842 const int beginIdA,
const int beginIdB,
const int n)
1848 int nBlocks, nThreads;
1852 _mulEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1854 cudaErrorCheck( cudaGetLastError() );
1859 const int beginIdA,
const int beginIdB,
const int n)
1865 int nBlocks, nThreads;
1869 _mulEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1871 cudaErrorCheck( cudaGetLastError() );
1876 const int beginIdA,
const int n)
1881 int nBlocks, nThreads;
1885 _mulEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1886 cudaErrorCheck( cudaGetLastError() );
1891 const int beginIdA,
const int n)
1896 int nBlocks, nThreads;
1900 _mulEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1901 cudaErrorCheck( cudaGetLastError() );
1906 const int beginIdA,
const int n)
1911 int nBlocks, nThreads;
1915 _mulEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1916 cudaErrorCheck( cudaGetLastError() );
1921 const int beginIdA,
const int beginIdB,
const int n)
1927 int nBlocks, nThreads;
1931 _divEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1933 cudaErrorCheck( cudaGetLastError() );
1938 const int beginIdA,
const int beginIdB,
const int n)
1944 int nBlocks, nThreads;
1948 _divEqV<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA,
1950 cudaErrorCheck( cudaGetLastError() );
1955 const int beginIdA,
const int n)
1960 int nBlocks, nThreads;
1964 _divEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1965 cudaErrorCheck( cudaGetLastError() );
1970 const int beginIdA,
const int n)
1975 int nBlocks, nThreads;
1979 _divEqS<<<nBlocks, nThreads>>>(a.
cArray()+beginIdA, b, n);
1980 cudaErrorCheck( cudaGetLastError() );