0%

05 Kernal Params

cuda synatx

https://docs.nvidia.com/cuda/cuda-c-programming-guide/#execution-configuration
https://blog.csdn.net/qq_39575835/article/details/83027440

block + thread

1
kernal_func<<< Dg, Db, Ns, S >>>(...);

dim3 index

calc index

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
int idx = (gridDim.x*gridDim.y*blockIdx.z+gridDim.x*blockIdx.y+blockIdx.x)*blockDim.x*blockDim.y*blockDim.z+ blockDim.x * blockDim.y * threadIdx.z + blockDim.x * threadIdx .y + threadIdx.x;

// or

int __device__ alphabitOrderIndex(unsigned int *str, unsigned int *base, int sz)
{
int cnt = 0;
for (int i = 0; i < sz; ++i)
{
cnt = cnt * base[i] + str[i];
}
return cnt;
};
unsigned int idxs[] = {blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x},
bases[] = {GridDim.z, GridDim.y, GridDim.x, blockDim.z, blockDim.y, blockDim.x};
int idx = alphabitOrderIndex(idxs, bases, 6);

as you can see, blockIdx is corrdinate in GridDim, threadIdx is corrdinate in blockDim, there are all dim3{int x,y,z;}

source code