You have to allocate the pointers to a host memory, then allocate device memory for each array and store it's pointer in the host memory.
Then allocate the memory for storing the pointers into the device
and then copy the host memory to the device memory.
One example is worth 1000 words:
__global__ void multi_array_kernel( int N, void** arrays ){
// stuff
}
int main(){
const int N_ARRAYS = 20;
void *h_array = malloc(sizeof(void*) * N_ARRAYS);
for(int i = 0; i < N_ARRAYS; i++){
cudaMalloc(&h_array[i], i * sizeof(void*));
//TODO: check error
}
void *d_array = cudaMalloc(sizeof(void*) * N_ARRAYS);
// Copy to device Memory
cudaMemcpy(d_array, h_array, sizeof(void*) * N_ARRAYS, cudaMemcpyHostToDevice);
multi_array_kernel<1,1>(N_ARRAYS, d_array);
cudaThreadSynchronize();
for(int i = 0; i < N_ARRAYS; i++){
cudaFree(h_array[i]); //host not device memory
//TODO: check error
}
cudaFree(d_array);
free(h_array);
}
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…