Here is my issue:
I have a 3D array of float3
on my device:
int size[3] = {416,464,512};
cudaExtent extent = make_cudaExtent(size[0]*sizeof(float3),size[1],size[2]);
cudaPitchedPtr renderedVolume;
int ret = cudaMalloc3D(&renderedVolume, extent);
size_t pitch = renderedVolume.pitch; //pitch = 5,120
size_t slicePitch = pitch * size[1]; //slicePitch = 2,375,680
Then I work with it and make it full of outstanding data.
After that I wish to copy it on a 1D linear memory on my host:
float *host_memory = (float*)malloc(size[0]*size[1]*size[2]*sizeof(float3));
cudaMemcpy3DParms p = {0};
p.srcPtr = renderedVolume;
p.dstPtr = make_cudaPitchedPtr(host_memory,size[0]*sizeof(float3),size[0],size[1]);
p.extent = make_cudaExtent(size[0]*sizeof(float3),size[1],size[2]);
p.srcPos = make_cudaPos(0,0,0);
p.dstPos = make_cudaPos(0,0,0);
p.kind=cudaMemcpyDeviceToHost;
cudaMemcpy3D(&p);
I am comparing the result in host_memory with the data I initially wrote tu renderedVolume
(my_data
) and with the data I read in my 3Dmemory
, slice by slice:
float* test1 = (float*)malloc(size[0]*size[1]*sizeof(float3));
cudaMemcpy(test1, myData, size[0]*size[1]*sizeof(float3) , cudaMemcpyDeviceToHost);
float* test2 = (float*)malloc(size[0]*size[1]*sizeof(float3));
cudaMemcpy(test2,(char*)renderedVolume.ptr + slicePitch * i,size[0]*size[1]*sizeof(float3), cudaMemcpyDeviceToHost);
Problem:
- The first slice (
i=0
) is ok, I have the same data in host_memory, test1
and test2
.
- In the second slice, I have the same data in
test1
and test2
. However, I should find this data in host_memory+579072
(=number of float
per slice, also heigth*pitch
of the destination pitched pointer) and I find it in host_memory+577504
. It is off by 1568
bytes, which corresponds to nothing that I am aware of, and this is why I would very much appreciate if any of you have an idea of what the problem might be in my code ?
See Question&Answers more detail:
os 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…