I'm writing CUDA program that adds blur effect onto BMP files. I wrote working program that does this on CPU, now I'm trying to convert the code to CUDA. This is the function I want to work on CUDA:
void blur(bitmap_header* hp, unsigned char *data)
{
int xx,yy,x,y, avgB, avgG, avgR, ile;
int blurSize = 5;
for(xx = 0; xx < hp->width; xx++)
{
for(yy = 0; yy < hp->height; yy++)
{
avgB = avgG = avgR = 0;
ile = 0;
for(x = xx; x < hp->width && x < xx + blurSize; x++)
{
for(y = yy; y < hp->height && y < yy + blurSize; y++)
{
avgB += data[x*3 + y*hp->width*3 + 0];
avgG += data[x*3 + y*hp->width*3 + 1];
avgR += data[x*3 + y*hp->width*3 + 2];
ile++;
}
}
avgB = avgB / ile;
avgG = avgG / ile;
avgR = avgR / ile;
data[xx*3 + yy*hp->width*3 + 0] = avgB;
data[xx*3 + yy*hp->width*3 + 1] = avgG;
data[xx*3 + yy*hp->width*3 + 2] = avgR;
}
}
}
How do I convert this function to work on CUDA device?
Every tutorial covers only one for loop and uses
int i = threadIdx.x
My previous question about this program:
Blur effect on bitmap using C
EDIT
FULL CODE with CUDA edits:
#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>
#pragma pack(push,1)
/* Windows 3.x bitmap file header */
typedef struct {
char filetype[2]; /* magic - always 'B' 'M' */
unsigned int filesize;
short reserved1;
short reserved2;
unsigned int dataoffset; /* offset in bytes to actual bitmap data */
} file_header;
/* Windows 3.x bitmap full header, including file header */
typedef struct {
file_header fileheader;
unsigned int headersize;
int width;
int height;
short planes;
short bitsperpixel; /* we only support the value 24 here */
unsigned int compression; /* we do not support compression */
unsigned int bitmapsize;
int horizontalres;
int verticalres;
unsigned int numcolors;
unsigned int importantcolors;
} bitmap_header;
#pragma pack(pop)
__global__ void blur(bitmap_header* hp, unsigned char *data)
{
int xx,yy,x,y, avgB, avgG, avgR, ile;
int blurSize = 5;
xx = blockIdy.y * blockDim.y + threadIdx.y;
yy = blockIdx.x * blockDim.x + threadIdx.x;
if(xx >= hp->width || yy >= hp->height)
return;
avgB = avgG = avgR = 0;
ile = 0;
for(x = xx; x < hp->width && x < xx + blurSize; x++)
{
for(y = yy; y < hp->height && y < yy + blurSize; y++)
{
avgB += data[x*3 + y*hp->width*3 + 0];
avgG += data[x*3 + y*hp->width*3 + 1];
avgR += data[x*3 + y*hp->width*3 + 2];
ile++;
}
}
avgB = avgB / ile;
avgG = avgG / ile;
avgR = avgR / ile;
data[xx*3 + yy*hp->width*3 + 0] = avgB;
data[xx*3 + yy*hp->width*3 + 1] = avgG;
data[xx*3 + yy*hp->width*3 + 2] = avgR;
}
int filter(char* input, char *output)
{
//variable dec:
FILE *fp,*out;
bitmap_header* hp;
bitmap_header* d_hp;
unsigned char *data;
unsigned char *d_data;
//Open input file:
fp = fopen(input, "r");
if(fp==NULL)
return 1;
//Read the input file headers:
hp=(bitmap_header*)malloc(sizeof(bitmap_header));
cudaMalloc( &d_hp, (sizeof(bitmap_header));
if(hp==NULL)
return 1;
fread(hp, sizeof(bitmap_header), 1, fp);
cudaMemcpy(d_hp, &hp, (sizeof(bitmap_header), cudaMemcpyHostToDevice);
//Read the data of the image:
data = (unsigned char*)malloc(sizeof(char)*hp->bitmapsize);
cudaMalloc( &d_data, (sizeof(char)*hp->bitmapsize));
fseek(fp,sizeof(char)*hp->fileheader.dataoffset,SEEK_SET);
fread(data,sizeof(char),hp->bitmapsize, fp);
cudaMemcpy(d_data, &data, (sizeof(char)*hp->bitmapsize), cudaMemcpyHostToDevice);
dim3 block(16,16);
dim3 grid ( (hp->height + 15)/16, (hp->width + 15)/16 );
blur<<<grid,block>>>(d_hp, d_data);
cudaMemcpy(data, d_data, (sizeof(char)*hp->bitmapsize), cudaMemcpyDeviceToHost);
//Open output file:
out = fopen(output, "wb");
if(out==NULL)
{
fclose(fp);
free(hp);
free(data);
cudaFree(d_data);
cudaFree(d_hp);
return 1;
}
fwrite(hp,sizeof(char),sizeof(bitmap_header),out);
fseek(out,sizeof(char)*hp->fileheader.dataoffset,SEEK_SET);
fwrite(data,sizeof(char),hp->bitmapsize,out);
fclose(fp);
fclose(out);
free(hp);
free(data);
cudaFree(d_data);
cudaFree(d_hp);
return 0;
}
int main(int argc, char* argv[])
{
int frames;
int frame = 1;
char path[100000];
system("rd /s/q temp");
system("mkdir temp");
system("cls");
printf("Zapis wszystkich klatek do folderu temp.
");
system("ffmpeg.exe -i test.mp4 -r 29.970 -vcodec bmp temp/%d.bmp");
printf("Ile jest klatek w folderze temp?
");
scanf("%d", &frames);
for(frame = 1;frame<=frames;frame++)
{
sprintf(path,"temp\%d.bmp",frame);
printf("Nakladam filtr na ");
printf(path);
printf("
");
filter(path,path);
}
system("cls");
printf("Wszystkie klatki do filmu mp4.
");
system("ffmpeg -r 29.970 -i temp/%d.bmp -c:v libx264 -preset slow -crf 21 temp/out.mp4");
system("cls");
printf("Wyciecie dzwieku z filmu do mp3
");
system("ffmpeg -i test.mp4 -vn -ar 44100 -ac 2 -ab 192 -f wav temp/sound.wav");
system("cls");
printf("Polaczenie mp3 z mp4.
");
system("ffmpeg -i temp/sound.wav -i temp/out.mp4 final.mp4");
system("cls");
printf("Delete ");
system("rd /s temp");
system("pause");
return 0;
}
FULL ERROR LIST:
Warning 1 warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release. C:UsersKarpińscydocumentsvisual studio 2012Projectslurlur
vcc blur
Error 2 error : identifier "blockIdy" is undefined C:UsersKarpi?scydocumentsvisual studio 2012Projectslurlurkernel.cu blur
Error 3 error : expected a ")" C:UsersKarpi?scydocumentsvisual studio 2012Projectslurlurkernel.cu blur
Warning 4 warning : expression has no effect C:UsersKarpi?scydocumentsvisual studio 2012Projectslurlurkernel.cu blur
Error 5 error : expected a ")" C:UsersKarpi?scydocumentsvisual studio 2012Projectslurlurkernel.cu blur
Error 6 error : too few arguments in function call C:UsersKarpi?scydocumentsvisual studio 2012Projectslurlurkernel.cu blur
Error 7 error MSB3721: The command ""C:Program FilesNVIDIA GPU Computing ToolkitCUDAv6.0in
vcc.exe" -gencode=arch=compute_10,code="sm_10,compute_10" --use-local-env --cl-version 2012 -ccbin "C:Program Files (x86)Microsoft Visual Studio 11.0VCin" -I"C:Program FilesNVIDIA GPU Computing ToolkitCUDAv6.0include" -I"C:Program FilesNVIDIA GPU Computing ToolkitCUDAv6.0include" -G -maxrregcount=0 --machine 32 --compile -cudart static -I -g -DWIN32 -D_DEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Zi /RTC1 /MDd " -o Debugkernel.cu.obj "C:UsersKarpińscydocumentsvisual studio 2012Projectslurlurkernel.cu"" exited with code 2. C:Program Files (x86)MSBuildMicrosoft.Cppv4.0V110BuildCustomizationsCUDA 6.0.targets 597 9 blur
8 IntelliSense: this declaration has no storage class or type specifier c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 33 1 blur
9 IntelliSense: expected a ';' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 33 12 blur
10 IntelliSense: this declaration has no storage class or type specifier c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 56 4 blur
11 IntelliSense: expected a ';' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 56 9 blur
12 IntelliSense: this declaration has no storage class or type specifier c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 57 4 blur
13 IntelliSense: expected a ';' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 57 7 blur
14 IntelliSense: expected a declaration c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 58 3 blur
15 IntelliSense: this declaration has no storage class or type specifier c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 62 2 blur
16 IntelliSense: this declaration has no storage class or type specifier c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 63 2 blur
17 IntelliSense: identifier "xx" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 65 7 blur
18 IntelliSense: identifier "yy" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 65 14 blur
19 IntelliSense: identifier "avgB" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 65 36 blur
20 IntelliSense: identifier "xx" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 66 7 blur
21 IntelliSense: identifier "yy" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 66 14 blur
22 IntelliSense: identifier "xx" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 67 7 blur
23 IntelliSense: identifier "yy" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 67 14 blur
24 IntelliSense: expected a declaration c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 68 1 blur
25 IntelliSense: this declaration has no storage class or type specifier c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 94 5 blur
26 IntelliSense: this declaration has no storage class or type specifier c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 95 2 blur
27 IntelliSense: identifier "d_data" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 95 15 blur
28 IntelliSense: expected a ')' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 95 21 blur
29 IntelliSense: this declaration has no storage class or type specifier c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 97 5 blur
30 IntelliSense: identifier "fp" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 97 11 blur
31 IntelliSense: expected a ')' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 97 13 blur
32 IntelliSense: this declaration has no storage class or type specifier c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 98 5 blur
33 IntelliSense: expected a ')' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 98 15 blur
34 IntelliSense: this declaration has no storage class or type specifier c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 99 2 blur
35 IntelliSense: identifier "d_data" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 99 13 blur
36 IntelliSense: expected a ')' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu 99 19 blur
37 IntelliSense: identifier "dim3" is u