Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
192 views
in Technique[技术] by (71.8m points)

c - Translating four nested loops into a CUDA kernel

I'm writing CUDA program that adds blur effect onto BMP files. I wrote working program that does this on CPU, now I'm trying to convert the code to CUDA. This is the function I want to work on CUDA:

void blur(bitmap_header* hp, unsigned char *data)
{
  int xx,yy,x,y, avgB, avgG, avgR, ile;
  int blurSize = 5;
    for(xx = 0; xx < hp->width; xx++)
    {
      for(yy = 0; yy < hp->height; yy++)
    {
        avgB = avgG = avgR = 0;
        ile = 0;

        for(x = xx; x < hp->width && x < xx + blurSize; x++)
        {


            for(y = yy; y < hp->height && y < yy + blurSize; y++)
            {
                avgB += data[x*3 + y*hp->width*3 + 0];
                avgG += data[x*3 + y*hp->width*3 + 1];
                avgR += data[x*3 + y*hp->width*3 + 2];
                ile++;
            }
        }

        avgB = avgB / ile;
        avgG = avgG / ile;
        avgR = avgR / ile;

        data[xx*3 + yy*hp->width*3 + 0] = avgB;
        data[xx*3 + yy*hp->width*3 + 1] = avgG;
        data[xx*3 + yy*hp->width*3 + 2] = avgR;
    }
}
}

How do I convert this function to work on CUDA device? Every tutorial covers only one for loop and uses

int i = threadIdx.x

My previous question about this program: Blur effect on bitmap using C

EDIT

FULL CODE with CUDA edits:

#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>


#pragma pack(push,1)
/* Windows 3.x bitmap file header */
typedef struct {
char         filetype[2];   /* magic - always 'B' 'M' */
unsigned int filesize;
short        reserved1;
short        reserved2;
unsigned int dataoffset;    /* offset in bytes to actual bitmap data */
} file_header;

/* Windows 3.x bitmap full header, including file header */
typedef struct {
file_header  fileheader;
unsigned int headersize;
int          width;
int          height;
short        planes;
short        bitsperpixel;  /* we only support the value 24 here */
unsigned int compression;   /* we do not support compression */
unsigned int bitmapsize;
int          horizontalres;
int          verticalres;
unsigned int numcolors;
unsigned int importantcolors;
} bitmap_header;
#pragma pack(pop)

__global__ void blur(bitmap_header* hp, unsigned char *data)
{
int xx,yy,x,y, avgB, avgG, avgR, ile;
int blurSize = 5;

xx = blockIdy.y * blockDim.y + threadIdx.y;
yy = blockIdx.x * blockDim.x + threadIdx.x;

if(xx >= hp->width || yy >= hp->height)
    return;


avgB = avgG = avgR = 0;
ile = 0;

for(x = xx; x < hp->width && x < xx + blurSize; x++)
{


    for(y = yy; y < hp->height && y < yy + blurSize; y++)
    {
        avgB += data[x*3 + y*hp->width*3 + 0];
        avgG += data[x*3 + y*hp->width*3 + 1];
        avgR += data[x*3 + y*hp->width*3 + 2];
        ile++;
    }
}

avgB = avgB / ile;
avgG = avgG / ile;
avgR = avgR / ile;

data[xx*3 + yy*hp->width*3 + 0] = avgB;
data[xx*3 + yy*hp->width*3 + 1] = avgG;
data[xx*3 + yy*hp->width*3 + 2] = avgR;
}

int filter(char* input, char *output)
{
//variable dec:
FILE *fp,*out;
bitmap_header* hp;
bitmap_header* d_hp;
unsigned char *data;
unsigned char *d_data;

//Open input file:
fp = fopen(input, "r");
if(fp==NULL)
    return 1;

//Read the input file headers:
hp=(bitmap_header*)malloc(sizeof(bitmap_header));
cudaMalloc( &d_hp, (sizeof(bitmap_header));
if(hp==NULL)
    return 1;

fread(hp, sizeof(bitmap_header), 1, fp);
cudaMemcpy(d_hp, &hp, (sizeof(bitmap_header), cudaMemcpyHostToDevice);

//Read the data of the image:
data = (unsigned char*)malloc(sizeof(char)*hp->bitmapsize);
cudaMalloc( &d_data, (sizeof(char)*hp->bitmapsize));

fseek(fp,sizeof(char)*hp->fileheader.dataoffset,SEEK_SET);
fread(data,sizeof(char),hp->bitmapsize, fp);
cudaMemcpy(d_data, &data, (sizeof(char)*hp->bitmapsize), cudaMemcpyHostToDevice);


dim3 block(16,16);
dim3 grid ( (hp->height + 15)/16, (hp->width + 15)/16 );

blur<<<grid,block>>>(d_hp, d_data);
cudaMemcpy(data, d_data, (sizeof(char)*hp->bitmapsize), cudaMemcpyDeviceToHost);

//Open output file:
out = fopen(output, "wb");
if(out==NULL)
{
    fclose(fp);
    free(hp);
    free(data);
    cudaFree(d_data);
    cudaFree(d_hp);
    return 1;
}

fwrite(hp,sizeof(char),sizeof(bitmap_header),out);

fseek(out,sizeof(char)*hp->fileheader.dataoffset,SEEK_SET);
fwrite(data,sizeof(char),hp->bitmapsize,out);

fclose(fp);
fclose(out);
free(hp);
free(data);

cudaFree(d_data);
cudaFree(d_hp);
return 0;
}

int main(int argc, char* argv[])
{
int frames;
int frame = 1;
char path[100000];

system("rd /s/q temp");
system("mkdir temp");

system("cls");
printf("Zapis wszystkich klatek do folderu temp.
");
system("ffmpeg.exe -i test.mp4 -r 29.970 -vcodec bmp temp/%d.bmp");

printf("Ile jest klatek w folderze temp?
");
scanf("%d", &frames);

for(frame = 1;frame<=frames;frame++)
{
    sprintf(path,"temp\%d.bmp",frame);
    printf("Nakladam filtr na ");
    printf(path);
    printf("
");
    filter(path,path);
}

system("cls");
printf("Wszystkie klatki do filmu mp4.
");
system("ffmpeg -r 29.970 -i temp/%d.bmp  -c:v libx264 -preset slow -crf 21 temp/out.mp4");

system("cls");
printf("Wyciecie dzwieku z filmu do mp3
");
system("ffmpeg -i test.mp4 -vn -ar 44100 -ac 2 -ab 192 -f wav temp/sound.wav");

system("cls");
printf("Polaczenie mp3 z mp4.
");
system("ffmpeg -i temp/sound.wav -i temp/out.mp4 final.mp4");

system("cls");
printf("Delete ");
system("rd /s temp");
system("pause");

return 0;
} 

FULL ERROR LIST:

Warning    1    warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release. C:UsersKarpińscydocumentsvisual studio 2012Projectslurlur
vcc    blur
Error    2    error : identifier "blockIdy" is undefined C:UsersKarpi?scydocumentsvisual studio 2012Projectslurlurkernel.cu    blur
Error    3    error : expected a ")" C:UsersKarpi?scydocumentsvisual studio 2012Projectslurlurkernel.cu    blur
Warning    4    warning : expression has no effect C:UsersKarpi?scydocumentsvisual studio 2012Projectslurlurkernel.cu    blur
Error    5    error : expected a ")" C:UsersKarpi?scydocumentsvisual studio 2012Projectslurlurkernel.cu    blur
Error    6    error : too few arguments in function call C:UsersKarpi?scydocumentsvisual studio 2012Projectslurlurkernel.cu    blur
Error    7    error MSB3721: The command ""C:Program FilesNVIDIA GPU Computing ToolkitCUDAv6.0in
vcc.exe" -gencode=arch=compute_10,code="sm_10,compute_10" --use-local-env --cl-version 2012 -ccbin "C:Program Files (x86)Microsoft Visual Studio 11.0VCin"  -I"C:Program FilesNVIDIA GPU Computing ToolkitCUDAv6.0include" -I"C:Program FilesNVIDIA GPU Computing ToolkitCUDAv6.0include"  -G    -maxrregcount=0  --machine 32 --compile -cudart static -I  -g   -DWIN32 -D_DEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Zi /RTC1 /MDd  " -o Debugkernel.cu.obj "C:UsersKarpińscydocumentsvisual studio 2012Projectslurlurkernel.cu"" exited with code 2. C:Program Files (x86)MSBuildMicrosoft.Cppv4.0V110BuildCustomizationsCUDA 6.0.targets    597    9    blur
    8    IntelliSense: this declaration has no storage class or type specifier    c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    33    1    blur
    9    IntelliSense: expected a ';' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    33    12    blur
    10    IntelliSense: this declaration has no storage class or type specifier    c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    56    4    blur
    11    IntelliSense: expected a ';' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    56    9    blur
    12    IntelliSense: this declaration has no storage class or type specifier    c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    57    4    blur
    13    IntelliSense: expected a ';' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    57    7    blur
    14    IntelliSense: expected a declaration c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    58    3    blur
    15    IntelliSense: this declaration has no storage class or type specifier    c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    62    2    blur
    16    IntelliSense: this declaration has no storage class or type specifier    c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    63    2    blur
    17    IntelliSense: identifier "xx" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    65    7    blur
    18    IntelliSense: identifier "yy" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    65    14    blur
    19    IntelliSense: identifier "avgB" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    65    36    blur
    20    IntelliSense: identifier "xx" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    66    7    blur
    21    IntelliSense: identifier "yy" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    66    14    blur
    22    IntelliSense: identifier "xx" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    67    7    blur
    23    IntelliSense: identifier "yy" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    67    14    blur
    24    IntelliSense: expected a declaration c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    68    1    blur
    25    IntelliSense: this declaration has no storage class or type specifier    c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    94    5    blur
    26    IntelliSense: this declaration has no storage class or type specifier    c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    95    2    blur
    27    IntelliSense: identifier "d_data" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    95    15    blur
    28    IntelliSense: expected a ')' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    95    21    blur
    29    IntelliSense: this declaration has no storage class or type specifier    c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    97    5    blur
    30    IntelliSense: identifier "fp" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    97    11    blur
    31    IntelliSense: expected a ')' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    97    13    blur
    32    IntelliSense: this declaration has no storage class or type specifier    c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    98    5    blur
    33    IntelliSense: expected a ')' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    98    15    blur
    34    IntelliSense: this declaration has no storage class or type specifier    c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    99    2    blur
    35    IntelliSense: identifier "d_data" is undefined c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    99    13    blur
    36    IntelliSense: expected a ')' c:UsersKarpińscyDocumentsVisual Studio 2012Projectslurlurkernel.cu    99    19    blur
    37    IntelliSense: identifier "dim3" is u

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

There are quite a few syntax errors in your code. Really they don't have anything to do with CUDA. Perhaps you should improve your basic C coding skills and understanding how to interpret compiler errors.

This line:

cudaMalloc( &d_hp, (sizeof(bitmap_header));

is missing a close parenthesis. You can't see that, and you also can't figure out the compiler error that indicates that?

Error    3    error : expected a ")" C:UsersKarpi?scydocumentsvisual studio 2012Projectslurlurkernel.cu    blur

In cuda, there is no built-in variable blockIdy. Perhaps you meant blockIdx.y?

Error    2    error : identifier "blockIdy" is undefined C:UsersKarpi?scydocumentsvisual studio 2012Projectslurlurkernel.cu    blur

Another compile error on this line:

cudaMemcpy(d_hp, &hp, (sizeof(bitmap_header), cudaMemcpyHostToDevice);

It should be this:

cudaMemcpy(d_hp, &hp, sizeof(bitmap_header), cudaMemcpyHostToDevice);

If you still need help after fixing those compile errors, post a new question with a complete but simple code that doesn't depend on reading images from files. Just create a simple test case and use that to validate the kernel. Add the error checking I mentioned and run your code with cuda-memcheck. If you refuse to do those things, I can't help you.


与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...