#include "cuda.h" #include "common/book.h" #include "common/cpu_bitmap.h" #include "cuda_runtime.h" #include "device_launch_parameters.h" #include "device_functions.h" //#include "GL/GLU.h" //#include "GL/glut.h" #define DIM 1024 #define PI 3.1415926535897932f __global__ void kernel( unsigned char *ptr ) { // map from threadIdx/BlockIdx to pixel position int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; int offset = x + y * blockDim.x * gridDim.x; __shared__ float shared[16][16]; // now calculate the value at that position const float period = 128.0f; shared[threadIdx.x][threadIdx.y] = 255 * (sinf(x*2.0f*PI/ period) + 1.0f) * (sinf(y*2.0f*PI/ period) + 1.0f) / 4.0f; __syncthreads(); ptr[offset*4 + 0] = 0; ptr[offset*4 + 1] = shared[15-threadIdx.x][15-threadIdx.y]; ptr[offset*4 + 2] = 0; ptr[offset*4 + 3] = 255; } // globals needed by the update routine struct DataBlock { unsigned char *dev_bitmap; }; int main( void ) { DataBlock data; CPUBitmap bitmap( DIM, DIM, &data ); unsigned char *dev_bitmap; HANDLE_ERROR( cudaMalloc( (void**)&dev_bitmap, bitmap.image_size() ) ); data.dev_bitmap = dev_bitmap; dim3 grids(DIM/16,DIM/16); dim3 threads(16,16); kernel<<<grids,threads>>>( dev_bitmap ); HANDLE_ERROR( cudaMemcpy( bitmap.get_ptr(), dev_bitmap, bitmap.image_size(), cudaMemcpyDeviceToHost ) ); HANDLE_ERROR( cudaFree( dev_bitmap ) ); bitmap.display_and_exit(); }
CPUBitmap结构体
#ifndef __CPU_BITMAP_H__ #define __CPU_BITMAP_H__ #include "gl_helper.h" struct CPUBitmap { unsigned char *pixels; int x, y; void *dataBlock; void (*bitmapExit)(void*); CPUBitmap( int width, int height, void *d = NULL ) { pixels = new unsigned char[width * height * 4]; x = width; y = height; dataBlock = d; } ~CPUBitmap() { delete [] pixels; } unsigned char* get_ptr( void ) const { return pixels; } long image_size( void ) const { return x * y * 4; } void display_and_exit( void(*e)(void*) = NULL ) { CPUBitmap** bitmap = get_bitmap_ptr(); *bitmap = this; bitmapExit = e; // a bug in the Windows GLUT implementation prevents us from // passing zero arguments to glutInit() int c=1; char* dummy = ""; glutInit( &c, &dummy ); glutInitDisplayMode( GLUT_SINGLE | GLUT_RGBA ); glutInitWindowSize( x, y ); glutCreateWindow( "bitmap" ); glutKeyboardFunc(Key); glutDisplayFunc(Draw); glutMainLoop(); } // static method used for glut callbacks static CPUBitmap** get_bitmap_ptr( void ) { static CPUBitmap *gBitmap; return &gBitmap; } // static method used for glut callbacks static void Key(unsigned char key, int x, int y) { switch (key) { case 27: CPUBitmap* bitmap = *(get_bitmap_ptr()); if (bitmap->dataBlock != NULL && bitmap->bitmapExit != NULL) bitmap->bitmapExit( bitmap->dataBlock ); exit(0); } } // static method used for glut callbacks static void Draw( void ) { CPUBitmap* bitmap = *(get_bitmap_ptr()); glClearColor( 0.0, 0.0, 0.0, 1.0 ); glClear( GL_COLOR_BUFFER_BIT ); glDrawPixels( bitmap->x, bitmap->y, GL_RGBA, GL_UNSIGNED_BYTE, bitmap->pixels ); glFlush(); } }; #endif
添加正确同步显示效果
不正确同步效果