现在的位置: 首页 > 综合 > 正文

openCL-hello word

2013年01月04日 ⁄ 综合 ⁄ 共 2905字 ⁄ 字号 评论关闭

忙了好久,期中考试考完了,要开始研究openCL了,这是自己的第一个hello word。就是数组的并行化,当然这里的kernal函数没有写在文件里,主要是程序不大,姑且就这样吧,便于阅读。以下是源代码

#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <CL/cl.h>

const char* programSource=
"__kernel                                         \n"
"void vecadd(__global const float* A,             \n"
"__global const float* B,                         \n"
" __global float* C)                              \n"
"{                                                \n"
"   int id = get_global_id(0);                    \n"
"   C[id] = A[id] + B[id];                        \n"
"}               \n"
;
int main()
{
	int *A = NULL; // 输入数组
	int *B = NULL; // 输入数组
	int *C = NULL; // 输出数组

	// 数组的大小
	const int  elements = 2048;
	
	// 计算内存大小
	size_t datasize = sizeof(int)*elements;
	// 分配内存空间
	A = (int*)malloc(datasize);
	B = (int*)malloc(datasize);
	C = (int*)malloc(datasize);

	// 初始化输入数组
	for(int i = 0;i < elements;i++)
	{
		A[i] = std::rand();
		B[i] = std::rand();
	}
	
	// 获取并初始化平台
	cl_int status;
	cl_uint numPlatforms = 0;
	cl_platform_id *platforms = NULL;


	status = clGetPlatformIDs(0,NULL,&numPlatforms);
	platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));

	//
	status = clGetPlatformIDs(numPlatforms,platforms,NULL);
	cl_uint numDevices = 0;
	cl_device_id *devices = NULL;

	status = clGetDeviceIDs(platforms[0],CL_DEVICE_TYPE_ALL,0,NULL,&numDevices);
	// 分配内存空间
	devices = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id));
	status = clGetDeviceIDs(platforms[0],CL_DEVICE_TYPE_ALL,numDevices,devices,NULL);

	cl_context context = NULL;
	//创建上下文,管理设备之间的资料
	context = clCreateContext(NULL,
		numDevices,
		devices,
		NULL,
		NULL,
		&status);
	cl_command_queue cmdQueue;
	//创建命令队列
	cmdQueue = clCreateCommandQueue(context,
		devices[0],
		0,
		&status);
	//初始化数组内存
	cl_mem bufferA;
	cl_mem bufferB;
	cl_mem bufferC;

	bufferA = clCreateBuffer(
		context,
		CL_MEM_READ_ONLY,
		datasize,
		NULL,
		&status);

	bufferB = clCreateBuffer(
		context,
		CL_MEM_READ_ONLY,
		datasize,
		NULL,
		&status);

	bufferC = clCreateBuffer(
		context,
		CL_MEM_WRITE_ONLY,
		datasize,
		NULL,
		&status);
	//将主机端的数据写入设备
	status = clEnqueueWriteBuffer(
		cmdQueue,
		bufferA,
		CL_FALSE,
		0,
		datasize,
		A,
		0,
		NULL,
		NULL);

	status = clEnqueueWriteBuffer(
		cmdQueue,
		bufferB,
		CL_FALSE,
		0,
		datasize,
		B,
		0,
		NULL,
		NULL);
	//编译函数
	cl_program program = clCreateProgramWithSource(
		context,
		1,
		(const char**)&programSource,
		NULL,
		&status);
	status = clBuildProgram(
		program,
		numDevices,
		devices,
		NULL,
		NULL,
		NULL);
	//创建Kernel函数
	cl_kernel kernel = NULL;
	kernel = clCreateKernel(program,"vecadd",&status);
	//设置参数
	status = clSetKernelArg(kernel,0,sizeof(cl_mem),&bufferA);
	status = clSetKernelArg(kernel,1,sizeof(cl_mem),&bufferB);
	status = clSetKernelArg(kernel,2,sizeof(cl_mem),&bufferC);
	//初始化线程的映射
	size_t globalWorkSize[1];
	globalWorkSize[0] = elements;
	//运行kernel
	status = clEnqueueNDRangeKernel(
		cmdQueue,
		kernel,
		1,
		NULL,
		globalWorkSize,
		NULL,
		0,
		NULL,
		NULL);
	//从设备中读回数据结果
	clEnqueueReadBuffer(
		cmdQueue,
		bufferC,
		CL_TRUE,
		0,
		datasize,
		C,
		0,
		NULL,
		NULL);

	bool result = true;
	for(int i = 0;i < elements;i++)
	{
		//std::cout<<C[i]<<std::endl;
		if(C[i]!=A[i]+B[i])
		{
				result = false;
				//break;
		}
	}

	if(result)
	{
		printf("Output is correct\n");
	}
	else
	{
		printf("Output is incorrect\n");
	}

	//清理数据
	clReleaseKernel(kernel);
	clReleaseProgram(program);
	clReleaseCommandQueue(cmdQueue);
	clReleaseMemObject(bufferA);
	clReleaseMemObject(bufferB);
	clReleaseMemObject(bufferC);
	clReleaseContext(context);

	free(A);
	free(B);
	free(C);
	free(platforms);
	free(devices);

	return 0;
}

 

抱歉!评论已关闭.