#define PROGRAM_FILE_PATH "regression_analysis.cl" #include #include /* CL Globals */ cl_device_id device; cl_context context; cl_program program; cl_command_queue queue; cl_mem int_buf_1, int_buf_2, int_buf_3, int_buf_4, d_mat_1, d_mat_2, d_mat_3, d_mat_4, d_mat_5, d_mat_6, d_mat_ptr_1, d_mat_ptr_2, d_vec_1, return_flt_buf_1, return_flt_buf_2, return_flt_buf_3, return_flt_buf_4, return_int_ptr_1, return_int_ptr_2, return_vec_1, return_vec_2, return_vec_3, return_flt_ptr_1, return_flt_ptr_2, d_vec_1, d_vec_2, d_vec_3, return_temp_buf; cl_kernel kernel; /* Find a GPU or CPU associated with the first available platform */ cl_device_id create_device() { cl_platform_id platform; cl_device_id dev; char name[128]; int err; /* Identify a platform */ err = clGetPlatformIDs(1, &platform, NULL); if (err < 0) { perror("Couldn't identify a platform"); exit(1); } /* Access a device */ err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ACCELERATOR, 1, &dev, NULL); if (err == CL_DEVICE_NOT_FOUND) { err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &dev, NULL); if (err == CL_DEVICE_NOT_FOUND) { err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL); if (err < 0) { perror("Couldn't access any devices"); exit(1); } else { clGetDeviceInfo(dev, CL_DEVICE_NAME, 128, name, NULL); fprintf(stdout, "Created a dispatch queue using the %s\n", name); printf("Accessing CPU... \n"); } } else { clGetDeviceInfo(dev, CL_DEVICE_NAME, 128, name, NULL); fprintf(stdout, "Created a dispatch queue using the %s\n", name); printf("Accessing GPU... \n"); } } else { clGetDeviceInfo(dev, CL_DEVICE_NAME, 128, name, NULL); fprintf(stdout, "Created a dispatch queue using the %s\n", name); printf("Accessing Accelerator... \n"); } return dev; } /* Create program from a file and compile it */ cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) { cl_program program; FILE *program_handle; char *program_buffer, *program_log; size_t program_size, log_size; int err; /* Read program file and place content into buffer */ program_handle = fopen(filename, "r"); if (program_handle == NULL) { perror("Couldn't find the program file"); exit(1); } fseek(program_handle, 0, SEEK_END); program_size = ftell(program_handle); rewind(program_handle); program_buffer = (char*) malloc(program_size + 1); program_buffer[program_size] = '\0'; fread(program_buffer, sizeof(char), program_size, program_handle); fclose(program_handle); /* Create program from file */ program = clCreateProgramWithSource(ctx, 1, (const char**) &program_buffer, &program_size, &err); if (err < 0) { perror("Couldn't create the program"); exit(1); } free(program_buffer); /* Build program */ err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err < 0) { /* Find size of log and print to std output */ clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); program_log = (char*) malloc(log_size + 1); program_log[log_size] = '\0'; clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, log_size + 1, program_log, NULL); printf("%s\n", program_log); free(program_log); exit(1); } return program; } void initialize_cl() { /* Error var */ cl_int err; /* Create device and context */ printf("Getting OpenCL device ID... \n"); cl_device_id device = create_device(); printf("Creating the OpenCL context... \n"); context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); if (err < 0) { perror("Couldn't create a context"); exit(1); } /* Build programs */ program = build_program(context, device, PROGRAM_FILE_PATH); /* Create a command queue */ printf("Creating OpenCL command queue... \n"); queue = clCreateCommandQueue(context, device, 0, &err); if (err < 0) { perror("Couldn't create a command queue"); exit(1); }; // Create the kernels printf("Creating the OpenCL kernel... \n"); kernel = clCreateKernel(program, "regression_analysis", &err); if (err < 0) { perror("Couldn't create a kernel"); exit(1); }; } int main() { printf("Initializing... \n"); initialize_cl(); printf("Done!\n"); int mi, mj, mk; int count = 0; int size_x_rdcd = 0; int size_xtxinvxt_rdcd = 0; int voxels = 4; int nt = 450; int ixyz_bot = 0; int ixyz_top = voxels; /* Variables */ int N = 450; int p = 14; int q = 12; int num_stimts = 8; float rms_min = 0.0; float min_lag[8] = {0, 0, 0, 0, 0, 0, 0, 0}; float max_lag[8] = {0, 0, 0, 0, 0, 0, 0, 0}; double * y_elts_buf = (double *) malloc(sizeof(double) * N * voxels); for(mi = 0; mi<450*voxels; ++mi) y_elts_buf[mi] = 0.9; int x_full_rows = 450; int x_full_cols = 14; double * x_full_elts = (double *)malloc(sizeof(double)*450*14); for(mi = 0; mi<450*14; ++mi) x_full_elts[mi] = 0.8; int xtxinv_full_rows = 14; int xtxinv_full_cols = 14; double * xtxinv_full_elts = (double *)malloc(sizeof(double)*14*14); for(mi = 0; mi<14*14; ++mi) xtxinv_full_elts[mi] = 0.7; int xtxinvxt_full_rows = 14; int xtxinvxt_full_cols = 450; double * xtxinvxt_full_elts = (double *)malloc(sizeof(double)*14*450); for(mi = 0; mi<14*450; ++mi) xtxinvxt_full_elts[mi] = 0.6; int x_base_rows = 450; int x_base_cols = 12; double * x_base_elts = (double *)malloc(sizeof(double)*450*12); for(mi = 0; mi<450*12; ++mi) x_base_elts[mi] = 0.5; int xtxinvxt_base_rows = 12; int xtxinvxt_base_cols = 450; double * xtxinvxt_base_elts = (double *)malloc(sizeof(double)*12*450); for(mi = 0; mi<12*450; ++mi) xtxinvxt_base_elts[mi] = (double)0.4; int * dims_x_rdcd = (int *)malloc(sizeof(int)*2*num_stimts); int * dims_xtxinvxt_rdcd = (int *)malloc(sizeof(int)*2*num_stimts); for(mi = 0; mi < num_stimts; ++mi){ dims_x_rdcd[mi+count] = 450; dims_x_rdcd[mi+count+1] = 13; size_x_rdcd += 450*13; dims_xtxinvxt_rdcd[mi+count] = 13; dims_xtxinvxt_rdcd[mi+count+1] = 450; size_xtxinvxt_rdcd += 13*450; ++count; } double * x_rdcd_1d = (double *)malloc(sizeof(double)*size_x_rdcd); for(mi = 0; mi