#include #include #define CHECK_ERR(f, err) do { \ (err) = (f); \ if ((err) != E_SUCCESS) { \ printf("[%s:%d] err (%d)\n", __FILE__, __LINE__, err); \ goto bail_out; \ } \ } while(0) #define dimension (4) static dnnError_t init_conversion(dnnPrimitive_t *cv, float **ptr_out, dnnLayout_t lt_pr, dnnLayout_t lt_us, float *ptr_us) { dnnError_t err; *ptr_out = NULL; if (!dnnLayoutCompare_F32(lt_pr, lt_us)) { dnnConversionCreate_F32(cv, lt_us, lt_pr); dnnAllocateBuffer_F32((void**)ptr_out, lt_pr); } else { *ptr_out = ptr_us; } return E_SUCCESS; bail_out: if (*ptr_out) dnnReleaseBuffer_F32(*ptr_out); return err; } void naive_conv(float *weights, float *input, float *output1, int avx_flag) { //x[416,416,3] //y[416,416,32] //w[3,3,3,32] int flag = 0; for (int ofm = 0; ofm < 32; ofm++) { for (int ofh = 0; ofh < 416; ofh++) { for (int ofw = 0; ofw < 416; ofw++) { if (flag > 10) goto label; float s = 0.0f; for (int ifm = 0; ifm < 3; ifm++) { for (int kh = 0; kh < 3; kh++) { for (int kw = 0; kw < 3; kw++) { float v = input[ifm * 416 * 416 + (ofh + kh) * 416 + ofw + kw];//ifm*IFH*IFW+(ofh+kh)*IFW+(ofw+kw) float w = weights[ofm* 416 *3*3 + ifm*3*3 + kh*3 + kw];//ofm*IFM*KH*KW+ifm*KH*KW+kh*KW+kw s += v*w; } } } output1[ofm * 416 * 416 + ofh * 416 + ofw] = s;//ofm*OFH*OFW+ofh*OFW+ofw flag++; } } } label: ; } void mkl_conv_primitive(float *weights, float *input, float *output, int avx_flag) { if (avx_flag == 1) { mkl_enable_instructions(MKL_ENABLE_AVX); } else { mkl_enable_instructions(MKL_ENABLE_AVX2); } size_t inp_size[4] = { 416, 416, 3, 1 }; size_t inputStrides[4] = { 1, 416, 416 * 416, 416 * 416 * 3 }; size_t out_size[4] = { 416, 416, 32, 1 }; size_t outputStrides[4] = { 1, 416, 416 * 416, 416 * 416 * 32 }; size_t filt_size[4] = { 3, 3, 3, 32 }; size_t filterStrides[4] = { 1, 3, 3 * 3, 3 * 3 * 3 }; // size_t stride[2] = { 1, 1 }; int pad[2] = { 0, 0 }; dnnError_t err; dnnPrimitive_t conv = NULL; dnnPrimitiveAttributes_t attributes = NULL; float *res_conv[dnnResourceNumber] = { 0 }; dnnLayout_t lt_user_input = NULL, lt_user_filt = NULL, //lt_user_bias = NULL, lt_user_output = NULL; dnnLayout_t lt_conv1_input = NULL, lt_conv1_filt = NULL, //lt_conv1_bias = NULL, lt_conv1_output = NULL; dnnPrimitive_t cv_user_to_conv1_input = NULL, cv_user_to_conv1_filt = NULL, //cv_user_to_conv1_bias = NULL, cv_conv1_to_user_output = NULL; /*** User's data description ***/ CHECK_ERR(dnnLayoutCreate_F32(<_user_input, dimension, inp_size, inputStrides), err); CHECK_ERR(dnnLayoutCreate_F32(<_user_filt, dimension, filt_size, filterStrides), err); CHECK_ERR(dnnLayoutCreate_F32(<_user_output, dimension, out_size, outputStrides), err); CHECK_ERR(dnnPrimitiveAttributesCreate_F32(&attributes), err); CHECK_ERR(dnnConvolutionCreateForward_F32(&conv, attributes, dnnAlgorithmConvolutionDirect, 4, inp_size, out_size, filt_size, stride, pad, dnnBorderZeros), err); // Convolution describes what layout it expects CHECK_ERR(dnnLayoutCreateFromPrimitive_F32(<_conv1_input, conv, dnnResourceSrc), err); CHECK_ERR(dnnLayoutCreateFromPrimitive_F32(<_conv1_filt, conv, dnnResourceFilter), err); CHECK_ERR(dnnLayoutCreateFromPrimitive_F32(<_conv1_output, conv, dnnResourceDst), err); init_conversion(&cv_user_to_conv1_input, &res_conv[dnnResourceSrc], lt_conv1_input, lt_user_input, input); init_conversion(&cv_user_to_conv1_filt, &res_conv[dnnResourceFilter], lt_conv1_filt, lt_user_filt, weights); dnnAllocateBuffer_F32((void**)&res_conv[dnnResourceDst], lt_conv1_output); init_conversion(&cv_conv1_to_user_output, &output, lt_user_output, lt_conv1_output, res_conv[dnnResourceDst]); /*** Execution ***/ if (cv_user_to_conv1_filt) dnnConversionExecute_F32(cv_user_to_conv1_filt, weights, res_conv[dnnResourceFilter]); if (cv_user_to_conv1_input) dnnConversionExecute_F32(cv_user_to_conv1_input, input, res_conv[dnnResourceSrc]); dnnExecute_F32(conv, (void**)res_conv); if (cv_conv1_to_user_output) dnnConversionExecute_F32(cv_conv1_to_user_output, res_conv[dnnResourceDst], output); printf("mkldnn_output:%f, %f, %f, %f, %f\n", output[0], output[1], output[2], output[3], output[4]); bail_out: dnnDelete_F32(conv); dnnDelete_F32(cv_user_to_conv1_input); dnnDelete_F32(cv_user_to_conv1_filt); dnnDelete_F32(cv_conv1_to_user_output); dnnLayoutDelete_F32(lt_user_input); dnnLayoutDelete_F32(lt_user_filt); dnnLayoutDelete_F32(lt_user_output); dnnLayoutDelete_F32(lt_conv1_input); dnnLayoutDelete_F32(lt_conv1_filt); dnnLayoutDelete_F32(lt_conv1_output); dnnPrimitiveAttributesDestroy_F32(attributes); } void mkl_conv(float *weights, float *input, float *output, int avx_flag) { //mkl_set_num_threads(4); if (avx_flag == 1) { mkl_enable_instructions(MKL_ENABLE_AVX); } else { mkl_enable_instructions(MKL_ENABLE_AVX2); } size_t inp_size[4] = { 416, 416, 3, 1 }; size_t out_size[4] = { 416, 416, 32, 1 }; size_t filt_size[4] = { 3, 3, 3, 32 }; size_t stride[2] = { 1, 1 }; int pad[2] = { 0, 0 }; dnnError_t err; dnnPrimitive_t conv = NULL; dnnPrimitiveAttributes_t attributes = NULL; float *res_conv[dnnResourceNumber] = { 0 }; CHECK_ERR(dnnPrimitiveAttributesCreate_F32(&attributes), err); CHECK_ERR(dnnConvolutionCreateForward_F32(&conv, attributes, dnnAlgorithmConvolutionDirect, 4, inp_size, out_size, filt_size, stride, pad, dnnBorderZeros), err); res_conv[dnnResourceSrc] = (void*)input; res_conv[dnnResourceFilter] = (void*)weights; res_conv[dnnResourceDst] = (void*)output; CHECK_ERR(dnnExecute_F32(conv, (void **)res_conv), err); bail_out: dnnDelete_F32(conv); dnnPrimitiveAttributesDestroy_F32(attributes); } int main() { mkl_set_num_threads(1); float *weights = (float *)mkl_malloc(32 * 3 * 3 * 3*sizeof(float),128); float *input = (float *)mkl_malloc(3 * 416 * 416*sizeof(float),128); float *output = (float *)mkl_malloc(32 * 416 * 416 * sizeof(float), 128); float *output1 = (float *)mkl_malloc(32 * 416 * 416 * sizeof(float), 128); FILE *f_c = fopen("data/conv1.test", "rb"); FILE *f_w = fopen("data/weights1.test", "rb"); if (f_c != NULL) fread(input, sizeof(float), 3 * 416 * 416, f_c); if (f_w != NULL) fread(weights, sizeof(float), 32 * 3 * 3 * 3, f_w); fclose(f_c); fclose(f_w); /*FILE *f_i = fopen("data/input.txt", "wb"); FILE *f_iw = fopen("data/weight.txt", "wb"); if (f_i != NULL) fwrite(input, sizeof(float), 3 * 416 * 416, f_i); if (f_iw != NULL) fwrite(weights, sizeof(float), 32 * 3 * 3 * 3, f_iw); fclose(f_i); fclose(f_iw);*/ printf("input:%f, %f, %f\n", input[0], input[1], input[2]); printf("weights:%f, %f, %f\n", weights[0], weights[1], weights[2]); int avx_flag; printf("Please chose the mkl enable instruction, 1 or 2: "); // correspond to avx or avx2 scanf("%d", &avx_flag); naive_conv(weights, input, output1, avx_flag); //mkl_conv(weights, input, output, avx_flag); mkl_conv_primitive(weights, input, output, avx_flag); printf("naive_output:%f, %f, %f, %f, %f\n", output1[0], output1[1], output1[2], output1[3], output1[4]); /*char filename[256]; sprintf(filename, "data/result_avx%d.txt", avx_flag); FILE *f = fopen(filename, "w"); if (f != NULL) { fprintf(f, "results:\n"); for (int i = 0; i < 32; ++i) { for (int j = 0; j < 416; ++j) { for (int k = 0; k < 416; ++k) { fprintf(f, "%f\t", output[i*416*416 + j * 416 + k]); } fprintf(f, "\n"); } fprintf(f, "\n"); } fclose(f); }*/ mkl_free(input); mkl_free(weights); mkl_free(output); getchar(); return 0; }