/* // Copyright 2015 2017 Intel Corporation All Rights Reserved. // // The source code, information and material ("Material") contained herein is // owned by Intel Corporation or its suppliers or licensors, and title // to such Material remains with Intel Corporation or its suppliers or // licensors. The Material contains proprietary information of Intel // or its suppliers and licensors. The Material is protected by worldwide // copyright laws and treaty provisions. No part of the Material may be used, // copied, reproduced, modified, published, uploaded, posted, transmitted, // distributed or disclosed in any way without Intel's prior express written // permission. No license under any patent, copyright or other intellectual // property rights in the Material is granted to or conferred upon you, // either expressly, by implication, inducement, estoppel or otherwise. // Any license under such intellectual property rights must be express and // approved by Intel in writing. // // Unless otherwise agreed by Intel in writing, // you may not remove or alter this notice or any other notice embedded in // Materials by Intel or Intel's suppliers or licensors in any way. */ // A simple example of performing a filtering an image using a general integer rectangular kernel // implemented with Intel IPP functions: // ippiImageJaehne_32f_C1R // ippiFilterGaussianInit // ippiFilterGaussianBorder_32f_C1R #include #include "ipp.h" #include #define WIDTH 2048 /* image width */ #define HEIGHT 2048 /* image height */ /* Next two defines are created to simplify code reading and understanding */ #define EXIT_MAIN exitLine: /* Label for Exit */ #define check_sts(st) if((status = (st)) < ippStsNoErr) goto exitLine; /* Go to Exit if IPP function returned status different from ippStsNoErr */ #define IPPS_FREE(PTR) ippsFree(PTR); (PTR) = NULL; #define IPPI_FREE(PTR) ippiFree(PTR); (PTR) = NULL; /* Results of ippMalloc() are not validated because IPP functions perform bad arguments check and will return an appropriate status */ int main(void) { IppStatus status; Ipp32f *pSrc = NULL; Ipp32f *pDst1 = NULL; /* Pointers to source/destination images */ Ipp32f *pDst2 = NULL; Ipp32f *pDstS = NULL; int srcStep = 0; int dstStep = 0; /* Steps, in bytes, through the source/destination images */ IppiSize size = { WIDTH, HEIGHT }; /* Size of source/destination ROI in pixels */ Ipp32u kernelSize = 3; Ipp32f sigma = 0.35f; IppiBorderType border = ippBorderRepl; Ipp32f borderValue = 0; int channels = 1; int threads = omp_get_max_threads(); // Get threads number int chunkSize = (HEIGHT + threads - 1)/(threads*2); pSrc = ippiMalloc_32f_C1(size.width, size.height, &srcStep); pDst1 = ippiMalloc_32f_C1(size.width, size.height, &dstStep); pDst2 = ippiMalloc_32f_C1(size.width, size.height, &dstStep); pDstS = ippiMalloc_32f_C1(size.width, size.height, &dstStep); check_sts(ippiImageJaehne_32f_C1R(pSrc, srcStep, size)); /* fill source image */ // parallel classic API { int tmpSize = 0; int specSize = 0; IppiSize roi = {WIDTH, chunkSize}; //each thread take a block of image data (only a few lines) check_sts(ippiFilterGaussianGetBufferSize(roi, kernelSize, ipp32f, channels, &specSize, &tmpSize)); #pragma omp parallel num_threads(threads) { IppFilterGaussianSpec *pSpec = NULL; /* context structure */ Ipp8u *pBuffer = NULL; /* Pointer to the work buffer */ IppStatus tStatus; Ipp32f *pSrcT; Ipp32f *pDstT; IppiSize tRoi; int row; IppiBorderType tBorder; pSpec = (IppFilterGaussianSpec*)ippsMalloc_8u(specSize); pBuffer = ippsMalloc_8u(tmpSize); #pragma omp for for(row = 0; row < size.height; row += roi.height) { if(status < 0) continue; tBorder = border; tRoi = roi; if(row) tBorder |= ippBorderInMemTop; if(row + roi.height >= size.height) tRoi.height = size.height - row; else tBorder |= ippBorderInMemBottom; pSrcT = (Ipp32f*)(((Ipp8u*)pSrc) + srcStep*row); pDstT = (Ipp32f*)(((Ipp8u*)pDst1) + dstStep*row); tStatus = ippiFilterGaussianInit(tRoi, kernelSize, sigma, tBorder, ipp32f, channels, pSpec, pBuffer); if(tStatus < 0) { status = tStatus; continue; } tStatus = ippiFilterGaussianBorder_32f_C1R(pSrcT, srcStep, pDstT, dstStep, tRoi, borderValue, pSpec, pBuffer); if(tStatus < 0) { status = tStatus; continue; } } ippsFree(pBuffer); ippsFree(pSpec); } check_sts(status); } // parallel Platform-Aware API { IppFilterGaussianSpec *pSpec = NULL; /* context structure */ Ipp8u *pInitBuffer = NULL; IppSizeL specSize = 0; IppSizeL initSize = 0; IppiSizeL roi = {WIDTH, chunkSize}; //each thread take a block of image data (only a few lines) check_sts(ippiFilterGaussianGetSpecSize_L(kernelSize, ipp32f, channels, &specSize, &initSize)); pSpec = (IppFilterGaussianSpec*)ippsMalloc_8u_L(specSize); pInitBuffer = ippsMalloc_8u_L(initSize); status = ippiFilterGaussianInit_L(roi, kernelSize, sigma, border, ipp32f, channels, pSpec, pInitBuffer); ippsFree(pInitBuffer); if(status < 0) { ippsFree(pSpec); check_sts(status); } #pragma omp parallel num_threads(threads) { IppSizeL tmpSize = 0; Ipp8u *pBuffer = NULL; /* Pointer to the work buffer */ IppStatus tStatus; Ipp32f *pSrcT; Ipp32f *pDstT; IppiSizeL tRoi; IppSizeL row; IppiBorderType tBorder; tStatus = ippiFilterGaussianGetBufferSize_L(roi, kernelSize, ipp32f, border, channels, &tmpSize); if(tStatus < 0) status = tStatus; else { pBuffer = ippsMalloc_8u_L(tmpSize); #pragma omp for for(row = 0; row < size.height; row += roi.height) { if(status < 0) continue; tBorder = border; tRoi = roi; if(row) tBorder |= ippBorderInMemTop; if(row + roi.height >= size.height) tRoi.height = size.height - row; else tBorder |= ippBorderInMemBottom; pSrcT = (Ipp32f*)(((Ipp8u*)pSrc) + srcStep*row); pDstT = (Ipp32f*)(((Ipp8u*)pDst2) + dstStep*row); tStatus = ippiFilterGaussian_32f_C1R_L(pSrcT, srcStep, pDstT, dstStep, tRoi, tBorder, &borderValue, pSpec, pBuffer); if(tStatus < 0) { status = tStatus; continue; } } ippsFree(pBuffer); } } ippsFree(pSpec); check_sts(status); } // sequential { IppSizeL initSize = 0; IppSizeL tmpSize = 0; IppSizeL specSize = 0; IppFilterGaussianSpec *pSpec = NULL; /* context structure */ Ipp8u *pBuffer = NULL; /* Pointer to the work buffer */ Ipp8u *pInitBuffer = NULL; IppiSizeL sizeL = {size.width, size.height}; check_sts(ippiFilterGaussianGetSpecSize_L(kernelSize, ipp32f, channels, &specSize, &initSize)); check_sts(ippiFilterGaussianGetBufferSize_L(sizeL, kernelSize, ipp32f, border, channels, &tmpSize)); pSpec = (IppFilterGaussianSpec*)ippsMalloc_8u_L(specSize); pBuffer = ippsMalloc_8u_L(tmpSize); pInitBuffer = ippsMalloc_8u_L(initSize); status = ippiFilterGaussianInit_L(sizeL, kernelSize, sigma, border, ipp32f, channels, pSpec, pInitBuffer); ippsFree(pInitBuffer); if(status < 0) { ippsFree(pBuffer); ippsFree(pSpec); check_sts(status); } status = ippiFilterGaussian_32f_C1R_L(pSrc, srcStep, pDstS, dstStep, sizeL, border, &borderValue, pSpec, pBuffer); ippsFree(pBuffer); ippsFree(pSpec); check_sts(status); } // compare { Ipp64f diff; check_sts(ippiNormDiff_Inf_32f_C1R(pDstS, dstStep, pDst1, dstStep, size, &diff)); printf("DIFF1: %.12f\n", diff); check_sts(ippiNormDiff_Inf_32f_C1R(pDstS, dstStep, pDst2, dstStep, size, &diff)); printf("DIFF2: %.12f\n", diff); } EXIT_MAIN ippiFree(pSrc); ippiFree(pDst1); ippiFree(pDst2); ippiFree(pDstS); printf("Exit status %d (%s)\n", (int)status, ippGetStatusString(status)); return (int)status; }