Link Copied
Hallo Michael,
Thanks for posting a general example regarding the convolution with separable filters. I am trying to modify your code so that it can be called from within Matlab (in a MEX file). There are a few things that still are not clear to me. I hope you can help me.
I do not clearly understand what this section of the code does:
/* organize dst buffer */
pTmp = (Ipp16s*)(ppDst+size.height);
for(i=0;i
ppSrc[i+2]=pTmp;
}
/* organize replicate border for ippiFilterColumnPipeline_16s_C1R */
/* top */
ppSrc[0]=ppSrc[2];
ppSrc[1]=ppSrc[2];
/* bottom */
ppSrc[i-1+4]=ppDst[size.height-1];
ppSrc[i-2+4]=ppDst[size.height-1];
I understand that you are arranging the pointers to the image borders for the replication but the whole process is not very clear. I would appreciate some more specific comments i nthe codeso that I can generalize this to any kernel size (I suspect that some coefficients like the 2 and the 4 in ppSrc[2] and ppSrc[i-2+4] are somehow related to the semisize of the filter that you using in this example but it is not clear why and how).
I also include my version of the code so that you may spot major mistakes and bugs:
#ifdef MEX_CONV2_SEP_SINGLE_USE_IPP
inline
void CheckIPPStatus(IppStatus status){
if(status != ippStsNoErr)
{
mexPrintf(" status = %s ", ippGetStatusString(status));
mexErrMsgTxt("IPP error");
}
}
// Adapted from:
// http://softwarecommunity.intel.com/isn/Community/en-US/forums/5482632/PostAttachment.aspx
//
// INTEL CORPORATION PROPRIETARY INFORMATION
// This software is supplied under the terms of a license agreement or
// nondisclosure agreement with Intel Corporation and may not be copied
// or disclosed except in accordance with the terms of that agreement.
// Copyright (c) 2005 Intel Corporation. All Rights Reserved.
//
// Separable 2D convolution example
//
void
SepConv(const Ipp32f *pSrcBuffer, Ipp32f *pDstBuffer, IppiSize *size, Ipp32f *hc, int Nc, Ipp32f *hr, int Nr){
int sizerow, sizecol, i; int maxKernelSize = (Nc > Nr) ? Nc : Nr;Ipp32f **ppDst, **ppSrc, *pTmp;
Ipp8u *pBufferCol, *pBufferRow;
IppStatus status;
//int xAnchor = (Nr >> 1) + 1; int yAnchor = (Nc >> 1) + 1; // allocate temporary dst bufferppDst = (Ipp32f**)ippsMalloc_8u(size->width*size->height*
sizeof(Ipp32f) + (size->height)*sizeof(Ipp32f*));ppSrc = (Ipp32f**)ippsMalloc_8u((size->height+maxKernelSize)*
sizeof(Ipp32f*)); // size of temporary buffersstatus = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(*size, maxKernelSize, &sizerow);
CheckIPPStatus(status);
status = ippiFilterColumnPipelineGetBufferSize_32f_C1R(*size, maxKernelSize, &sizecol);
CheckIPPStatus(status);
//mexPrintf(" size = {%d,%d}", size->height, size->width); // allocate temporary bufferspBufferCol = ippsMalloc_8u(sizecol);
if (pBufferCol == NULL) mexErrMsgTxt("IPP error: failed to allocate column buffer");pBufferRow = ippsMalloc_8u(sizerow);
if (pBufferRow == NULL) mexErrMsgTxt("IPP error: failed to allocate row buffer"); // organize dst bufferpTmp = (Ipp32f*)(ppDst+size->height);
for(i = 0; i{
ppDst = pTmp;
ppSrc[i+2] = pTmp;
}
ppSrc[0] = ppSrc[2];
ppSrc[1] = ppSrc[2];
// bottomppSrc[i-1+4] = ppDst[size->height-1];
ppSrc[i-2+4] = ppDst[size->height-1];
// perform the convolution /*mexPrintf(" pSrcBuffer = {%f, %f, %f, %f}", pSrcBuffer[0], pSrcBuffer[1], pSrcBuffer[2], pSrcBuffer[3]);
mexPrintf(" hc = {%f, %f, %f, %f}", hc[0], hc[1], hc[2], hc[3]);
mexPrintf(" size = {%d, %d}", size->height, size->width);
mexPrintf(" sizerow = %d, sizecol = %d", sizerow, sizecol);
*/
status = ippiFilterRowBorderPipeline_32f_C1R((
const Ipp32f*) pSrcBuffer, size->width*sizeof(Ipp32f), ppDst,*size, hc, Nc, yAnchor, ippBorderRepl, 0, pBufferRow);
CheckIPPStatus(status);
status = ippiFilterColumnPipeline_32f_C1R((
const Ipp32f **)ppSrc, pDstBuffer, size->width*sizeof(Ipp32f), *size,hr, Nr, pBufferCol);
CheckIPPStatus(status);
ippsF ree(ppSrc);
ippsFree(ppDst);
ippsFree(pBufferCol);
ippsFree(pBufferRow);
}
#endif
Thanks in advance for your help,
Marco
This is the version of the code that I would expect to generalize the one originally posted by Michael. In particular I modified the section for the organization of the buffers as follows:
// organize dst buffer// (the offset takes care of the double pointer structure)
pTmp = (Ipp32f*)(ppDst+size->height);
for(i = 0; iheight; i++, pTmp += size->width) for(i = 0; i < yAnchor; i++){
ppDst = pTmp;
ppSrc[i+yAnchor] = pTmp;
}
{
// topppSrc = ppSrc[yAnchor];
// bottomppSrc[i + size->height] = ppSrc[size->height-1];
}
Unfortunately performing the second convolution (within the MEX file, ippiFilterColumnPipeline_32f_C1R) the status returned by IPP says Null pointer error
I am reposting the original code for your convenience. Note that hxc and hr are the convolution kernels whose lenght is respectively Nc and Nr.
#ifdef
MEX_CONV2_SEP_SINGLE_USE_IPPinline
void CheckIPPStatus(IppStatus status){
if(status != ippStsNoErr){
mexPrintf(
" status = %s ", ippGetStatusString(status));mexErrMsgTxt(
"IPP error");}
}
// Adapted from:
// http://softwarecommunity.intel.com/isn/Community/en-US/forums/5482632/PostAttachment.aspx
//
// INTEL CORPORATION PROPRIETARY INFORMATION
// This software is supplied under the terms of a license agreement or
// nondisclosure agreement with Intel Corporation and may not be copied
// or disclosed except in accordance with the terms of that agreement.
// Copyright (c) 2005 Intel Corporation. All Rights Reserved.
//
// Separable 2D convolution example
//
void
SepConv(const Ipp32f *pSrcBuffer, Ipp32f *pDstBuffer, IppiSize *size, Ipp32f *hc, int Nc, Ipp32f *hr, int Nr){
int sizerow, sizecol, i;int maxKernelSize = (Nc > Nr) ? Nc : Nr;Ipp32f **ppDst, **ppSrc, *pTmp;
Ipp8u *pBufferCol, *pBufferRow;
IppStatus status;
//int xAnchor = (Nr >> 1) + 1;int yAnchor = (Nc >> 1) + 1;// allocate temporary dst buffer
ppDst = (Ipp32f**)ippsMalloc_8u(size->width*size->height*
sizeof(Ipp32f) + (size->height)*sizeof(Ipp32f*));ppSrc = (Ipp32f**)ippsMalloc_8u((size->height+maxKernelSize)*
sizeof(Ipp32f*));// size of temporary buffers
status = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(*size, maxKernelSize, &sizerow);
CheckIPPStatus(status);
status = ippiFilterColumnPipelineGetBufferSize_32f_C1R(*size, maxKernelSize, &sizecol);
CheckIPPStatus(status);
// allocate temporary buffers
pBufferCol = ippsMalloc_8u(sizecol);
if (pBufferCol == NULL) mexErrMsgTxt("IPP error: failed to allocate column buffer");pBufferRow = ippsMalloc_8u(sizerow);
if (pBufferRow == NULL) mexErrMsgTxt("IPP error: failed to allocate row buffer");// organize dst buffer (the offset takes care of the double pointer structure)
pTmp = (Ipp32f*)(ppDst+size->height);
for(i = 0; iheight; i++, pTmp += size->width) for(i = 0; i < yAnchor; i++){
ppDst = pTmp;
ppSrc[i+yAnchor] = pTmp;
}
{
// topppSrc = ppSrc[yAnchor];
// bottomppSrc[i + size->height] = ppSrc[size->height-1];
}
// perform the convolutions
status = ippiFilterRowBorderPipeline_32f_C1R((
const Ipp32f*) pSrcBuffer, size->width*sizeof(Ipp32f), ppDst, *size, hc, Nc, yAnchor, ippBorderRepl, 0, pBufferRow);CheckIPPStatus(status);
status = ippiFilterColumnPipeline_32f_C1R((
const Ipp32f **)ppSrc, pDstBuffer, size->width*sizeof(Ipp32f), *size, hr, Nr, pBufferCol);CheckIPPStatus(status);
ippsFree(ppSrc);
ippsFree(ppDst);
ippsFree(pBufferCol);
ippsFree(pBufferRow);
}
#endif
The above function is called as:
// in version for the different byte ordering between Matlab and IPPIppiSize size;
size.height = width;
size.width = height;
SepConv((Ipp32f *)f, (Ipp32f *)g, &size, (Ipp32f *)hc, Nc, (Ipp32f *)hr, Nr);
Again thanks in advance for any useful insight,
Marco
Hi,
Example 9-4 (p 9-55 of IPP manual vol2, Jan 2007) described how to use ippiFilterRow/ColumnBorderPipeline functions to calculate the separable convolution without the intermediate buffer for the whole image.
The ring buffer (the double pointer) for convolved rows is used there. You should befine the border type for row convolution because there is no data outside the image. But border rows for column convolution are formed manually, so you need not extra argument for them.
Eg for the 3x3 convolution with replicate border you need to replicate the pointer to the first convolved row.
Thanks,
Alexander
Dear Alexander,
thanks for your prompt response. I must say that I found the Example 9-4 extremely criptic (30 lines of dense code without a single comment...). However I came up with a routine that seems to achive the task. Note that there are a bunch of tricks to handle kernel sizes that can be even or odd. The flipping of the kernel is meant to adapt my routine with the conv2 routine of Matlab (and for the same reasonone might notice "inversion" between row and columns, since Matlabuses column-major ordering of the data, as in Fortran).
I hope that this can be of some help. I would appreciate any help from the community as far asbugs/improvments are concerned.
#ifdef
MEX_CONV2_SEP_SINGLE_USE_IPPinline
void CheckIPPStatus(IppStatus status){
if(status != ippStsNoErr){
mexPrintf(
" status = %s ", ippGetStatusString(status));mexErrMsgTxt(
"IPP error");}
}
// Adapted by Marco Zuliani (zuliani@mayachitra.com) from:
//
// http://softwarecommunity.intel.com/isn/Community/en-US/forums/5482632/PostAttachment.aspx
//
// INTEL CORPORATION PROPRIETARY INFORMATION
// This software is supplied under the terms of a license agreement or
// nondisclosure agreement with Intel Corporation and may not be copied
// or disclosed except in aNcssordance with the terms of that agreement.
// Copyright (c) 2005 Intel Corporation. All Rights Reserved.
//
// Separable 2D convolution example
//
void
SepConv(const Ipp32f *pSrcBuffer, Ipp32f *pDstBuffer, IppiSize *size, Ipp32f *hc, int Nc, Ipp32f *hr, int Nr){
int sizerow, sizecol, i, j;Ipp32f **ppDst, **ppSrc, *pTmp;
Ipp8u *pBufferCol, *pBufferRow;
// flip the kernels and align the memory to please IPP
Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f(Nc *
sizeof(float));Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f(Nr *
sizeof(float));for(i = 0; i < Nc; i++) hc_flipped = hc[Nc-i-1];
for(j = 0; j < Nr; j++) hr_flipped
= hr[Nr- j-1]; // compute the kernel semisizes
int Ncss = Nc >> 1;int Nrss = Nr >> 1;// compute the kernel offsets (0 -> odd, 1 -> even)int co = 1-(Nc%2);int ro = 1-(Nr%2);// allocate temporary dst buffer
ppDst = (Ipp32f**)ippsMalloc_8u(size->width*size->height*
sizeof(Ipp32f)+(size->height)*sizeof(Ipp32f*));ppSrc = (Ipp32f**)ippsMalloc_8u((size->height+Nc)*
sizeof(Ipp32f*));// size of temporary buffers
ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(*size, Nc, &sizerow);
ippiFilterColumnPipelineGetBufferSize_32f_C1R(*size, Nr, &sizecol);
// allocate temporary buffers
pBufferCol = ippsMalloc_8u(sizecol);
pBufferRow = ippsMalloc_8u(sizerow);
// organize dst buffer
pTmp = (Ipp32f*)(ppDst + size->height);
for(i = 0; i < size->height; i++, pTmp += size->width){
ppDst = pTmp;
ppSrc[i+Nrss-ro] = pTmp;
}
// organize replicate border for ippiFilterColumnPipeline_32f_C1R
for(j = 0; j < Nrss; j++){
// topppSrc
// bottom= ppSrc[Nrss-ro]; ppSrc[i-j+Nr-2] = ppDst[size->height-1];
}
// perform the actual convolutions
ippiFilterRowBorderPipeline_32f_C1R((
const Ipp32f*) pSrcBuffer, size->width*sizeof(Ipp32f), ppDst,*size, hc_flipped, Nc, Ncss-co, ippBorderConst, 0, pBufferRow);
ippiFilterColumnPipeline_32f_C1R((
const Ipp32f**)ppSrc, pDstBuffer, size->width*sizeof(Ipp32f), *size,hr_flipped, Nr, pBufferCol);
// release some memory
ippsFree(hc_flipped);
ippsFree(hr_flipped);
ippsFree(ppSrc);
ippsFree(ppDst);
ippsFree(pBufferCol);
ippsFree(pBufferRow);
}
#endif
Hi Jay
Thanks alot for sharing your improved version of the convolution code. I was wondering if you can check a couple of things:
1] Should the line
ppSrc = new Ipp32f*[roiSize.height + (Nrss * 2) - co];
read instead as:
ppSrc = new Ipp32f*[roiSize.height + (Ncss * 2) - co];
so that the loop:
for( int ii=0,jj=roiSize.height+Ncss;ii
ppSrc[ii] = pTmp + ii * tmpw;
ppSrc[jj] = pTmp + jj * tmpw;
(...)
will not exceed the boundaries?
2] Should the lines:
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizerow) )
return sts;
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizecol) )
return sts;
Hi Jay
Thanks alot for sharing your improved version of the convolution code. I was wondering if you can check a couple of things:
1] Should the line
ppSrc = new Ipp32f*[roiSize.height + (Nrss * 2) - co];
read instead as:
ppSrc = new Ipp32f*[roiSize.height + (Ncss * 2) - co];
so that the loop:
for( int ii=0,jj=roiSize.height+Ncss;ii
ppSrc[ii] = pTmp + ii * tmpw;
ppSrc[jj] = pTmp + jj * tmpw;
(...)
will not exceed the boundaries?
2] Should the lines:
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizerow) )
return sts;
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizecol) )
return sts;
[cpp]/*********************************************************************************
Copyright(C) 2004-2009, Riverain Medical Group LLC. All Rights Reserved.
This is UNPUBLISHED PROPRIETARY SOURCE CODE of Riverain Medical Group, LLC.
The contents of this file may not be disclosed to third parties, copied or
duplicated in any form, in whole or in part, for use or transmittal, without
the prior written permission of Riverain Medical Group LLC
**********************************************************************************/
/*!
* file IppSepFilter.cpp
* brief Implementation of Separable Filter with IPP
* author J. Schamus, jschamus@riverainmedical.com
*/
# pragma once
# include
// Row major version
static inline IppStatus IppSepFilterRC(
Ipp32f* pDst, // Destination Image
const int& dstStep, // Destination step
const Ipp32f* pSrc, // Source Image
const int& srcStep, // Source step
const IppiSize& roiSize, // Source/Destination size
const Ipp32f* hr, // Row filter
const int& Nr, // Row filter size
const Ipp32f* hc, // Column filter
const int& Nc, // Column filter size
const PadType& padType = CONSTANT, // Padding type
const Ipp32f& val = 0.0f ) // Value to use with CONSTANT padding
{
TRY_AUTO
{
IppStatus sts;
int sizerow, sizecol;
Ipp32f *pTmp = NULL;
Ipp32f *pTmpLocal = NULL;
Ipp8u *pBufferCol = NULL, *pBufferRow = NULL;
// flip the kernels and align the memory to please IPP
Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc );
Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr );
ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc );
ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr );
// compute the kernel semisizes
int Ncss = Nc >> 1;
int Nrss = Nr >> 1;
// compute the kernel offsets (0 -> odd, 1 -> even)
int co = 1 - ( Nc % 2 );
int ro = 1 - ( Nr % 2 );
// allocate temporary dst buffer
int tmpStep;
int tmpw;
// The IPP filter functions seem to need 1 more row allocated
// than is obvious or they sometimes crash.
int tmpHeight = roiSize.height+Nc+1;
int tmpWidth = roiSize.width;
if( !( pTmpLocal = ippiMalloc_32f_C1( roiSize.width, roiSize.height + Nc + 1, &tmpStep ) ) )
throw exception( "nIppSepFilterRC, mem-alloc error. " );
pTmp = pTmpLocal;
tmpw = tmpStep / sizeof(Ipp32f);
Ipp32f **ppSrc, **ppDst;
ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 );
ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height );
if( padType == CONSTANT )
{
IppiSize tmpSize;
tmpSize.height = roiSize.height + Nc + 1;
tmpSize.width = roiSize.width;
ippiSet_32f_C1R( val, pTmp, tmpStep, tmpSize );
}
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) )
throw exception( "nIppSepFilterRC, ipp-row-mem-size error. ");
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) )
throw exception( "nIppSepFilterRC, ipp-col-mem-size error. ");
// allocate temporary buffers
if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) )
throw exception( "nIppSepFilterRC, ipp-col-temp mem-alloc error. ");
if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) )
throw exception( "nIppSepFilterRC, ipp-row-temp mem-alloc error. ");
Nrss -= ro;
Ncss -= co;
// organize dst buffer
for( int ii=0,jj=Ncss;ii<< "IPP Error: " << ippGetStatusString( sts ) << endl;
throw exception( "nIppSepFilterRC, ipp-row-filter error. ");
return sts;
}
if( sts = ippiFilterColumnPipeline_32f_C1R( (const Ipp32f**)ppSrc, pDst, dstStep,
roiSize, hc_flipped, Nc, pBufferCol) )
{
cout << "IPP Error: " << ippGetStatusString( sts ) << endl;
throw exception( "nIppSepFilterRC, ipp-column-filter error. ");
return sts;
}
if( ppSrc ) { ippsFree(ppSrc); ppSrc = NULL; }
if( ppDst ) { ippsFree(ppDst); ppDst = NULL; }
if( pTmpLocal ) { ippiFree(pTmpLocal); pTmpLocal = NULL; }
if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; }
if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; }
if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; }
if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; }
return sts;
}
CATCH_AUTO
}
//
//
// Column major version
static inline IppStatus IppSepFilterCR(
Ipp32f* pDst, // Destination Image
const int& dstStep, // Destination step
const Ipp32f* pSrc, // Source Image
const int& srcStep, // Source step
const IppiSize& roiSize, // Source/Destination size
const Ipp32f* hc, // Column filter
const int& Nc, // Column filter size
const Ipp32f* hr, // Row filter
const int& Nr, // Row filter size
const PadType& padType = CONSTANT, // Padding type
const Ipp32f& val = 0.0f ) // Value to use with CONSTANT padding
{
TRY_AUTO
{
IppStatus sts;
int sizerow, sizecol;
Ipp32f *pTmp = NULL, *pPad = NULL;
Ipp8u *pBufferCol = NULL, *pBufferRow = NULL;
// flip the kernels and align the memory to please IPP
Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc );
Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr );
ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc );
ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr );
// compute the kernel semisizes
int Ncss = Nc >> 1;
int Nrss = Nr >> 1;
// compute the kernel offsets (0 -> odd, 1 -> even)
int co = 1 - ( Nc % 2 );
int ro = 1 - ( Nr % 2 );
// allocate temporary dst buffer
int tmpStep, padStep;
// The IPP filter functions seem to need 1 more row allocated
// than is obvious or they sometimes crash.
IppiSize tmpSize;
tmpSize.width = roiSize.width; tmpSize.height = roiSize.height + Nc + 1;
if( !( pTmp = ippiMalloc_32f_C1( tmpSize.width, tmpSize.height, &tmpStep ) ) )
throw exception( "nIppSepFilterCR mem-alloc error." );
int srcw = srcStep / sizeof(Ipp32f);
int dstw = dstStep / sizeof(Ipp32f);
int tmpw = tmpStep / sizeof(Ipp32f);
ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize );
int padw;
IppiSize padSize;
// Only need pad space for CONSTANT
if( padType == CONSTANT )
{
if( !( pPad = ippiMalloc_32f_C1( roiSize.width, (Ncss*2) - co, &padStep ) ) )
throw exception( "nIppSepFilterCR mem-alloc error." );
padw = padStep / sizeof(Ipp32f);
padSize.height = (Ncss*2) - co; padSize.width = roiSize.width;
ippiSet_32f_C1R( val, pPad, padStep, padSize );
}
Ipp32f **ppSrc, **ppDst;
ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 );
ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height );
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) )
throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) )
throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");
// allocate temporary buffers
if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) )
throw exception( "nIppSepFilterCR, ipp-col-mem-alloc error. ");
if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) )
throw exception( "nIppSepFilterCR, ipp-row-mem-alloc error. ");
Nrss -= ro;
Ncss -= co;
// organize dst buffer
for( int ii = 0, jj = Ncss; ii < roiSize.height; ++ii, ++jj )
{
ppDst[ii] = pDst + ii * dstw;
ppSrc[jj] = (Ipp32f *)pSrc + ii * srcw;
}
IppiBorderType borderType;
switch( padType )
{
case CONSTANT:
for( int ii=0,jj=roiSize.height+Ncss;ii<< "IPP Error: " << ippGetStatusString( sts ) << endl;
throw exception( "nIppSepFilterCR col-filter error." );
return sts;
}
if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pTmp, tmpStep,
ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) )
{
cout << "IPP Error: " << ippGetStatusString( sts ) << endl;
throw exception( "nIppSepFilterCR row-filter error." );
return sts;
}
if( ppSrc ) { ippsFree(ppSrc); ppSrc = NULL; }
if( ppDst ) { ippsFree(ppDst); ppDst = NULL; }
if( pTmp ) { ippiFree(pTmp); pTmp = NULL; }
if( pPad ) { ippsFree(pPad); pPad = NULL; };
if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; };
if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; };
if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; };
if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; };
return sts;
}
CATCH_AUTO
}
[/cpp]
[cpp]// // // Column major version static inline IppStatus IppSepFilterCR( Ipp32f* pDst, // Destination Image const int& dstStep, // Destination step const Ipp32f* pSrc, // Source Image const int& srcStep, // Source step const IppiSize& roiSize, // Source/Destination size const Ipp32f* hc, // Column filter const int& Nc, // Column filter size const Ipp32f* hr, // Row filter const int& Nr, // Row filter size const PadType& padType = CONSTANT, // Padding type const Ipp32f& val = 0.0f ) // Value to use with CONSTANT padding { TRY_AUTO { IppStatus sts; int sizerow, sizecol; Ipp32f *pTmp = NULL, *pPad = NULL; Ipp8u *pBufferCol = NULL, *pBufferRow = NULL; // flip the kernels and align the memory to please IPP Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc ); Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr ); ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc ); ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr ); // compute the kernel semisizes int Ncss = Nc >> 1; int Nrss = Nr >> 1; // compute the kernel offsets (0 -> odd, 1 -> even) int co = 1 - ( Nc % 2 ); int ro = 1 - ( Nr % 2 ); // allocate temporary dst buffer int tmpStep, padStep; // The IPP filter functions seem to need 1 more row allocated // than is obvious or they sometimes crash. IppiSize tmpSize; tmpSize.width = roiSize.width; tmpSize.height = roiSize.height + Nc + 1; if( !( pTmp = ippiMalloc_32f_C1( tmpSize.width, tmpSize.height, &tmpStep ) ) ) throw exception( "nIppSepFilterCR mem-alloc error." ); int srcw = srcStep / sizeof(Ipp32f); int dstw = dstStep / sizeof(Ipp32f); int tmpw = tmpStep / sizeof(Ipp32f); ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize ); int padw; IppiSize padSize; // Only need pad space for CONSTANT if( padType == CONSTANT ) { if( !( pPad = ippiMalloc_32f_C1( roiSize.width, (Ncss*2) - co, &padStep ) ) ) throw exception( "nIppSepFilterCR mem-alloc error." ); padw = padStep / sizeof(Ipp32f); padSize.height = (Ncss*2) - co; padSize.width = roiSize.width; ippiSet_32f_C1R( val, pPad, padStep, padSize ); } Ipp32f **ppSrc, **ppDst; ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 ); ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height ); // size of temporary buffers if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) ) throw exception( "nIppSepFilterCR, ipp-row-mem-size error. "); if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) ) throw exception( "nIppSepFilterCR, ipp-row-mem-size error. "); // allocate temporary buffers if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) ) throw exception( "nIppSepFilterCR, ipp-col-mem-alloc error. "); if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) ) throw exception( "nIppSepFilterCR, ipp-row-mem-alloc error. "); Nrss -= ro; Ncss -= co; // organize dst buffer for( int ii = 0, jj = Ncss; ii < roiSize.height; ++ii, ++jj ) { ppDst[ii] = pDst + ii * dstw; ppSrc[jj] = (Ipp32f *)pSrc + ii * srcw; } IppiBorderType borderType; switch( padType ) { case CONSTANT: for( int ii=0,jj=roiSize.height+Ncss;ii<< "IPP Error: " << ippGetStatusString( sts ) << endl; throw exception( "nIppSepFilterCR col-filter error." ); return sts; } if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pTmp, tmpStep, ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) ) { cout << "IPP Error: " << ippGetStatusString( sts ) << endl; throw exception( "nIppSepFilterCR row-filter error." ); return sts; } if( ppSrc ) { ippsFree(ppSrc); ppSrc = NULL; } if( ppDst ) { ippsFree(ppDst); ppDst = NULL; } if( pTmp ) { ippiFree(pTmp); pTmp = NULL; } if( pPad ) { ippsFree(pPad); pPad = NULL; }; if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; }; if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; }; if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; }; if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; }; return sts; } CATCH_AUTO } [/cpp]
[cpp]// // // Column major version static inline IppStatus IppSepFilterCR( Ipp32f* pDst, // Destination Image const int& dstStep, // Destination step const Ipp32f* pSrc, // Source Image const int& srcStep, // Source step const IppiSize& roiSize, // Source/Destination size const Ipp32f* hc, // Column filter const int& Nc, // Column filter size const Ipp32f* hr, // Row filter const int& Nr, // Row filter size const PadType& padType = CONSTANT, // Padding type const Ipp32f& val = 0.0f ) // Value to use with CONSTANT padding { TRY_AUTO { IppStatus sts; int sizerow, sizecol; Ipp32f *pTmp = NULL, *pPad = NULL; Ipp8u *pBufferCol = NULL, *pBufferRow = NULL; // flip the kernels and align the memory to please IPP Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc ); Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr ); ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc ); ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr ); // compute the kernel semisizes int Ncss = Nc >> 1; int Nrss = Nr >> 1; // compute the kernel offsets (0 -> odd, 1 -> even) int co = 1 - ( Nc % 2 ); int ro = 1 - ( Nr % 2 ); // allocate temporary dst buffer int tmpStep, padStep; // The IPP filter functions seem to need 1 more row allocated // than is obvious or they sometimes crash. IppiSize tmpSize; tmpSize.width = roiSize.width; tmpSize.height = roiSize.height + Nc + 1; if( !( pTmp = ippiMalloc_32f_C1( tmpSize.width, tmpSize.height, &tmpStep ) ) ) throw exception( "nIppSepFilterCR mem-alloc error." ); int srcw = srcStep / sizeof(Ipp32f); int dstw = dstStep / sizeof(Ipp32f); int tmpw = tmpStep / sizeof(Ipp32f); ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize ); int padw; IppiSize padSize; // Only need pad space for CONSTANT if( padType == CONSTANT ) { if( !( pPad = ippiMalloc_32f_C1( roiSize.width, (Ncss*2) - co, &padStep ) ) ) throw exception( "nIppSepFilterCR mem-alloc error." ); padw = padStep / sizeof(Ipp32f); padSize.height = (Ncss*2) - co; padSize.width = roiSize.width; ippiSet_32f_C1R( val, pPad, padStep, padSize ); } Ipp32f **ppSrc, **ppDst; ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 ); ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height ); // size of temporary buffers if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) ) throw exception( "nIppSepFilterCR, ipp-row-mem-size error. "); if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) ) throw exception( "nIppSepFilterCR, ipp-row-mem-size error. "); // allocate temporary buffers if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) ) throw exception( "nIppSepFilterCR, ipp-col-mem-alloc error. "); if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) ) throw exception( "nIppSepFilterCR, ipp-row-mem-alloc error. "); Nrss -= ro; Ncss -= co; // organize dst buffer for( int ii = 0, jj = Ncss; ii < roiSize.height; ++ii, ++jj ) { ppDst[ii] = pDst + ii * dstw; ppSrc[jj] = (Ipp32f *)pSrc + ii * srcw; } IppiBorderType borderType; switch( padType ) { case CONSTANT: for( int ii=0,jj=roiSize.height+Ncss;ii,++jj )
{
ppSrc[ii] = pPad + ii * padw;
ppSrc[jj] = pPad + (ii + Ncss) * padw;
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = pPad + (Ncss*2) * srcw;
}
borderType = ippBorderConst;
break;
case REPLICATE:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[Ncss];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height+Ncss-1];
}
borderType = ippBorderRepl;
break;
case SYMMETRIC:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[(Ncss*2)-ii-1];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-ii-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height-1];
}
borderType = ippBorderMirrorR;
break;
case CIRCULAR:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[roiSize.height+ii];
ppSrc[jj] = ppSrc[ii+Ncss];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[Ncss*2];
}
borderType = ippBorderWrap;
break;
default:
// ippStsBorderErr missing from ippdefs.h
return ippStsPaddingSchemeErr;
}
// perform the actual convolutions
if( sts = ippiFilterColumnPipeline_32f_C1R( (const Ipp32f**)ppSrc, pTmp, tmpStep,
roiSize, hc_flipped, Nc, pBufferCol) )
{
//cout << "IPP Error: " << ippGetStatusString( sts ) << endl; throw exception( "nIppSepFilterCR col-filter error." ); return sts; } if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pTmp, tmpStep, ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) ) { //cout << "IPP Error: " << ippGetStatusString( sts ) << endl; throw exception( "nIppSepFilterCR row-filter error." ); return sts; } if( ppSrc ) { ippsFree(ppSrc); ppSrc = NULL; } if( ppDst ) { ippsFree(ppDst); ppDst = NULL; } if( pTmp ) { ippiFree(pTmp); pTmp = NULL; } if( pPad ) { ippsFree(pPad); pPad = NULL; }; if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; }; if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; }; if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; }; if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; }; return sts; } CATCH_AUTO } [/cpp]
[cpp]//
//
// Column major version
static inline IppStatus IppSepFilterCR(
Ipp32f* pDst, // Destination Image
const int& dstStep, // Destination step
const Ipp32f* pSrc, // Source Image
const int& srcStep, // Source step
const IppiSize& roiSize, // Source/Destination size
const Ipp32f* hc, // Column filter
const int& Nc, // Column filter size
const Ipp32f* hr, // Row filter
const int& Nr, // Row filter size
const PadType& padType = CONSTANT, // Padding type
const Ipp32f& val = 0.0f ) // Value to use with CONSTANT padding
{
TRY_AUTO
{
IppStatus sts;
int sizerow, sizecol;
Ipp32f *pTmp = NULL, *pPad = NULL;
Ipp8u *pBufferCol = NULL, *pBufferRow = NULL;
// flip the kernels and align the memory to please IPP
Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc );
Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr );
ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc );
ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr );
// compute the kernel semisizes
int Ncss = Nc >> 1;
int Nrss = Nr >> 1;
// compute the kernel offsets (0 -> odd, 1 -> even)
int co = 1 - ( Nc % 2 );
int ro = 1 - ( Nr % 2 );
// allocate temporary dst buffer
int tmpStep, padStep;
// The IPP filter functions seem to need 1 more row allocated
// than is obvious or they sometimes crash.
IppiSize tmpSize;
tmpSize.width = roiSize.width; tmpSize.height = roiSize.height + Nc + 1;
if( !( pTmp = ippiMalloc_32f_C1( tmpSize.width, tmpSize.height, &tmpStep ) ) )
throw exception( "nIppSepFilterCR mem-alloc error." );
int srcw = srcStep / sizeof(Ipp32f);
int dstw = dstStep / sizeof(Ipp32f);
int tmpw = tmpStep / sizeof(Ipp32f);
ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize );
int padw;
IppiSize padSize;
// Only need pad space for CONSTANT
if( padType == CONSTANT )
{
if( !( pPad = ippiMalloc_32f_C1( roiSize.width, (Ncss*2) - co, &padStep ) ) )
throw exception( "nIppSepFilterCR mem-alloc error." );
padw = padStep / sizeof(Ipp32f);
padSize.height = (Ncss*2) - co; padSize.width = roiSize.width;
ippiSet_32f_C1R( val, pPad, padStep, padSize );
}
Ipp32f **ppSrc, **ppDst;
ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 );
ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height );
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) )
throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) )
throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");
// allocate temporary buffers
if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) )
throw exception( "nIppSepFilterCR, ipp-col-mem-alloc error. ");
if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) )
throw exception( "nIppSepFilterCR, ipp-row-mem-alloc error. ");
Nrss -= ro;
Ncss -= co;
// organize dst buffer
for( int ii = 0, jj = Ncss; ii < roiSize.height; ++ii, ++jj )
{
ppDst[ii] = pDst + ii * dstw;
ppSrc[jj] = (Ipp32f *)pSrc + ii * srcw;
}
IppiBorderType borderType;
switch( padType )
{
case CONSTANT:
for( int ii=0,jj=roiSize.height+Ncss;ii,++jj )
{
ppSrc[ii] = pPad + ii * padw;
ppSrc[jj] = pPad + (ii + Ncss) * padw;
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = pPad + (Ncss*2) * srcw;
}
borderType = ippBorderConst;
break;
case REPLICATE:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[Ncss];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height+Ncss-1];
}
borderType = ippBorderRepl;
break;
case SYMMETRIC:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[(Ncss*2)-ii-1];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-ii-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height-1];
}
borderType = ippBorderMirrorR;
break;
case CIRCULAR:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[roiSize.height+ii];
ppSrc[jj] = ppSrc[ii+Ncss];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[Ncss*2];
}
borderType = ippBorderWrap;
break;
default:
// ippStsBorderErr missing from ippdefs.h
return ippStsPaddingSchemeErr;
}
// perform the actual convolutions
if( sts = ippiFilterColumnPipeline_32f_C1R( (const Ipp32f**)ppSrc, pTmp, tmpStep,
roiSize, hc_flipped, Nc, pBufferCol) )
{
//cout << "IPP Error: " << ippGetStatusString( sts ) << endl;
throw exception( "nIppSepFilterCR col-filter error." );
return sts;
}
if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pTmp, tmpStep,
ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) )
{
//cout << "IPP Error: " << ippGetStatusString( sts ) << endl;
throw exception( "nIppSepFilterCR row-filter error." );
return sts;
}
if( ppSrc ) { ippsFree(ppSrc); ppSrc = NULL; }
if( ppDst ) { ippsFree(ppDst); ppDst = NULL; }
if( pTmp ) { ippiFree(pTmp); pTmp = NULL; }
if( pPad ) { ippsFree(pPad); pPad = NULL; };
if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; };
if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; };
if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; };
if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; };
return sts;
}
CATCH_AUTO
}
[/cpp]
[cpp]// Row major version static inline IppStatus IppSepFilterRC( Ipp32f* pDst, // Destination Image const int& dstStep, // Destination step const Ipp32f* pSrc, // Source Image const int& srcStep, // Source step const IppiSize& roiSize, // Source/Destination size const Ipp32f* hr, // Row filter const int& Nr, // Row filter size const Ipp32f* hc, // Column filter const int& Nc, // Column filter size const PadType& padType = CONSTANT, // Padding type const Ipp32f& val = 0.0f ) // Value to use with CONSTANT padding { TRY_AUTO { IppStatus sts; int sizerow, sizecol; Ipp32f *pTmp = NULL; Ipp32f *pTmpLocal = NULL; Ipp8u *pBufferCol = NULL, *pBufferRow = NULL; // flip the kernels and align the memory to please IPP Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc ); Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr ); ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc ); ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr ); // compute the kernel semisizes int Ncss = Nc >> 1; int Nrss = Nr >> 1; // compute the kernel offsets (0 -> odd, 1 -> even) int co = 1 - ( Nc % 2 ); int ro = 1 - ( Nr % 2 ); // allocate temporary dst buffer int tmpStep; int tmpw; // The IPP filter functions seem to need 1 more row allocated // than is obvious or they sometimes crash. int tmpHeight = roiSize.height+Nc+1; int tmpWidth = roiSize.width; if( !( pTmpLocal = ippiMalloc_32f_C1( roiSize.width, roiSize.height + Nc + 1, &tmpStep ) ) ) throw exception( "nIppSepFilterRC, mem-alloc error. " ); pTmp = pTmpLocal; tmpw = tmpStep / sizeof(Ipp32f); Ipp32f **ppSrc, **ppDst; ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 ); ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height ); if( padType == CONSTANT ) { IppiSize tmpSize; tmpSize.height = roiSize.height + Nc + 1; tmpSize.width = roiSize.width; ippiSet_32f_C1R( val, pTmp, tmpStep, tmpSize ); } // size of temporary buffers if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) ) throw exception( "nIppSepFilterRC, ipp-row-mem-size error. "); if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) ) throw exception( "nIppSepFilterRC, ipp-col-mem-size error. "); // allocate temporary buffers if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) ) throw exception( "nIppSepFilterRC, ipp-col-temp mem-alloc error. "); if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) ) throw exception( "nIppSepFilterRC, ipp-row-temp mem-alloc error. "); Nrss -= ro; Ncss -= co; // organize dst buffer for( int ii=0,jj=Ncss;ii{
ppDst[ii] = pTmp + jj * tmpw;
ppSrc[jj] = pTmp + jj * tmpw;
}
IppiBorderType borderType;
switch( padType )
{
case CONSTANT:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = pTmp + ii * tmpw;
ppSrc[jj] = pTmp + jj * tmpw;
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = pTmp + (roiSize.height+(Ncss*2)) * tmpw;
}
borderType = ippBorderConst;
break;
case REPLICATE:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[Ncss];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height+Ncss-1];
}
borderType = ippBorderRepl;
break;
case SYMMETRIC:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[(Ncss*2)-ii-1];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-ii-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height-1];
}
borderType = ippBorderMirrorR;
break;
case CIRCULAR:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[roiSize.height+ii];
ppSrc[jj] = ppSrc[ii+Ncss];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[Ncss*2];
}
borderType = ippBorderWrap;
break;
default:
// ippStsBorderErr missing from ippdefs.h
return ippStsPaddingSchemeErr;
}
// perform the actual convolutions
if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pSrc, srcStep,
ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) )
{
cout << "IPP Error: " << ippGetStatusString( sts ) << endl; throw exception( "nIppSepFilterRC, ipp-row-filter error. "); return sts; } if( sts = ippiFilterColumnPipeline_32f_C1R( (const Ipp32f**)ppSrc, pDst, dstStep, roiSize, hc_flipped, Nc, pBufferCol) ) { cout << "IPP Error: " << ippGetStatusString( sts ) << endl; throw exception( "nIppSepFilterRC, ipp-column-filter error. "); return sts; } if( ppSrc ) { ippsFree(ppSrc); ppSrc = NULL; } if( ppDst ) { ippsFree(ppDst); ppDst = NULL; } if( pTmpLocal ) { ippiFree(pTmpLocal); pTmpLocal = NULL; } if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; } if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; } if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; } if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; } return sts; } CATCH_AUTO } [/cpp]
What is wrong with the following code? It crashes only when I call IppRowFilter multiple times.
static inline IppStatus IppRowFilter( Ipp32f* pDst, // Destination Image const int dstStep, // Destination step const Ipp32f* pSrc, // Source Image const int srcStep, // Source step const IppiSize dstSize, // Destination size const Ipp32f* hr, // Row filter const int Nr) // Row filter size { IppStatus sts; int sizerow; Ipp8u *pBufferRow = NULL; IppiSize srcSize = { dstSize.width + Nr - 1, dstSize.height }; // flip the kernel and align the memory to please IPP Ipp32f *hr_flipped = (Ipp32f*)ippsMalloc_32f(Nr*sizeof(Ipp32f)); ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr); Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*sizeof(Ipp32f)); // size of temporary buffer if (sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(srcSize, Nr, &sizerow)) { goto cleanup; } // allocate temporary buffer if (!(pBufferRow = ippsMalloc_8u(sizerow))) { sts = ippStsNoMemErr; goto cleanup; } // organize dst buffer for (int ii = 0; ii < dstSize.height; ii++) { ppDst[ii] = pDst + ii * (dstStep / sizeof(Ipp32f)); } // perform the actual convolution if (sts = ippiFilterRowBorderPipeline_32f_C1R((const Ipp32f*)pSrc, srcStep, ppDst, srcSize, hr_flipped, Nr, 0, ippBorderRepl, 0, pBufferRow)) { goto cleanup; } cleanup: if (ppDst) { ippsFree(ppDst); ppDst = NULL; } if (pBufferRow) { ippsFree(pBufferRow); pBufferRow = NULL; } if (hr_flipped) { ippsFree(hr_flipped); hr_flipped = NULL; } return sts; }
Hi,
I can't reproduce crash with your code.
Could you provide your version of IPP and values of dstSize, srcStep, dstSize, filter and filter size?
BR,
Alexander
Hi Piotr,
The code looks ok, just one small problem about
21
Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*
sizeof
(Ipp32f));
and
ppDst[ii] = pDst + ii * (dstStep /
sizeof
(Ipp32f));
Here ppDst is double pointers.
if in 32bit application, it is ok as sizeof(Ipp32f)=4 and sizeof(Ipp32f *) =4.
but when in 64bit application. sizeof (IPP32f*)=8, not 4. so for safety,it is better to change
Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*
sizeof
(Ipp32f*));
Best Regards,
Ying
For more complete information about compiler optimizations, see our Optimization Notice.