- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hallo Michael,
Thanks for posting a general example regarding the convolution with separable filters. I am trying to modify your code so that it can be called from within Matlab (in a MEX file). There are a few things that still are not clear to me. I hope you can help me.
I do not clearly understand what this section of the code does:
/* organize dst buffer */
pTmp = (Ipp16s*)(ppDst+size.height);
for(i=0;i
ppSrc[i+2]=pTmp;
}
/* organize replicate border for ippiFilterColumnPipeline_16s_C1R */
/* top */
ppSrc[0]=ppSrc[2];
ppSrc[1]=ppSrc[2];
/* bottom */
ppSrc[i-1+4]=ppDst[size.height-1];
ppSrc[i-2+4]=ppDst[size.height-1];
I understand that you are arranging the pointers to the image borders for the replication but the whole process is not very clear. I would appreciate some more specific comments i nthe codeso that I can generalize this to any kernel size (I suspect that some coefficients like the 2 and the 4 in ppSrc[2] and ppSrc[i-2+4] are somehow related to the semisize of the filter that you using in this example but it is not clear why and how).
I also include my version of the code so that you may spot major mistakes and bugs:
#ifdef MEX_CONV2_SEP_SINGLE_USE_IPP
inline
void CheckIPPStatus(IppStatus status){
if(status != ippStsNoErr)
{
mexPrintf(" status = %s ", ippGetStatusString(status));
mexErrMsgTxt("IPP error");
}
}
// Adapted from:
// http://softwarecommunity.intel.com/isn/Community/en-US/forums/5482632/PostAttachment.aspx
//
// INTEL CORPORATION PROPRIETARY INFORMATION
// This software is supplied under the terms of a license agreement or
// nondisclosure agreement with Intel Corporation and may not be copied
// or disclosed except in accordance with the terms of that agreement.
// Copyright (c) 2005 Intel Corporation. All Rights Reserved.
//
// Separable 2D convolution example
//
void
SepConv(const Ipp32f *pSrcBuffer, Ipp32f *pDstBuffer, IppiSize *size, Ipp32f *hc, int Nc, Ipp32f *hr, int Nr){
int sizerow, sizecol, i; int maxKernelSize = (Nc > Nr) ? Nc : Nr;Ipp32f **ppDst, **ppSrc, *pTmp;
Ipp8u *pBufferCol, *pBufferRow;
IppStatus status;
//int xAnchor = (Nr >> 1) + 1; int yAnchor = (Nc >> 1) + 1; // allocate temporary dst bufferppDst = (Ipp32f**)ippsMalloc_8u(size->width*size->height*
sizeof(Ipp32f) + (size->height)*sizeof(Ipp32f*));ppSrc = (Ipp32f**)ippsMalloc_8u((size->height+maxKernelSize)*
sizeof(Ipp32f*)); // size of temporary buffersstatus = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(*size, maxKernelSize, &sizerow);
CheckIPPStatus(status);
status = ippiFilterColumnPipelineGetBufferSize_32f_C1R(*size, maxKernelSize, &sizecol);
CheckIPPStatus(status);
//mexPrintf(" size = {%d,%d}", size->height, size->width); // allocate temporary bufferspBufferCol = ippsMalloc_8u(sizecol);
if (pBufferCol == NULL) mexErrMsgTxt("IPP error: failed to allocate column buffer");pBufferRow = ippsMalloc_8u(sizerow);
if (pBufferRow == NULL) mexErrMsgTxt("IPP error: failed to allocate row buffer"); // organize dst bufferpTmp = (Ipp32f*)(ppDst+size->height);
for(i = 0; i{
ppDst = pTmp;
ppSrc[i+2] = pTmp;
}
ppSrc[0] = ppSrc[2];
ppSrc[1] = ppSrc[2];
// bottomppSrc[i-1+4] = ppDst[size->height-1];
ppSrc[i-2+4] = ppDst[size->height-1];
// perform the convolution /*mexPrintf(" pSrcBuffer = {%f, %f, %f, %f}", pSrcBuffer[0], pSrcBuffer[1], pSrcBuffer[2], pSrcBuffer[3]);
mexPrintf(" hc = {%f, %f, %f, %f}", hc[0], hc[1], hc[2], hc[3]);
mexPrintf(" size = {%d, %d}", size->height, size->width);
mexPrintf(" sizerow = %d, sizecol = %d", sizerow, sizecol);
*/
status = ippiFilterRowBorderPipeline_32f_C1R((
const Ipp32f*) pSrcBuffer, size->width*sizeof(Ipp32f), ppDst,*size, hc, Nc, yAnchor, ippBorderRepl, 0, pBufferRow);
CheckIPPStatus(status);
status = ippiFilterColumnPipeline_32f_C1R((
const Ipp32f **)ppSrc, pDstBuffer, size->width*sizeof(Ipp32f), *size,hr, Nr, pBufferCol);
CheckIPPStatus(status);
ippsF ree(ppSrc);
ippsFree(ppDst);
ippsFree(pBufferCol);
ippsFree(pBufferRow);
}
#endif
Thanks in advance for your help,
Marco
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
This is the version of the code that I would expect to generalize the one originally posted by Michael. In particular I modified the section for the organization of the buffers as follows:
// organize dst buffer// (the offset takes care of the double pointer structure)
pTmp = (Ipp32f*)(ppDst+size->height);
for(i = 0; iheight; i++, pTmp += size->width) for(i = 0; i < yAnchor; i++){
ppDst = pTmp;
ppSrc[i+yAnchor] = pTmp;
}
{
// topppSrc = ppSrc[yAnchor];
// bottomppSrc[i + size->height] = ppSrc[size->height-1];
}
Unfortunately performing the second convolution (within the MEX file, ippiFilterColumnPipeline_32f_C1R) the status returned by IPP says Null pointer error
I am reposting the original code for your convenience. Note that hxc and hr are the convolution kernels whose lenght is respectively Nc and Nr.
#ifdef
MEX_CONV2_SEP_SINGLE_USE_IPPinline
void CheckIPPStatus(IppStatus status){
if(status != ippStsNoErr){
mexPrintf(
" status = %s ", ippGetStatusString(status));mexErrMsgTxt(
"IPP error");}
}
// Adapted from:
// http://softwarecommunity.intel.com/isn/Community/en-US/forums/5482632/PostAttachment.aspx
//
// INTEL CORPORATION PROPRIETARY INFORMATION
// This software is supplied under the terms of a license agreement or
// nondisclosure agreement with Intel Corporation and may not be copied
// or disclosed except in accordance with the terms of that agreement.
// Copyright (c) 2005 Intel Corporation. All Rights Reserved.
//
// Separable 2D convolution example
//
void
SepConv(const Ipp32f *pSrcBuffer, Ipp32f *pDstBuffer, IppiSize *size, Ipp32f *hc, int Nc, Ipp32f *hr, int Nr){
int sizerow, sizecol, i;int maxKernelSize = (Nc > Nr) ? Nc : Nr;Ipp32f **ppDst, **ppSrc, *pTmp;
Ipp8u *pBufferCol, *pBufferRow;
IppStatus status;
//int xAnchor = (Nr >> 1) + 1;int yAnchor = (Nc >> 1) + 1;// allocate temporary dst buffer
ppDst = (Ipp32f**)ippsMalloc_8u(size->width*size->height*
sizeof(Ipp32f) + (size->height)*sizeof(Ipp32f*));ppSrc = (Ipp32f**)ippsMalloc_8u((size->height+maxKernelSize)*
sizeof(Ipp32f*));// size of temporary buffers
status = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(*size, maxKernelSize, &sizerow);
CheckIPPStatus(status);
status = ippiFilterColumnPipelineGetBufferSize_32f_C1R(*size, maxKernelSize, &sizecol);
CheckIPPStatus(status);
// allocate temporary buffers
pBufferCol = ippsMalloc_8u(sizecol);
if (pBufferCol == NULL) mexErrMsgTxt("IPP error: failed to allocate column buffer");pBufferRow = ippsMalloc_8u(sizerow);
if (pBufferRow == NULL) mexErrMsgTxt("IPP error: failed to allocate row buffer");// organize dst buffer (the offset takes care of the double pointer structure)
pTmp = (Ipp32f*)(ppDst+size->height);
for(i = 0; iheight; i++, pTmp += size->width) for(i = 0; i < yAnchor; i++){
ppDst = pTmp;
ppSrc[i+yAnchor] = pTmp;
}
{
// topppSrc = ppSrc[yAnchor];
// bottomppSrc[i + size->height] = ppSrc[size->height-1];
}
// perform the convolutions
status = ippiFilterRowBorderPipeline_32f_C1R((
const Ipp32f*) pSrcBuffer, size->width*sizeof(Ipp32f), ppDst, *size, hc, Nc, yAnchor, ippBorderRepl, 0, pBufferRow);CheckIPPStatus(status);
status = ippiFilterColumnPipeline_32f_C1R((
const Ipp32f **)ppSrc, pDstBuffer, size->width*sizeof(Ipp32f), *size, hr, Nr, pBufferCol);CheckIPPStatus(status);
ippsFree(ppSrc);
ippsFree(ppDst);
ippsFree(pBufferCol);
ippsFree(pBufferRow);
}
#endif
The above function is called as:
// in version for the different byte ordering between Matlab and IPPIppiSize size;
size.height = width;
size.width = height;
SepConv((Ipp32f *)f, (Ipp32f *)g, &size, (Ipp32f *)hc, Nc, (Ipp32f *)hr, Nr);
Again thanks in advance for any useful insight,
Marco
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
Example 9-4 (p 9-55 of IPP manual vol2, Jan 2007) described how to use ippiFilterRow/ColumnBorderPipeline functions to calculate the separable convolution without the intermediate buffer for the whole image.
The ring buffer (the double pointer) for convolved rows is used there. You should befine the border type for row convolution because there is no data outside the image. But border rows for column convolution are formed manually, so you need not extra argument for them.
Eg for the 3x3 convolution with replicate border you need to replicate the pointer to the first convolved row.
Thanks,
Alexander
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Dear Alexander,
thanks for your prompt response. I must say that I found the Example 9-4 extremely criptic (30 lines of dense code without a single comment...). However I came up with a routine that seems to achive the task. Note that there are a bunch of tricks to handle kernel sizes that can be even or odd. The flipping of the kernel is meant to adapt my routine with the conv2 routine of Matlab (and for the same reasonone might notice "inversion" between row and columns, since Matlabuses column-major ordering of the data, as in Fortran).
I hope that this can be of some help. I would appreciate any help from the community as far asbugs/improvments are concerned.
#ifdef
MEX_CONV2_SEP_SINGLE_USE_IPPinline
void CheckIPPStatus(IppStatus status){
if(status != ippStsNoErr){
mexPrintf(
" status = %s ", ippGetStatusString(status));mexErrMsgTxt(
"IPP error");}
}
// Adapted by Marco Zuliani (zuliani@mayachitra.com) from:
//
// http://softwarecommunity.intel.com/isn/Community/en-US/forums/5482632/PostAttachment.aspx
//
// INTEL CORPORATION PROPRIETARY INFORMATION
// This software is supplied under the terms of a license agreement or
// nondisclosure agreement with Intel Corporation and may not be copied
// or disclosed except in aNcssordance with the terms of that agreement.
// Copyright (c) 2005 Intel Corporation. All Rights Reserved.
//
// Separable 2D convolution example
//
void
SepConv(const Ipp32f *pSrcBuffer, Ipp32f *pDstBuffer, IppiSize *size, Ipp32f *hc, int Nc, Ipp32f *hr, int Nr){
int sizerow, sizecol, i, j;Ipp32f **ppDst, **ppSrc, *pTmp;
Ipp8u *pBufferCol, *pBufferRow;
// flip the kernels and align the memory to please IPP
Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f(Nc *
sizeof(float));Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f(Nr *
sizeof(float));for(i = 0; i < Nc; i++) hc_flipped = hc[Nc-i-1];
for(j = 0; j < Nr; j++) hr_flipped
= hr[Nr- j-1]; // compute the kernel semisizes
int Ncss = Nc >> 1;int Nrss = Nr >> 1;// compute the kernel offsets (0 -> odd, 1 -> even)int co = 1-(Nc%2);int ro = 1-(Nr%2);// allocate temporary dst buffer
ppDst = (Ipp32f**)ippsMalloc_8u(size->width*size->height*
sizeof(Ipp32f)+(size->height)*sizeof(Ipp32f*));ppSrc = (Ipp32f**)ippsMalloc_8u((size->height+Nc)*
sizeof(Ipp32f*));// size of temporary buffers
ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(*size, Nc, &sizerow);
ippiFilterColumnPipelineGetBufferSize_32f_C1R(*size, Nr, &sizecol);
// allocate temporary buffers
pBufferCol = ippsMalloc_8u(sizecol);
pBufferRow = ippsMalloc_8u(sizerow);
// organize dst buffer
pTmp = (Ipp32f*)(ppDst + size->height);
for(i = 0; i < size->height; i++, pTmp += size->width){
ppDst = pTmp;
ppSrc[i+Nrss-ro] = pTmp;
}
// organize replicate border for ippiFilterColumnPipeline_32f_C1R
for(j = 0; j < Nrss; j++){
// topppSrc
// bottom= ppSrc[Nrss-ro]; ppSrc[i-j+Nr-2] = ppDst[size->height-1];
}
// perform the actual convolutions
ippiFilterRowBorderPipeline_32f_C1R((
const Ipp32f*) pSrcBuffer, size->width*sizeof(Ipp32f), ppDst,*size, hc_flipped, Nc, Ncss-co, ippBorderConst, 0, pBufferRow);
ippiFilterColumnPipeline_32f_C1R((
const Ipp32f**)ppSrc, pDstBuffer, size->width*sizeof(Ipp32f), *size,hr_flipped, Nr, pBufferCol);
// release some memory
ippsFree(hc_flipped);
ippsFree(hr_flipped);
ippsFree(ppSrc);
ippsFree(ppDst);
ippsFree(pBufferCol);
ippsFree(pBufferRow);
}
#endif
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Copyright(C) 2004-2009, Riverain Medical Group LLC. All Rights Reserved.
This is UNPUBLISHED PROPRIETARY SOURCE CODE of Riverain Medical Group, LLC.
The contents of this file may not be disclosed to third parties, copied or
duplicated in any form, in whole or in part, for use or transmittal, without
the prior written permission of Riverain Medical Group LLC
**********************************************************************************/
/*!
* file IppSepFilter.cpp
* brief Implementation of Separable Filter with IPP
* author J. Schamus, jschamus@riverainmedical.com
*/
# pragma once
# include "Image.h"
static inline IppStatus ippSepFilter(
const Ipp32f* pSrc, // Sourse Image
const int srcStep, // Source step
Ipp32f* pDst, // Destination Image
const int dstStep, // Destination step
const IppiSize roiSize, // Source/Destination size
const Ipp32f* hc, // Column filter
const int Nc, // Column filter size
const Ipp32f* hr, // Row filter
const int Nr, // Row filter size
const PadType& padType = CONSTANT, // Padding type
const Ipp32f val = 0.0f ) // Value to use with CONSTANT padding
{
IppStatus sts;
Ipp8u *pBufferCol = NULL, *pBufferRow = NULL;
// compute the kernel semisizes
int Ncss = Nc >> 1;
int Nrss = Nr >> 1;
// compute the kernel offsets (0 -> odd, 1 -> even)
int co = 1 - ( Nc % 2 );
int ro = 1 - ( Nr % 2 );
// allocate temporary dst buffer
int tmpStep;
pTmp = ippiMalloc_32f_C1( roiSize.width, roiSize.height + (Ncss * 2), &tmpStep );
if( !pTmp ) return ippStsMemAllocErr;
int tmpw = tmpStep / sizeof(Ipp32f);
IppiSize tmpSize;
tmpSize.height = roiSize.height + (Ncss * 2) - co; tmpSize.width = roiSize.width;
ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize );
ppDst = new Ipp32f*[roiSize.height];
ppSrc = new Ipp32f*[roiSize.height + (Nrss * 2) - co];
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizerow) )
return sts;
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizecol) )
return sts;
// allocate temporary buffers
pBufferCol = ippsMalloc_8u( sizecol );
if( !pBufferCol ) return ippStsMemAllocErr;
pBufferRow = ippsMalloc_8u( sizerow );
if( !pBufferRow ) return ippStsMemAllocErr;
Nrss -= ro;
Ncss -= co;
// organize dst buffer
for( int ii=0,jj=Ncss;ii
ppDst[ii] = pTmp + jj * tmpw;
ppSrc[jj] = pTmp + jj * tmpw;
}
IppiBorderType borderType;
switch( padType )
{
case CONSTANT:
for( int ii=0,jj=roiSize.height+Ncss;ii
ppSrc[ii] = pTmp + ii * tmpw;
ppSrc[jj] = pTmp + jj * tmpw;
if( val )
{
ippsSet_32f( val, ppSrc[ii], roiSize.width );
ippsSet_32f( val, ppSrc[jj], roiSize.width );
}
else
{
ippsZero_32f( ppSrc[ii], roiSize.width );
ippsZero_32f( ppSrc[jj], roiSize.width );
}
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = pTmp + (roiSize.height+(Ncss*2)) * tmpw;
ippsSet_32f( val, ppSrc[roiSize.height+(Ncss*2)], roiSize.width );
}
borderType = ippBorderConst;
break;
case REPLICATE:
for( int ii=0,jj=roiSize.height+Ncss;ii
ppSrc[ii] = ppSrc[Ncss];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height+Ncss-1];
}
borderType = ippBorderRepl;
break;
case SYMMETRIC:
for( int ii=0,jj=roiSize.height+Ncss;ii
ppSrc[ii] = ppSrc[(Ncss*2)-ii-1];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-ii-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height-1];
}
borderType = ippBorderMirrorR;
break;
case CIRCULAR:
for( int ii=0,jj=roiSize.height+Ncss;ii
ppSrc[ii] = ppSrc[roiSize.height+ii];
ppSrc[jj] = ppSrc[ii+Ncss];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[Ncss*2];
}
borderType = ippBorderWrap;
break;
default:
return ippStsPaddingSchemeErr; // ippStsBorderErr missing from ippdefs.h
}
// perform the actual convolutions
if( sts = ippiFilterRowBorderPipeline_32f_C1R((const Ipp32f*) pSrc, srcStep,
ppDst, roiSize, hr, Nr, Nrss, borderType, val, pBufferRow) )
return sts;
if( sts = ippiFilterColumnPipeline_32f_C1R((const Ipp32f**)ppSrc, pDst, dstStep,
roiSize, hc, Nc, pBufferCol) )
return sts;
ippsFree(pTmp);
ippsFree(pBufferCol);
ippsFree(pBufferRow);
delete []ppSrc;
delete []ppDst;
return sts;
}
No Guarntees with this, but it has been tested with both even and odd sized kernals and for all padding types.
Enjoy,
Jay Schamus
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
thanks for sharing of your expertise with IPP developers community. Although the copyright notice of your code may look misleading. Is it possible to disclosure this code?
Regards,
Vladimir
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
thanks for sharing of your expertise with IPP developers community. Although the copyright notice of your code may look misleading. Is it possible to disclosure this code?
Regards,
Vladimir
Sure fine. Just acknowledge the source.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Jay
Thanks alot for sharing your improved version of the convolution code. I was wondering if you can check a couple of things:
1] Should the line
ppSrc = new Ipp32f*[roiSize.height + (Nrss * 2) - co];
read instead as:
ppSrc = new Ipp32f*[roiSize.height + (Ncss * 2) - co];
so that the loop:
for( int ii=0,jj=roiSize.height+Ncss;ii
ppSrc[ii] = pTmp + ii * tmpw;
ppSrc[jj] = pTmp + jj * tmpw;
(...)
will not exceed the boundaries?
2] Should the lines:
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizerow) )
return sts;
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizecol) )
return sts;
be instead:
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) )
return sts;
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) )
return sts;
i.e. with Nr and Nc swapped?
Thanks,
Marco
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Jay
Thanks alot for sharing your improved version of the convolution code. I was wondering if you can check a couple of things:
1] Should the line
ppSrc = new Ipp32f*[roiSize.height + (Nrss * 2) - co];
read instead as:
ppSrc = new Ipp32f*[roiSize.height + (Ncss * 2) - co];
so that the loop:
for( int ii=0,jj=roiSize.height+Ncss;ii
ppSrc[ii] = pTmp + ii * tmpw;
ppSrc[jj] = pTmp + jj * tmpw;
(...)
will not exceed the boundaries?
2] Should the lines:
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizerow) )
return sts;
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizecol) )
return sts;
be instead:
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) )
return sts;
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) )
return sts;
i.e. with Nr and Nc swapped?
Thanks,
Marco
Marco,
sorry to take so long to get back, but I've been busy. Here is the corrected code for this (can't use new and delete ro it will crash once every ~1000 times you run it ). Also, I included a version that runs the filter in reverse order (CR vs. RC), for those that need to match MatLab. Note the Copyright is there but it means that if you use this code please acknowledge the source. And also, one of our guys who is experimenting with 64-bit under Windows 2008 gets a crash every time at the call to ippiFilterRowBorderPipelineGetBufferSize_32f_C1R when builds this as a 64-bit DLL.
[cpp]/*********************************************************************************
Copyright(C) 2004-2009, Riverain Medical Group LLC. All Rights Reserved.
This is UNPUBLISHED PROPRIETARY SOURCE CODE of Riverain Medical Group, LLC.
The contents of this file may not be disclosed to third parties, copied or
duplicated in any form, in whole or in part, for use or transmittal, without
the prior written permission of Riverain Medical Group LLC
**********************************************************************************/
/*!
* file IppSepFilter.cpp
* brief Implementation of Separable Filter with IPP
* author J. Schamus, jschamus@riverainmedical.com
*/
# pragma once
# include
// Row major version
static inline IppStatus IppSepFilterRC(
Ipp32f* pDst, // Destination Image
const int& dstStep, // Destination step
const Ipp32f* pSrc, // Source Image
const int& srcStep, // Source step
const IppiSize& roiSize, // Source/Destination size
const Ipp32f* hr, // Row filter
const int& Nr, // Row filter size
const Ipp32f* hc, // Column filter
const int& Nc, // Column filter size
const PadType& padType = CONSTANT, // Padding type
const Ipp32f& val = 0.0f ) // Value to use with CONSTANT padding
{
TRY_AUTO
{
IppStatus sts;
int sizerow, sizecol;
Ipp32f *pTmp = NULL;
Ipp32f *pTmpLocal = NULL;
Ipp8u *pBufferCol = NULL, *pBufferRow = NULL;
// flip the kernels and align the memory to please IPP
Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc );
Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr );
ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc );
ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr );
// compute the kernel semisizes
int Ncss = Nc >> 1;
int Nrss = Nr >> 1;
// compute the kernel offsets (0 -> odd, 1 -> even)
int co = 1 - ( Nc % 2 );
int ro = 1 - ( Nr % 2 );
// allocate temporary dst buffer
int tmpStep;
int tmpw;
// The IPP filter functions seem to need 1 more row allocated
// than is obvious or they sometimes crash.
int tmpHeight = roiSize.height+Nc+1;
int tmpWidth = roiSize.width;
if( !( pTmpLocal = ippiMalloc_32f_C1( roiSize.width, roiSize.height + Nc + 1, &tmpStep ) ) )
throw exception( "nIppSepFilterRC, mem-alloc error. " );
pTmp = pTmpLocal;
tmpw = tmpStep / sizeof(Ipp32f);
Ipp32f **ppSrc, **ppDst;
ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 );
ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height );
if( padType == CONSTANT )
{
IppiSize tmpSize;
tmpSize.height = roiSize.height + Nc + 1;
tmpSize.width = roiSize.width;
ippiSet_32f_C1R( val, pTmp, tmpStep, tmpSize );
}
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) )
throw exception( "nIppSepFilterRC, ipp-row-mem-size error. ");
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) )
throw exception( "nIppSepFilterRC, ipp-col-mem-size error. ");
// allocate temporary buffers
if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) )
throw exception( "nIppSepFilterRC, ipp-col-temp mem-alloc error. ");
if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) )
throw exception( "nIppSepFilterRC, ipp-row-temp mem-alloc error. ");
Nrss -= ro;
Ncss -= co;
// organize dst buffer
for( int ii=0,jj=Ncss;ii<< "IPP Error: " << ippGetStatusString( sts ) << endl;
throw exception( "nIppSepFilterRC, ipp-row-filter error. ");
return sts;
}
if( sts = ippiFilterColumnPipeline_32f_C1R( (const Ipp32f**)ppSrc, pDst, dstStep,
roiSize, hc_flipped, Nc, pBufferCol) )
{
cout << "IPP Error: " << ippGetStatusString( sts ) << endl;
throw exception( "nIppSepFilterRC, ipp-column-filter error. ");
return sts;
}
if( ppSrc ) { ippsFree(ppSrc); ppSrc = NULL; }
if( ppDst ) { ippsFree(ppDst); ppDst = NULL; }
if( pTmpLocal ) { ippiFree(pTmpLocal); pTmpLocal = NULL; }
if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; }
if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; }
if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; }
if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; }
return sts;
}
CATCH_AUTO
}
//
//
// Column major version
static inline IppStatus IppSepFilterCR(
Ipp32f* pDst, // Destination Image
const int& dstStep, // Destination step
const Ipp32f* pSrc, // Source Image
const int& srcStep, // Source step
const IppiSize& roiSize, // Source/Destination size
const Ipp32f* hc, // Column filter
const int& Nc, // Column filter size
const Ipp32f* hr, // Row filter
const int& Nr, // Row filter size
const PadType& padType = CONSTANT, // Padding type
const Ipp32f& val = 0.0f ) // Value to use with CONSTANT padding
{
TRY_AUTO
{
IppStatus sts;
int sizerow, sizecol;
Ipp32f *pTmp = NULL, *pPad = NULL;
Ipp8u *pBufferCol = NULL, *pBufferRow = NULL;
// flip the kernels and align the memory to please IPP
Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc );
Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr );
ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc );
ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr );
// compute the kernel semisizes
int Ncss = Nc >> 1;
int Nrss = Nr >> 1;
// compute the kernel offsets (0 -> odd, 1 -> even)
int co = 1 - ( Nc % 2 );
int ro = 1 - ( Nr % 2 );
// allocate temporary dst buffer
int tmpStep, padStep;
// The IPP filter functions seem to need 1 more row allocated
// than is obvious or they sometimes crash.
IppiSize tmpSize;
tmpSize.width = roiSize.width; tmpSize.height = roiSize.height + Nc + 1;
if( !( pTmp = ippiMalloc_32f_C1( tmpSize.width, tmpSize.height, &tmpStep ) ) )
throw exception( "nIppSepFilterCR mem-alloc error." );
int srcw = srcStep / sizeof(Ipp32f);
int dstw = dstStep / sizeof(Ipp32f);
int tmpw = tmpStep / sizeof(Ipp32f);
ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize );
int padw;
IppiSize padSize;
// Only need pad space for CONSTANT
if( padType == CONSTANT )
{
if( !( pPad = ippiMalloc_32f_C1( roiSize.width, (Ncss*2) - co, &padStep ) ) )
throw exception( "nIppSepFilterCR mem-alloc error." );
padw = padStep / sizeof(Ipp32f);
padSize.height = (Ncss*2) - co; padSize.width = roiSize.width;
ippiSet_32f_C1R( val, pPad, padStep, padSize );
}
Ipp32f **ppSrc, **ppDst;
ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 );
ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height );
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) )
throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) )
throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");
// allocate temporary buffers
if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) )
throw exception( "nIppSepFilterCR, ipp-col-mem-alloc error. ");
if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) )
throw exception( "nIppSepFilterCR, ipp-row-mem-alloc error. ");
Nrss -= ro;
Ncss -= co;
// organize dst buffer
for( int ii = 0, jj = Ncss; ii < roiSize.height; ++ii, ++jj )
{
ppDst[ii] = pDst + ii * dstw;
ppSrc[jj] = (Ipp32f *)pSrc + ii * srcw;
}
IppiBorderType borderType;
switch( padType )
{
case CONSTANT:
for( int ii=0,jj=roiSize.height+Ncss;ii<< "IPP Error: " << ippGetStatusString( sts ) << endl;
throw exception( "nIppSepFilterCR col-filter error." );
return sts;
}
if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pTmp, tmpStep,
ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) )
{
cout << "IPP Error: " << ippGetStatusString( sts ) << endl;
throw exception( "nIppSepFilterCR row-filter error." );
return sts;
}
if( ppSrc ) { ippsFree(ppSrc); ppSrc = NULL; }
if( ppDst ) { ippsFree(ppDst); ppDst = NULL; }
if( pTmp ) { ippiFree(pTmp); pTmp = NULL; }
if( pPad ) { ippsFree(pPad); pPad = NULL; };
if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; };
if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; };
if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; };
if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; };
return sts;
}
CATCH_AUTO
}
[/cpp]
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
[cpp]// // // Column major version static inline IppStatus IppSepFilterCR( Ipp32f* pDst, // Destination Image const int& dstStep, // Destination step const Ipp32f* pSrc, // Source Image const int& srcStep, // Source step const IppiSize& roiSize, // Source/Destination size const Ipp32f* hc, // Column filter const int& Nc, // Column filter size const Ipp32f* hr, // Row filter const int& Nr, // Row filter size const PadType& padType = CONSTANT, // Padding type const Ipp32f& val = 0.0f ) // Value to use with CONSTANT padding { TRY_AUTO { IppStatus sts; int sizerow, sizecol; Ipp32f *pTmp = NULL, *pPad = NULL; Ipp8u *pBufferCol = NULL, *pBufferRow = NULL; // flip the kernels and align the memory to please IPP Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc ); Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr ); ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc ); ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr ); // compute the kernel semisizes int Ncss = Nc >> 1; int Nrss = Nr >> 1; // compute the kernel offsets (0 -> odd, 1 -> even) int co = 1 - ( Nc % 2 ); int ro = 1 - ( Nr % 2 ); // allocate temporary dst buffer int tmpStep, padStep; // The IPP filter functions seem to need 1 more row allocated // than is obvious or they sometimes crash. IppiSize tmpSize; tmpSize.width = roiSize.width; tmpSize.height = roiSize.height + Nc + 1; if( !( pTmp = ippiMalloc_32f_C1( tmpSize.width, tmpSize.height, &tmpStep ) ) ) throw exception( "nIppSepFilterCR mem-alloc error." ); int srcw = srcStep / sizeof(Ipp32f); int dstw = dstStep / sizeof(Ipp32f); int tmpw = tmpStep / sizeof(Ipp32f); ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize ); int padw; IppiSize padSize; // Only need pad space for CONSTANT if( padType == CONSTANT ) { if( !( pPad = ippiMalloc_32f_C1( roiSize.width, (Ncss*2) - co, &padStep ) ) ) throw exception( "nIppSepFilterCR mem-alloc error." ); padw = padStep / sizeof(Ipp32f); padSize.height = (Ncss*2) - co; padSize.width = roiSize.width; ippiSet_32f_C1R( val, pPad, padStep, padSize ); } Ipp32f **ppSrc, **ppDst; ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 ); ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height ); // size of temporary buffers if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) ) throw exception( "nIppSepFilterCR, ipp-row-mem-size error. "); if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) ) throw exception( "nIppSepFilterCR, ipp-row-mem-size error. "); // allocate temporary buffers if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) ) throw exception( "nIppSepFilterCR, ipp-col-mem-alloc error. "); if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) ) throw exception( "nIppSepFilterCR, ipp-row-mem-alloc error. "); Nrss -= ro; Ncss -= co; // organize dst buffer for( int ii = 0, jj = Ncss; ii < roiSize.height; ++ii, ++jj ) { ppDst[ii] = pDst + ii * dstw; ppSrc[jj] = (Ipp32f *)pSrc + ii * srcw; } IppiBorderType borderType; switch( padType ) { case CONSTANT: for( int ii=0,jj=roiSize.height+Ncss;ii<< "IPP Error: " << ippGetStatusString( sts ) << endl; throw exception( "nIppSepFilterCR col-filter error." ); return sts; } if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pTmp, tmpStep, ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) ) { cout << "IPP Error: " << ippGetStatusString( sts ) << endl; throw exception( "nIppSepFilterCR row-filter error." ); return sts; } if( ppSrc ) { ippsFree(ppSrc); ppSrc = NULL; } if( ppDst ) { ippsFree(ppDst); ppDst = NULL; } if( pTmp ) { ippiFree(pTmp); pTmp = NULL; } if( pPad ) { ippsFree(pPad); pPad = NULL; }; if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; }; if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; }; if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; }; if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; }; return sts; } CATCH_AUTO } [/cpp]
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
[cpp]// // // Column major version static inline IppStatus IppSepFilterCR( Ipp32f* pDst, // Destination Image const int& dstStep, // Destination step const Ipp32f* pSrc, // Source Image const int& srcStep, // Source step const IppiSize& roiSize, // Source/Destination size const Ipp32f* hc, // Column filter const int& Nc, // Column filter size const Ipp32f* hr, // Row filter const int& Nr, // Row filter size const PadType& padType = CONSTANT, // Padding type const Ipp32f& val = 0.0f ) // Value to use with CONSTANT padding { TRY_AUTO { IppStatus sts; int sizerow, sizecol; Ipp32f *pTmp = NULL, *pPad = NULL; Ipp8u *pBufferCol = NULL, *pBufferRow = NULL; // flip the kernels and align the memory to please IPP Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc ); Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr ); ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc ); ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr ); // compute the kernel semisizes int Ncss = Nc >> 1; int Nrss = Nr >> 1; // compute the kernel offsets (0 -> odd, 1 -> even) int co = 1 - ( Nc % 2 ); int ro = 1 - ( Nr % 2 ); // allocate temporary dst buffer int tmpStep, padStep; // The IPP filter functions seem to need 1 more row allocated // than is obvious or they sometimes crash. IppiSize tmpSize; tmpSize.width = roiSize.width; tmpSize.height = roiSize.height + Nc + 1; if( !( pTmp = ippiMalloc_32f_C1( tmpSize.width, tmpSize.height, &tmpStep ) ) ) throw exception( "nIppSepFilterCR mem-alloc error." ); int srcw = srcStep / sizeof(Ipp32f); int dstw = dstStep / sizeof(Ipp32f); int tmpw = tmpStep / sizeof(Ipp32f); ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize ); int padw; IppiSize padSize; // Only need pad space for CONSTANT if( padType == CONSTANT ) { if( !( pPad = ippiMalloc_32f_C1( roiSize.width, (Ncss*2) - co, &padStep ) ) ) throw exception( "nIppSepFilterCR mem-alloc error." ); padw = padStep / sizeof(Ipp32f); padSize.height = (Ncss*2) - co; padSize.width = roiSize.width; ippiSet_32f_C1R( val, pPad, padStep, padSize ); } Ipp32f **ppSrc, **ppDst; ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 ); ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height ); // size of temporary buffers if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) ) throw exception( "nIppSepFilterCR, ipp-row-mem-size error. "); if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) ) throw exception( "nIppSepFilterCR, ipp-row-mem-size error. "); // allocate temporary buffers if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) ) throw exception( "nIppSepFilterCR, ipp-col-mem-alloc error. "); if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) ) throw exception( "nIppSepFilterCR, ipp-row-mem-alloc error. "); Nrss -= ro; Ncss -= co; // organize dst buffer for( int ii = 0, jj = Ncss; ii < roiSize.height; ++ii, ++jj ) { ppDst[ii] = pDst + ii * dstw; ppSrc[jj] = (Ipp32f *)pSrc + ii * srcw; } IppiBorderType borderType; switch( padType ) { case CONSTANT: for( int ii=0,jj=roiSize.height+Ncss;ii,++jj )
{
ppSrc[ii] = pPad + ii * padw;
ppSrc[jj] = pPad + (ii + Ncss) * padw;
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = pPad + (Ncss*2) * srcw;
}
borderType = ippBorderConst;
break;
case REPLICATE:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[Ncss];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height+Ncss-1];
}
borderType = ippBorderRepl;
break;
case SYMMETRIC:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[(Ncss*2)-ii-1];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-ii-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height-1];
}
borderType = ippBorderMirrorR;
break;
case CIRCULAR:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[roiSize.height+ii];
ppSrc[jj] = ppSrc[ii+Ncss];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[Ncss*2];
}
borderType = ippBorderWrap;
break;
default:
// ippStsBorderErr missing from ippdefs.h
return ippStsPaddingSchemeErr;
}
// perform the actual convolutions
if( sts = ippiFilterColumnPipeline_32f_C1R( (const Ipp32f**)ppSrc, pTmp, tmpStep,
roiSize, hc_flipped, Nc, pBufferCol) )
{
//cout << "IPP Error: " << ippGetStatusString( sts ) << endl; throw exception( "nIppSepFilterCR col-filter error." ); return sts; } if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pTmp, tmpStep, ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) ) { //cout << "IPP Error: " << ippGetStatusString( sts ) << endl; throw exception( "nIppSepFilterCR row-filter error." ); return sts; } if( ppSrc ) { ippsFree(ppSrc); ppSrc = NULL; } if( ppDst ) { ippsFree(ppDst); ppDst = NULL; } if( pTmp ) { ippiFree(pTmp); pTmp = NULL; } if( pPad ) { ippsFree(pPad); pPad = NULL; }; if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; }; if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; }; if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; }; if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; }; return sts; } CATCH_AUTO } [/cpp]
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
[cpp]//
//
// Column major version
static inline IppStatus IppSepFilterCR(
Ipp32f* pDst, // Destination Image
const int& dstStep, // Destination step
const Ipp32f* pSrc, // Source Image
const int& srcStep, // Source step
const IppiSize& roiSize, // Source/Destination size
const Ipp32f* hc, // Column filter
const int& Nc, // Column filter size
const Ipp32f* hr, // Row filter
const int& Nr, // Row filter size
const PadType& padType = CONSTANT, // Padding type
const Ipp32f& val = 0.0f ) // Value to use with CONSTANT padding
{
TRY_AUTO
{
IppStatus sts;
int sizerow, sizecol;
Ipp32f *pTmp = NULL, *pPad = NULL;
Ipp8u *pBufferCol = NULL, *pBufferRow = NULL;
// flip the kernels and align the memory to please IPP
Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc );
Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr );
ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc );
ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr );
// compute the kernel semisizes
int Ncss = Nc >> 1;
int Nrss = Nr >> 1;
// compute the kernel offsets (0 -> odd, 1 -> even)
int co = 1 - ( Nc % 2 );
int ro = 1 - ( Nr % 2 );
// allocate temporary dst buffer
int tmpStep, padStep;
// The IPP filter functions seem to need 1 more row allocated
// than is obvious or they sometimes crash.
IppiSize tmpSize;
tmpSize.width = roiSize.width; tmpSize.height = roiSize.height + Nc + 1;
if( !( pTmp = ippiMalloc_32f_C1( tmpSize.width, tmpSize.height, &tmpStep ) ) )
throw exception( "nIppSepFilterCR mem-alloc error." );
int srcw = srcStep / sizeof(Ipp32f);
int dstw = dstStep / sizeof(Ipp32f);
int tmpw = tmpStep / sizeof(Ipp32f);
ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize );
int padw;
IppiSize padSize;
// Only need pad space for CONSTANT
if( padType == CONSTANT )
{
if( !( pPad = ippiMalloc_32f_C1( roiSize.width, (Ncss*2) - co, &padStep ) ) )
throw exception( "nIppSepFilterCR mem-alloc error." );
padw = padStep / sizeof(Ipp32f);
padSize.height = (Ncss*2) - co; padSize.width = roiSize.width;
ippiSet_32f_C1R( val, pPad, padStep, padSize );
}
Ipp32f **ppSrc, **ppDst;
ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 );
ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height );
// size of temporary buffers
if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) )
throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");
if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) )
throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");
// allocate temporary buffers
if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) )
throw exception( "nIppSepFilterCR, ipp-col-mem-alloc error. ");
if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) )
throw exception( "nIppSepFilterCR, ipp-row-mem-alloc error. ");
Nrss -= ro;
Ncss -= co;
// organize dst buffer
for( int ii = 0, jj = Ncss; ii < roiSize.height; ++ii, ++jj )
{
ppDst[ii] = pDst + ii * dstw;
ppSrc[jj] = (Ipp32f *)pSrc + ii * srcw;
}
IppiBorderType borderType;
switch( padType )
{
case CONSTANT:
for( int ii=0,jj=roiSize.height+Ncss;ii,++jj )
{
ppSrc[ii] = pPad + ii * padw;
ppSrc[jj] = pPad + (ii + Ncss) * padw;
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = pPad + (Ncss*2) * srcw;
}
borderType = ippBorderConst;
break;
case REPLICATE:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[Ncss];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height+Ncss-1];
}
borderType = ippBorderRepl;
break;
case SYMMETRIC:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[(Ncss*2)-ii-1];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-ii-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height-1];
}
borderType = ippBorderMirrorR;
break;
case CIRCULAR:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[roiSize.height+ii];
ppSrc[jj] = ppSrc[ii+Ncss];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[Ncss*2];
}
borderType = ippBorderWrap;
break;
default:
// ippStsBorderErr missing from ippdefs.h
return ippStsPaddingSchemeErr;
}
// perform the actual convolutions
if( sts = ippiFilterColumnPipeline_32f_C1R( (const Ipp32f**)ppSrc, pTmp, tmpStep,
roiSize, hc_flipped, Nc, pBufferCol) )
{
//cout << "IPP Error: " << ippGetStatusString( sts ) << endl;
throw exception( "nIppSepFilterCR col-filter error." );
return sts;
}
if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pTmp, tmpStep,
ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) )
{
//cout << "IPP Error: " << ippGetStatusString( sts ) << endl;
throw exception( "nIppSepFilterCR row-filter error." );
return sts;
}
if( ppSrc ) { ippsFree(ppSrc); ppSrc = NULL; }
if( ppDst ) { ippsFree(ppDst); ppDst = NULL; }
if( pTmp ) { ippiFree(pTmp); pTmp = NULL; }
if( pPad ) { ippsFree(pPad); pPad = NULL; };
if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; };
if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; };
if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; };
if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; };
return sts;
}
CATCH_AUTO
}
[/cpp]
switch( padType )
{
case CONSTANT:
for( int ii=0,jj=roiSize.height+Ncss;ii
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
[cpp]// Row major version static inline IppStatus IppSepFilterRC( Ipp32f* pDst, // Destination Image const int& dstStep, // Destination step const Ipp32f* pSrc, // Source Image const int& srcStep, // Source step const IppiSize& roiSize, // Source/Destination size const Ipp32f* hr, // Row filter const int& Nr, // Row filter size const Ipp32f* hc, // Column filter const int& Nc, // Column filter size const PadType& padType = CONSTANT, // Padding type const Ipp32f& val = 0.0f ) // Value to use with CONSTANT padding { TRY_AUTO { IppStatus sts; int sizerow, sizecol; Ipp32f *pTmp = NULL; Ipp32f *pTmpLocal = NULL; Ipp8u *pBufferCol = NULL, *pBufferRow = NULL; // flip the kernels and align the memory to please IPP Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc ); Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr ); ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc ); ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr ); // compute the kernel semisizes int Ncss = Nc >> 1; int Nrss = Nr >> 1; // compute the kernel offsets (0 -> odd, 1 -> even) int co = 1 - ( Nc % 2 ); int ro = 1 - ( Nr % 2 ); // allocate temporary dst buffer int tmpStep; int tmpw; // The IPP filter functions seem to need 1 more row allocated // than is obvious or they sometimes crash. int tmpHeight = roiSize.height+Nc+1; int tmpWidth = roiSize.width; if( !( pTmpLocal = ippiMalloc_32f_C1( roiSize.width, roiSize.height + Nc + 1, &tmpStep ) ) ) throw exception( "nIppSepFilterRC, mem-alloc error. " ); pTmp = pTmpLocal; tmpw = tmpStep / sizeof(Ipp32f); Ipp32f **ppSrc, **ppDst; ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 ); ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height ); if( padType == CONSTANT ) { IppiSize tmpSize; tmpSize.height = roiSize.height + Nc + 1; tmpSize.width = roiSize.width; ippiSet_32f_C1R( val, pTmp, tmpStep, tmpSize ); } // size of temporary buffers if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &sizerow) ) throw exception( "nIppSepFilterRC, ipp-row-mem-size error. "); if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &sizecol) ) throw exception( "nIppSepFilterRC, ipp-col-mem-size error. "); // allocate temporary buffers if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) ) throw exception( "nIppSepFilterRC, ipp-col-temp mem-alloc error. "); if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) ) throw exception( "nIppSepFilterRC, ipp-row-temp mem-alloc error. "); Nrss -= ro; Ncss -= co; // organize dst buffer for( int ii=0,jj=Ncss;ii{
ppDst[ii] = pTmp + jj * tmpw;
ppSrc[jj] = pTmp + jj * tmpw;
}
IppiBorderType borderType;
switch( padType )
{
case CONSTANT:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = pTmp + ii * tmpw;
ppSrc[jj] = pTmp + jj * tmpw;
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = pTmp + (roiSize.height+(Ncss*2)) * tmpw;
}
borderType = ippBorderConst;
break;
case REPLICATE:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[Ncss];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height+Ncss-1];
}
borderType = ippBorderRepl;
break;
case SYMMETRIC:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[(Ncss*2)-ii-1];
ppSrc[jj] = ppSrc[roiSize.height+Ncss-ii-1];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height-1];
}
borderType = ippBorderMirrorR;
break;
case CIRCULAR:
for( int ii=0,jj=roiSize.height+Ncss;ii{
ppSrc[ii] = ppSrc[roiSize.height+ii];
ppSrc[jj] = ppSrc[ii+Ncss];
}
if( co )
{
ppSrc[roiSize.height+(Ncss*2)] = ppSrc[Ncss*2];
}
borderType = ippBorderWrap;
break;
default:
// ippStsBorderErr missing from ippdefs.h
return ippStsPaddingSchemeErr;
}
// perform the actual convolutions
if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pSrc, srcStep,
ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) )
{
cout << "IPP Error: " << ippGetStatusString( sts ) << endl; throw exception( "nIppSepFilterRC, ipp-row-filter error. "); return sts; } if( sts = ippiFilterColumnPipeline_32f_C1R( (const Ipp32f**)ppSrc, pDst, dstStep, roiSize, hc_flipped, Nc, pBufferCol) ) { cout << "IPP Error: " << ippGetStatusString( sts ) << endl; throw exception( "nIppSepFilterRC, ipp-column-filter error. "); return sts; } if( ppSrc ) { ippsFree(ppSrc); ppSrc = NULL; } if( ppDst ) { ippsFree(ppDst); ppDst = NULL; } if( pTmpLocal ) { ippiFree(pTmpLocal); pTmpLocal = NULL; } if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; } if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; } if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; } if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; } return sts; } CATCH_AUTO } [/cpp]
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
thanks for updating your sample of IPP based convolution. I would recommend to use attachment for big code chunks instead of inserting code into post as it may cause truncation.
If you can provide us a sample which cause crach in 64-bit mode we will investigate the reason (you also may quickly check if issue is related to optimized code by dispatching a generic, PX version of IPP library)
Regards,
Vladimir
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
What is wrong with the following code? It crashes only when I call IppRowFilter multiple times.
static inline IppStatus IppRowFilter( Ipp32f* pDst, // Destination Image const int dstStep, // Destination step const Ipp32f* pSrc, // Source Image const int srcStep, // Source step const IppiSize dstSize, // Destination size const Ipp32f* hr, // Row filter const int Nr) // Row filter size { IppStatus sts; int sizerow; Ipp8u *pBufferRow = NULL; IppiSize srcSize = { dstSize.width + Nr - 1, dstSize.height }; // flip the kernel and align the memory to please IPP Ipp32f *hr_flipped = (Ipp32f*)ippsMalloc_32f(Nr*sizeof(Ipp32f)); ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr); Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*sizeof(Ipp32f)); // size of temporary buffer if (sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(srcSize, Nr, &sizerow)) { goto cleanup; } // allocate temporary buffer if (!(pBufferRow = ippsMalloc_8u(sizerow))) { sts = ippStsNoMemErr; goto cleanup; } // organize dst buffer for (int ii = 0; ii < dstSize.height; ii++) { ppDst[ii] = pDst + ii * (dstStep / sizeof(Ipp32f)); } // perform the actual convolution if (sts = ippiFilterRowBorderPipeline_32f_C1R((const Ipp32f*)pSrc, srcStep, ppDst, srcSize, hr_flipped, Nr, 0, ippBorderRepl, 0, pBufferRow)) { goto cleanup; } cleanup: if (ppDst) { ippsFree(ppDst); ppDst = NULL; } if (pBufferRow) { ippsFree(pBufferRow); pBufferRow = NULL; } if (hr_flipped) { ippsFree(hr_flipped); hr_flipped = NULL; } return sts; }
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
I can't reproduce crash with your code.
Could you provide your version of IPP and values of dstSize, srcStep, dstSize, filter and filter size?
BR,
Alexander
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Piotr,
The code looks ok, just one small problem about
21
Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*
sizeof
(Ipp32f));
and
ppDst[ii] = pDst + ii * (dstStep /
sizeof
(Ipp32f));
Here ppDst is double pointers.
if in 32bit application, it is ok as sizeof(Ipp32f)=4 and sizeof(Ipp32f *) =4.
but when in 64bit application. sizeof (IPP32f*)=8, not 4. so for safety,it is better to change
Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*
sizeof
(Ipp32f*));
Best Regards,
Ying
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I'm using IPP 9.0.1 for Windows. Here is the function that calls IppRowFilter:
mbool ImageRowFilter(Image * const pres, const Image img, const float * kernel, const mint kernel_length) { const int w = (int)ImageWidth(img); const int h = (int)ImageHeight(img); const int channels = (int)ImageChannels(img); IppiSize srcSize = { w, h }; int srcStep = srcSize.width * sizeof(Ipp32f); IppiSize dstSize = { (int)(w - kernel_length + 1), (int)h }; int dstStep = dstSize.width * sizeof(Ipp32f); Ipp32f* pSrc = NULL, *pDst = NULL; IppStatus stat; *pres = Image_newAllocated_size(img, dstSize.width, dstSize.height, channels); if (*pres == NOT_AN_IMAGE) { return FALSE; } for (mint c = 0; c < channels; c++) { pSrc = ImageSliceElemPointer(img, raw_t_real32, 0, 0, 0, c); pDst = ImageSliceElemPointer(*pres, raw_t_real32, 0, 0, 0, c); stat = IppRowFilter(pDst, dstStep, pSrc, srcStep, dstSize, kernel, kernel_length); if (stat != ippStsNoErr) { goto cleanup; } } cleanup: if (stat != ippStsNoErr) { Image_delete(pres); return FALSE; } return TRUE; }
It uses my data structures so you will not be able to compile it, but it shows how I set srcSize, srcStep, dstSize, and dstStep.
I have noticed that it does not crash when I increase the height of the output image by 1, i.e.:
*pres = Image_newAllocated_size(img, dstSize.width, dstSize.height + 1, channels);
I'm testing this code using an RGB image (width = 150, height = 116) and the following kernel: {0.0216149, 0.0439554, 0.0778778, 0.118718, 0.153857, 0.167953, 0.153857, 0.118718, 0.0778778, 0.0439554, 0.0216149}.
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page