- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Link Copied
- « Previous
-
- 1
- 2
- Next »
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Piotr,
You haven't mentioned if you are build 32bit or 64bit application. I guess, you are building 32bit,right?
Besides the issue of 32bit and 64bit , you mentioned, bigger memory will work around, then there are another factors you may need to take care of
1) about the Bytes
https://software.intel.com/en-us/articles/intel-integrated-performance-primitives-intel-ipp-processing-an-image-from-edge-to-edge
Note the parmaeter in the function : Image stepBytes. It is the distance in bytes of image row. It depends on your array memory layout and data type. In most of case, it is equal to the image Width*sizeof(datatype)*Channel. But sometimes, it is not, especially for bmp image and ippMalloc, which required 4 bytes aligned and 32 bytes aligneed correspondingly. there are padded zero at the end of row. so please take care when use stepBystes or shift the pointer by stepBystes.
Not sure how you allocate the memory and how the function Image_newAllocated_size, ImageSliceElemPointer works
But usually, an RGB image (width = 150, height = 116), the image Bytes is
((m_width*m_channels*(Depth()>>3)+3)>>2)<<2 , suppose the Depth()=8, 8bit RGB_8u. then the image Bytes is
((150x3*1 +3) >>2) <<2 = 452 not 150x3*sizeof(IPP 8u) = 450
if 32bit float, it should be ok. 150x3*4. = 150x3* sizeof(IPP32f).
2) about the channel, how do you convert 8bit RGB to 1 channel IPP32f array?
Best Regards,
Ying
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I'm building 64-bit application.
1) I don't think this is the problem because many other IPP functions work fine with this image.
2) My input image is of 32-bit floating point type so I don't need any conversion. You should be able to reproduce the crash when you use
Ipp32f * img = (Ipp32f*)ippsMalloc_32f(w*h*channels*
sizeof
(Ipp32f))
and
Ipp32f * pres = (Ipp32f*)ippsMalloc_32f(dstSize.width*dstSize.height*channels*
sizeof
(Ipp32f))
Have you considered that there might be a bug in
ippiFilterRowBorderPipeline_32f_C1R?
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Piotr,
It may be a bug in the function. Could you please quick check how your program run?
Please replace the code
// organize dst buffer
for (int ii = 0; ii < dstSize.height; ii++) {
ppDst[ii] = pDst + ii * (dstStep / sizeof(Ipp32f));
}
// perform the actual convolution
if (sts = ippiFilterRowBorderPipeline_32f_C1R((const Ipp32f*)pSrc, srcStep, ppDst, srcSize, hr_flipped, Nr, 0, ippBorderRepl, 0, pBufferRow))
{
goto cleanup;
}
With
// organize dst buffer
for (int ii = 0; ii < dstSize.height; ii++) {
ppDst[ii] = pDst + ii * (dstStep / sizeof(Ipp32f));
printf("ppDst[%d] = %llx , %lf \n", ii, ppDst[ii], *ppDst[ii]);
}
/*// perform the actual convolution
if (sts = ippiFilterRowBorderPipeline_32f_C1R((const Ipp32f*)pSrc, srcStep, ppDst, srcSize, hr_flipped, Nr, 0, ippBorderRepl, 0, pBufferRow))
{
goto cleanup;
}.
*/
It seems to me, the access of the ppDSt may have some issues also.
Best Regards,
Ying
P.S About Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*sizeof(Ipp32f));, it has issue as I mentioned last. But as you are using ippsMalloc_32f, which actually size =mallocSize x 4 , so lucky, it seems workable both ia32 and 64bit.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
the test case seems runs fine. #include "stdio.h" #include "stdlib.h" #include "memory.h" //#include "stdafx.h" #include "math.h" #include "ipp.h" static inline IppStatus IppRowFilter( Ipp32f* pDst, // Destination Image const int dstStep, // Destination step const Ipp32f* pSrc, // Source Image const int srcStep, // Source step const IppiSize dstSize, // Destination size const Ipp32f* hr, // Row filter const int Nr) // Row filter size { IppStatus sts; int sizerow; Ipp8u *pBufferRow = NULL; IppiSize srcSize = { dstSize.width + Nr - 1, dstSize.height }; // flip the kernel and align the memory to please IPP Ipp32f *hr_flipped = (Ipp32f*)ippsMalloc_32f(Nr*sizeof(Ipp32f)); ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr); Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*sizeof(Ipp32f*)); // size of temporary buffer if (sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(srcSize, Nr, &sizerow)) { goto cleanup; } // allocate temporary buffer if (!(pBufferRow = ippsMalloc_8u(sizerow))) { sts = ippStsNoMemErr; goto cleanup; } // organize dst buffer for (int ii = 0; ii < dstSize.height; ii++) { ppDst[ii] = pDst + ii * (dstStep / sizeof(Ipp32f*)); //ppDst[ii] = pDst + ii * dstStep; // / sizeof(Ipp32f*)); printf("ppDst[%d] = %llx , %lf \n", ii, ppDst[ii], *ppDst[ii]); } // perform the actual convolution if (sts = ippiFilterRowBorderPipeline_32f_C1R((const Ipp32f*)pSrc, srcStep, ppDst, srcSize, hr_flipped, Nr, 0, ippBorderRepl, 0, pBufferRow)) { goto cleanup; } cleanup: if (ppDst) { ippsFree(ppDst); ppDst = NULL; } if (pBufferRow) { ippsFree(pBufferRow); pBufferRow = NULL; } if (hr_flipped) { ippsFree(hr_flipped); hr_flipped = NULL; } return sts; } int main(int argc, char* argv[]) { //ippInit(); const IppLibraryVersion* lib = ippsGetLibVersion(); printf("%s %s %d.%d.%d.%d\n", lib->Name, lib->Version, lib->major, lib->minor, lib->majorBuild, lib->build); Ipp32f kernel[11]= {0.0216149f, 0.0439554f, 0.0778778, 0.118718f, 0.153857f, 0.167953f, 0.153857f, 0.118718f, 0.0778778f, 0.0439554f, 0.0216149f}; int kernel_length = 11; const int w = 150; const int h = 116; const int channels = 3; IppiSize srcSize = { w, h }; int srcStep = srcSize.width * sizeof(Ipp32f); IppiSize dstSize = { (int)(w - kernel_length + 1), (int)h }; int dstStep = dstSize.width * sizeof(Ipp32f); Ipp32f* pSrc = NULL, *pDst = NULL; IppStatus stat; //*pres = int in_stride = 0; int srcBytes; int dstBytes; Ipp32f *img = (Ipp32f *)malloc(srcSize.width*srcSize.height*channels*sizeof(Ipp32f)); Ipp32f *pres = (Ipp32f *) malloc(dstSize.width*dstSize.height*channels*sizeof(Ipp32f)); //Ipp32f *img = ippiMalloc_32f_C3(srcSize.width,srcSize.height,&srcBytes); img[0]=1.0; //assert( in != NULL ); // printf("ppDst[%d] = %llx \n", ii, ppDst[ii]); // Image_newAllocated_size(img, dstSize.width, dstSize.height, channels); Ipp32f **ppSrc = (Ipp32f**)ippsMalloc_32f(srcSize.height*sizeof(Ipp32f)); // organize dst buffer for (int ii = 0; ii < srcSize.height; ii++) { ppSrc[ii] = img + ii * (srcStep / sizeof(Ipp32f)); } printf("ppSrc[0] = %llx value *ppSrc[0]=%lf \n", ppSrc[0],*ppSrc[0] ); // printf("ppSrc[1] = %llx value *ppSrc[0]=%lf \n", ppSrc[1],*ppSrc[1] ); //printf("ppSrc[150] = %llx value *ppSrc[0]=%lf \n", ppSrc[150],*ppSrc[150] ); //if (*pres == NOT_AN_IMAGE) { // return FALSE; //} for (int c = 0; c < channels; c++) { pSrc = img+ c*srcSize.height* (srcStep / sizeof(Ipp32f));//ImageSliceElemPointer(img, raw_t_real32, 0, 0, 0, c); pDst = pres+ c*dstSize.height* (dstStep / sizeof(Ipp32f));//ImageSliceElemPointer(*pres, raw_t_real32, 0, 0, 0, c); printf("pDst = %llx value pDst[0]=%lf \n", pDst,pDst[0] ); // / stat = IppRowFilter(pDst, dstStep, pSrc, srcStep, dstSize, kernel, kernel_length); if (stat != ippStsNoErr) { goto cleanup; } } //ippFree(img); free(img); free(pres); cleanup: // if (stat != ippStsNoErr) { // ippFree(pres); // return -1; // } return 0; }
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Ying, your code does not crash, but it also does not work correctly because you call
ppDst[ii] = pDst + ii * (dstStep / sizeof(Ipp32f*));
instead of
ppDst[ii] = pDst + ii * (dstStep / sizeof(Ipp32f));
I fixed the problem with allocation of ppDst, but it didn't help.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Piotr,
Your code will work correctly if you set Anchor to 10.
sts = ippiFilterRowBorderPipeline_32f_C1R((const Ipp32f*)pSrc, srcStep, ppDst, srcSize, hr_flipped, Nr, 10, ippBorderRepl, 0, pBufferRow);
BR,
Alexander
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Alexander, I don't understand your comment. Setting anchor to 10 does not fix the crash and obviously it generates an incorrect result.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Piotr,
I see two bugs in your code:
1. It needs to set anchor to 10 to work with border correctly.( You will see as result pDst[0] = 0.999999 in Ying's test case)
2. You set
IppiSize dstSize = { (
int
)(w - kernel_length + 1), (
int
)h };
to allocate pDst in ImageRowFilter
and call IppRowFilter(pDst, dstStep, pSrc, srcStep, dstSize, kernel, kernel_length);.
But in IppRowFilter you set
IppiSize srcSize = { dstSize.width + Nr - 1, dstSize.height }; and call
sts = ippiFilterRowBorderPipeline_32f_C1R((
const
Ipp32f*)pSrc, srcStep, ppDst, srcSize, hr_flipped, Nr, 0, ippBorderRepl, 0, pBufferRow)); It is reason to generate incorrect result and c
rash code.
BR,
Alexander
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Aleksander, please provide a full code listing that performs row filtering on an image without doing any padding, i.e. the resulting image is of size {w - kernel_length + 1, h - kernel_length + 1}. I'm performing image padding in a separate function. Please explain why do you think the anchor equal to 10 is a correct setting.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Piotr,
I suggested to set anchor equal 10(kernel_length -1) in Ying’s test to check that IPP function works correctly, but for your sample anchor equal 0 is correct.
Your mistake is incorrect usage of parameter roiSize for calling of ippiFilterRowBorderPipeline_32f_C1R function.
roiSize is Size of the source and destination ROI in pixels (see https://software.intel.com/en-us/node/504163 )
If you set roiSize equal dstSize = { (int)(w - kernel_length + 1), (int)h } resulting image will have the same size and for calculating of the right kernel_length pixels function will use border, built as ippBorderReplicate.
If you want to use as border pixels from image you need to set border equal ippBorderInMem. Sample I used for testing is below.
#include "stdio.h"
#include "stdlib.h"
#include "memory.h"
#include "math.h"
#include "ipp.h"
static inline IppStatus IppRowFilter(
Ipp32f* pDst, // Destination Image
const int dstStep, // Destination step
const Ipp32f* pSrc, // Source Image
const int srcStep, // Source step
const IppiSize dstSize, // Destination size
const Ipp32f* hr, // Row filter
const int Nr) // Row filter size
{
IppStatus sts;
int sizerow;
Ipp8u *pBufferRow = NULL;
// IppiSize srcSize = { dstSize.width + Nr - 1, dstSize.height };
// flip the kernel and align the memory to please IPP
// Ipp32f *hr_flipped = (Ipp32f*)ippsMalloc_32f(Nr*sizeof(Ipp32f));
Ipp32f *hr_flipped = (Ipp32f*)ippsMalloc_8u(Nr*sizeof(Ipp32f));
ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr);
// Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*sizeof(Ipp32f*));
Ipp32f **ppDst = (Ipp32f**)ippsMalloc_8u(dstSize.height*sizeof(Ipp32f*));
// size of temporary buffer
//if (sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(srcSize, Nr, &sizerow)) { goto cleanup; }
if (sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(dstSize, Nr, &sizerow)) { goto cleanup; }
// allocate temporary buffer
if (!(pBufferRow = ippsMalloc_8u(sizerow))) { sts = ippStsNoMemErr; goto cleanup; }
// organize dst buffer
for (int ii = 0; ii < dstSize.height; ii++) {
ppDst[ii] = pDst + ii * (dstStep / sizeof(Ipp32f));
}
// perform the actual convolution
if (sts = ippiFilterRowBorderPipeline_32f_C1R((const Ipp32f*)pSrc, srcStep, ppDst, dstSize, hr_flipped, Nr, 0, ippBorderRepl, 0, pBufferRow))
{
goto cleanup;
}
cleanup:
if (ppDst) { ippsFree(ppDst); ppDst = NULL; }
if (pBufferRow) { ippsFree(pBufferRow); pBufferRow = NULL; }
if (hr_flipped) { ippsFree(hr_flipped); hr_flipped = NULL; }
return sts;
}
int main(int argc, char* argv[])
{
//ippInit();
const IppLibraryVersion* lib = ippsGetLibVersion();
printf("%s %s %d.%d.%d.%d\n", lib->Name, lib->Version, lib->major, lib->minor, lib->majorBuild, lib->build);
Ipp32f kernel[11]= {0.0216149f, 0.0439554f, 0.0778778, 0.118718f, 0.153857f, 0.167953f, 0.153857f, 0.118718f, 0.0778778f, 0.0439554f, 0.0216149f};
int kernel_length = 11;
const int w = 150;
const int h = 116;
const int channels = 3;
IppiSize srcSize = { w, h };
int srcStep = srcSize.width * sizeof(Ipp32f);
IppiSize dstSize = { (int)(w - kernel_length + 1), (int)h };
int dstStep = dstSize.width * sizeof(Ipp32f);
Ipp32f* pSrc = NULL, *pDst = NULL;
IppStatus stat;
Ipp32f *img = (Ipp32f *)malloc(srcSize.width*srcSize.height*channels*sizeof(Ipp32f));
Ipp32f *pres = (Ipp32f *) malloc(dstSize.width*dstSize.height*channels*sizeof(Ipp32f));
img[0]=1.0;
// Image_newAllocated_size(img, dstSize.width, dstSize.height, channels);
// Ipp32f **ppSrc = (Ipp32f**)ippsMalloc_32f(srcSize.height*sizeof(Ipp32f));
Ipp32f **ppSrc = (Ipp32f**)malloc(srcSize.height*sizeof(Ipp32f*));
// organize dst buffer
for (int ii = 0; ii < srcSize.height; ii++) {
ppSrc[ii] = img + ii * (srcStep / sizeof(Ipp32f));
}
printf("ppSrc[0] = %llx value *ppSrc[0]=%lf \n", ppSrc[0],*ppSrc[0] );
// printf("ppSrc[1] = %llx value *ppSrc[0]=%lf \n", ppSrc[1],*ppSrc[1] );
//printf("ppSrc[150] = %llx value *ppSrc[0]=%lf \n", ppSrc[150],*ppSrc[150] );
//if (*pres == NOT_AN_IMAGE) {
// return FALSE;
//}
for (int c = 0; c < channels; c++) {
pSrc = img+ c*srcSize.height* (srcStep / sizeof(Ipp32f));//ImageSliceElemPointer(img, raw_t_real32, 0, 0, 0, c);
pDst = pres+ c*dstSize.height* (dstStep / sizeof(Ipp32f));//ImageSliceElemPointer(*pres, raw_t_real32, 0, 0, 0, c);
printf("pDst = %llx value pDst[0]=%lf \n", pDst,pDst[0] );
stat = IppRowFilter(pDst, dstStep, pSrc, srcStep, dstSize, kernel, kernel_length);
if (stat != ippStsNoErr) {
goto cleanup;
}
}
cleanup:
free(img);
free(pres);
free(ppSrc);
// if (stat != ippStsNoErr) {
// ippFree(pres);
// return -1;
// }
return 0;
}
BR,
Alexander
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
It works now. Thank you.
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page
- « Previous
-
- 1
- 2
- Next »