- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I am testing the convolution using IPP.
I also tested using MKL, but it was not as fast as I thought. However, when implemented simply with IPP, incorrect results were output.
I would like the result of the link below, but please review what's wrong.
const int out_width = 5;
const int out_height = 9;
IppStatus status = ippStsNoErr;
Ipp16s* pSrc1 = NULL, *pSrc2 = NULL, *pDst = NULL; /* Pointers to source/destination images */
int srcStep1 = 0, srcStep2 = 0, dstStep = 0; /* Steps, in bytes, through the source/destination images */
IppiSize dstSize = { out_width, out_height }; /* Size of destination ROI in pixels */
IppiSize src1Size = { 3, 5 }; /* Size of destination ROI in pixels */
IppiSize src2Size = { 3, 5 }; /* Size of destination ROI in pixels */
int divisor = 1; /* The integer value by which the computed result is divided */
Ipp8u *pBuffer = NULL; /* Pointer to the work buffer */
int iTmpBufSize = 0; /* Common work buffer size */
int numChannels = 1;
IppEnum funCfgFull = (IppEnum)(ippAlgAuto | ippiROIFull | ippiNormNone);
pSrc2 = ippiMalloc_16s_C1(src2Size.width, src2Size.height, &srcStep2);
pSrc1 = ippiMalloc_16s_C1(src1Size.width, src1Size.height, &srcStep1);
pDst = ippiMalloc_16s_C1(dstSize.width, dstSize.height, &dstStep);
do {
status = ippiConvGetBufferSize(src1Size, src2Size, ipp16s, numChannels, funCfgFull, &iTmpBufSize);
if (ippStsNoErr != status)
break;
pBuffer = ippsMalloc_8u(iTmpBufSize);
pSrc2[0] = pSrc1[0] = 1;
pSrc2[1] = pSrc1[1] = 1;
pSrc2[2] = pSrc1[2] = 1;
pSrc2[3] = pSrc1[3] = 1;
pSrc2[4] = pSrc1[4] = 0;
pSrc2[5] = pSrc1[5] = 0;
pSrc2[6] = pSrc1[6] = 1;
pSrc2[7] = pSrc1[7] = 1;
pSrc2[8] = pSrc1[8] = 1;
pSrc2[9] = pSrc1[9] = 0;
pSrc2[10] = pSrc1[10] = 0;
pSrc2[11] = pSrc1[11] = 1;
pSrc2[12] = pSrc1[12] = 1;
pSrc2[13] = pSrc1[13] = 1;
pSrc2[14] = pSrc1[14] = 1;
status = ippiConv_16s_C1R(pSrc1, srcStep1, src1Size, pSrc2, srcStep2, src2Size, pDst, dstStep, divisor, funCfgFull, pBuffer);
if (ippStsNoErr != status)
break;
for (int j = 0; j < out_height; j++) {
for (int i = 0; i < out_width; i++) {
cout << pDst[i + j*out_width] << " ";
}
cout << endl;
}
} while (false);
ippsFree(pBuffer);
ippiFree(pSrc1);
ippiFree(pSrc2);
ippiFree(pDst);
- Tags:
- Development Tools
- General Support
- Intel® Integrated Performance Primitives
- Parallel Computing
- Vectorization
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi KIM, ILTAEK.
The IPP library uses a so-named "step" between lines because the processed region can be part of big image. I am attaching changes how to work with steps to get expected result.
#include "ipp.h"
#include <stdio.h>
void main()
{
const int out_width = 5;
const int out_height = 9;
IppStatus status = ippStsNoErr;
Ipp16s* pSrc1 = NULL, *pSrc2 = NULL, *pDst = NULL; /* Pointers to source/destination images */
int srcStep1 = 0, srcStep2 = 0, dstStep = 0; /* Steps, in bytes, through the source/destination images */
IppiSize dstSize = { out_width, out_height }; /* Size of destination ROI in pixels */
IppiSize src1Size = { 3, 5 }; /* Size of destination ROI in pixels */
IppiSize src2Size = { 3, 5 }; /* Size of destination ROI in pixels */
int divisor = 1; /* The integer value by which the computed result is divided */
Ipp8u *pBuffer = NULL; /* Pointer to the work buffer */
int iTmpBufSize = 0; /* Common work buffer size */
int numChannels = 1;
IppEnum funCfgFull = (IppEnum)(ippAlgAuto | ippiROIFull | ippiNormNone);
pSrc2 = ippiMalloc_16s_C1(src2Size.width, src2Size.height, &srcStep2);
pSrc1 = ippiMalloc_16s_C1(src1Size.width, src1Size.height, &srcStep1);
pDst = ippiMalloc_16s_C1(dstSize.width, dstSize.height, &dstStep);
Ipp16s img[5][3] = { {1, 1, 1},
{1, 0, 0},
{1, 1, 1},
{0, 0, 1},
{1, 1, 1} };
do {
status = ippiConvGetBufferSize(src1Size, src2Size, ipp16s, numChannels, funCfgFull, &iTmpBufSize);
if (ippStsNoErr != status)
break;
pBuffer = ippsMalloc_8u(iTmpBufSize);
int w, h;
for (h = 0; h < 5; h++) {
for (w = 0; w < 3; w++) {
pSrc1[h*(srcStep1 / 2) + w] = img;
pSrc2[h*(srcStep2/2)+w] = img;
}
}
status = ippiConv_16s_C1R(pSrc1, srcStep1, src1Size, pSrc2, srcStep2, src2Size, pDst, dstStep, divisor, funCfgFull, pBuffer);
if (ippStsNoErr != status)
break;
for (int j = 0; j < out_height; j++) {
for (int i = 0; i < out_width; i++) {
printf("%d ", pDst[i + j * (dstStep/2)]);
}
printf("\n");
}
} while (false);
ippsFree(pBuffer);
ippiFree(pSrc1);
ippiFree(pSrc2);
ippiFree(pDst);
}
Thanks.
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi KIM, ILTAEK.
The IPP library uses a so-named "step" between lines because the processed region can be part of big image. I am attaching changes how to work with steps to get expected result.
#include "ipp.h"
#include <stdio.h>
void main()
{
const int out_width = 5;
const int out_height = 9;
IppStatus status = ippStsNoErr;
Ipp16s* pSrc1 = NULL, *pSrc2 = NULL, *pDst = NULL; /* Pointers to source/destination images */
int srcStep1 = 0, srcStep2 = 0, dstStep = 0; /* Steps, in bytes, through the source/destination images */
IppiSize dstSize = { out_width, out_height }; /* Size of destination ROI in pixels */
IppiSize src1Size = { 3, 5 }; /* Size of destination ROI in pixels */
IppiSize src2Size = { 3, 5 }; /* Size of destination ROI in pixels */
int divisor = 1; /* The integer value by which the computed result is divided */
Ipp8u *pBuffer = NULL; /* Pointer to the work buffer */
int iTmpBufSize = 0; /* Common work buffer size */
int numChannels = 1;
IppEnum funCfgFull = (IppEnum)(ippAlgAuto | ippiROIFull | ippiNormNone);
pSrc2 = ippiMalloc_16s_C1(src2Size.width, src2Size.height, &srcStep2);
pSrc1 = ippiMalloc_16s_C1(src1Size.width, src1Size.height, &srcStep1);
pDst = ippiMalloc_16s_C1(dstSize.width, dstSize.height, &dstStep);
Ipp16s img[5][3] = { {1, 1, 1},
{1, 0, 0},
{1, 1, 1},
{0, 0, 1},
{1, 1, 1} };
do {
status = ippiConvGetBufferSize(src1Size, src2Size, ipp16s, numChannels, funCfgFull, &iTmpBufSize);
if (ippStsNoErr != status)
break;
pBuffer = ippsMalloc_8u(iTmpBufSize);
int w, h;
for (h = 0; h < 5; h++) {
for (w = 0; w < 3; w++) {
pSrc1[h*(srcStep1 / 2) + w] = img;
pSrc2[h*(srcStep2/2)+w] = img;
}
}
status = ippiConv_16s_C1R(pSrc1, srcStep1, src1Size, pSrc2, srcStep2, src2Size, pDst, dstStep, divisor, funCfgFull, pBuffer);
if (ippStsNoErr != status)
break;
for (int j = 0; j < out_height; j++) {
for (int i = 0; i < out_width; i++) {
printf("%d ", pDst[i + j * (dstStep/2)]);
}
printf("\n");
}
} while (false);
ippsFree(pBuffer);
ippiFree(pSrc1);
ippiFree(pSrc2);
ippiFree(pDst);
}
Thanks.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Kim,
please keep us informed of the performance results you will obtain.
Gennady
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi, Andrey Bakshaev
Thank your shared information. Your advice has been a great help.
Let me ask you one more question.
Actually I want to use ippiROISame option. But when ippiROISame is used ippiConvGetBufferSize is returning an error. If I change the code below from the code you guided above, it becomes a problem. Is there a solution?
//const int out_width = 5; //const int out_height = 9; const int out_width = 3; const int out_height = 5; //IppEnum funCfgFull = (IppEnum)(ippAlgAuto | ippiROIFull | ippiNormNone); IppEnum funCfgFull = (IppEnum)(ippAlgAuto | ippiROISame | ippiNormNone);
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page