Intel® Integrated Performance Primitives
Deliberate problems developing high-performance vision, signal, security, and storage applications.

Segfault in l9_cmn_dft_avx2_ippsDFTFwd_CToC_32fc

silkskier
Beginner
431 Views

As in the topic. After upgrading ICPX to 2024.1.0 I've started to encounter segfault in IPP forward FFT on Debian 13.

I'm using it for a gaussian blur kernel, in a loop few billion iterations long, on vectors few hundred elements long.

The error is relatively uncommon, and happens only once in a few billion operations.

The wrapper around IPP I'm using in my code is attached to the message - sadly I have no idea what may be causing it. The address sanitizer report is also pasted below. If it would be helpful in any way I am willing to provide a working example of the code causing segfaults.


Here is the Adress Sanitizer raport;
==74099==ERROR: AddressSanitizer: SEGV on unknown address 0xfffffffda6c95e68 (pc 0x000000e0bfc3 bp 0x5240000f27c0 sp 0x7ffd7ebe05d0 T0)
==74099==The signal is caused by a READ memory access.
0.88% complete, 11.34 seconds left.    #0 0xe0bfc3 in l9_cmn_dft_avx2_ippsDFTFwd_CToC_32fc (/home/kacper/Pulpit/ECL_test/src/Gaussian_blur/build/GB_b+0xe0bfc3)
   #1 0xb5a0fe in l9_ownscDft_Conv_32fc (/home/kacper/Pulpit/ECL_test/src/Gaussian_blur/build/GB_b+0xb5a0fe)
   #2 0xb58860 in l9_cmn_dft_avx2_ippsDFTFwd_RToPack_32f (/home/kacper/Pulpit/ECL_test/src/Gaussian_blur/build/GB_b+0xb58860)
   #3 0xb0c10a in l9_ippsDFTFwd_RToPack_32f (/home/kacper/Pulpit/ECL_test/src/Gaussian_blur/build/GB_b+0xb0c10a)
   #4 0x51b362 in gb_b(star const&, Grid const&) (/home/kacper/Pulpit/ECL_test/src/Gaussian_blur/build/GB_b+0x51b362)
   #5 0x51d48d in periodogram(Grid&, std::filesystem::__cxx11::path) (/home/kacper/Pulpit/ECL_test/src/Gaussian_blur/build/GB_b+0x51d48d)
   #6 0x520af6 in main (/home/kacper/Pulpit/ECL_test/src/Gaussian_blur/build/GB_b+0x520af6)
   #7 0x7f4be27396c9 in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16
   #8 0x7f4be2739784 in __libc_start_main csu/../csu/libc-start.c:360:3
   #9 0x4304b0 in _start (/home/kacper/Pulpit/ECL_test/src/Gaussian_blur/build/GB_b+0x4304b0)

AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV (/home/kacper/Pulpit/ECL_test/src/Gaussian_blur/build/GB_b+0xe0bfc3) in l9_cmn_dft_avx2_ippsDFTFwd_CToC_32fc
==74099==ABORTING




And the wrapper I'm using around the IPP;


#include <ipps.h>
#include <cmath>
#include <iostream>
#include <iomanip>

struct FFT {

int n; 

IppsDFTSpec_R_32f *pDFTSpec;

Ipp8u* pDFTInitBuf;
Ipp8u* pDFTWorkBuf;

Ipp32f* pSrc;
Ipp32f* pDst;

int sizeDFTSpec;
int sizeDFTInitBuf;
int sizeDFTWorkBuf;

Ipp32f* kernel;


void init(int size) {

n = size;

pDFTSpec=0;
// Allocate buffers
psrc=ippsMalloc_32f(n);
pDst = ippsMalloc_32f(n);

// Query to get buffer sizes
ippsDFTGetSize_C_32f(n, IPP_FFT_DIV_INV_BY_N, ippAlgHintNone, &sizeDFTSpec, &sizeDFTInitBuf, &sizeDFTWorkBuf);

// Alloc DFT buffers
pDFTSpec = (IppsDFTSpec_R_32f*)ippsMalloc_8u(sizeDFTSpec);
pDFTInitBuf = ippsMalloc_8u(sizeDFTInitBuf);
pDFTWorkBuf = ippsMalloc_8u(sizeDFTWorkBuf);

// Initialize DFT
ippsDFTInit_R_32f(n, IPP_FFT_DIV_INV_BY_N, ippAlgHintNone, pDFTSpec, pDFTInitBuf);
if (pDFTInitBuf) ippsFree(pDFTInitBuf);
}

void generate_kernel(float a) {

kernel = ippsMalloc_32f(n); // allocate memory for kernel
double wsum;
for (int i = 0; i <= n/2; i++) {kernel[i] = exp(float(-16) * float(i * i)/(a * a * float(n)));}
for (int i = n-1; i > n/2; i--) {kernel[i] = kernel[n-i];}
//for (int i = 0; i < n; i++) {std::cout << std::fixed << std::setprecision(3) << kernel[i] << "\t";} //print the weights
for (int i = 0; i < n; i++) {wsum += kernel[i];}
for (int i = 0; i < n; i++) {kernel[i] /= wsum;}
//for (int i = 0; i < n; i++) {std::cout << std::fixed << std::setprecision(3) << kernel[i] << "\t";} //print normalized weights
ippsDFTFwd_RToPack_32f(kernel, kernel, pDFTSpec, pDFTWorkBuf); //transform the kernel
//ippsDFTInv_PackToR_32f(kernel, kernel, pDFTSpec, pDFTWorkBuf); //retrieve weights
//for (int i = 0; i < n; i++) {std::cout << std::fixed << std::setprecision(3) << kernel[i] << "\t";} //print the kernel
}

void generate_kernel(){generate_kernel(1);}

    /*
    void convolve(float* in, float* out) {

        ippsDFTFwd_RToPack_32f(reinterpret_cast<Ipp32f*>(in), reinterpret_cast<Ipp32f*>(out), pDFTSpec, pDFTWorkBuf);
        ippsMulPack_32f_I(kernel, reinterpret_cast<Ipp32f*>(out), n);
        ippsDFTInv_PackToR_32f(reinterpret_cast<Ipp32f*>(out), reinterpret_cast<Ipp32f*>(out), pDFTSpec, pDFTWorkBuf);
    }
    */

    void convolve(float* in, float* out) {
        // Allocate memory for temporary arrays
        float* temp_in = ippsMalloc_32f(n);  // static_cast<float*>(malloc(sizeof(float) * n));
        float* temp_out = ippsMalloc_32f(n); // static_cast<float*>(malloc(sizeof(float) * n));

        // Copy elements from input to temporary input array
        for (int i = 0; i < n; ++i) {
            temp_in[i] = in[i];
        }

        // Perform the operations on temporary arrays
        ippsDFTFwd_RToPack_32f(temp_in, temp_out, pDFTSpec, pDFTWorkBuf);
        ippsMulPack_32f_I(kernel, temp_out, n);
        ippsDFTInv_PackToR_32f(temp_out, temp_out, pDFTSpec, pDFTWorkBuf);

        // Copy elements from temporary output array to output array
        for (int i = 0; i < n; ++i) {
            out[i] = temp_out[i];
        }

        // Free memory for temporary arrays
        ippsFree(temp_in);
        ippsFree(temp_out);
}

        void convolve(float* y) {convolve(y, y);}

void convolve(float* y) {convolve(y, y);}


void free() {
if (pDFTSpec) ippsFree(pDFTSpec);
if (pDFTWorkBuf) ippsFree(pDFTWorkBuf);
if (pSrc) ippsFree(pSrc);
if (pDst) ippsFree(pDst);
if (kernel) ippsFree(kernel);
}

};


What's even more interesting once the source of error is pinpointed (eg. single array value causing segfault is identified) it's not reproducible on a single example - the segfaults happen only in long loops, and running it on a single array doesn't reproduce anything.

Btw I've tested both versions (with and without reinterpret casting), and both tend to fail in the same way, so it's not the source of that issue.

 
Labels (1)
0 Kudos
2 Replies
silkskier
Beginner
381 Views

The issue seems to be on my side, sorry for the error notice.

I've just realised, that my wrapper contains a typo;

ippsDFTGetSize_C_32f(n, IPP_FFT_DIV_INV_BY_N, ippAlgHintNone, &sizeDFTSpec, &sizeDFTInitBuf, &sizeDFTWorkBuf);

Instead of

ippsDFTGetSize_R_32f(n, IPP_FFT_DIV_INV_BY_N, ippAlgHintNone, &sizeDFTSpec, &sizeDFTInitBuf, &sizeDFTWorkBuf);


However due to the code working without issue in previous ICPX versions (and in resonably sized loops with new ICPX) I assumed, that the code was correct.

Sorry for the false error report again - I hope, that anyone didn't put any work in trying to resolve the issue yet.

0 Kudos
Ruqiu_C_Intel
Moderator
371 Views

Hello Silkskier,

Thank you let us know you find out typo in you side.

 

Regards,

Ruqiu

0 Kudos
Reply