Bold statement but when I profile IPP code, a large portion is spent just checking if paramters are NULL and not doing actual work.
This level of paramter check is fine for debuging and high level OS function, but is unnaceptable in a release build.
Case in point Huffman decoding. A full decoder spend 8% of its time dispatching instead of doing actualy work.
And here is a copy of the dispatch code. This does nothing but callsy8_ownpj_ippiDecodeHuffman8x8_JPEG_1u16s_C1() after checking if any of the 9 parameter is NULL....
Any reason why IPP include debug code in release builds, this is really slowing performance down...
y8_ownpj_ippiDecodeHuffman8x8_JPEG_1u16s_C1 << this code just calls the internal version we dont have access to.
"0x106B70" "" "y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1: push rbx" "" "21" "31" ""
"0x106B71" "" " push rsi" "" "42" "12" ""
"0x106B72" "" " push rdi" "" "" "" ""
"0x106B73" "" " push r12" "" "4" "1" ""
"0x106B75" "" " push r13" "" "7" "24" ""
"0x106B77" "" " push r14" "" "39" "7" ""
"0x106B79" "" " push r15" "" "2" "" ""
"0x106B7B" "" " push rbp" "" "7" "7" ""
"0x106B7C" "" " sub rsp, 0x58h" "" "14" "15" ""
"0x106B80" "" " mov rbp, rcx" "" "" "" ""
"0x106B83" "" " mov r12, r9" "" "" "" ""
"0x106B86" "" " mov rdi, r8" "" "10" "13" ""
"0x106B89" "" " mov esi, edx" "" "" "" ""
"0x106B8B" "" " test rbp, rbp" "" "" "1" ""
"0x106B8E" "" " je y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x31b" "" "" "" ""
"0x106B94" "" "y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x24: test esi, esi" "" "6" "20" ""
"0x106B96" "" " jnge y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x331" "" "" "" ""
"0x106B9C" "" "y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x2c: test rdi, rdi" "" "5" "6" ""
"0x106B9F" "" " je y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x31b" "" "" "" ""
"0x106BA5" "" "y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x35: test r12, r12" "" "" "" ""
"0x106BA8" "" " je y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x31b" "" "" "" ""
"0x106BAE" "" "y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x3e: mov r14, QWORD PTR [rsp+0c0h]" "" "" "1" ""
"0x106BB6" "" " test r14, r14" "" "15" "26" ""
"0x106BB9" "" " je y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x31b" "" "" "" ""
"0x106BBF" "" "y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x4f: mov rax, QWORD PTR [rsp+0c8h]" "" "1" "1" ""
"0x106BC7" "" " test rax, rax" "" "2" "" ""
"0x106BCA" "" " je y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x31b" "" "" "" ""
"0x106BD0" "" "y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x60: mov r15, QWORD PTR [rsp+0d0h]" "" "1" "1" ""
"0x106BD8" "" " test r15, r15" "" "8" "21" ""
"0x106BDB" "" " je y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x31b" "" "" "" ""
"0x106BE1" "" "y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x71: mov rax, QWORD PTR [rsp+0d8h]" "" "2" "8" ""
"0x106BE9" "" " test rax, rax" "" "" "" ""
"0x106BEC" "" " je y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x31b" "" "" "" ""
"0x106BF2" "" "y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x82: mov rbx, QWORD PTR [rsp+0e0h]" "" "11" "21" ""
"0x106BFA" "" " test rbx, rbx" "" "2" "2" ""
"0x106BFD" "" " je y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x31b" "" "" "1" ""
"0x106C03" "" "y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0x93: mov rax, QWORD PTR [rsp+0c8h]" "" "2" "4" ""
"0x106C0B" "" " mov rcx, rbp" "" "5" "21" ""
"0x106C0E" "" " mov r10, QWORD PTR [rsp+0d8h]" "" "" "3" ""
"0x106C16" "" " mov edx, esi" "" "" "" ""
"0x106C18" "" " mov r8, rdi" "" "1" "" ""
"0x106C1B" "" " mov r9, r12" "" "9" "20" ""
"0x106C1E" "" " mov QWORD PTR [rsp+020h], r14" "" "3" "1" ""
"0x106C23" "" " mov r13d, 0x40h" "" "4" "8" ""
"0x106C29" "" " mov QWORD PTR [rsp+028h], rax" "" "29" "5" ""
"0x106C2E" "" " mov QWORD PTR [rsp+030h], r15" "" "12" "18" ""
"0x106C33" "" " mov QWORD PTR [rsp+038h], r10" "" "3" "2" ""
"0x106C38" "" " mov QWORD PTR [rsp+040h], rbx" "" "5" "2" ""
"0x106C3D" "" " call y8_ownpj_DecodeHuffman8x8_JPEG_1u16s_C1" "" "37" "15" ""
"0x106C42" "" "y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0xd2: test eax, eax" "" "13" "13" ""
"0x106C44" "" " jnz y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0xe9" "" "" "" ""
"0x106C46" "" "y8_ippiDecodeHuffman8x8_JPEG_1u16s_C1+0xd6: xor eax, eax" "" "" "" ""
"0x106C48" "" " add rsp, 0x58h" "" "" "" ""
"0x106C4C" "" " pop rbp" "" "15" "11" ""
"0x106C4D" "" " pop r15" "" "" "" ""
"0x106C4F" "" " pop r14" "" "1" "" ""
"0x106C51" "" " pop r13" "" "6" "3" ""
"0x106C53" "" " pop r12" "" "5" "6" ""
"0x106C55" "" " pop rdi" "" "11" "3" ""
"0x106C56" "" " pop rsi" "" "12" "4" ""
"0x106C57" "" " pop rbx" "" "8" "3" ""
"0x106C58" "" " ret " "" "7" "8" ""