Intel® Integrated Performance Primitives
Deliberate problems developing high-performance vision, signal, security, and storage applications.

Performance in IPP Crypto vs. OpenSSL with static library

zhu__xuewu
Beginner
1,533 Views

I`ve  compare  the SM3/4 performance of the ippcp and openssl. Here is the test result :

 

                    sm3:                                                                                  sm4:
                                                  64                 1024             65536               64           1024     65536      
ippcp2019_with_dynamic_lib    1628664        208877        3528    TPS        113385    7141    111     TPS
ippcp2019_with_static_lib         1619249        212247        3530    TPS        504006    35565   562     TPS
openssl_1.1.1                             1084010        141426        2300    TPS        371751    38036   621      TPS

I think the IPP Crypto could do much better than OpenSSL, but I only got this. I hope someone could help me with this. I paste my test code of SM3/4 with IPP Crypto below(l_ippcp_2019.0.117):

gcc -O3 ippcp_SM4_CBC_test.c -o ippcp_SM4_CBC_test -I/opt/intel/compilers_and_libraries_2019.0.117/linux/ipp/include -I/opt/intel/compilers_and_libraries_2019.0.117/linux/ippcp/include /opt/intel/compilers_and_libraries_2019.0.117/linux/ippcp/lib/intel64_lin/libippcp.a

#include "ipp.h"
#include "ippcp.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int SM3_ContextSize = 0;

int main(int argc, char *argv[])
{
	char mess0[65536] = { 0 };
	unsigned char mres[64] = { 0 };
	int byte, i;
	float time_use = 0;
	struct timeval start;
	struct timeval end;
	long my_tps = 0;
	long repeattime = 0;
	if (argc != 3)
	{
		printf("Usage: %s byte(s) repeattime\n", argv[0]);
		printf("byte(s) only can be like :64 1024 65536\n");
		exit(1);
	}

	byte = atoi(argv[1]);
	repeattime = atol(argv[2]);

	ippsSM3GetSize(&SM3_ContextSize);

	gettimeofday(&start, NULL);
	IppsSM3State *sm3_context = (IppsSM3State*)malloc(SM3_ContextSize);
	for (i = 0; i < repeattime; i++)
	{
		

		memset(mess0, 49, byte);

		ippsSM3Init(sm3_context);

		ippsSM3Update((unsigned char*)mess0, byte, sm3_context);

		ippsSM3Final(mres, sm3_context);
		/*int j;
		for (j = 0; j < 32; j++)
		{
			printf("%02x", mres);
		}
		printf("\n");*/
		memset(mres, 0, 64);
	}

	gettimeofday(&end, NULL);
	time_use = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
	printf("time_use is %f\n", time_use);
	my_tps = repeattime * 1000000L / time_use;
	printf("TPS: %ld\n", my_tps);
	free(sm3_context);
	return 0;
}

gcc -O3 ippcp_SM3_test.c -o ippcp_SM3_test -I/opt/intel/compilers_and_libraries_2019.0.117/linux/ipp/include -I/opt/intel/compilers_and_libraries_2019.0.117/linux/ippcp/include /opt/intel/compilers_and_libraries_2019.0.117/linux/ippcp/lib/intel64_lin/libippcp.a

#include "ipp.h"
#include "ippcp.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int SM4_CBC_ContextSize = 0;
int main(int argc, char* argv[])
{

	IppStatus istate;
	unsigned char pkey[16] = { 
		0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
		0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 
	};
	unsigned char pIV[16] = { 
		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	};
	int keyLen = 16;
	Ipp8u* pKey = pkey;
	int byte, i;
	float time_use = 0;
	struct timeval start;
	struct timeval end;
	char a[65536] = { 0 };
	long repeattime = 0;
	long my_tps = 0;
	if (argc != 3) 
	{
		printf("Usage: %s byte(s) repeattime\n", argv[0]);
		printf("byte(s) only can be like :64 1024 65536\n");
		exit(1);
	}

	byte = atoi(argv[1]);
	repeattime = atol(argv[2]);

	ippsSMS4GetSize(&SM4_CBC_ContextSize);
	memset(a, 49, byte);
	IppsSMS4Spec* sm4_cbc_context = (IppsSMS4Spec*)malloc(SM4_CBC_ContextSize);
	IppsSMS4Spec* sm4_cbc_context2 = (IppsSMS4Spec*)malloc(SM4_CBC_ContextSize);
	gettimeofday(&start, NULL);
	

                istate = ippsSMS4Init(pKey, keyLen, sm4_cbc_context, SM4_CBC_ContextSize);
istate = ippsSMS4Init(pKey, keyLen, sm4_cbc_context2, SM4_CBC_ContextSize);

ippsSMS4SetKey(pKey, 16, sm4_cbc_context);
ippsSMS4SetKey(pKey, 16, sm4_cbc_context2);

	for (i = 0; i < repeattime; i++)
	{
		//encryption
		
		//istate = ippsSMS4Init(pKey, keyLen, sm4_cbc_context, SM4_CBC_ContextSize);

		unsigned char b[65536];
		//ippsSMS4SetKey(pKey, 16, sm4_cbc_context);
		istate = ippsSMS4EncryptCBC((unsigned char*)a, b, byte, sm4_cbc_context, pIV);
		if(ippStsNoErr != istate)
		{
			printf("ippsSMS4EncryptCBC err %d\n", istate);
			break;
		}	
		/*for (j = 0; j < byte; j++) {
			printf("%02x ", b);
		}
		printf("\n");*/
		/*gettimeofday(&end, NULL);
		time_use = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
		printf("time_use is %f\n", time_use);*/
		//decryption
		unsigned char c[65536];
		
		//istate = ippsSMS4Init(pKey, keyLen, sm4_cbc_context2, SM4_CBC_ContextSize);

		//ippsSMS4SetKey(pKey, 16, sm4_cbc_context2);
		istate = ippsSMS4DecryptCBC(b, c, byte, sm4_cbc_context2, pIV);
                if(ippStsNoErr != istate)
                {
                        printf("ippsSMS4DecryptCBC err %d\n", istate);
                        break;
                }

		/*for (j = 0; j < byte; j++) {
			printf("%c", c);
		}
		printf("\n");*/
	}

	gettimeofday(&end, NULL);
	time_use = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
	printf("time_use is %f\n", time_use);
	my_tps = repeattime * 1000000L / time_use;
	printf("TPS: %ld\n", my_tps);
	free(sm4_cbc_context);
	free(sm4_cbc_context2);
	return 0;
}

 

0 Kudos
6 Replies
Chao_Y_Intel
Moderator
1,533 Views

Hi, Xuexu, 

Thanks for test code. We need to reproduce your issue here. 
could you submit the test code into our support ticket. Here are some steps you can follow: 
https://software.intel.com/sites/default/files/managed/d5/ce/SubmittingSupportIssue.pdf

Our support export will check this code. 

regards,
Chao

0 Kudos
Gennady_F_Intel
Moderator
1,533 Views

it would be also very useful if you will provide 1/ the CPU type you are running this application and 2/ and don't measure the initialization part of this computations like ippsSMS4Init .

0 Kudos
zhu__xuewu
Beginner
1,533 Views

Gennady F. (Intel) (Blackbelt) wrote:

it would be also very useful if you will provide 1/ the CPU type you are running this application and 2/ and don't measure the initialization part of this computations like ippsSMS4Init .

1)  I run my test code on virtual machine of alibaba cloud. The CPU is Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz, kernel for a single CPU.

2)  I did not measure the initialization part of this computations like ippsSMS4Init, you can cheak the code above, it has been commented out.

Thank you!

Zhu Xuewu

0 Kudos
zhu__xuewu
Beginner
1,533 Views

Chao Y (Intel) (Intel) wrote:

Hi, Xuexu, 

Thanks for test code. We need to reproduce your issue here. 
could you submit the test code into our support ticket. Here are some steps you can follow: 
https://software.intel.com/sites/default/files/managed/d5/ce/SubmittingS...

Our support export will check this code. 

regards,
Chao

Hi, Chao,

I have submit the test code and some information about my test environment.  I am looking forward to receiving your reply.

Zhu Xuewu

0 Kudos
Gennady_F_Intel
Moderator
1,533 Views

Xuexu, we assigned owner to this ticket and we will try to reproduce this case on our side.

0 Kudos
Chao_Y_Intel
Moderator
1,533 Views

Hi, Xuewu, 

We further investigate the code performance:  

The SM4-CBC-DEC operation in IPP much faster, but the SM4-CBC-ENC is a bit slower. The performance difference is because BC-ENC has dependency in the input data, but CBC-DEC has no. In IPP SM4 BC-ENC part, it chooses some more secure way for implementation, which has the performance cost.

Thanks,
Chao  
 

0 Kudos
Reply