Community
cancel
Showing results for 
Search instead for 
Did you mean: 
lohinyeung
Beginner
168 Views

-ip cause invalid result for i386 (32_64 universal package)

Hi there

We have complied our application with intel C++ complier 11.1.088 with -ip set(Enable Interprocedural Optimization for Single File Compilation), on Mac 10.6.3, with xcode 3.2.1 for i386 and x86_64 (32_64 universal binary package).

We have verified that the i386 release version is generating incorrect result, and the x86_64 is still outputting the good result. And by removing the -ip option, both of them are outputting good result.

To show you the issue, we have extracted a portion of our code to form a test bed. We think the complier is performing invalid optimisation, please advise.

[cpp]#include 
#include 
#include 

namespace datatypes
{
	typedef int8_t     int8;
	typedef int16_t    int16;
	typedef int32_t    int32;
	typedef int64_t    int64;
	
	typedef uint8_t    uint8;
	typedef uint16_t   uint16;
	typedef uint32_t   uint32;
	typedef uint64_t   uint64;
};

static const datatypes::uint32 Te4[256] =
{
	0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
	0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
	0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
	0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
	0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
	0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
	0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
	0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
	0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
	0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
	0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
	0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
	0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
	0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
	0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
	0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
	0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
	0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
	0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
	0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
	0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
	0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
	0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
	0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
	0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
	0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
	0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
	0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
	0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
	0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
	0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
	0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
	0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
	0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
	0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
	0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
	0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
	0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
	0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
	0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
	0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
	0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
	0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
	0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
	0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
	0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
	0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
	0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
	0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
	0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
	0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
	0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
	0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
	0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
	0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
	0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
	0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
	0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
	0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
	0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
	0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
	0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
	0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
	0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
};

static const datatypes::uint32 rcon[] =
{
	0x01000000, 0x02000000, 0x04000000, 0x08000000,
	0x10000000, 0x20000000, 0x40000000, 0x80000000,
	0x1B000000, 0x36000000,
};

#define GETU32(plaintext) (((datatypes::uint32)(plaintext)[0] << 24) ^ \
((datatypes::uint32)(plaintext)[1] << 16) ^ \
((datatypes::uint32)(plaintext)[2] <<  8) ^ \
((datatypes::uint32)(plaintext)[3]))

#define PUTU32(ciphertext, st) { (ciphertext)[0] = (datatypes::uint8)((st) >> 24); \
(ciphertext)[1] = (datatypes::uint8)((st) >> 16); \
(ciphertext)[2] = (datatypes::uint8)((st) >>  8); \
(ciphertext)[3] = (datatypes::uint8)(st); }

using namespace std;

void func(datatypes::uint32 *rk, const datatypes::uint8 *key)
{
	datatypes::int32 i = 0;
	datatypes::uint32 temp;
	
	datatypes::uint32 * org_rk = rk;
	
	rk[0] = GETU32(key     );
	rk[1] = GETU32(key +  4);
	rk[2] = GETU32(key +  8);
	rk[3] = GETU32(key + 12);
		
	  
	for (;;)
	{
		temp  = rk[3];
		rk[4] = rk[0] ^
		(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
		(Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
		(Te4[(temp      ) & 0xff] & 0x0000ff00) ^
		(Te4[(temp >> 24)       ] & 0x000000ff) ^
		rcon;
		rk[5] = rk[1] ^ rk[4];
		rk[6] = rk[2] ^ rk[5];
		rk[7] = rk[3] ^ rk[6];
		
		
		if (++i == 10)
		{
			
			// Note: 
			// error will only appeared if the following debug outputs are presented ;-x!!!!
			
			std::cout << "Te4 :\n";
			for (int idx=0; idx<128; idx++) 
			{
				std::cout << std::hex << Te4[idx] << " ";
			}
			std::cout << std::endl;
			
			std::cout << "rcon :\n";
			for (int idx=0; idx<10; idx++) 
			{
				std::cout << std::hex << rcon[idx] << " ";
			}
			std::cout << std::endl;
			
			std::cout << "rk :\n";
			for (int idx=0; idx<44; idx++) 
			{
				std::cout << std::hex << org_rk[idx] << " ";
			}
			std::cout << std::endl;
			
			break;			
		}
		rk += 4;
	}
}

int main (int argc, char * const argv[]) 
{    
	const unsigned char key[] = 
	{
		0x25, 0x47, 0xfb, 0x5b, 
		0x51, 0x95, 0x46, 0x69, 
		0xa1, 0xa3, 0xf6, 0xa9, 
		0x9f, 0x66, 0x4b, 0xd7
	};
	
	datatypes::uint32 rk[44];
	memset(rk, 0, 44*sizeof(datatypes::uint32));
	
	func(rk, key);
	
	// expected result returning from above func(...)
	const datatypes::uint32 expected[] = 
	{
		0x2547fb5b, 0x51954669, 0xa1a3f6a9, 0x9f664bd7, 0x17f4f580, 0x4661b3e9, 0xe7c24540, 0x78a40e97, 
		0x5c5f7d3c, 0x1a3eced5, 0xfdfc8b95, 0x85588502, 0x32c80aab, 0x28f6c47e, 0xd50a4feb, 0x5052cae9, 
		0x3abc14f8, 0x124ad086, 0xc7409f6d, 0x97125584, 0xe3404b70, 0xf10a9bf6, 0x364a049b, 0xa158511f, 
		0xa9918b42, 0x589b10b4, 0x6ed1142f, 0xcf894530, 0x4eff8fc8, 0x16649f7c, 0x78b58b53, 0xb73cce63, 
		0x25747461, 0x3310eb1d, 0x4ba5604e, 0xfc99ae2d, 0xd090acd1, 0xe38047cc, 0xa8252782, 0x54bc89af, 
		0x8337d5f1, 0x60b7923d, 0xc892b5bf, 0x9c2e3c10
	};
	
	if(memcmp(expected, rk, sizeof(expected)) == 0)
		std::cout << "Data matched\n";
	else
		std::cout << "Data NOT match!!!\n";
	
    return 0;
}
[/cpp]

Best Regards

Simon

0 Kudos
4 Replies
lohinyeung
Beginner
168 Views

Hi there
We have just re-built the test @ Leopard OS X 10.5.7 with Xcode 3.1.2, Intel Compiler 11.1.088. The -ip option is working fine for both binaries.
So the problem is in the combination of SnowLeopard OS X 10.6.3 + Xcode 3.2.1 +Intel Compiler 11.1.088 + (-ip).
Hope the above information can provide you the better idea to identify the problem.
Best Regards
Simon
mecej4
Black Belt
168 Views

See this related thread in the Intel Fortran compiler forum
Quoc-An_L_Intel
Moderator
168 Views

There is a known bug with the combination of -O3 and -ip optimization on ( system config - OSX 10.6.3, Xcode 3.2.x, icc 11.1.088)resulting in the data mismatch. The data matched when the option is set to -O2 and -ip or remove -ip when using -O3.

I will provide an update when a fix for this issue is available.

Quoc-An_L_Intel
Moderator
168 Views

This issueis resolved with the latest update of 11.1 orIntel Parallel Composer2011 or Composer XE products.
Reply