OpenCL* for CPU
Ask questions and share information on Intel® SDK for OpenCL™ Applications and OpenCL™ implementations for Intel® CPU.
Announcements
This forum covers OpenCL* for CPU only. OpenCL* for GPU questions can be asked in the GPU Compute Software forum. Intel® FPGA SDK for OpenCL™ questions can be ask in the FPGA Intel® High Level Design forum.
1721 Discussions

I found that the result of multiplying bit64 numbers was wrong.

jianzhong_wang
Beginner
621 Views

OpenCL1.2/OpenCL2.0
OS:Win10
Intel(R) HD Graphics 4600

 

 

__kernel void miniTest(__global unsigned long long * buff )
{
    const unsigned int M = 0x3FFFFFFUL;
    const unsigned int R = 0x3D10UL;

    unsigned long long u = 0;
    unsigned long long c = 0;
    unsigned long long d = 0;

    buff[0] = 0x32974b6;
    buff[1] = 0x6695f8a;
    buff[2] = 0x3cc840c;
    buff[3] = 0xc8998;
    buff[4] = 0x4f06406;
    buff[5] = 0x438d148;
    buff[6] = 0x29e81a2;
    buff[7] = 0x784c484;
    buff[8] = 0x6ad6038;
    buff[9] = 0x1383ee;

    unsigned long long aA[10] = { 0 };
    aA[0] = (unsigned long long)buff[0];
    aA[1] = (unsigned long long)buff[1];
    aA[2] = (unsigned long long)buff[2];
    aA[3] = (unsigned long long)buff[3];
    aA[4] = (unsigned long long)buff[4];
    aA[5] = (unsigned long long)buff[5];
    aA[6] = (unsigned long long)buff[6];
    aA[7] = (unsigned long long)buff[7];
    aA[8] = (unsigned long long)buff[8];
    aA[9] = (unsigned long long)buff[9];

    d = (aA[0] * 2) * aA[9]
      + (aA[1] * 2) * aA[8]
      + (aA[2] * 2) * aA[7]
      + (aA[3] * 2) * aA[6]
      + (aA[4] * 2) * aA[5];

    d >>= 26;

    d += (aA[1] * 2) * aA[9]
       + (aA[2] * 2) * aA[8]
       + (aA[3] * 2) * aA[7]
       + (aA[4] * 2) * aA[6]
       + (aA[5]    ) * aA[5];

    u = d & (unsigned long long)M;       //At here , the u=0x0a1db38
    c = u * (unsigned long long)R;       //the result(c=0x09b5a0b80) is always wrong. WHY?

    unsigned int iCLow = (unsigned int)c;
    unsigned int iCHig = (unsigned int)(c >> 32);
    printf( "c=0x%x%x;\r\n" , iCHig , iCLow );

//===================================================

    aA[0] = 0x32974b6;
    aA[1] = 0x6695f8a;
    aA[2] = 0x3cc840c;
    aA[3] = 0xc8998;
    aA[4] = 0x4f06406;
    aA[5] = 0x438d148;
    aA[6] = 0x29e81a2;
    aA[7] = 0x784c484;
    aA[8] = 0x6ad6038;
    aA[9] = 0x1383ee;

    d = (aA[0] * 2) * aA[9]
      + (aA[1] * 2) * aA[8]
      + (aA[2] * 2) * aA[7]
      + (aA[3] * 2) * aA[6]
      + (aA[4] * 2) * aA[5];

    d >>= 26;
    d += (aA[1] * 2) * aA[9]
       + (aA[2] * 2) * aA[8]
       + (aA[3] * 2) * aA[7]
       + (aA[4] * 2) * aA[6]
       + (aA[5]    ) * aA[5];

    u = d & (unsigned long long)M;        //At here , the u=0x0a1db38
    c = u * (unsigned long long)R;        //the result(c=0x269b5a0b80) is ok.

    iCLow = (unsigned int)c;
    iCHig = (unsigned int)(c >> 32);
    printf( "c=0x%x%x;\r\n" , iCHig , iCLow );
}

0 Kudos
0 Replies
Reply