/*BEGIN_LEGAL 
INTEL CONFIDENTIAL

Copyright (c) 2004-2011, Intel Corporation. All rights reserved.

The source code contained or described herein and all documents
related to the source code ("Material") are owned by Intel Corporation
or its suppliers or licensors. Title to the Material remains with
Intel Corporation or its suppliers and licensors. The Material
contains trade secrets and proprietary and confidential information of
Intel or its suppliers and licensors. The Material is protected by
worldwide copyright and trade secret laws and treaty provisions. No
part of the Material may be used, copied, reproduced, modified,
published, uploaded, posted, transmitted, distributed, or disclosed in
any way without Intel's prior express written permission.

No license under any patent, copyright, trade secret or other
intellectual property right is granted to or conferred upon you by
disclosure or delivery of the Materials, either expressly, by
implication, inducement, estoppel or otherwise. Any license under such
intellectual property rights must be express and approved by Intel in
writing.

END_LEGAL */

#include <immintrin.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>

#if defined(_M_X64) || defined(__x86_64__)
# define SDE_TEST_64BIT
#endif

#ifdef SDE_TEST_64BIT
#define REG rdx
#else
#define REG edx
#endif

typedef union {
    uint32_t i[8];
    float f[8];
} data_t;

int check_val_flt(float *v1, float *v2) {
    /* check bitwise match via the int to capture NaNs */
    if ((uint32_t) *v1 == (uint32_t) *v2)
      return 1;
    return 0;
}

int main() {
    unsigned int status;
    int errors = 0;
    int i = 0;
    unsigned int mask = 0xffffffff;
    data_t indices,output,mask_result;
    for(i=0;i<8;i++)  {
        indices.i[i] = i;
    }
    float base[1024];
    for(i=0;i<1024;i++)  {
        base[i]=i + 5;
    }

    // Test that commit works inside transaction
    status = _xbegin();
    if (status == _XBEGIN_STARTED) {
        __asm {
            vpbroadcastd ymm3, mask
            lea REG, base
            vmovups  ymm2, indices
            vgatherdps  ymm1, [REG+ymm2*4], ymm3
            vmovups  output, ymm1
            vmovups  mask_result, ymm3
        }

        _xend();
    }
    else {
        errors++;
    }
    
    // Check values
    for(i=0;i<8;i++) {
        float exp = i + 5;
      
        if (!check_val_flt(&exp,&output.f[i])) {
            printf("Commit %d. error! expected %f, result: %f\n",
                   i,exp,output.f[i]);
            errors++;
        }
    }

    // Check success mask - all zeros
    for(i=0;i<8;i++) {

        if (mask_result.i[i] != 0) {
            printf("Commit mask %d. error! expected 0, result: %x\n",
                i,(unsigned)mask_result.i[i]);
            errors++;
        }
    }

    // Set mask register again
    __asm {
        vpbroadcastd ymm3, mask
     }
    memset(&mask_result,0xff,sizeof(mask_result));

    // Test that rollback works in aborted transaction
    status = _xbegin();
    if (status == _XBEGIN_STARTED) {
        __asm {
            lea REG, base
            add REG,4 // Corrupt data
            vmovups  ymm2, indices
            vgatherdps  ymm1, [REG+ymm2*4], ymm3
            vmovups  output, ymm1
            vmovups  mask_result, ymm3
        }

        _xabort(_XABORT_DEBUG); // We abort transaction here
        _xend();
        errors++;
    }

    // Check values
    for(i=0;i<8;i++) {
        float exp = i + 5;

        if (!check_val_flt(&exp,&output.f[i])) {
            printf("Rollback %d. error! expected %f, result: %f\n",
                   i,exp,output.f[i]);
            errors++;
        }
    }

    // Check rollback mask - all ones
    for(i=0;i<8;i++) {

        if (mask_result.i[i] != 0xffffffff) {
            printf("Rollback mask %d. error! expected ffffffff, result: %x\n",
                i,(unsigned)mask_result.i[i]);
            errors++;
        }
    }

    if (errors > 0) {
        printf("rtm_vgatherdps test failed with %d errors\n",
            errors);
    }
    return errors;
}
