#include "stdlib.h" #include "stdio.h" #include "stdint.h" #include "stdbool.h" #include "cilk/cilk.h" #include "cilk/reducer.h" #include "math.h" #define max(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a > _b ? _a : _b; }) #define min(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a < _b ? _a : _b; }) __declspec(vector) void kernelFun_evt81(int64_t flatidx16, double* tmp_128, double* tmp_129, double* tmp_130, int64_t tmp_1_shape31); __declspec(vector) void kernelFun_evt81(int64_t flatidx16, double* tmp_128, double* tmp_129, double* tmp_130, int64_t tmp_1_shape31) { double gensym_58; double gensym_59; double gensym_60; int64_t gensym_21; int64_t gensym_22; int64_t gensym_23; int64_t gensym_37; int64_t gensym_38; int64_t gensym_39; int64_t gensym_40; int64_t v269; double e3710; double gensym_41; double gensym_42; double gensym_43; int64_t gensym_44; int64_t gensym_45; int64_t v21114; double e31215; double gensym_46; double gensym_47; double gensym_48; double e7; double e8; double e9; double e10; double e11; double e12; double e13; double e14; double e15; double e16; double e17; double e18; gensym_21 = (int64_t)(flatidx16 / tmp_1_shape31); gensym_22 = (int64_t)(flatidx16 % tmp_1_shape31); gensym_23 = (int64_t)(0); gensym_37 = gensym_22; gensym_38 = gensym_21; gensym_39 = gensym_37; gensym_40 = gensym_38; v269 = gensym_40; e3710 = (double)(v269); gensym_41 = e3710; gensym_42 = e3710; gensym_43 = e3710; gensym_44 = gensym_37; gensym_45 = gensym_38; v21114 = gensym_44; e31215 = (double)(v21114); gensym_46 = e31215; gensym_47 = e31215; gensym_48 = e31215; e7 = gensym_47; e8 = gensym_42; e9 = gensym_48; e10 = gensym_43; e11 = gensym_41; e12 = gensym_46; // SWITCH THESE TWO LINES AND OBSERVE VECTORIZATION FAILURE: // LINE 1: // if ((bool)(!((bool)((bool)(e11 == e12) && (bool)((bool)(e8 == e7) && (bool)(e10 == e9)))))) // LINE 2: if ((!(((e11 == e12) && ((e8 == e7) && (e10 == e9)))))) { e13 = (double)(e9 - e10); e14 = (double)(e12 - e11); e15 = (double)(e7 - e8); e16 = (double)((double)((double)(e14 * e14) + (double)(e15 * e15)) + (double)(e13 * e13)); e17 = (double)(sqrt(e16)); e18 = (double)((double)((double)(1.0) * (double)(1.0)) / e16); gensym_58 = (double)((double)(e18 * e14) / e17); gensym_59 = (double)((double)(e18 * e15) / e17); gensym_60 = (double)((double)(e18 * e13) / e17); } else { gensym_58 = (double)(0.0); gensym_59 = (double)(0.0); gensym_60 = (double)(0.0); } tmp_128[flatidx16] = gensym_58; tmp_129[flatidx16] = gensym_59; tmp_130[flatidx16] = gensym_60; } void build_evt81(int64_t sizeArg, double* tmp_128, double* tmp_129 , double* tmp_130, int64_t tmp_1_shape31) { #pragma vector always #pragma ivdep _Cilk_for (int i0 = 0; (i0 < sizeArg); i0 += 1) { kernelFun_evt81(i0, tmp_128, tmp_129, tmp_130, tmp_1_shape31); } } void build_evt84(int64_t inSize, int64_t inStride, double* tmp_025 , double* tmp_026, double* tmp_027, double* tmp_128, double* tmp_129, double* tmp_130, double v066, double v067, double v068) { // Fold loop, reduction variable(s): [(v066,Default,TDouble),(v067,Default,TDouble),(v068,Default,TDouble)] // First, some temporaries to back up the inital state // (we're going to stomp on the reduction vars / formal params): double eetmp0 = v066; double eetmp1 = v067; double eetmp2 = v068; for (int i3 = 0; (i3 < inSize); i3 = (i3 + inStride)) { // Fresh round, restore the state of the accumulator to the initial/identity: v066 = eetmp0; v067 = eetmp1; v068 = eetmp2; for (int i4 = i3; (i4 < (i3 + inStride)); i4 = (i4 + 1)) { double v169 = tmp_128[i4]; double v170 = tmp_129[i4]; double v171 = tmp_130[i4]; double gensym_72; double gensym_73; double gensym_74; gensym_72 = (double)(v066 + v169); gensym_73 = (double)(v067 + v170); gensym_74 = (double)(v068 + v171); v066 = gensym_72; v067 = gensym_73; v068 = gensym_74; } // Write the single reduction result to each output array: tmp_025[(i3 / inStride)] = v066; tmp_026[(i3 / inStride)] = v067; tmp_027[(i3 / inStride)] = v068; } } struct ArgRecord { // These are all the Use arrays gathered from the Acc computation: }; struct ArgRecord* CreateArgRecord() { return malloc(sizeof(struct ArgRecord)); } void DestroyArgRecord(struct ArgRecord* arg0) { free(arg0); } struct ResultRecord { // These are all the progResults arrays output from the Acc computation: double* tmp_025; int tmp_025_size; double* tmp_026; int tmp_026_size; double* tmp_027; int tmp_027_size; }; struct ResultRecord* CreateResultRecord() { return malloc(sizeof(struct ResultRecord)); } void DestroyResultRecord(struct ResultRecord* arg1) { // In the CURRENT protocol, we free all results SIMULTANEOUSLY, here: free(arg1); } double* GetResult_tmp_025(struct ResultRecord* arg2) { return arg2->tmp_025; } int GetResultSize_tmp_025(struct ResultRecord* arg3) { return arg3->tmp_025_size; } double* GetResult_tmp_026(struct ResultRecord* arg4) { return arg4->tmp_026; } int GetResultSize_tmp_026(struct ResultRecord* arg5) { return arg5->tmp_026_size; } double* GetResult_tmp_027(struct ResultRecord* arg6) { return arg6->tmp_027; } int GetResultSize_tmp_027(struct ResultRecord* arg7) { return arg7->tmp_027_size; } void MainProg(struct ArgRecord* argsRec, struct ResultRecord* resultsRec) { // First we EXECUTE the program by executing each array op in order: int64_t gensym_0; { int64_t gensym_49; gensym_49 = (int64_t)(1000); gensym_0 = gensym_49; } int64_t tmp_2_shape33; int64_t tmp_2_shape34; { int64_t gensym_50; int64_t gensym_51; gensym_50 = gensym_0; gensym_51 = (int64_t)(1000); tmp_2_shape33 = gensym_50; tmp_2_shape34 = gensym_51; } int64_t gensym_1; { int64_t gensym_52; gensym_52 = (int64_t)(1000); gensym_1 = gensym_52; } int64_t tmp_3_shape35; int64_t tmp_3_shape36; { int64_t gensym_53; int64_t gensym_54; gensym_53 = (int64_t)(1000); gensym_54 = gensym_1; tmp_3_shape35 = gensym_53; tmp_3_shape36 = gensym_54; } int64_t tmp_1_shape31; int64_t tmp_1_shape32; { int64_t gensym_55; int64_t gensym_56; gensym_55 = (int64_t)(min(tmp_2_shape33,tmp_3_shape35)); gensym_56 = (int64_t)(min(tmp_2_shape34,tmp_3_shape36)); tmp_1_shape31 = gensym_55; tmp_1_shape32 = gensym_56; } int64_t tmp_1_size; { int64_t gensym_57; gensym_57 = (int64_t)(tmp_1_shape31 * tmp_1_shape32); tmp_1_size = gensym_57; } double* tmp_128 = malloc(((sizeof(double)) * tmp_1_size)); double* tmp_129 = malloc(((sizeof(double)) * tmp_1_size)); double* tmp_130 = malloc(((sizeof(double)) * tmp_1_size)); build_evt81(tmp_1_size, tmp_128, tmp_129, tmp_130, tmp_1_shape31); int64_t tmp_0_shape; { int64_t gensym_61; gensym_61 = tmp_1_shape32; tmp_0_shape = gensym_61; } int64_t tmp_0_size; { int64_t gensym_62; gensym_62 = tmp_0_shape; tmp_0_size = gensym_62; } double gensym_63; double gensym_64; double gensym_65; gensym_63 = (double)(0.0); gensym_64 = (double)(0.0); gensym_65 = (double)(0.0); // Allocate all ouput space for the reduction operation: double* tmp_025 = malloc(((sizeof(double)) * (tmp_1_size / tmp_1_shape32))); double* tmp_026 = malloc(((sizeof(double)) * (tmp_1_size / tmp_1_shape32))); double* tmp_027 = malloc(((sizeof(double)) * (tmp_1_size / tmp_1_shape32))); build_evt84(tmp_1_size, tmp_1_shape32, tmp_025, tmp_026, tmp_027, tmp_128, tmp_129, tmp_130, gensym_63, gensym_64, gensym_65); // We write the final output to the results record: resultsRec->tmp_025 = tmp_025; resultsRec->tmp_025_size = tmp_0_size; resultsRec->tmp_026 = tmp_026; resultsRec->tmp_026_size = tmp_0_size; resultsRec->tmp_027 = tmp_027; resultsRec->tmp_027_size = tmp_0_size; // Finally, we free all arrays that are NOT either input or outputs: free(tmp_128); free(tmp_129); free(tmp_130); } // As a bonus, we produce a normal main function when there are no Use AST nodes. int main() { // First we EXECUTE the program by executing each array op in order: int64_t gensym_0; { int64_t gensym_49; gensym_49 = (int64_t)(1000); gensym_0 = gensym_49; } int64_t tmp_2_shape33; int64_t tmp_2_shape34; { int64_t gensym_50; int64_t gensym_51; gensym_50 = gensym_0; gensym_51 = (int64_t)(1000); tmp_2_shape33 = gensym_50; tmp_2_shape34 = gensym_51; } int64_t gensym_1; { int64_t gensym_52; gensym_52 = (int64_t)(1000); gensym_1 = gensym_52; } int64_t tmp_3_shape35; int64_t tmp_3_shape36; { int64_t gensym_53; int64_t gensym_54; gensym_53 = (int64_t)(1000); gensym_54 = gensym_1; tmp_3_shape35 = gensym_53; tmp_3_shape36 = gensym_54; } int64_t tmp_1_shape31; int64_t tmp_1_shape32; { int64_t gensym_55; int64_t gensym_56; gensym_55 = (int64_t)(min(tmp_2_shape33,tmp_3_shape35)); gensym_56 = (int64_t)(min(tmp_2_shape34,tmp_3_shape36)); tmp_1_shape31 = gensym_55; tmp_1_shape32 = gensym_56; } int64_t tmp_1_size; { int64_t gensym_57; gensym_57 = (int64_t)(tmp_1_shape31 * tmp_1_shape32); tmp_1_size = gensym_57; } double* tmp_128 = malloc(((sizeof(double)) * tmp_1_size)); double* tmp_129 = malloc(((sizeof(double)) * tmp_1_size)); double* tmp_130 = malloc(((sizeof(double)) * tmp_1_size)); build_evt81(tmp_1_size, tmp_128, tmp_129, tmp_130, tmp_1_shape31); int64_t tmp_0_shape; { int64_t gensym_61; gensym_61 = tmp_1_shape32; tmp_0_shape = gensym_61; } int64_t tmp_0_size; { int64_t gensym_62; gensym_62 = tmp_0_shape; tmp_0_size = gensym_62; } double gensym_63; double gensym_64; double gensym_65; gensym_63 = (double)(0.0); gensym_64 = (double)(0.0); gensym_65 = (double)(0.0); // Allocate all ouput space for the reduction operation: double* tmp_025 = malloc(((sizeof(double)) * (tmp_1_size / tmp_1_shape32))); double* tmp_026 = malloc(((sizeof(double)) * (tmp_1_size / tmp_1_shape32))); double* tmp_027 = malloc(((sizeof(double)) * (tmp_1_size / tmp_1_shape32))); build_evt84(tmp_1_size, tmp_1_shape32, tmp_025, tmp_026, tmp_027, tmp_128, tmp_129, tmp_130, gensym_63, gensym_64, gensym_65); // This code prints the final result(s): int64_t eetmp0; eetmp0 = tmp_0_size; printf(" [ "); printf("("); printf("%lf", tmp_025[0]); printf(", "); printf("%lf", tmp_026[0]); printf(", "); printf("%lf", tmp_027[0]); printf(")"); for (int i1 = 1; (i1 < eetmp0); i1 = (i1 + 1)) { printf(", "); printf("("); printf("%lf", tmp_025[i1]); printf(", "); printf("%lf", tmp_026[i1]); printf(", "); printf("%lf", tmp_027[i1]); printf(")"); } printf(" ] "); printf("\n"); int64_t eetmp2; eetmp2 = tmp_0_size; printf(" [ "); printf("("); printf("%lf", tmp_025[0]); printf(", "); printf("%lf", tmp_026[0]); printf(", "); printf("%lf", tmp_027[0]); printf(")"); for (int i3 = 1; (i3 < eetmp2); i3 = (i3 + 1)) { printf(", "); printf("("); printf("%lf", tmp_025[i3]); printf(", "); printf("%lf", tmp_026[i3]); printf(", "); printf("%lf", tmp_027[i3]); printf(")"); } printf(" ] "); printf("\n"); int64_t eetmp4; eetmp4 = tmp_0_size; printf(" [ "); printf("("); printf("%lf", tmp_025[0]); printf(", "); printf("%lf", tmp_026[0]); printf(", "); printf("%lf", tmp_027[0]); printf(")"); for (int i5 = 1; (i5 < eetmp4); i5 = (i5 + 1)) { printf(", "); printf("("); printf("%lf", tmp_025[i5]); printf(", "); printf("%lf", tmp_026[i5]); printf(", "); printf("%lf", tmp_027[i5]); printf(")"); } printf(" ] "); printf("\n"); // Finally, we free all arrays that are NOT either input or outputs: free(tmp_128); free(tmp_129); free(tmp_130); return 0; }