typedef struct _Class {
	ulong vtable;
	__global struct _Class* self;
} Class;

__kernel void prepareNodes(__global uchar* src, __global void* dest) {
	if (get_local_id(0) == 0)
		((__global Class*) dest)->self = (__global Class*) dest;
	barrier(CLK_GLOBAL_MEM_FENCE);

//	__global Class* dest_for_item = (__global Class*) (((__global Class*) dest) + get_global_id(0)); //CORRECT
	__global Class* dest_for_item = (__global Class*) (((__global Class*) dest)->self + get_global_id(0)); //WRONG
	dest_for_item->vtable = 12345;
//	barrier(CLK_GLOBAL_MEM_FENCE);
	uint4 ui4 = vload4(0, (__global uint*) src);
	vstore4(ui4, 0, (__global uint*) dest_for_item);

	if (get_local_id(0) == 0) {
#ifdef cl_nv_pragma_unroll
		printf("NVIDIA vtable=%ld\n", dest_for_item->vtable);
#else
		printf("Intel vtable=%ld\n", dest_for_item->vtable);
#endif
	}
}
