The code is simple:
typedef struct _Class {
ulong vtable;
ulong id;
} Class;
//code below is in kernel function
__global Class* psrc = (__global Class*) param1; //param1 is any valid __global kernel pointer parameter (length >= sizeof(Class))
__global Class* pdest = (__global Class*) param2; //param2 is any valid __global kernel pointer parameter (length >= sizeof(Class))
uint4 ui4 = vload4(0, (__global uint*) psrc);
vstore4(ui4, 0, (__global uint*) pdest);
printf("%#v4hlX vtable=%ld\n", ui4, pdest->vtable);
The result shows on HD4600 GPU pdest is not modified from param2 to be param1. While same code works fine on AMD and Nvidia GPU.
I also found code below works on HD4600 which removes the __global address space qualifier:
Class src = {3333, 200};
Class dest = {2222, 100};
uint4 ui4 = vload4(0, (__global uint*) &src);
vstore4(ui4, 0, (__global uint*) &dest);
printf("%#v4hlX vtable=%ld\n", ui4, dest.vtable);
Is it a bug? Any comments are appreciated.