struct RGB16 { ushort R, G, B; }; __kernel void scale2D_3u16(__global ushort* restrict in, __global ushort* restrict out) { int xPos = get_global_id(0); int yPos = get_global_id(1); int p = yPos*get_global_size(0)+xPos; __global struct RGB16* pRGBIn = (__global struct RGB16*)in; __global struct RGB16* pRGBOut = (__global struct RGB16*)out; pRGBOut[p] = pRGBIn[p]; }
Input data (memory view) is:
0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4 ...
Using NVidia and AMD I get the same result as output. But using Intel OCL on HD4600 I get:
0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 3, 4, 4, 4, ...
I'm totally confused!