Quantcast
Channel: Intel® Software - OpenCL*
Viewing all articles
Browse latest Browse all 1182

Relatively simply kernel (included) fails to compile on GPU, compiles on CPU & other platforms

$
0
0

I was able to simplify the kernel to a very small state in order to make it easier to track down the bug. Logically, this kernel may not be correct however syntax seems OK to me and it compiles with on the CPU, as well as other platforms (AMD CPU and GPU).

This is on an Intel Core i3 32xx CPU, the IGP is an Intel HD 2500. I'm using the latest driver build (3345), windows 7 x64 and the Kernel Builder x64 application.

/* Please Write the OpenCL Kernel(s) code here*/
void ntn_snh(__global float16 const *restrict src, __global float16 *restrict dst)
{
    for (__private int j = 0; j < 16; j+=4, src+=4, dst+=4)
    {
        dst[0] = sinh(src[0]);
        dst[1] = sinh(src[1]);
        dst[2] = sinh(src[2]);
        dst[3] = sinh(src[3]);
    }
}
 
void ntn_csh(__global float16 const *restrict src, __global float16 *restrict dst)
{
    for (__private int j = 0; j < 16; j+=4, src+=4, dst+=4)
    {
        dst[0] = cosh(src[0]);
        dst[1] = cosh(src[1]);
        dst[2] = cosh(src[2]);
        dst[3] = cosh(src[3]);
    }
}
 
void ntn_tnh(__global float16 const *restrict src, __global float16 *restrict dst)
{
    for (__private int j = 0; j < 16; j+=4, src+=4, dst+=4)
    {
        dst[0] = tanh(src[0]);
        dst[1] = tanh(src[1]);
        dst[2] = tanh(src[2]);
        dst[3] = tanh(src[3]);
    }
}
 
__kernel void eval(__global uchar  const *restrict const gcom,
                          __global float16 const *restrict const gdata_in,
                          __global float16          *restrict const gdata_out)
{
    __private int j = 6, k = 9;
    for (; k >= 7; --j, --k)
    {
        switch(gcom[j])
        {
        case 0: break;
        case 1: ntn_snh(gdata_in + gcom[k]*64, gdata_out + j*64); break;
        case 2: ntn_csh(gdata_in + gcom[k]*64, gdata_out + j*64); break;
        case 3: ntn_tnh(gdata_in + gcom[k]*64, gdata_out + j*64); break;
        case 4: break;
        default:{}
        }
}
 
    for (; j >= 0; --j, --k)
    {
        switch(gcom[j])
        {
        case 0: break;
        case 1: ntn_snh(gdata_out + k*64, gdata_out + j*64); break;
        case 2: ntn_csh(gdata_out + k*64, gdata_out + j*64); break;
        case 3: ntn_tnh(gdata_out + k*64, gdata_out + j*64); break;
        case 4: break;
        default:{}
        }
    }
}

Viewing all articles
Browse latest Browse all 1182

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>