Relatively simply kernel (included) fails to compile on GPU, compiles on CPU & other platforms

I was able to simplify the kernel to a very small state in order to make it easier to track down the bug. Logically, this kernel may not be correct however syntax seems OK to me and it compiles with on the CPU, as well as other platforms (AMD CPU and GPU).

This is on an Intel Core i3 32xx CPU, the IGP is an Intel HD 2500. I'm using the latest driver build (3345), windows 7 x64 and the Kernel Builder x64 application.

/* Please Write the OpenCL Kernel(s) code here*/
void ntn_snh(__global float16 const *restrict src, __global float16 *restrict dst)
{
    for (__private int j = 0; j < 16; j+=4, src+=4, dst+=4)
    {
        dst[0] = sinh(src[0]);
        dst[1] = sinh(src[1]);
        dst[2] = sinh(src[2]);
        dst[3] = sinh(src[3]);
    }
}
 
void ntn_csh(__global float16 const *restrict src, __global float16 *restrict dst)
{
    for (__private int j = 0; j < 16; j+=4, src+=4, dst+=4)
    {
        dst[0] = cosh(src[0]);
        dst[1] = cosh(src[1]);
        dst[2] = cosh(src[2]);
        dst[3] = cosh(src[3]);
    }
}
 
void ntn_tnh(__global float16 const *restrict src, __global float16 *restrict dst)
{
    for (__private int j = 0; j < 16; j+=4, src+=4, dst+=4)
    {
        dst[0] = tanh(src[0]);
        dst[1] = tanh(src[1]);
        dst[2] = tanh(src[2]);
        dst[3] = tanh(src[3]);
    }
}
 
__kernel void eval(__global uchar  const *restrict const gcom,
                          __global float16 const *restrict const gdata_in,
                          __global float16          *restrict const gdata_out)
{
    __private int j = 6, k = 9;
    for (; k >= 7; --j, --k)
    {
        switch(gcom[j])
        {
        case 0: break;
        case 1: ntn_snh(gdata_in + gcom[k]*64, gdata_out + j*64); break;
        case 2: ntn_csh(gdata_in + gcom[k]*64, gdata_out + j*64); break;
        case 3: ntn_tnh(gdata_in + gcom[k]*64, gdata_out + j*64); break;
        case 4: break;
        default:{}
        }
}
 
    for (; j >= 0; --j, --k)
    {
        switch(gcom[j])
        {
        case 0: break;
        case 1: ntn_snh(gdata_out + k*64, gdata_out + j*64); break;
        case 2: ntn_csh(gdata_out + k*64, gdata_out + j*64); break;
        case 3: ntn_tnh(gdata_out + k*64, gdata_out + j*64); break;
        case 4: break;
        default:{}
        }
    }
}

Relatively simply kernel (included) fails to compile on GPU, compiles on CPU & other platforms

Trending Articles

RAMAYAMPET Mandal Sarpanch | Upa-Sarpanch | Ward member Mobile Numbers Medak...

लड़कियां सेक्स के दौरान क्यों करती है उह! आह!लड़कियां सेक्स के दौरान क्यों करती...

Neem Baba Extra Questions Answer Class 6 English Poorvi

Throw Back: 4×4 — Sikilitele (Ft Castro) Prod by JQ

Rajasthan Board 10th Result 2016 Roll No wise & Name Wise

Lowe faces four theft charges

Practice Sheet of Right form of verbs for HSC Students

Mafia, Murder & Mayhem In The Motor City: Detroit Mob Hit Timeline (1937-2007)

The 10 Tennessee Cities With The Largest Black Population For 2021

Materials Around Us Class 6 Worksheet Science Chapter 6

デスクトップヒープの枯渇

Best Suvichar in Hindi |बेस्ट सुविचार |शुभ विचार हिंदी में

Kanulanu Thaake Lyrics and translation | Manam (2014)

Korean Sex Porn Videos: XXX Videos & Free Porn Movies

Teen Shot In Miami Drive-By Dies From Injuries

Download: IQ Muzatasha feat Shy D & Pmj – Ulesi NiFertilizer Yamavuto

Mahakal Attitude Status

Property developer set up cannabis factory to help pay off debts...

♡

KB: How to troubleshoot issues when adding a Hyper-V host in System Center...