Quantcast
Channel: Intel® Software - OpenCL*
Viewing all articles
Browse latest Browse all 1182

clEnqueueNDRangeKernel may fail when using 2D local arrays

$
0
0

clEnqueueNDRangeKernel may fail on kernel with 2D local arrays but succeed with 1D local arrays and manual index computing.

For example, the following matrix multiplication kernel fails with CL_OUT_OF_RESOURCES if USE_2D is defined and succeedes otherwise.

Matricies are [24, 72] * [24, 72]T = [24, 24] and BLOCK_SIZE = 24.

#define BLOCK_SIZE 24
#define C_WIDTH 24
#define AB_COMMON 72

__kernel __attribute__((reqd_work_group_size(BLOCK_SIZE, BLOCK_SIZE, 1)))
void mx_mul(__global const float *A,
            __global const float *B,
            __global float *C) {

#ifdef USE_2D
  __local float AS[BLOCK_SIZE][BLOCK_SIZE];
  __local float BS[BLOCK_SIZE][BLOCK_SIZE];
#else
  __local float AS[BLOCK_SIZE * BLOCK_SIZE];
  __local float BS[BLOCK_SIZE * BLOCK_SIZE];
#endif

  int bx = get_group_id(0);
  int by = get_group_id(1);

  int tx = get_local_id(0);
  int ty = get_local_id(1);

  int a_offs = (by * BLOCK_SIZE + ty) * AB_COMMON + tx;
  int b_offs = (bx * BLOCK_SIZE + ty) * AB_COMMON + tx;

  float sum = 0;
  for (int i = 0; i < AB_COMMON / BLOCK_SIZE; i++, a_offs += BLOCK_SIZE, b_offs += BLOCK_SIZE) {
#ifdef USE_2D
    AS[ty][tx] = A[a_offs];
    BS[ty][tx] = B[b_offs];
#else
    AS[ty * BLOCK_SIZE + tx] = A[a_offs];
    BS[ty * BLOCK_SIZE + tx] = B[b_offs];
#endif

    barrier(CLK_LOCAL_MEM_FENCE);

    #pragma unroll
    for (int k = 0; k < BLOCK_SIZE; k++) {
#ifdef USE_2D
      sum += AS[ty][k] * BS[tx][k];
#else
      sum += AS[ty * BLOCK_SIZE + k] * BS[tx * BLOCK_SIZE + k];
#endif
    }

    barrier(CLK_LOCAL_MEM_FENCE);
  }

  C[get_global_id(1) * C_WIDTH + get_global_id(0)] = sum;
}

 

Tested on Ubuntu 14.10 and Core i7-3770 with intel_sdk_for_ocl_applications_xe_2013_r3_sdk_3.2.1.16712_x64.

AttachmentSize
Download24x72.c5.35 KB

Viewing all articles
Browse latest Browse all 1182

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>