DPCT1086#

Message#

__activemask() is migrated to 0xffffffff. You may need to adjust the code.

Detailed Help#

There is currently no functional equivalent of __activemask() in SYCL*. If there is flow control in your code that will make the thread inactive, you need to rewrite the thread logic.

For example, this original CUDA* code:

 1  __device__ inline int SHFL_SYNC(unsigned mask, int val, unsigned offset, unsigned w = warpSize) {
 2    return __shfl_down_sync(mask, val, offset, w);
 3  }
 4
 5  __global__ void kernel(int *array) {
 6    unsigned int tid = threadIdx.x;
 7    if (tid >= 8)
 8      return;
 9      unsigned mask = __activemask();
10      array[tid] = SHFL_SYNC(mask, array[tid], 4);
11  }

results in the following migrated SYCL code:

 1  inline int SHFL_SYNC(unsigned mask, int val, unsigned offset,
 2                    sycl::nd_item<3> item_ct1, unsigned w = 0) {
 3    if (!w) w = item_ct1.get_sub_group().get_local_range().get(0);
 4    // This call will wait for all work-items to arrive which will never happen since only work-items with tid < 8 will encounter this call.
 5    return sycl::shift_group_left(item_ct1.get_sub_group(), val, offset);
 6  }
 7
 8  void kernel(int *array, sycl::nd_item<3> item_ct1) {
 9    unsigned int tid = item_ct1.get_local_id(2);
10    if (tid >= 8)
11      return;
12
13    /*
14    DPCT1086
15    */
16    unsigned mask = 0xffffffff;
17    array[tid] = SHFL_SYNC(mask, array[tid], 4, item_ct1);
18  }

which is rewritten to:

 1  // remove mask parameter, as it is not used
 2  inline int SHFL_SYNC(int val, unsigned offset,
 3                    sycl::nd_item<3> item_ct1, unsigned w = 0) {
 4    if (!w) w = item_ct1.get_sub_group().get_local_range().get(0);
 5    unsigned int tid = item_ct1.get_local_id(2);
 6    // Use a temporary variable to save the result of sycl::shift_group_left() to make sure all work-items can encounter this call.
 7    int v_tmp = sycl::shift_group_left(item_ct1.get_sub_group(), val, offset);
 8    return (tid < 8) ? v_tmp : val;
 9  }
10
11  void kernel(int *array, sycl::nd_item<3> item_ct1) {
12    unsigned int tid = item_ct1.get_local_id(2);
13    // remove mask parameter, as it is not used
14    array[tid] = SHFL_SYNC(array[tid], 4, item_ct1);
15  }

Suggestions to Fix#

Check if 0xffffffff can be used instead of __activemask(). If it cannot be used, redesign the thread logic.