Intel® VTune™ Profiler uses OpenCL™ API to collect profiling information about OpenCL kernels. According to the OpenCL Specification, completion callbacks must be thread-safe and can be called in different threads. It is possible that the completion callback is received while the collection is being stopped.
Use OpenCL API to set callbacks for events for clEnqueue* functions and wait for them to be received. For example:
#include <atomic>
#include <thread>
...
#include <CL/cl2.hpp>
std::atomic_uint32_t number_of_uncompleted_callbacks = 0;
void CL_CALLBACK completion_callback(cl_event, cl_int , void*)
{
--number_of_uncompleted_callbacks;
}
int main()
{
...
cl::Program prog(context,
std::string((std::istreambuf_iterator<char>(programSourceFile)),std::istreambuf_iterator<char>()));
...
auto kernelFunc = cl::KernelFunctor<cl::Buffer, cl_int>(prog, "sin_cos");
cl::Event event = kernelFunc(cl::EnqueueArgs(cl::NDRange(dataBuf.size())), clDataBuf, 0);
++ number_of_uncompleted_callbacks;
event.setCallback(CL_COMPLETE, completion_callback);
...
while (number_of_uncompleted_callbacks.load())
{
std::this_thread::yield();
}
return EXIT_SUCCESS;
}