A typical usage model for device routines is the same as described in RNG Usage Model:
Create and initialize the object for basic random number generator.
Create and initialize the object for distribution generator.
Call the generate routine to get random numbers with appropriate statistical distribution.
#include<iostream> #include <CL/sycl.hpp> #include "mkl_rng_sycl_device.hpp" int main() { sycl::queue queue; const int n = 1000; const int seed = 1; // Prepare an array for random numbers std::vector<float> r(n); sycl::buffer<float, 1> r_buf(r.data(), r.size()); // Submit a kernel to generate on device queue.submit([&](sycl::handler& cgh) { auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh); cgh.parallel_for(sycl::range<1>(n), [=](sycl::item<1> item) { // Create an engine object mkl::rng::device::philox4x32x10<> engine(seed, item.get_id(0)); // Create a distribution object mkl::rng::device::uniform<> distr; // Call generate function to obtain scalar random number float res = mkl::rng::device::generate(distr, engine); r_acc[item.get_id(0)] = res; }); }); auto r_acc = r_buf.template get_access<sycl::access::mode::read>(); std::cout << "Samples of uniform distribution" << std::endl; for(int i = 0; i < 10; i++) { std::cout << r_acc[i] << std::endl; } return 0; }
#include<iostream> #include <CL/sycl.hpp> #include "mkl_rng_sycl_device.hpp" int main() { sycl::queue queue; const int n = 1000; const int seed = 1; const int vec_size = 4; // Prepare an array for random numbers std::vector<float> r(n); sycl::buffer<float, 1> r_buf(r.data(), r.size()); // Submit a kernel to generate on device sycl::queue{}.submit([&](sycl::handler& cgh) { auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh); cgh.parallel_for(sycl::range<1>(n / vec_size), [=](sycl::item<1> item) { // Create an engine object mkl::rng::device::philox4x32x10<vec_size> engine(seed, item.get_id(0) * vec_size); // Create a distribution object mkl::rng::device::uniform<> distr; // Call generate function to obtain sycl::vec<float, 4> with random numbers auto res = mkl::rng::device::generate(distr, engine); res.store(ite.get_id(0), r_acc); }); }); auto r_acc = r_buf.template get_access<sycl::access::mode::read>(); std::cout << "Samples of uniform distribution" << std::endl; for(int i = 0; i < 10; i++) { std::cout << r_acc[i] << std::endl; } return 0; }
There is an opportunity to store engines between kernels manually via sycl::buffer / USM pointers or by using a specific host-side helper class called, engine descriptor.
Engines are initialized in the first kernel. Random number generation is performed in the second kernel.
#include<iostream> #include <CL/sycl.hpp> #include "mkl_rng_sycl_device.hpp" int main() { sycl::queue queue; const int n = 1000; const int seed = 1; const int vec_size = 4; // Prepare an array for random numbers std::vector<float> r(n); sycl::buffer<float, 1> r_buf(r.data(), r.size()); sycl::range<1> range(n / vec_size); sycl::buffer<mkl::rng::device::mrg32k3a<vec_size>, 1> engine_buf(range); sycl::queue queue; // Kernel with initialization of engines queue.submit([&](sycl::handler& cgh) { // Create an accessor to sycl::buffer with engines to write initialized states auto engine_acc = engine_buf.template get_access<sycl::access::mode::write>(cgh); cgh.parallel_for(range, [=](sycl::item<1> item) { size_t id = item.get_id(0); // Create an engine object with offset id * 2^64 mkl::rng::device::mrg32k3a<vec_size> engine(seed, {0, id}); engine_acc[id] = engine; }); }); // Kernel for random numbers generation queue.submit([&](sycl::handler& cgh) { auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh); // Create an accessor to sycl::buffer with engines to read initialized states auto engine_acc = engine_buf.template get_access<sycl::access::mode::read>(cgh); cgh.parallel_for(range, [=](sycl::item<1> item) { size_t id = item.get_id(0); auto engine = engine_acc[id]; mkl::rng::device::uniform distr; auto res = mkl::rng::device::generate(distr, engine); res.store(id, r_acc); }); }); auto r_acc = r_buf.template get_access<sycl::access::mode::read>(); std::cout << "Samples of uniform distribution" << std::endl; for(int i = 0; i < 10; i++) { std::cout << r_acc[i] << std::endl; } return 0; }
#include<iostream> #include <CL/sycl.hpp> #include "mkl_rng_sycl_device.hpp" int main() { sycl::queue queue; const int n = 1000; const int seed = 1; const int vec_size = 4; // prepare array for random numbers std::vector<float> r(n); sycl::buffer<float, 1> r_buf(r.data(), r.size()); sycl::range<1> range(n / vec_size); // offset of each engine in engine_descriptor int offset = vec_size; // each engine would be created in enqueued task as of specified range // as mkl::rng::device::mrg32k3a<vec_size>(seed, id * offset); mkl::rng::device::engine_descriptor<mkl::rng::device::mrg32k3a<vec_size>> descr(queue, range, seed, offset); queue.submit([&](sycl::handler& cgh) { auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh); // create engine_accessor auto engine_acc = descr.get_access(cgh); cgh.parallel_for(range, [=](sycl::item<1> item) { size_t id = item.get_id(0); // load engine from engine_accessor auto engine = engine_acc.load(id); mkl::rng::device::uniform<Type> distr; auto res = mkl::rng::device::generate(distr, engine); res.store(id, r_acc); // store engine for furter calculations if needed engine_acc.store(engine, id); }); }); auto r_acc = r_buf.template get_access<sycl::access::mode::read>(); std::cout << "Samples of uniform distribution" << std::endl; for(int i = 0; i < 10; i++) { std::cout << r_acc[i] << std::endl; } return 0; }
Additionally, examples that demonstrate usage of random number generators device routines are available in:
${MKL}/examples/sycl_device/rng