oneMKL RNG Device Usage Model#

A typical usage model for device routines is the same as described in oneMKL RNG Usage Model:

  1. Create and initialize the object for basic random number generator.

  2. Create and initialize the object for distribution generator.

  3. Call the generate routine to get random numbers with appropriate statistical distribution.

Example of Scalar Random Numbers Generation#

 1#include <iostream>
 2#include <sycl/sycl.hpp>
 3#include "oneapi/mkl/rng/device.hpp"
 4
 5
 6int main() {
 7    sycl::queue queue;
 8    const int n = 1000;
 9    const int seed = 1;
10    // Prepare an array for random numbers
11    std::vector<float> r(n);
12
13
14    sycl::buffer<float, 1> r_buf(r.data(), r.size());
15    // Submit a kernel to generate on device
16    queue.submit([&](sycl::handler& cgh) {
17        auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh);
18        cgh.parallel_for(sycl::range<1>(n), [=](sycl::item<1> item) {
19            // Create an engine object
20            oneapi::mkl::rng::device::philox4x32x10<> engine(seed, item.get_id(0));
21            // Create a distribution object
22            oneapi::mkl::rng::device::uniform<> distr;
23            // Call generate function to obtain scalar random number
24            float res = oneapi::mkl::rng::device::generate(distr, engine);
25
26
27            r_acc[item.get_id(0)] = res;
28        });
29    });
30
31
32    auto r_acc = r_buf.template get_access<sycl::access::mode::read>();
33    std::cout << "Samples of uniform distribution" << std::endl;
34    for(int i = 0; i < 10; i++) {
35        std::cout << r_acc[i] << std::endl;
36    }
37
38
39    return 0;
40}

Example of Vector Random Numbers Generation#

 1#include <iostream>
 2#include <sycl/sycl.hpp>
 3
 4
 5#include "oneapi/mkl/rng/device.hpp"
 6
 7
 8int main() {
 9    sycl::queue queue;
10    const int n = 1000;
11    const int seed = 1;
12    const int vec_size = 4;
13    // Prepare an array for random numbers
14    std::vector<float> r(n);
15
16
17    sycl::buffer<float, 1> r_buf(r.data(), r.size());
18    // Submit a kernel to generate on device
19    sycl::queue{}.submit([&](sycl::handler& cgh) {
20        auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh);
21        cgh.parallel_for(sycl::range<1>(n / vec_size), [=](sycl::item<1> item) {
22            // Create an engine object
23            oneapi::mkl::rng::device::philox4x32x10<vec_size> engine(seed, item.get_id(0) * vec_size);
24            // Create a distribution object
25            oneapi::mkl::rng::device::uniform<> distr;
26            // Call generate function to obtain sycl::vec<float, 4> with random numbers
27            auto res = oneapi::mkl::rng::device::generate(distr, engine);
28
29
30            res.store(ite.get_id(0), r_acc);
31        });
32    });
33
34
35    auto r_acc = r_buf.template get_access<sycl::access::mode::read>();
36    std::cout << "Samples of uniform distribution" << std::endl;
37    for(int i = 0; i < 10; i++) {
38        std::cout << r_acc[i] << std::endl;
39    }
40
41
42    return 0;
43}

There is an opportunity to store engines between kernels manually via sycl::buffer / USM pointers or by using a specific host-side helper class called, engine descriptor.

Example of Random Numbers Generation by Engines Stored in sycl::buffer#

Engines are initialized in the first kernel. Random number generation is performed in the second kernel.

 1#include <iostream>
 2#include <sycl/sycl.hpp>
 3
 4
 5#include "oneapi/mkl/rng/device.hpp"
 6
 7
 8int main() {
 9    sycl::queue queue;
10    const int n = 1000;
11    const int seed = 1;
12    const int vec_size = 4;
13    // Prepare an array for random numbers
14    std::vector<float> r(n);
15    sycl::buffer<float, 1> r_buf(r.data(), r.size());
16    sycl::range<1> range(n / vec_size);
17    sycl::buffer<oneapi::mkl::rng::device::mrg32k3a<vec_size>, 1> engine_buf(range);
18
19
20    sycl::queue queue;
21
22
23    // Kernel with initialization of engines
24    queue.submit([&](sycl::handler& cgh) {
25        // Create an accessor to sycl::buffer with engines to write initialized states
26        auto engine_acc = engine_buf.template get_access<sycl::access::mode::write>(cgh);
27        cgh.parallel_for(range, [=](sycl::item<1> item) {
28            size_t id = item.get_id(0);
29            // Create an engine object with offset id * 2^64
30            oneapi::mkl::rng::device::mrg32k3a<vec_size> engine(seed, {0, id});
31            engine_acc[id] = engine;
32        });
33    });
34    // Kernel for random numbers generation
35    queue.submit([&](sycl::handler& cgh) {
36        auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh);
37        // Create an accessor to sycl::buffer with engines to read initialized states
38        auto engine_acc = engine_buf.template get_access<sycl::access::mode::read>(cgh);
39        cgh.parallel_for(range, [=](sycl::item<1> item) {
40            size_t id = item.get_id(0);
41
42
43            auto engine = engine_acc[id];
44            oneapi::mkl::rng::device::uniform distr;
45            auto res = oneapi::mkl::rng::device::generate(distr, engine);
46
47
48            res.store(id, r_acc);
49        });
50    });
51
52
53    auto r_acc = r_buf.template get_access<sycl::access::mode::read>();
54    std::cout << "Samples of uniform distribution" << std::endl;
55    for(int i = 0; i < 10; i++) {
56        std::cout << r_acc[i] << std::endl;
57    }
58
59
60    return 0;
61}

Example of Random Numbers Generation with Host-side Helpers Usage#

 1#include <iostream>
 2#include <sycl/sycl.hpp>
 3
 4
 5#include "oneapi/mkl/rng/device.hpp"
 6
 7
 8int main() {
 9    sycl::queue queue;
10    const int n = 1000;
11    const int seed = 1;
12    const int vec_size = 4;
13    // prepare array for random numbers
14    std::vector<float> r(n);
15    sycl::buffer<float, 1> r_buf(r.data(), r.size());
16    sycl::range<1> range(n / vec_size);
17
18
19    // offset of each engine in engine_descriptor
20    int offset = vec_size;
21    // each engine would be created in enqueued task as of specified range
22    // as oneapi::mkl::rng::device::mrg32k3a<vec_size>(seed, id * offset);
23    oneapi::mkl::rng::device::engine_descriptor<oneapi::mkl::rng::device::mrg32k3a<vec_size>>
24    descr(queue, range, seed, offset);
25    queue.submit([&](sycl::handler& cgh) {
26        auto r_acc = r_buf.template get_access<sycl::access::mode::write>(cgh);
27        // create engine_accessor
28        auto engine_acc = descr.get_access(cgh);
29        cgh.parallel_for(range, [=](sycl::item<1> item) {
30            size_t id = item.get_id(0);
31            // load engine from engine_accessor
32            auto engine = engine_acc.load(id);
33            oneapi::mkl::rng::device::uniform<Type> distr;
34
35
36            auto res = oneapi::mkl::rng::device::generate(distr, engine);
37
38
39            res.store(id, r_acc);
40            // store engine for furter calculations if needed
41            engine_acc.store(engine, id);
42        });
43    });
44
45
46    auto r_acc = r_buf.template get_access<sycl::access::mode::read>();
47    std::cout << "Samples of uniform distribution" << std::endl;
48    for(int i = 0; i < 10; i++) {
49        std::cout << r_acc[i] << std::endl;
50    }
51
52
53    return 0;
54}

Example of Random Numbers Generation Using Gamma Distribution#

 1#include <iostream>
 2#include <vector>
 3#include <sycl/sycl.hpp>
 4#include "oneapi/mkl/rng/device.hpp"
 5
 6constexpr size_t seed = 42;
 7
 8int main() {
 9  sycl::queue queue;
10  const size_t n = 10'000;
11
12  // create USM allocator
13  sycl::usm_allocator<double, sycl::usm::alloc::shared> allocator(queue);
14
15  // create vector with USM allocator
16  std::vector<double, decltype(allocator)> r_vec(n, allocator);
17  double* r = r_vec.data();
18
19  size_t reduced_val = 0;
20  // create a reductor to calculate the sum of rejected numbers across all work-items
21  auto reductor = sycl::reduction(&reduced_val, size_t(0), std::plus<size_t>{});
22
23  queue.parallel_for(n, reductor, [=](size_t id, auto& sum) {
24      // create basic random number generator object
25      // 2^76 numbers for each work-item are skipped to allow acceptance rejection scheme ro reject as much numbers as it needs
26      oneapi::mkl::rng::device::mrg32k3a<1> engine({ seed, seed, seed, seed, seed, seed }, { 0, (4096 * id) });
27      // create distribution object
28      oneapi::mkl::rng::device::gamma<double> distr(0.7, 0.0, 1.0);
29      // perform generation
30      auto res = oneapi::mkl::rng::device::generate(distr, engine);
31      r[id] = res;
32
33      // calculate how much random numbers are rejected in this work-item
34      size_t rej = distr.count_rejected_numbers();
35      sum += rej;
36  }).wait();  // synchronization can be also done by queue.wait()
37
38  std::cout << "First 10 gamma distributed numbers are following:"<< std::endl;
39  for(int i = 0; i < 10; i++) {
40      std::cout << r[i] << " ";
41  }
42  std::cout << std::endl;
43
44  std::cout << "rejected " << reduced_val <<" random numbers" << std::endl;
45
46  return 0;
47}

Additionally, examples that demonstrate usage of random number generators device routines are available in:

${MKL}/examples/dpcpp_device/rng/source