Here’s a simple vector addition example parallelized with OpenMP.
#include <vector>
#pragma cling load("libomp.so.5")template <typename OutputIt, typename InputIt, typename Size>
void vector_add(OutputIt out, InputIt a, InputIt b, Size n)
{
for (Size i = 0; i < n; ++i)
out[i] = a[i] + b[i];
}template <typename OutputIt, typename InputIt, typename Size>
void vector_add_omp(OutputIt out, InputIt a, InputIt b, Size n)
{
#pragma omp parallel for
for (Size i = 0; i < n; ++i)
out[i] = a[i] + b[i];
}const size_t N{10'000'000};
std::vector<float> a(N, 1.0f), b(N, 2.0f), out(N);%%timeit
vector_add(out.begin(), a.cbegin(), b.cbegin(), N);Output
79.1 ms +- 17.2 ms per loop (mean +- std. dev. of 7 runs 10 loops each)
out[0]Output
3.00000f%%timeit
vector_add_omp(out.begin(), a.cbegin(), b.cbegin(), N);Output
59.8 ms +- 3.77 ms per loop (mean +- std. dev. of 7 runs 10 loops each)
out[0]Output
3.00000f