Skip to article frontmatterSkip to article content

Working with OpenMP

Here’s a simple vector addition example parallelized with OpenMP.

#include <vector>

#pragma cling load("libomp.so.5")
template <typename OutputIt, typename InputIt, typename Size>
void vector_add(OutputIt out, InputIt a, InputIt b, Size n)
{
    for (Size i = 0; i < n; ++i)
        out[i] = a[i] + b[i];
}
template <typename OutputIt, typename InputIt, typename Size>
void vector_add_omp(OutputIt out, InputIt a, InputIt b, Size n)
{
    #pragma omp parallel for
    for (Size i = 0; i < n; ++i)
        out[i] = a[i] + b[i];
}
const size_t N{10'000'000};

std::vector<float> a(N, 1.0f), b(N, 2.0f), out(N);
%%timeit
vector_add(out.begin(), a.cbegin(), b.cbegin(), N);
Output
104 ms +- 4.76 ms per loop (mean +- std. dev. of 7 runs 10 loops each)
out[0]
Output
3.00000f
%%timeit
vector_add_omp(out.begin(), a.cbegin(), b.cbegin(), N);
Output
55.4 ms +- 2.31 ms per loop (mean +- std. dev. of 7 runs 10 loops each)
out[0]
Output
3.00000f