Better_Software_Header_Mobile Better_Software_Header_Web

Find what you need - explore our website and developer resources

Synchronization Primitives in C++20

std::latch and std::barrier

#include <latch>
#include <thread>
#include <iostream>
#include <vector>
#include <syncstream>

std::latch latch(3);

void worker(int id) {
    // Simulating some work
    std::this_thread::sleep_for(std::chrono::milliseconds(id * 100));
    std::osyncstream(std::cout) << "Worker " << id << " reached the latch.\n";
    latch.count_down();
}

int main() {
    std::vector<std::jthread> threads;
    for (int i = 1; i <= 3; ++i)
        threads.emplace_back(worker, i);

    latch.wait();
    std::cout << "All workers reached the latch.\n";
}
Worker 1 reached the latch.
Worker 2 reached the latch.
Worker 3 reached the latch.
All workers reached the latch.
#include <barrier>
#include <thread>
#include <iostream>
#include <vector>
#include <syncstream>

std::barrier barrier(3);

void worker(int id) {
    // Simulating some work
    std::this_thread::sleep_for(std::chrono::milliseconds(id * 100));
    std::osyncstream(std::cout) << "Worker " << id << " reached the barrier.\n";
    barrier.arrive_and_wait();
    std::osyncstream(std::cout) << "Worker " << id << " passed the barrier.\n";
}

int main() {
    std::vector<std::jthread> threads;
    for (int i = 1; i <= 3; ++i)
        threads.emplace_back(worker, i);
}
Worker 1 reached the barrier.
Worker 2 reached the barrier.
Worker 3 reached the barrier.
Worker 1 passed the barrier.
Worker 2 passed the barrier.
Worker 3 passed the barrier.

Tags:

c++

About KDAB

1 Comment

12 - Aug - 2025

Andrew Polar

    thread_count.fetch_add(1, std::memory_order_acq_rel);
    //here the control is passed to main thread for serial phase

    while (thread_count.load(std::memory_order_acquire) \> 0) {
        std::this_thread::yield();
    }
    //this is second concurrent block in each thread

    printf("*");

    //printf("b%d-%d ", id, local_step);
    //std::this_thread::sleep_for(std::chrono::milliseconds(100));

    //thread termination counter
    local_step++;
}
int nThreads = 16;
int nLoops = 10;

std::vector&lt;std::thread\> threads;
threads.reserve(nThreads);

for (int i = 0; i &lt; nThreads; ++i) threads.emplace_back(WorkerTest, i, nLoops);
for (int s = 0; s &lt; nLoops; ++s) {
    //first concurrent blocks are running in each thread
    while (thread_count.load(std::memory_order_acquire) &lt; nThreads) {
        std::this_thread::yield();
    }
    //serial phase
    printf("\n");

    //trigger second concurrent block
    thread_count.store(0, std::memory_order_release);
    loop_count.fetch_add(1, std::memory_order_acq_rel);
}
for (auto& t : threads) t.join();

current_time = clock();
printf("\nElapsed time %2.3f\n", (double)(current_time - start_application) / CLOCKS_PER_SEC);
ShivamKunwar

Shivam Kunwar

Software Engineer

Sign up for the KDAB Newsletter

Learn Modern C++

Learn more