Graphite  Version 3
An experimental 3D geometry processing program
thread_sync.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2000-2022 Inria
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * * Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright notice,
11  * this list of conditions and the following disclaimer in the documentation
12  * and/or other materials provided with the distribution.
13  * * Neither the name of the ALICE Project-Team nor the names of its
14  * contributors may be used to endorse or promote products derived from this
15  * software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27  * POSSIBILITY OF SUCH DAMAGE.
28  *
29  * Contact: Bruno Levy
30  *
31  * https://www.inria.fr/fr/bruno-levy
32  *
33  * Inria,
34  * Domaine de Voluceau,
35  * 78150 Le Chesnay - Rocquencourt
36  * FRANCE
37  *
38  */
39 
40 #ifndef GEOGRAM_BASIC_THREAD_SYNC
41 #define GEOGRAM_BASIC_THREAD_SYNC
42 
48 #include <geogram/basic/common.h>
49 #include <geogram/basic/numeric.h>
50 #include <geogram/basic/assert.h>
51 #include <geogram/basic/argused.h>
52 #include <vector>
53 #include <atomic>
54 
55 // On Windows/MSCV, we need to use a special implementation
56 // of spinlocks because std::atomic_flag in MSVC's stl does
57 // not fully implement the norm (lacks a constructor).
58 #ifdef GEO_OS_WINDOWS
59 #include <windows.h>
60 #include <intrin.h>
61 #pragma intrinsic(_InterlockedCompareExchange16)
62 #pragma intrinsic(_WriteBarrier)
63 #endif
64 
65 // On MacOS, I get many warnings with atomic_flag initialization,
66 // such as std::atomic_flag f = ATOMIC_FLAG_INIT
67 #if defined(__clang__)
68 #pragma GCC diagnostic ignored "-Wbraced-scalar-init"
69 #endif
70 
75 inline void geo_pause() {
76 #ifdef GEO_OS_WINDOWS
77  YieldProcessor();
78 #else
79 # ifdef GEO_PROCESSOR_X86
80 # ifdef __ICC
81  _mm_pause();
82 # else
83  __builtin_ia32_pause();
84 # endif
85 # endif
86 #endif
87 }
88 
89 /*******************************************************************************/
90 
91 #ifdef GEO_OS_WINDOWS
92 
93 // Windows-specific spinlock implementation.
94 // I'd have prefered to use std::atomic_flag for everybody,
95 // unfortunately atomic_flag's constructor is not implemented in MSCV's stl,
96 // so we reimplement them using atomic compare-exchange functions...
97 
98 namespace GEO {
99  namespace Process {
101  typedef short spinlock;
102 
104 # define GEOGRAM_SPINLOCK_INIT 0
105  inline void acquire_spinlock(volatile spinlock& x) {
106  while(_InterlockedCompareExchange16(&x, 1, 0) == 1) {
107  // Intel recommends to have a PAUSE asm instruction
108  // in the spinlock loop. Under MSVC/Windows,
109  // YieldProcessor() is a macro that calls the
110  // (undocumented) _mm_pause() intrinsic function
111  // that generates a PAUSE opcode.
112  YieldProcessor();
113  }
114  // We do not need _ReadBarrier() here since
115  // _InterlockedCompareExchange16
116  // "acts as a full barrier in VC2005" according to the doc
117  }
118 
119  inline void release_spinlock(volatile spinlock& x) {
120  _WriteBarrier(); // prevents compiler reordering
121  x = 0;
122  }
123 
124  }
125 }
126 
127 /*******************************************************************************/
128 
129 #else
130 
131 namespace GEO {
132  namespace Process {
133 
135  // Note: C++20 does not need it anymore, in C++20
136  // std::atomic_flag's constructor initializes it,
137  // we keep it because
138  // - we are using C++17
139  // - the Windows implementation that uses integers rather than
140  // std::atomic_flag needs an initialization value.
141 #define GEOGRAM_SPINLOCK_INIT ATOMIC_FLAG_INIT
142 
149  typedef std::atomic_flag spinlock;
150 
155  inline void acquire_spinlock(volatile spinlock& x) {
156  for (;;) {
157  if (!x.test_and_set(std::memory_order_acquire)) {
158  break;
159  }
160 // If compiling in C++20 we can be slightly more efficient when spinning
161 // (avoid unrequired atomic operations, just "peek" the flag)
162 #if defined(__cpp_lib_atomic_flag_test)
163  while (x.test(std::memory_order_relaxed))
164 #endif
165  geo_pause();
166  }
167  }
168 
173  inline void release_spinlock(volatile spinlock& x) {
174  x.clear(std::memory_order_release);
175  }
176 
177  }
178 }
179 #endif
180 
181 /****************************************************************************/
182 
183 namespace GEO {
184  namespace Process {
185 
198  public:
202  BasicSpinLockArray() : spinlocks_(nullptr), size_(0) {
203  }
204 
209  BasicSpinLockArray(index_t size_in) : spinlocks_(nullptr), size_(0) {
210  resize(size_in);
211  }
212 
217 
222  const BasicSpinLockArray& rhs
223  ) = delete;
224 
230  void resize(index_t size_in) {
231  delete[] spinlocks_;
232  spinlocks_ = new spinlock[size_in];
233  size_ = size_in;
234  // Need to initialize the spinlocks to false (dirty !)
235  // (maybe use placement new on each item..., to be tested)
236  for(index_t i=0; i<size_; ++i) {
237  Process::release_spinlock(spinlocks_[i]);
238  }
239  }
240 
244  void clear() {
245  delete[] spinlocks_;
246  spinlocks_ = nullptr;
247  }
248 
252  index_t size() const {
253  return size_;
254  }
255 
263  geo_debug_assert(i < size());
264  GEO::Process::acquire_spinlock(spinlocks_[i]);
265  }
266 
273  geo_debug_assert(i < size());
274  GEO::Process::release_spinlock(spinlocks_[i]);
275  }
276 
277  private:
278  // Cannot use a std::vector because std::atomic_flag does not
279  // have copy ctor nor assignment operator.
280  spinlock* spinlocks_;
281  index_t size_;
282  };
283  }
284 }
285 
286 /*******************************************************************************/
287 
288 namespace GEO {
289  namespace Process {
290 
302  public:
306  CompactSpinLockArray() : spinlocks_(nullptr), size_(0) {
307  }
308 
313  CompactSpinLockArray(index_t size_in) : spinlocks_(nullptr),size_(0){
314  resize(size_in);
315  }
316 
321  clear();
322  }
323 
328 
333  const CompactSpinLockArray& rhs
334  ) = delete;
335 
341  void resize(index_t size_in) {
342  if(size_ != size_in) {
343  size_ = size_in;
344  index_t nb_words = (size_ >> 5) + 1;
345  delete[] spinlocks_;
346  spinlocks_ = new std::atomic<uint32_t>[nb_words];
347  for(index_t i=0; i<nb_words; ++i) {
348  // Note: std::atomic_init() is deprecated in C++20
349  // that can initialize std::atomic through its
350  // non-default constructor. We'll need to do something
351  // else when we'll switch to C++20 (placement new...)
352  std::atomic_init(&spinlocks_[i],0u);
353  }
354  }
355 // Test at compile time that we are using atomic uint32_t operations (and not
356 // using an additional lock which would be catastrophic in terms of performance)
357 #ifdef __cpp_lib_atomic_is_always_lock_free
358  static_assert(std::atomic<uint32_t>::is_always_lock_free);
359 #else
360 // If we cannot test that at compile time, we test that at runtime in debug
361 // mode (so that we will be notified in the non-regression test if one of
362 // the platforms has the problem, which is very unlikely though...)
363  geo_debug_assert(size_ == 0 || spinlocks_[0].is_lock_free());
364 #endif
365  }
366 
370  index_t size() const {
371  return size_;
372  }
373 
377  void clear() {
378  delete[] spinlocks_;
379  size_ = 0;
380  }
381 
389  geo_debug_assert(i < size());
390  index_t w = i >> 5;
391  uint32_t b = uint32_t(i & 31);
392  uint32_t mask = (1u << b);
393  while(
394  (spinlocks_[w].fetch_or(
395  mask, std::memory_order_acquire
396  ) & mask) != 0
397  ) {
398  geo_pause();
399  }
400  }
401 
408  geo_debug_assert(i < size());
409  index_t w = i >> 5;
410  uint32_t b = uint32_t(i & 31);
411  uint32_t mask = ~(1u << b);
412  spinlocks_[w].fetch_and(mask, std::memory_order_release);
413  }
414 
415  private:
416  // Cannot use a std::vector because std::atomic<> does not
417  // have copy ctor nor assignment operator.
418  std::atomic<uint32_t>* spinlocks_;
419  index_t size_;
420  };
421 
422  }
423 }
424 
425 /*******************************************************************************/
426 
427 namespace GEO {
428  namespace Process {
429  typedef CompactSpinLockArray SpinLockArray;
430  }
431 }
432 
433 /*******************************************************************************/
434 
435 #endif
A function to suppress unused parameters compilation warnings.
Assertion checking mechanism.
#define geo_debug_assert(x)
Verifies that a condition is met.
Definition: assert.h:196
An array of light-weight synchronisation primitives (spinlocks).
Definition: thread_sync.h:197
void clear()
Resets size to 0 and clears all the memory.
Definition: thread_sync.h:244
void resize(index_t size_in)
Resizes a BasicSpinLockArray.
Definition: thread_sync.h:230
void acquire_spinlock(index_t i)
Acquires a spinlock at a given index.
Definition: thread_sync.h:262
BasicSpinLockArray(index_t size_in)
Constructs a new BasicSpinLockArray of size size_in.
Definition: thread_sync.h:209
BasicSpinLockArray & operator=(const BasicSpinLockArray &rhs)=delete
Forbids copy.
BasicSpinLockArray(const BasicSpinLockArray &rhs)=delete
Forbids copy.
index_t size() const
Gets the number of spinlocks in this array.
Definition: thread_sync.h:252
void release_spinlock(index_t i)
Releases a spinlock at a given index.
Definition: thread_sync.h:272
BasicSpinLockArray()
Constructs a new BasicSpinLockArray of size 0.
Definition: thread_sync.h:202
An array of light-weight synchronisation primitives (spinlocks).
Definition: thread_sync.h:301
void resize(index_t size_in)
Resizes a CompactSpinLockArray.
Definition: thread_sync.h:341
void clear()
Resets size to 0 and clears all the memory.
Definition: thread_sync.h:377
CompactSpinLockArray(index_t size_in)
Constructs a new CompactSpinLockArray of size size_in.
Definition: thread_sync.h:313
CompactSpinLockArray(const CompactSpinLockArray &rhs)=delete
Forbids copy.
~CompactSpinLockArray()
CompactSpinLockArray destructor.
Definition: thread_sync.h:320
void release_spinlock(index_t i)
Releases a spinlock at a given index.
Definition: thread_sync.h:407
CompactSpinLockArray & operator=(const CompactSpinLockArray &rhs)=delete
Forbids copy.
void acquire_spinlock(index_t i)
Acquires a spinlock at a given index.
Definition: thread_sync.h:388
index_t size() const
Gets the number of spinlocks in this array.
Definition: thread_sync.h:370
CompactSpinLockArray()
Constructs a new SpinLockArray of size 0.
Definition: thread_sync.h:306
Common include file, providing basic definitions. Should be included before anything else by all head...
std::atomic_flag spinlock
A lightweight synchronization structure.
Definition: thread_sync.h:149
void release_spinlock(volatile spinlock &x)
Makes x available to other threads.
Definition: thread_sync.h:173
void acquire_spinlock(volatile spinlock &x)
Loops until x is available then reserves it.
Definition: thread_sync.h:155
Global Vorpaline namespace.
geo_index_t index_t
The type for storing and manipulating indices.
Definition: numeric.h:329
Types and functions for numbers manipulation.
void geo_pause()
executes the pause instruction
Definition: thread_sync.h:75