xenium
lock_free_ref_count.hpp
1//
2// Copyright (c) 2018-2020 Manuel Pöter.
3// Licensed under the MIT License. See LICENSE file in the project root for full license information.
4//
5
6#ifndef LOCK_FREE_REF_COUNT_IMPL
7#error "This is an impl file and must not be included directly!"
8#endif
9
10#ifdef _MSC_VER
11#pragma warning(push)
12#pragma warning(disable: 4127) // conditional expression is constant
13#endif
14
15namespace xenium { namespace reclamation {
16
17 template<class Traits>
18 template<class T, std::size_t N, class Deleter>
19 class lock_free_ref_count<Traits>::enable_concurrent_ptr<T, N, Deleter>::free_list {
20 public:
21 T *pop() {
22 if (max_local_elements > 0)
23 if (auto result = local_free_list().pop())
24 return result;
25
26 guard_ptr guard;
27
28 while (true) {
29 // (1) - this acquire-load synchronizes-with the release-CAS (3)
30 guard = acquire_guard(head, std::memory_order_acquire);
31 if (guard.get() == nullptr)
32 return nullptr;
33
34 // Note: ref_count can be anything here since multiple threads
35 // could have gotten a reference to the node on the freelist.
36 marked_ptr expected(guard);
37 auto next = guard->next_free().load(std::memory_order_relaxed);
38 // since head is only changed via CAS operations it is sufficient to use relaxed order
39 // for this operation as it is always part of a release-sequence headed by (3)
40 if (head.compare_exchange_weak(expected, next, std::memory_order_relaxed))
41 {
42 assert((guard->ref_count().load(std::memory_order_relaxed) & RefCountClaimBit) != 0 &&
43 "ClaimBit must be set for a node on the free list");
44
45 auto ptr = guard.get();
46 ptr->ref_count().fetch_sub(RefCountClaimBit, std::memory_order_relaxed); // clear claim bit
47 ptr->next_free().store(nullptr, std::memory_order_relaxed);
48 guard.ptr.reset(); // reset guard_ptr to prevent decrement of ref_count
49 return ptr;
50 }
51 }
52 }
53
54 void push(T *node) {
55 assert(node->ref_count().load(std::memory_order_relaxed) & RefCountClaimBit &&
56 "ClaimBit must be set for a node to be put on the free list");
57 if (max_local_elements > 0 && local_free_list().push(node))
58 return;
59
60 add_nodes(node, node);
61 }
62
63 private:
64 void add_nodes(T *first, T *last) {
65 // (2) - this acquire-load synchronizes-with the release-CAS (3)
66 auto old = head.load(std::memory_order_acquire);
67 do {
68 last->next_free().store(old, std::memory_order_relaxed);
69 // (3) - if this release-CAS succeeds, it synchronizes-with the acquire-loads (1, 2)
70 // if it failes, the reload synchronizes-with itself (3)
71 } while (!head.compare_exchange_weak(old, first, std::memory_order_release, std::memory_order_acquire));
72 }
73
74 // the free list is implemented as a FILO single linked list
75 // the LSB of a node's ref_count acts as claim bit, so for all nodes on the free list the bit has to be set
76 concurrent_ptr <T, N> head;
77
78 class thread_local_free_list {
79 public:
80 ~thread_local_free_list() noexcept {
81 if (head == nullptr)
82 return;
83 auto first = head;
84 auto last = head;
85 auto next = last->next_free().load(std::memory_order_relaxed);
86 while (next) {
87 last = next.get();
88 next = next->next_free().load(std::memory_order_relaxed);
89 }
90 global_free_list.add_nodes(first, last);
91 }
92
93 bool push(T *node) {
94 if (number_of_elements >= max_local_elements)
95 return false;
96 node->next_free().store(head, std::memory_order_relaxed);
97 head = node;
98 ++number_of_elements;
99 return true;
100 }
101
102 T *pop() {
103 auto result = head;
104 if (result) {
105 assert(number_of_elements > 0);
106 head = result->next_free().load(std::memory_order_relaxed).get();
107 --number_of_elements;
108 // clear claim bit and increment ref_count
109 result->ref_count().fetch_add(RefCountInc - RefCountClaimBit, std::memory_order_relaxed);
110 result->next_free().store(nullptr, std::memory_order_relaxed);
111 }
112 return result;
113 }
114
115 private:
116 size_t number_of_elements = 0;
117 T *head = nullptr;
118 };
119
120 static constexpr size_t max_local_elements = Traits::thread_local_free_list_size;
121
122 static thread_local_free_list &local_free_list() {
123 // workaround for gcc issue causing redefinition of __tls_guard when
124 // defining this as static thread_local member of free_list.
125 alignas(64) static thread_local thread_local_free_list local_free_list;
126 return local_free_list;
127 }
128 };
129
130 template<class Traits>
131 template<class T, std::size_t N, class Deleter>
132 void* lock_free_ref_count<Traits>::
133 enable_concurrent_ptr<T, N, Deleter>::operator new(size_t sz) {
134 assert(sz == sizeof(T) && "Cannot handle allocations of anything other than T instances");
135 T *result = global_free_list.pop();
136 if (result == nullptr) {
137 auto h = static_cast<header *>(::operator new(sz + sizeof(header)));
138 h->ref_count.store(RefCountInc, std::memory_order_release);
139 result = static_cast<T *>(static_cast<void *>(h + 1));
140 }
141
142 return result;
143 }
144
145 template<class Traits>
146 template<class T, std::size_t N, class Deleter>
147 void lock_free_ref_count<Traits>::
148 enable_concurrent_ptr<T, N, Deleter>::operator delete(void *p) {
149 auto node = static_cast<T *>(p);
150 assert((node->ref_count().load(std::memory_order_relaxed) & RefCountClaimBit) == 0);
151
152 if (node->decrement_refcnt())
153 node->push_to_free_list();
154 }
155
156 template<class Traits>
157 template<class T, std::size_t N, class Deleter>
158 bool lock_free_ref_count<Traits>::
159 enable_concurrent_ptr<T, N, Deleter>::decrement_refcnt() {
160 unsigned old_refcnt, new_refcnt;
161 do {
162 old_refcnt = ref_count().load(std::memory_order_relaxed);
163 new_refcnt = old_refcnt - RefCountInc;
164 if (new_refcnt == 0)
165 new_refcnt = RefCountClaimBit;
166 // (4) - this release/acquire CAS synchronizes with itself
167 } while (!ref_count().compare_exchange_weak(old_refcnt, new_refcnt,
168 new_refcnt == RefCountClaimBit
169 ? std::memory_order_acquire
170 : std::memory_order_release,
171 std::memory_order_relaxed));
172
173 // free node iff ref_count is zero AND we're the first thread to "claim" this node for reclamation.
174 return (old_refcnt - new_refcnt) & RefCountClaimBit;
175 }
176
177 template<class Traits>
178 template<class T, class MarkedPtr>
179 lock_free_ref_count<Traits>::
180 guard_ptr<T, MarkedPtr>::guard_ptr(const MarkedPtr &p) noexcept :
181 base(p) {
182 if (this->ptr.get() != nullptr)
183 this->ptr->ref_count().fetch_add(RefCountInc, std::memory_order_relaxed);
184 }
185
186 template<class Traits>
187 template<class T, class MarkedPtr>
188 lock_free_ref_count<Traits>::
189 guard_ptr<T, MarkedPtr>::guard_ptr(const guard_ptr &p) noexcept :
190 guard_ptr(p.ptr) {}
191
192 template<class Traits>
193 template<class T, class MarkedPtr>
194 lock_free_ref_count<Traits>::
195 guard_ptr<T, MarkedPtr>::guard_ptr(guard_ptr &&p) noexcept :
196 base(p.ptr) {
197 p.ptr.reset();
198 }
199
200 template<class Traits>
201 template<class T, class MarkedPtr>
202 auto lock_free_ref_count<Traits>::
203 guard_ptr<T, MarkedPtr>::operator=(const guard_ptr &p)
204 -> guard_ptr & {
205 if (&p == this)
206 return *this;
207
208 reset();
209 this->ptr = p.ptr;
210 if (this->ptr.get() != nullptr)
211 this->ptr->ref_count().fetch_add(RefCountInc, std::memory_order_relaxed);
212 return *this;
213 }
214
215 template<class Traits>
216 template<class T, class MarkedPtr>
217 auto lock_free_ref_count<Traits>::
218 guard_ptr<T, MarkedPtr>::operator=(guard_ptr &&p) noexcept
219 -> guard_ptr & {
220 if (&p == this)
221 return *this;
222
223 reset();
224 this->ptr = std::move(p.ptr);
225 p.ptr.reset();
226 return *this;
227 }
228
229 template<class Traits>
230 template<class T, class MarkedPtr>
231 void lock_free_ref_count<Traits>::
232 guard_ptr<T, MarkedPtr>::acquire(const concurrent_ptr <T> &p, std::memory_order order) noexcept {
233 for (;;) {
234 reset();
235 // FIXME: If this load is relaxed, TSan reports a data race between the following
236 // fetch-add and the initialization of the ref_count field. I tend to disagree, as
237 // the fetch_add should be ordered after the initial store (operator new) in the
238 // modification order of ref_count. Therefore the acquire-fetch-add should
239 // synchronize-with the release store.
240 // I created a GitHub issue:
241 // But for now, let's make this an acquire-load to make TSan happy.
242 auto q = p.load(std::memory_order_acquire);
243 this->ptr = q;
244 if (q.get() == nullptr)
245 return;
246
247 // (5) - this acquire-fetch_add synchronizes-with the release-fetch_sub (7)
248 // this ensures that a change to p becomes visible
249 q->ref_count().fetch_add(RefCountInc, std::memory_order_acquire);
250
251 if (q == p.load(order))
252 return;
253 }
254 }
255
256 template<class Traits>
257 template<class T, class MarkedPtr>
258 bool lock_free_ref_count<Traits>::
259 guard_ptr<T, MarkedPtr>::acquire_if_equal(
260 const concurrent_ptr <T> &p, const MarkedPtr &expected, std::memory_order order) noexcept {
261 reset();
262 // FIXME: same issue with TSan as in acquire (see above).
263 auto q = p.load(std::memory_order_acquire);
264 if (q != expected)
265 return false;
266
267 this->ptr = q;
268 if (q.get() == nullptr)
269 return true;
270
271 // (6) - this acquire-fetch_add synchronizes-with the release-fetch_sub (7)
272 // this ensures that a change to p becomes visible
273 q->ref_count().fetch_add(RefCountInc, std::memory_order_acquire);
274
275 if (q == p.load(order))
276 return true;
277
278 reset();
279 return false;
280 }
281
282 template<class Traits>
283 template<class T, class MarkedPtr>
284 void lock_free_ref_count<Traits>::
285 guard_ptr<T, MarkedPtr>::reset() noexcept {
286 auto p = this->ptr.get();
287 this->ptr.reset();
288 if (p == nullptr)
289 return;
290
291 if (p->decrement_refcnt()) {
292 if (!p->is_destroyed())
293 p->~T();
294
295 p->push_to_free_list();
296 }
297 }
298
299 template<class Traits>
300 template<class T, class MarkedPtr>
301 void lock_free_ref_count<Traits>::
302 guard_ptr<T, MarkedPtr>::reclaim(Deleter) noexcept {
303 if (this->ptr.get() != nullptr) {
304 assert(this->ptr->refs() > 1);
305 // ref_count was initialized with "1", so we need an additional
306 // decrement to ensure that the node gets reclaimed.
307 // ref_count cannot drop to zero here -> no check required.
308 // (7) - this release-fetch-sub synchronizes-with the acquire-fetch-add (5, 6)
309 this->ptr->ref_count().fetch_sub(RefCountInc, std::memory_order_release);
310 }
311 reset();
312 }
313
314 template<class Traits>
315 template<class T, std::size_t N, class Deleter>
316 typename lock_free_ref_count<Traits>::
317 template enable_concurrent_ptr<T, N, Deleter>::free_list
318 lock_free_ref_count<Traits>::
319 enable_concurrent_ptr<T, N, Deleter>::global_free_list;
320
321#ifdef TRACK_ALLOCATIONS
322 template <class Traits>
323 inline detail::allocation_counter& lock_free_ref_count<Traits>::allocation_counter()
324 { return allocation_counter_; };
325
326 template <class Traits>
327 inline void lock_free_ref_count<Traits>::count_allocation()
328 { allocation_counter().count_allocation(); }
329
330 template <class Traits>
331 inline void lock_free_ref_count<Traits>::count_reclamation()
332 { allocation_counter().count_reclamation(); }
333#endif
334}}
335
336#ifdef _MSC_VER
337#pragma warning(pop)
338#endif