portable_atomic/imp/fallback/
mod.rs

1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*
4Fallback implementation using global locks.
5
6This implementation uses seqlock for global locks.
7
8This is basically based on global locks in crossbeam-utils's `AtomicCell`,
9but seqlock is implemented in a way that does not depend on UB
10(see comments in optimistic_read method in atomic! macro for details).
11
12Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic
13type and the value type must be the same.
14*/
15
16#![cfg_attr(
17    any(
18        all(
19            target_arch = "x86_64",
20            not(portable_atomic_no_outline_atomics),
21            not(any(target_env = "sgx", miri)),
22        ),
23        all(
24            target_arch = "powerpc64",
25            feature = "fallback",
26            not(portable_atomic_no_outline_atomics),
27            any(
28                all(
29                    target_os = "linux",
30                    any(
31                        all(
32                            target_env = "gnu",
33                            any(target_endian = "little", not(target_feature = "crt-static")),
34                        ),
35                        all(
36                            any(target_env = "musl", target_env = "ohos", target_env = "uclibc"),
37                            not(target_feature = "crt-static"),
38                        ),
39                        portable_atomic_outline_atomics,
40                    ),
41                ),
42                target_os = "android",
43                target_os = "freebsd",
44                target_os = "openbsd",
45                all(
46                    target_os = "aix",
47                    not(portable_atomic_pre_llvm_20),
48                    portable_atomic_outline_atomics, // TODO(aix): currently disabled by default
49                ),
50            ),
51            not(any(miri, portable_atomic_sanitize_thread)),
52        ),
53        all(
54            target_arch = "riscv32",
55            not(any(miri, portable_atomic_sanitize_thread)),
56            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
57            any(
58                target_feature = "zacas",
59                portable_atomic_target_feature = "zacas",
60                all(
61                    feature = "fallback",
62                    not(portable_atomic_no_outline_atomics),
63                    any(target_os = "linux", target_os = "android"),
64                ),
65            ),
66        ),
67        all(
68            target_arch = "riscv64",
69            not(any(miri, portable_atomic_sanitize_thread)),
70            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
71            any(
72                target_feature = "zacas",
73                portable_atomic_target_feature = "zacas",
74                all(
75                    feature = "fallback",
76                    not(portable_atomic_no_outline_atomics),
77                    any(target_os = "linux", target_os = "android"),
78                ),
79            ),
80        ),
81        all(
82            target_arch = "arm",
83            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
84            any(target_os = "linux", target_os = "android"),
85            not(portable_atomic_no_outline_atomics),
86        ),
87    ),
88    allow(dead_code)
89)]
90
91#[macro_use]
92pub(crate) mod utils;
93
94// Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap
95// around.
96//
97// In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be
98// vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the
99// counter will not be increased that fast.
100//
101// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
102// AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast,
103// so use it to implement normal sequence lock.
104cfg_has_fast_atomic_64! {
105    mod seq_lock;
106}
107cfg_no_fast_atomic_64! {
108    #[path = "seq_lock_wide.rs"]
109    mod seq_lock;
110}
111
112use core::{cell::UnsafeCell, mem, sync::atomic::Ordering};
113
114use self::{
115    seq_lock::{SeqLock, SeqLockWriteGuard},
116    utils::CachePadded,
117};
118#[cfg(portable_atomic_no_strict_provenance)]
119use crate::utils::ptr::PtrExt as _;
120
121// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
122// AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast,
123// so use it to reduce chunks of byte-wise atomic memcpy.
124use self::seq_lock::{AtomicChunk, Chunk};
125
126// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016.
127#[inline]
128#[must_use]
129fn lock(addr: usize) -> &'static SeqLock {
130    // The number of locks is a prime number because we want to make sure `addr % LEN` gets
131    // dispersed across all locks.
132    //
133    // crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded,
134    // so the actual concurrency level will be smaller.
135    const LEN: usize = 67;
136    const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new());
137    static LOCKS: [CachePadded<SeqLock>; LEN] = [
138        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
139        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
140        L, L, L, L, L, L, L,
141    ];
142
143    // If the modulus is a constant number, the compiler will use crazy math to transform this into
144    // a sequence of cheap arithmetic operations rather than using the slow modulo instruction.
145    &LOCKS[addr % LEN]
146}
147
148macro_rules! atomic {
149    ($atomic_type:ident, $int_type:ident, $align:literal) => {
150        #[repr(C, align($align))]
151        pub(crate) struct $atomic_type {
152            v: UnsafeCell<$int_type>,
153        }
154
155        impl $atomic_type {
156            const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>();
157
158            #[inline]
159            unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] {
160                static_assert!($atomic_type::LEN > 1);
161                static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0);
162
163                // SAFETY: the caller must uphold the safety contract for `chunks`.
164                unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) }
165            }
166
167            #[inline]
168            fn optimistic_read(&self) -> $int_type {
169                // Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M
170                let mut dst: [Chunk; Self::LEN] = [0; Self::LEN];
171                // SAFETY:
172                // - There are no threads that perform non-atomic concurrent write operations.
173                // - There is no writer that updates the value using atomic operations of different granularity.
174                //
175                // If the atomic operation is not used here, it will cause a data race
176                // when `write` performs concurrent write operation.
177                // Such a data race is sometimes considered virtually unproblematic
178                // in SeqLock implementations:
179                //
180                // - https://github.com/Amanieu/seqlock/issues/2
181                // - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116
182                // - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F
183                //
184                // However, in our use case, the implementation that loads/stores value as
185                // chunks of usize is enough fast and sound, so we use that implementation.
186                //
187                // See also atomic-memcpy crate, a generic implementation of this pattern:
188                // https://github.com/taiki-e/atomic-memcpy
189                let chunks = unsafe { self.chunks() };
190                for i in 0..Self::LEN {
191                    dst[i] = chunks[i].load(Ordering::Relaxed);
192                }
193                // SAFETY: integers are plain old data types so we can always transmute to them.
194                unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) }
195            }
196
197            #[inline]
198            fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type {
199                // This calls optimistic_read that can return teared value, but the resulting value
200                // is guaranteed not to be teared because we hold the lock to write.
201                self.optimistic_read()
202            }
203
204            #[inline]
205            fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) {
206                // SAFETY: integers are plain old data types so we can always transmute them to arrays of integers.
207                let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) };
208                // SAFETY:
209                // - The guard guarantees that we hold the lock to write.
210                // - There are no threads that perform non-atomic concurrent read or write operations.
211                //
212                // See optimistic_read for the reason that atomic operations are used here.
213                let chunks = unsafe { self.chunks() };
214                for i in 0..Self::LEN {
215                    chunks[i].store(val[i], Ordering::Relaxed);
216                }
217            }
218        }
219
220        // Send is implicitly implemented.
221        // SAFETY: any data races are prevented by the lock and atomic operation.
222        unsafe impl Sync for $atomic_type {}
223
224        impl_default_no_fetch_ops!($atomic_type, $int_type);
225        impl_default_bit_opts!($atomic_type, $int_type);
226        impl $atomic_type {
227            #[inline]
228            pub(crate) const fn new(v: $int_type) -> Self {
229                Self { v: UnsafeCell::new(v) }
230            }
231
232            #[inline]
233            pub(crate) fn is_lock_free() -> bool {
234                Self::IS_ALWAYS_LOCK_FREE
235            }
236            pub(crate) const IS_ALWAYS_LOCK_FREE: bool = false;
237
238            #[inline]
239            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
240            pub(crate) fn load(&self, order: Ordering) -> $int_type {
241                crate::utils::assert_load_ordering(order);
242                let lock = lock(self.v.get().addr());
243
244                // Try doing an optimistic read first.
245                if let Some(stamp) = lock.optimistic_read() {
246                    let val = self.optimistic_read();
247
248                    if lock.validate_read(stamp) {
249                        return val;
250                    }
251                }
252
253                // Grab a regular write lock so that writers don't starve this load.
254                let guard = lock.write();
255                let val = self.read(&guard);
256                // The value hasn't been changed. Drop the guard without incrementing the stamp.
257                guard.abort();
258                val
259            }
260
261            #[inline]
262            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
263            pub(crate) fn store(&self, val: $int_type, order: Ordering) {
264                crate::utils::assert_store_ordering(order);
265                let guard = lock(self.v.get().addr()).write();
266                self.write(val, &guard)
267            }
268
269            #[inline]
270            pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
271                let guard = lock(self.v.get().addr()).write();
272                let prev = self.read(&guard);
273                self.write(val, &guard);
274                prev
275            }
276
277            #[inline]
278            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
279            pub(crate) fn compare_exchange(
280                &self,
281                current: $int_type,
282                new: $int_type,
283                success: Ordering,
284                failure: Ordering,
285            ) -> Result<$int_type, $int_type> {
286                crate::utils::assert_compare_exchange_ordering(success, failure);
287                let guard = lock(self.v.get().addr()).write();
288                let prev = self.read(&guard);
289                if prev == current {
290                    self.write(new, &guard);
291                    Ok(prev)
292                } else {
293                    // The value hasn't been changed. Drop the guard without incrementing the stamp.
294                    guard.abort();
295                    Err(prev)
296                }
297            }
298
299            #[inline]
300            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
301            pub(crate) fn compare_exchange_weak(
302                &self,
303                current: $int_type,
304                new: $int_type,
305                success: Ordering,
306                failure: Ordering,
307            ) -> Result<$int_type, $int_type> {
308                self.compare_exchange(current, new, success, failure)
309            }
310
311            #[inline]
312            pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
313                let guard = lock(self.v.get().addr()).write();
314                let prev = self.read(&guard);
315                self.write(prev.wrapping_add(val), &guard);
316                prev
317            }
318
319            #[inline]
320            pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
321                let guard = lock(self.v.get().addr()).write();
322                let prev = self.read(&guard);
323                self.write(prev.wrapping_sub(val), &guard);
324                prev
325            }
326
327            #[inline]
328            pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type {
329                let guard = lock(self.v.get().addr()).write();
330                let prev = self.read(&guard);
331                self.write(prev & val, &guard);
332                prev
333            }
334
335            #[inline]
336            pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
337                let guard = lock(self.v.get().addr()).write();
338                let prev = self.read(&guard);
339                self.write(!(prev & val), &guard);
340                prev
341            }
342
343            #[inline]
344            pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type {
345                let guard = lock(self.v.get().addr()).write();
346                let prev = self.read(&guard);
347                self.write(prev | val, &guard);
348                prev
349            }
350
351            #[inline]
352            pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type {
353                let guard = lock(self.v.get().addr()).write();
354                let prev = self.read(&guard);
355                self.write(prev ^ val, &guard);
356                prev
357            }
358
359            #[inline]
360            pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
361                let guard = lock(self.v.get().addr()).write();
362                let prev = self.read(&guard);
363                self.write(core::cmp::max(prev, val), &guard);
364                prev
365            }
366
367            #[inline]
368            pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
369                let guard = lock(self.v.get().addr()).write();
370                let prev = self.read(&guard);
371                self.write(core::cmp::min(prev, val), &guard);
372                prev
373            }
374
375            #[inline]
376            pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type {
377                let guard = lock(self.v.get().addr()).write();
378                let prev = self.read(&guard);
379                self.write(!prev, &guard);
380                prev
381            }
382            #[inline]
383            pub(crate) fn not(&self, order: Ordering) {
384                self.fetch_not(order);
385            }
386
387            #[inline]
388            pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
389                let guard = lock(self.v.get().addr()).write();
390                let prev = self.read(&guard);
391                self.write(prev.wrapping_neg(), &guard);
392                prev
393            }
394            #[inline]
395            pub(crate) fn neg(&self, order: Ordering) {
396                self.fetch_neg(order);
397            }
398
399            #[inline]
400            pub(crate) const fn as_ptr(&self) -> *mut $int_type {
401                self.v.get()
402            }
403        }
404    };
405}
406
407#[cfg_attr(
408    portable_atomic_no_cfg_target_has_atomic,
409    cfg(any(
410        test,
411        not(any(
412            not(portable_atomic_no_atomic_64),
413            all(
414                target_arch = "riscv32",
415                not(any(miri, portable_atomic_sanitize_thread)),
416                any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
417                any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
418            ),
419        ))
420    ))
421)]
422#[cfg_attr(
423    not(portable_atomic_no_cfg_target_has_atomic),
424    cfg(any(
425        test,
426        not(any(
427            target_has_atomic = "64",
428            all(
429                target_arch = "riscv32",
430                not(any(miri, portable_atomic_sanitize_thread)),
431                any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
432                any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
433            ),
434        ))
435    ))
436)]
437cfg_no_fast_atomic_64! {
438    atomic!(AtomicI64, i64, 8);
439    atomic!(AtomicU64, u64, 8);
440}
441
442atomic!(AtomicI128, i128, 16);
443atomic!(AtomicU128, u128, 16);
444
445#[cfg(test)]
446mod tests {
447    use super::*;
448
449    cfg_no_fast_atomic_64! {
450        test_atomic_int!(i64);
451        test_atomic_int!(u64);
452    }
453    test_atomic_int!(i128);
454    test_atomic_int!(u128);
455
456    // load/store/swap implementation is not affected by signedness, so it is
457    // enough to test only unsigned types.
458    cfg_no_fast_atomic_64! {
459        stress_test!(u64);
460    }
461    stress_test!(u128);
462}