reth_libmdbx/
flags.rs

1use bitflags::bitflags;
2use ffi::*;
3
4/// MDBX sync mode
5#[derive(Clone, Copy, Debug)]
6pub enum SyncMode {
7    /// Default robust and durable sync mode.
8    /// Metadata is written and flushed to disk after a data is written and flushed, which
9    /// guarantees the integrity of the database in the event of a crash at any time.
10    Durable,
11
12    /// Don't sync the meta-page after commit.
13    ///
14    /// Flush system buffers to disk only once per transaction commit, omit the metadata flush.
15    /// Defer that until the system flushes files to disk, or next non-read-only commit or
16    /// [`Environment::sync()`](crate::Environment::sync). Depending on the platform and
17    /// hardware, with [`SyncMode::NoMetaSync`] you may get a doubling of write performance.
18    ///
19    /// This trade-off maintains database integrity, but a system crash may undo the last committed
20    /// transaction. I.e. it preserves the ACPI (atomicity, consistency, isolation) but not D
21    /// (durability) database property.
22    NoMetaSync,
23
24    /// Don't sync anything but keep previous steady commits.
25    ///
26    /// [`SyncMode::UtterlyNoSync`] the [`SyncMode::SafeNoSync`] flag disable similarly flush
27    /// system buffers to disk when committing a transaction. But there is a huge difference in
28    /// how are recycled the MVCC snapshots corresponding to previous "steady" transactions
29    /// (see below).
30    ///
31    /// With [`crate::EnvironmentKind::WriteMap`] the [`SyncMode::SafeNoSync`] instructs MDBX to
32    /// use asynchronous mmap-flushes to disk. Asynchronous mmap-flushes means that actually
33    /// all writes will scheduled and performed by operation system on it own manner, i.e.
34    /// unordered. MDBX itself just notify operating system that it would be nice to write data
35    /// to disk, but no more.
36    ///
37    /// Depending on the platform and hardware, with [`SyncMode::SafeNoSync`] you may get a
38    /// multiple increase of write performance, even 10 times or more.
39    ///
40    /// In contrast to [`SyncMode::UtterlyNoSync`] mode, with [`SyncMode::SafeNoSync`] flag MDBX
41    /// will keeps untouched pages within B-tree of the last transaction "steady" which was
42    /// synced to disk completely. This has big implications for both data durability and
43    /// (unfortunately) performance:
44    /// - A system crash can't corrupt the database, but you will lose the last transactions;
45    ///   because MDBX will rollback to last steady commit since it kept explicitly.
46    /// - The last steady transaction makes an effect similar to "long-lived" read transaction
47    ///   since prevents reuse of pages freed by newer write transactions, thus the any data
48    ///   changes will be placed in newly allocated pages.
49    /// - To avoid rapid database growth, the system will sync data and issue a steady commit-point
50    ///   to resume reuse pages, each time there is insufficient space and before increasing the
51    ///   size of the file on disk.
52    ///
53    /// In other words, with
54    /// [`SyncMode::SafeNoSync`] flag MDBX protects you from the whole database corruption, at the
55    /// cost increasing database size and/or number of disk IOPs. So, [`SyncMode::SafeNoSync`]
56    /// flag could be used with [`Environment::sync()`](crate::Environment::sync) as alternatively
57    /// for batch committing or nested transaction (in some cases).
58    ///
59    /// The number and volume of disk IOPs with [`SyncMode::SafeNoSync`] flag will exactly the
60    /// as without any no-sync flags. However, you should expect a larger process's work set
61    /// and significantly worse a locality of reference, due to the more intensive allocation
62    /// of previously unused pages and increase the size of the database.
63    SafeNoSync,
64
65    /// Don't sync anything and wipe previous steady commits.
66    ///
67    /// Don't flush system buffers to disk when committing a transaction.
68    /// This optimization means a system crash can corrupt the database, if buffers are not yet
69    /// flushed to disk. Depending on the platform and hardware, with [`SyncMode::UtterlyNoSync`]
70    /// you may get a multiple increase of write performance, even 100 times or more.
71    ///
72    /// If the filesystem preserves write order (which is rare and never provided unless explicitly
73    /// noted) and the [`WriteMap`](crate::EnvironmentKind::WriteMap) and
74    /// [`EnvironmentFlags::liforeclaim`] flags are not used, then a system crash can't corrupt
75    /// the database, but you can lose the last transactions, if at least one buffer is not yet
76    /// flushed to disk. The risk is governed by how often the system flushes dirty buffers to
77    /// disk and how often [`Environment::sync()`](crate::Environment::sync) is called. So,
78    /// transactions exhibit ACPI (atomicity, consistency, isolation) properties and only lose D
79    /// (durability). I.e. database integrity is maintained, but a system crash may undo the
80    /// final transactions.
81    ///
82    /// Otherwise, if the filesystem not preserves write order (which is typically) or
83    /// [`WriteMap`](crate::EnvironmentKind::WriteMap) or [`EnvironmentFlags::liforeclaim`] flags
84    /// are used, you should expect the corrupted database after a system crash.
85    ///
86    /// So, most important thing about [`SyncMode::UtterlyNoSync`]:
87    /// - A system crash immediately after commit the write transaction high likely lead to
88    ///   database corruption.
89    /// - Successful completion of [`Environment::sync(force=true`)](crate::Environment::sync)
90    ///   after one or more committed transactions guarantees consistency and durability.
91    /// - BUT by committing two or more transactions you back database into a weak state, in which
92    ///   a system crash may lead to database corruption! In case single transaction after
93    ///   [`Environment::sync()`](crate::Environment::sync), you may lose transaction itself, but
94    ///   not a whole database.
95    ///
96    /// Nevertheless, [`SyncMode::UtterlyNoSync`] provides "weak" durability in
97    /// case of an application crash (but no durability on system failure), and therefore may
98    /// be very useful in scenarios where data durability is not required over a system failure
99    /// (e.g for short-lived data), or if you can take such risk.
100    UtterlyNoSync,
101}
102
103impl Default for SyncMode {
104    fn default() -> Self {
105        Self::Durable
106    }
107}
108
109#[derive(Clone, Copy, Debug)]
110pub enum Mode {
111    ReadOnly,
112    ReadWrite { sync_mode: SyncMode },
113}
114
115impl Default for Mode {
116    fn default() -> Self {
117        Self::ReadWrite { sync_mode: SyncMode::default() }
118    }
119}
120
121impl From<Mode> for EnvironmentFlags {
122    fn from(mode: Mode) -> Self {
123        Self { mode, ..Default::default() }
124    }
125}
126
127#[derive(Clone, Copy, Debug, Default)]
128pub struct EnvironmentFlags {
129    pub no_sub_dir: bool,
130    pub exclusive: bool,
131    /// Flag is intended to open an existing sub-database which was created with unknown flags
132    /// In such cases, instead of returning the `MDBX_INCOMPATIBLE` error, the sub-database will be
133    /// opened with flags which it was created, and then an application could determine the actual
134    /// flags.
135    pub accede: bool,
136    pub mode: Mode,
137    pub no_rdahead: bool,
138    pub no_meminit: bool,
139    pub coalesce: bool,
140    pub liforeclaim: bool,
141}
142
143impl EnvironmentFlags {
144    /// Configures the mdbx flags to use when opening the environment.
145    pub(crate) const fn make_flags(&self) -> ffi::MDBX_env_flags_t {
146        let mut flags = 0;
147
148        if self.no_sub_dir {
149            flags |= ffi::MDBX_NOSUBDIR;
150        }
151
152        if self.exclusive {
153            flags |= ffi::MDBX_EXCLUSIVE;
154        }
155
156        if self.accede {
157            flags |= ffi::MDBX_ACCEDE;
158        }
159
160        match self.mode {
161            Mode::ReadOnly => {
162                flags |= ffi::MDBX_RDONLY;
163            }
164            Mode::ReadWrite { sync_mode } => {
165                flags |= match sync_mode {
166                    SyncMode::Durable => ffi::MDBX_SYNC_DURABLE,
167                    SyncMode::NoMetaSync => ffi::MDBX_NOMETASYNC,
168                    SyncMode::SafeNoSync => ffi::MDBX_SAFE_NOSYNC,
169                    SyncMode::UtterlyNoSync => ffi::MDBX_UTTERLY_NOSYNC,
170                };
171            }
172        }
173
174        if self.no_rdahead {
175            flags |= ffi::MDBX_NORDAHEAD;
176        }
177
178        if self.no_meminit {
179            flags |= ffi::MDBX_NOMEMINIT;
180        }
181
182        if self.coalesce {
183            flags |= ffi::MDBX_COALESCE;
184        }
185
186        if self.liforeclaim {
187            flags |= ffi::MDBX_LIFORECLAIM;
188        }
189
190        flags |= ffi::MDBX_NOTLS;
191
192        flags
193    }
194}
195
196bitflags! {
197    #[doc="Database options."]
198    #[derive(Default)]
199    pub struct DatabaseFlags: MDBX_env_flags_t {
200        const REVERSE_KEY = MDBX_REVERSEKEY;
201        const DUP_SORT = MDBX_DUPSORT;
202        const INTEGER_KEY = MDBX_INTEGERKEY;
203        const DUP_FIXED = MDBX_DUPFIXED;
204        const INTEGER_DUP = MDBX_INTEGERDUP;
205        const REVERSE_DUP = MDBX_REVERSEDUP;
206        const CREATE = MDBX_CREATE;
207        const ACCEDE = MDBX_DB_ACCEDE;
208    }
209}
210
211bitflags! {
212    #[doc="Write options."]
213    #[derive(Default)]
214    pub struct WriteFlags: MDBX_env_flags_t {
215        const UPSERT = MDBX_UPSERT;
216        const NO_OVERWRITE = MDBX_NOOVERWRITE;
217        const NO_DUP_DATA = MDBX_NODUPDATA;
218        const CURRENT = MDBX_CURRENT;
219        const ALLDUPS = MDBX_ALLDUPS;
220        const RESERVE = MDBX_RESERVE;
221        const APPEND = MDBX_APPEND;
222        const APPEND_DUP = MDBX_APPENDDUP;
223        const MULTIPLE = MDBX_MULTIPLE;
224    }
225}