reth_libmdbx/flags.rs
1use std::str::FromStr;
2
3use bitflags::bitflags;
4use ffi::*;
5
6/// MDBX sync mode
7#[derive(PartialEq, Eq, Clone, Copy, Debug, Default)]
8pub enum SyncMode {
9 /// Default robust and durable sync mode.
10 /// Metadata is written and flushed to disk after a data is written and flushed, which
11 /// guarantees the integrity of the database in the event of a crash at any time.
12 #[default]
13 Durable,
14
15 /// Don't sync the meta-page after commit.
16 ///
17 /// Flush system buffers to disk only once per transaction commit, omit the metadata flush.
18 /// Defer that until the system flushes files to disk, or next non-read-only commit or
19 /// [`Environment::sync()`](crate::Environment::sync). Depending on the platform and
20 /// hardware, with [`SyncMode::NoMetaSync`] you may get a doubling of write performance.
21 ///
22 /// This trade-off maintains database integrity, but a system crash may undo the last committed
23 /// transaction. I.e. it preserves the ACPI (atomicity, consistency, isolation) but not D
24 /// (durability) database property.
25 NoMetaSync,
26
27 /// Don't sync anything but keep previous steady commits.
28 ///
29 /// [`SyncMode::UtterlyNoSync`] the [`SyncMode::SafeNoSync`] flag disable similarly flush
30 /// system buffers to disk when committing a transaction. But there is a huge difference in
31 /// how are recycled the MVCC snapshots corresponding to previous "steady" transactions
32 /// (see below).
33 ///
34 /// With [`crate::EnvironmentKind::WriteMap`] the [`SyncMode::SafeNoSync`] instructs MDBX to
35 /// use asynchronous mmap-flushes to disk. Asynchronous mmap-flushes means that actually
36 /// all writes will scheduled and performed by operation system on it own manner, i.e.
37 /// unordered. MDBX itself just notify operating system that it would be nice to write data
38 /// to disk, but no more.
39 ///
40 /// Depending on the platform and hardware, with [`SyncMode::SafeNoSync`] you may get a
41 /// multiple increase of write performance, even 10 times or more.
42 ///
43 /// In contrast to [`SyncMode::UtterlyNoSync`] mode, with [`SyncMode::SafeNoSync`] flag MDBX
44 /// will keeps untouched pages within B-tree of the last transaction "steady" which was
45 /// synced to disk completely. This has big implications for both data durability and
46 /// (unfortunately) performance:
47 /// - A system crash can't corrupt the database, but you will lose the last transactions;
48 /// because MDBX will rollback to last steady commit since it kept explicitly.
49 /// - The last steady transaction makes an effect similar to "long-lived" read transaction
50 /// since prevents reuse of pages freed by newer write transactions, thus the any data
51 /// changes will be placed in newly allocated pages.
52 /// - To avoid rapid database growth, the system will sync data and issue a steady commit-point
53 /// to resume reuse pages, each time there is insufficient space and before increasing the
54 /// size of the file on disk.
55 ///
56 /// In other words, with
57 /// [`SyncMode::SafeNoSync`] flag MDBX protects you from the whole database corruption, at the
58 /// cost increasing database size and/or number of disk IOPs. So, [`SyncMode::SafeNoSync`]
59 /// flag could be used with [`Environment::sync()`](crate::Environment::sync) as alternatively
60 /// for batch committing or nested transaction (in some cases).
61 ///
62 /// The number and volume of disk IOPs with [`SyncMode::SafeNoSync`] flag will exactly the
63 /// as without any no-sync flags. However, you should expect a larger process's work set
64 /// and significantly worse a locality of reference, due to the more intensive allocation
65 /// of previously unused pages and increase the size of the database.
66 SafeNoSync,
67
68 /// Don't sync anything and wipe previous steady commits.
69 ///
70 /// Don't flush system buffers to disk when committing a transaction.
71 /// This optimization means a system crash can corrupt the database, if buffers are not yet
72 /// flushed to disk. Depending on the platform and hardware, with [`SyncMode::UtterlyNoSync`]
73 /// you may get a multiple increase of write performance, even 100 times or more.
74 ///
75 /// If the filesystem preserves write order (which is rare and never provided unless explicitly
76 /// noted) and the [`WriteMap`](crate::EnvironmentKind::WriteMap) and
77 /// [`EnvironmentFlags::liforeclaim`] flags are not used, then a system crash can't corrupt
78 /// the database, but you can lose the last transactions, if at least one buffer is not yet
79 /// flushed to disk. The risk is governed by how often the system flushes dirty buffers to
80 /// disk and how often [`Environment::sync()`](crate::Environment::sync) is called. So,
81 /// transactions exhibit ACPI (atomicity, consistency, isolation) properties and only lose D
82 /// (durability). I.e. database integrity is maintained, but a system crash may undo the
83 /// final transactions.
84 ///
85 /// Otherwise, if the filesystem not preserves write order (which is typically) or
86 /// [`WriteMap`](crate::EnvironmentKind::WriteMap) or [`EnvironmentFlags::liforeclaim`] flags
87 /// are used, you should expect the corrupted database after a system crash.
88 ///
89 /// So, most important thing about [`SyncMode::UtterlyNoSync`]:
90 /// - A system crash immediately after commit the write transaction high likely lead to
91 /// database corruption.
92 /// - Successful completion of [`Environment::sync(force=true`)](crate::Environment::sync)
93 /// after one or more committed transactions guarantees consistency and durability.
94 /// - BUT by committing two or more transactions you back database into a weak state, in which
95 /// a system crash may lead to database corruption! In case single transaction after
96 /// [`Environment::sync()`](crate::Environment::sync), you may lose transaction itself, but
97 /// not a whole database.
98 ///
99 /// Nevertheless, [`SyncMode::UtterlyNoSync`] provides "weak" durability in
100 /// case of an application crash (but no durability on system failure), and therefore may
101 /// be very useful in scenarios where data durability is not required over a system failure
102 /// (e.g for short-lived data), or if you can take such risk.
103 UtterlyNoSync,
104}
105
106#[derive(Clone, Copy, Debug)]
107pub enum Mode {
108 ReadOnly,
109 ReadWrite { sync_mode: SyncMode },
110}
111
112impl Default for Mode {
113 fn default() -> Self {
114 Self::ReadWrite { sync_mode: SyncMode::default() }
115 }
116}
117
118impl From<Mode> for EnvironmentFlags {
119 fn from(mode: Mode) -> Self {
120 Self { mode, ..Default::default() }
121 }
122}
123
124impl FromStr for SyncMode {
125 type Err = String;
126
127 fn from_str(s: &str) -> Result<Self, Self::Err> {
128 let val = s.trim().to_ascii_lowercase();
129 match val.as_str() {
130 "durable" => Ok(Self::Durable),
131 "safe-no-sync" | "safenosync" | "safe_no_sync" => Ok(Self::SafeNoSync),
132 _ => Err(format!(
133 "invalid value '{s}' for sync mode. valid values: durable, safe-no-sync"
134 )),
135 }
136 }
137}
138
139#[derive(Clone, Copy, Debug, Default)]
140pub struct EnvironmentFlags {
141 pub no_sub_dir: bool,
142 pub exclusive: bool,
143 /// Flag is intended to open an existing sub-database which was created with unknown flags
144 /// In such cases, instead of returning the `MDBX_INCOMPATIBLE` error, the sub-database will be
145 /// opened with flags which it was created, and then an application could determine the actual
146 /// flags.
147 pub accede: bool,
148 pub mode: Mode,
149 pub no_rdahead: bool,
150 pub no_meminit: bool,
151 pub coalesce: bool,
152 pub liforeclaim: bool,
153}
154
155impl EnvironmentFlags {
156 /// Configures the mdbx flags to use when opening the environment.
157 pub(crate) const fn make_flags(&self) -> ffi::MDBX_env_flags_t {
158 let mut flags = 0;
159
160 if self.no_sub_dir {
161 flags |= ffi::MDBX_NOSUBDIR;
162 }
163
164 if self.exclusive {
165 flags |= ffi::MDBX_EXCLUSIVE;
166 }
167
168 if self.accede {
169 flags |= ffi::MDBX_ACCEDE;
170 }
171
172 match self.mode {
173 Mode::ReadOnly => {
174 flags |= ffi::MDBX_RDONLY;
175 }
176 Mode::ReadWrite { sync_mode } => {
177 flags |= match sync_mode {
178 SyncMode::Durable => ffi::MDBX_SYNC_DURABLE,
179 SyncMode::NoMetaSync => ffi::MDBX_NOMETASYNC,
180 SyncMode::SafeNoSync => ffi::MDBX_SAFE_NOSYNC,
181 SyncMode::UtterlyNoSync => ffi::MDBX_UTTERLY_NOSYNC,
182 };
183 }
184 }
185
186 if self.no_rdahead {
187 flags |= ffi::MDBX_NORDAHEAD;
188 }
189
190 if self.no_meminit {
191 flags |= ffi::MDBX_NOMEMINIT;
192 }
193
194 if self.coalesce {
195 flags |= ffi::MDBX_COALESCE;
196 }
197
198 if self.liforeclaim {
199 flags |= ffi::MDBX_LIFORECLAIM;
200 }
201
202 flags |= ffi::MDBX_NOTLS;
203
204 flags
205 }
206}
207
208bitflags! {
209 #[doc="Database options."]
210 #[derive(Default)]
211 pub struct DatabaseFlags: MDBX_env_flags_t {
212 const REVERSE_KEY = MDBX_REVERSEKEY;
213 const DUP_SORT = MDBX_DUPSORT;
214 const INTEGER_KEY = MDBX_INTEGERKEY;
215 const DUP_FIXED = MDBX_DUPFIXED;
216 const INTEGER_DUP = MDBX_INTEGERDUP;
217 const REVERSE_DUP = MDBX_REVERSEDUP;
218 const CREATE = MDBX_CREATE;
219 const ACCEDE = MDBX_DB_ACCEDE;
220 }
221}
222
223bitflags! {
224 #[doc="Write options."]
225 #[derive(Default)]
226 pub struct WriteFlags: MDBX_env_flags_t {
227 const UPSERT = MDBX_UPSERT;
228 const NO_OVERWRITE = MDBX_NOOVERWRITE;
229 const NO_DUP_DATA = MDBX_NODUPDATA;
230 const CURRENT = MDBX_CURRENT;
231 const ALLDUPS = MDBX_ALLDUPS;
232 const RESERVE = MDBX_RESERVE;
233 const APPEND = MDBX_APPEND;
234 const APPEND_DUP = MDBX_APPENDDUP;
235 const MULTIPLE = MDBX_MULTIPLE;
236 }
237}