kumo_server_common/
disk_space.rs

1use anyhow::Context;
2use human_bytes::human_bytes;
3use num_format::{Locale, ToFormattedString};
4use prometheus::IntGaugeVec;
5use serde::Deserialize;
6use serde_json::Value;
7use std::collections::HashSet;
8use std::path::PathBuf;
9use std::sync::atomic::{AtomicBool, Ordering};
10use std::sync::{LazyLock, Mutex, Once};
11use std::time::Duration;
12
13static OVER_LIMIT: AtomicBool = AtomicBool::new(false);
14static PATHS: LazyLock<Mutex<Vec<MonitoredPath>>> = LazyLock::new(Default::default);
15static MONITOR: Once = Once::new();
16static FREE_INODES: LazyLock<IntGaugeVec> = LazyLock::new(|| {
17    prometheus::register_int_gauge_vec!(
18        "disk_free_inodes",
19        "number of available inodes in a monitored location",
20        &["name"]
21    )
22    .unwrap()
23});
24static FREE_INODES_PCT: LazyLock<IntGaugeVec> = LazyLock::new(|| {
25    prometheus::register_int_gauge_vec!(
26        "disk_free_inodes_percent",
27        "percentage of available inodes in a monitored location",
28        &["name"]
29    )
30    .unwrap()
31});
32static FREE_SPACE: LazyLock<IntGaugeVec> = LazyLock::new(|| {
33    prometheus::register_int_gauge_vec!(
34        "disk_free_bytes",
35        "number of available bytes in a monitored location",
36        &["name"]
37    )
38    .unwrap()
39});
40static FREE_SPACE_PCT: LazyLock<IntGaugeVec> = LazyLock::new(|| {
41    prometheus::register_int_gauge_vec!(
42        "disk_free_percent",
43        "percentage of available bytes in a monitored location",
44        &["name"]
45    )
46    .unwrap()
47});
48
49#[derive(Hash, PartialEq, Eq, Debug, Clone, Copy, Deserialize)]
50#[serde(try_from = "serde_json::Value")]
51pub enum MinFree {
52    Percent(u8),
53    Value(u64),
54}
55
56impl TryFrom<Value> for MinFree {
57    type Error = String;
58    fn try_from(v: Value) -> Result<Self, String> {
59        match v {
60            Value::String(s) => {
61                match s.strip_suffix("%") {
62                    Some(n) => n
63                        .parse::<u8>()
64                        .map_err(|err| format!("invalid MinFree percentage specifier. {err:#}"))
65                        .map(MinFree::Percent),
66                    None => s
67                        .parse::<u64>()
68                        .map_err(|err| format!("invalid MinFree size specifier. {err:#}"))
69                        .map(MinFree::Value),
70                }
71            }
72            Value::Number(n) => {
73                match n.as_u64() {
74                    Some(n) => Ok(MinFree::Value(n)),
75                    None => Err(format!("invalid MinFree size specifier. {n} could not be converted to u64"))
76                }
77            }
78            v => Err(format!("invalid MinFree specifier {v:?}. Value must be either a percentage string like '10%' or the size expressed as an integer"))
79        }
80    }
81}
82
83impl TryFrom<String> for MinFree {
84    type Error = String;
85    fn try_from(s: String) -> Result<Self, String> {
86        Self::try_from(s.as_str())
87    }
88}
89
90impl TryFrom<&str> for MinFree {
91    type Error = String;
92    fn try_from(s: &str) -> Result<Self, String> {
93        match s.strip_suffix("%") {
94            Some(n) => n
95                .parse::<u8>()
96                .map_err(|err| format!("invalid MinFree percentage specifier. {err:#}"))
97                .map(MinFree::Percent),
98            None => s
99                .parse::<u64>()
100                .map_err(|err| format!("invalid MinFree size specifier. {err:#}"))
101                .map(MinFree::Value),
102        }
103    }
104}
105
106impl Default for MinFree {
107    fn default() -> Self {
108        Self::Percent(10)
109    }
110}
111
112#[derive(Hash, PartialEq, Eq, Debug, Clone)]
113pub struct MonitoredPath {
114    pub name: String,
115    pub path: PathBuf,
116    pub min_free_space: MinFree,
117    pub min_free_inodes: MinFree,
118}
119
120pub struct AvailableSpace {
121    pub space_avail: u64,
122    pub space_avail_percent: u8,
123    pub inodes_avail: u64,
124    pub inodes_avail_percent: u8,
125}
126
127impl MonitoredPath {
128    pub fn register(self) {
129        PATHS.lock().unwrap().push(self);
130
131        MONITOR.call_once(|| {
132            std::thread::Builder::new()
133                .name("disk-space-monitor".to_string())
134                .spawn(monitor_thread)
135                .expect("failed to spawn disk-space-monitor thread");
136        });
137    }
138
139    pub fn get_usage(&self) -> anyhow::Result<AvailableSpace> {
140        let info = nix::sys::statvfs::statvfs(&self.path)
141            .with_context(|| format!("statvfs({}) failed", self.path.display()))?;
142
143        let blocks_avail = info.blocks_available() as u64;
144        let blocks_total = info.blocks() as u64;
145
146        let space_avail_percent =
147            ((blocks_avail as f64 / blocks_total as f64) * 100.0).floor() as u8;
148        let space_avail = blocks_avail * info.block_size();
149        FREE_SPACE
150            .get_metric_with_label_values(&[&self.name])
151            .unwrap()
152            .set(space_avail as i64);
153        FREE_SPACE_PCT
154            .get_metric_with_label_values(&[&self.name])
155            .unwrap()
156            .set(space_avail_percent as i64);
157
158        let inodes_avail = info.files_available() as u64;
159        let inodes_total = info.files();
160        let inodes_avail_percent =
161            ((inodes_avail as f64 / inodes_total as f64) * 100.0).floor() as u8;
162        FREE_INODES
163            .get_metric_with_label_values(&[&self.name])
164            .unwrap()
165            .set(inodes_avail as i64);
166        FREE_INODES_PCT
167            .get_metric_with_label_values(&[&self.name])
168            .unwrap()
169            .set(inodes_avail_percent as i64);
170
171        Ok(AvailableSpace {
172            space_avail,
173            space_avail_percent,
174            inodes_avail,
175            inodes_avail_percent,
176        })
177    }
178
179    pub fn check_usage(&self, avail: &AvailableSpace) -> anyhow::Result<()> {
180        let mut reason = vec![];
181
182        match self.min_free_space {
183            MinFree::Percent(p) if avail.space_avail_percent < p => {
184                reason.push(format!(
185                    "{}% space available but minimum is {p}%",
186                    avail.space_avail_percent
187                ));
188            }
189            MinFree::Value(n) if avail.space_avail < n => {
190                reason.push(format!(
191                    "{} ({}) space available but minimum is {} ({})",
192                    avail.space_avail.to_formatted_string(&Locale::en),
193                    human_bytes(avail.space_avail as f64),
194                    n.to_formatted_string(&Locale::en),
195                    human_bytes(n as f64),
196                ));
197            }
198            _ => {}
199        }
200        match self.min_free_inodes {
201            MinFree::Percent(p) if avail.inodes_avail_percent < p => {
202                reason.push(format!(
203                    "{}% inodes available but minimum is {p}%",
204                    avail.inodes_avail_percent
205                ));
206            }
207            MinFree::Value(n) if avail.inodes_avail < n => {
208                reason.push(format!(
209                    "{} inodes available but minimum is {}",
210                    avail.space_avail.to_formatted_string(&Locale::en),
211                    n.to_formatted_string(&Locale::en),
212                ));
213            }
214            _ => {}
215        }
216
217        if reason.is_empty() {
218            Ok(())
219        } else {
220            anyhow::bail!(
221                "{} path {} has issue(s): {}",
222                self.name,
223                self.path.display(),
224                reason.join(", ")
225            );
226        }
227    }
228}
229
230pub fn is_over_limit() -> bool {
231    OVER_LIMIT.load(Ordering::SeqCst)
232}
233
234fn copy_paths() -> Vec<MonitoredPath> {
235    PATHS.lock().unwrap().clone()
236}
237
238fn monitor_thread() {
239    let mut bad_monitors = HashSet::new();
240    loop {
241        let paths = copy_paths();
242
243        for p in paths {
244            match p.get_usage() {
245                Ok(avail) => match p.check_usage(&avail) {
246                    Ok(()) => {
247                        if bad_monitors.remove(&p) {
248                            tracing::error!("{} path {} has recovered", p.name, p.path.display());
249                        }
250                    }
251                    Err(err) => {
252                        if bad_monitors.insert(p.clone()) {
253                            tracing::error!("{err:#}");
254                        }
255                    }
256                },
257                Err(err) => {
258                    tracing::error!("{err:#}");
259                }
260            }
261        }
262
263        OVER_LIMIT.store(!bad_monitors.is_empty(), Ordering::SeqCst);
264        std::thread::sleep(Duration::from_secs(5));
265    }
266}