kumo_server_common/
disk_space.rs1use anyhow::Context;
2use human_bytes::human_bytes;
3use num_format::{Locale, ToFormattedString};
4use prometheus::IntGaugeVec;
5use serde::Deserialize;
6use serde_json::Value;
7use std::collections::HashSet;
8use std::path::PathBuf;
9use std::sync::atomic::{AtomicBool, Ordering};
10use std::sync::{LazyLock, Mutex, Once};
11use std::time::Duration;
12
13static OVER_LIMIT: AtomicBool = AtomicBool::new(false);
14static PATHS: LazyLock<Mutex<Vec<MonitoredPath>>> = LazyLock::new(Default::default);
15static MONITOR: Once = Once::new();
16static FREE_INODES: LazyLock<IntGaugeVec> = LazyLock::new(|| {
17 prometheus::register_int_gauge_vec!(
18 "disk_free_inodes",
19 "number of available inodes in a monitored location",
20 &["name"]
21 )
22 .unwrap()
23});
24static FREE_INODES_PCT: LazyLock<IntGaugeVec> = LazyLock::new(|| {
25 prometheus::register_int_gauge_vec!(
26 "disk_free_inodes_percent",
27 "percentage of available inodes in a monitored location",
28 &["name"]
29 )
30 .unwrap()
31});
32static FREE_SPACE: LazyLock<IntGaugeVec> = LazyLock::new(|| {
33 prometheus::register_int_gauge_vec!(
34 "disk_free_bytes",
35 "number of available bytes in a monitored location",
36 &["name"]
37 )
38 .unwrap()
39});
40static FREE_SPACE_PCT: LazyLock<IntGaugeVec> = LazyLock::new(|| {
41 prometheus::register_int_gauge_vec!(
42 "disk_free_percent",
43 "percentage of available bytes in a monitored location",
44 &["name"]
45 )
46 .unwrap()
47});
48
49#[derive(Hash, PartialEq, Eq, Debug, Clone, Copy, Deserialize)]
50#[serde(try_from = "serde_json::Value")]
51pub enum MinFree {
52 Percent(u8),
53 Value(u64),
54}
55
56impl TryFrom<Value> for MinFree {
57 type Error = String;
58 fn try_from(v: Value) -> Result<Self, String> {
59 match v {
60 Value::String(s) => {
61 match s.strip_suffix("%") {
62 Some(n) => n
63 .parse::<u8>()
64 .map_err(|err| format!("invalid MinFree percentage specifier. {err:#}"))
65 .map(MinFree::Percent),
66 None => s
67 .parse::<u64>()
68 .map_err(|err| format!("invalid MinFree size specifier. {err:#}"))
69 .map(MinFree::Value),
70 }
71 }
72 Value::Number(n) => {
73 match n.as_u64() {
74 Some(n) => Ok(MinFree::Value(n)),
75 None => Err(format!("invalid MinFree size specifier. {n} could not be converted to u64"))
76 }
77 }
78 v => Err(format!("invalid MinFree specifier {v:?}. Value must be either a percentage string like '10%' or the size expressed as an integer"))
79 }
80 }
81}
82
83impl TryFrom<String> for MinFree {
84 type Error = String;
85 fn try_from(s: String) -> Result<Self, String> {
86 Self::try_from(s.as_str())
87 }
88}
89
90impl TryFrom<&str> for MinFree {
91 type Error = String;
92 fn try_from(s: &str) -> Result<Self, String> {
93 match s.strip_suffix("%") {
94 Some(n) => n
95 .parse::<u8>()
96 .map_err(|err| format!("invalid MinFree percentage specifier. {err:#}"))
97 .map(MinFree::Percent),
98 None => s
99 .parse::<u64>()
100 .map_err(|err| format!("invalid MinFree size specifier. {err:#}"))
101 .map(MinFree::Value),
102 }
103 }
104}
105
106impl Default for MinFree {
107 fn default() -> Self {
108 Self::Percent(10)
109 }
110}
111
112#[derive(Hash, PartialEq, Eq, Debug, Clone)]
113pub struct MonitoredPath {
114 pub name: String,
115 pub path: PathBuf,
116 pub min_free_space: MinFree,
117 pub min_free_inodes: MinFree,
118}
119
120pub struct AvailableSpace {
121 pub space_avail: u64,
122 pub space_avail_percent: u8,
123 pub inodes_avail: u64,
124 pub inodes_avail_percent: u8,
125}
126
127impl MonitoredPath {
128 pub fn register(self) {
129 PATHS.lock().unwrap().push(self);
130
131 MONITOR.call_once(|| {
132 std::thread::Builder::new()
133 .name("disk-space-monitor".to_string())
134 .spawn(monitor_thread)
135 .expect("failed to spawn disk-space-monitor thread");
136 });
137 }
138
139 pub fn get_usage(&self) -> anyhow::Result<AvailableSpace> {
140 let info = nix::sys::statvfs::statvfs(&self.path)
141 .with_context(|| format!("statvfs({}) failed", self.path.display()))?;
142
143 let blocks_avail = info.blocks_available() as u64;
144 let blocks_total = info.blocks() as u64;
145
146 let space_avail_percent =
147 ((blocks_avail as f64 / blocks_total as f64) * 100.0).floor() as u8;
148 let space_avail = blocks_avail * info.block_size();
149 FREE_SPACE
150 .get_metric_with_label_values(&[&self.name])
151 .unwrap()
152 .set(space_avail as i64);
153 FREE_SPACE_PCT
154 .get_metric_with_label_values(&[&self.name])
155 .unwrap()
156 .set(space_avail_percent as i64);
157
158 let inodes_avail = info.files_available() as u64;
159 let inodes_total = info.files();
160 let inodes_avail_percent =
161 ((inodes_avail as f64 / inodes_total as f64) * 100.0).floor() as u8;
162 FREE_INODES
163 .get_metric_with_label_values(&[&self.name])
164 .unwrap()
165 .set(inodes_avail as i64);
166 FREE_INODES_PCT
167 .get_metric_with_label_values(&[&self.name])
168 .unwrap()
169 .set(inodes_avail_percent as i64);
170
171 Ok(AvailableSpace {
172 space_avail,
173 space_avail_percent,
174 inodes_avail,
175 inodes_avail_percent,
176 })
177 }
178
179 pub fn check_usage(&self, avail: &AvailableSpace) -> anyhow::Result<()> {
180 let mut reason = vec![];
181
182 match self.min_free_space {
183 MinFree::Percent(p) if avail.space_avail_percent < p => {
184 reason.push(format!(
185 "{}% space available but minimum is {p}%",
186 avail.space_avail_percent
187 ));
188 }
189 MinFree::Value(n) if avail.space_avail < n => {
190 reason.push(format!(
191 "{} ({}) space available but minimum is {} ({})",
192 avail.space_avail.to_formatted_string(&Locale::en),
193 human_bytes(avail.space_avail as f64),
194 n.to_formatted_string(&Locale::en),
195 human_bytes(n as f64),
196 ));
197 }
198 _ => {}
199 }
200 match self.min_free_inodes {
201 MinFree::Percent(p) if avail.inodes_avail_percent < p => {
202 reason.push(format!(
203 "{}% inodes available but minimum is {p}%",
204 avail.inodes_avail_percent
205 ));
206 }
207 MinFree::Value(n) if avail.inodes_avail < n => {
208 reason.push(format!(
209 "{} inodes available but minimum is {}",
210 avail.space_avail.to_formatted_string(&Locale::en),
211 n.to_formatted_string(&Locale::en),
212 ));
213 }
214 _ => {}
215 }
216
217 if reason.is_empty() {
218 Ok(())
219 } else {
220 anyhow::bail!(
221 "{} path {} has issue(s): {}",
222 self.name,
223 self.path.display(),
224 reason.join(", ")
225 );
226 }
227 }
228}
229
230pub fn is_over_limit() -> bool {
231 OVER_LIMIT.load(Ordering::SeqCst)
232}
233
234fn copy_paths() -> Vec<MonitoredPath> {
235 PATHS.lock().unwrap().clone()
236}
237
238fn monitor_thread() {
239 let mut bad_monitors = HashSet::new();
240 loop {
241 let paths = copy_paths();
242
243 for p in paths {
244 match p.get_usage() {
245 Ok(avail) => match p.check_usage(&avail) {
246 Ok(()) => {
247 if bad_monitors.remove(&p) {
248 tracing::error!("{} path {} has recovered", p.name, p.path.display());
249 }
250 }
251 Err(err) => {
252 if bad_monitors.insert(p.clone()) {
253 tracing::error!("{err:#}");
254 }
255 }
256 },
257 Err(err) => {
258 tracing::error!("{err:#}");
259 }
260 }
261 }
262
263 OVER_LIMIT.store(!bad_monitors.is_empty(), Ordering::SeqCst);
264 std::thread::sleep(Duration::from_secs(5));
265 }
266}