rusty_bin/monitor/
health.rs1use crate::monitor::config::MonitorConfig;
6use crate::monitor::utils::time;
7use parking_lot::RwLock;
8use rusty_common::collections::FxHashMap;
9use serde::{Deserialize, Serialize};
10use std::sync::Arc;
11use std::time::Duration;
12use tokio::time::interval;
13
14#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
16pub enum HealthStatus {
17 Healthy,
19 Warning,
21 Critical,
23 Unknown,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct HealthCheck {
30 pub name: String,
32 pub status: HealthStatus,
34 pub message: String,
36 pub timestamp: u64,
38 pub details: FxHashMap<String, String>,
40}
41
42#[derive(Debug, Clone)]
44pub struct HealthChecker {
45 config: MonitorConfig,
47 checks: Arc<RwLock<Vec<HealthCheck>>>,
49}
50
51impl HealthChecker {
52 #[must_use]
54 pub fn new(config: MonitorConfig) -> Self {
55 Self {
56 config,
57 checks: Arc::new(RwLock::new(Vec::new())),
58 }
59 }
60
61 pub async fn start_monitoring(&self) {
63 log::info!("Starting health monitoring");
64
65 let mut interval = interval(Duration::from_secs(
66 self.config.monitoring.health_check_interval_seconds,
67 ));
68 let checks = self.checks.clone();
69 let config = self.config.clone();
70
71 loop {
72 interval.tick().await;
73
74 let health_checks = Self::perform_health_checks_static(&config).await;
75
76 {
78 let mut stored_checks = checks.write();
79 *stored_checks = health_checks;
80 }
81 }
82 }
83
84 pub async fn perform_health_checks(&self) -> Vec<HealthCheck> {
86 Self::perform_health_checks_static(&self.config).await
87 }
88
89 async fn perform_health_checks_static(config: &MonitorConfig) -> Vec<HealthCheck> {
91 let mut checks = Vec::new();
92
93 checks.push(Self::check_disk_space_static(config).await);
95
96 checks.push(Self::check_memory_usage_static().await);
98
99 checks.push(Self::check_data_directories_static(config).await);
101
102 checks.push(Self::check_file_permissions_static(config).await);
104
105 checks
106 }
107
108 async fn check_disk_space_static(_config: &MonitorConfig) -> HealthCheck {
110 use sysinfo::Disks;
111
112 let disks = Disks::new_with_refreshed_list();
113
114 let mut status = HealthStatus::Healthy;
115 let mut message = "Disk space is adequate".to_string();
116 let mut details = FxHashMap::default();
117
118 for disk in &disks {
119 let total = disk.total_space();
120 let available = disk.available_space();
121 let usage_percent = ((total - available) as f64 / total as f64) * 100.0;
122
123 let mount_point = disk.mount_point().to_string_lossy().to_string();
124 details.insert(
125 format!("disk_{}", mount_point.replace('/', "_")),
126 format!("{usage_percent:.1}% used"),
127 );
128
129 if usage_percent > 90.0 {
130 status = HealthStatus::Critical;
131 message = format!("Disk space critical: {mount_point} is {usage_percent:.1}% full");
132 } else if usage_percent > 80.0 && status == HealthStatus::Healthy {
133 status = HealthStatus::Warning;
134 message = format!("Disk space warning: {mount_point} is {usage_percent:.1}% full");
135 }
136 }
137
138 HealthCheck {
139 name: "disk_space".to_string(),
140 status,
141 message,
142 timestamp: time::now_nanos(),
143 details,
144 }
145 }
146
147 async fn check_memory_usage_static() -> HealthCheck {
149 use sysinfo::System;
150
151 let mut system = System::new_all();
152 system.refresh_memory();
153
154 let memory_total = system.total_memory();
155 let memory_used = system.used_memory();
156 let usage_percent = (memory_used as f64 / memory_total as f64) * 100.0;
157
158 let (status, message) = if usage_percent > 90.0 {
159 (
160 HealthStatus::Critical,
161 format!("Memory usage critical: {usage_percent:.1}%"),
162 )
163 } else if usage_percent > 80.0 {
164 (
165 HealthStatus::Warning,
166 format!("Memory usage high: {usage_percent:.1}%"),
167 )
168 } else {
169 (
170 HealthStatus::Healthy,
171 format!("Memory usage normal: {usage_percent:.1}%"),
172 )
173 };
174
175 let mut details = FxHashMap::default();
176 details.insert("usage_percent".to_string(), format!("{usage_percent:.1}%"));
177 details.insert(
178 "used_mb".to_string(),
179 format!("{}", memory_used / 1024 / 1024),
180 );
181 details.insert(
182 "total_mb".to_string(),
183 format!("{}", memory_total / 1024 / 1024),
184 );
185
186 HealthCheck {
187 name: "memory_usage".to_string(),
188 status,
189 message,
190 timestamp: time::now_nanos(),
191 details,
192 }
193 }
194
195 async fn check_data_directories_static(config: &MonitorConfig) -> HealthCheck {
197 let directories = [&config.storage.market_data_path];
198
199 let mut status = HealthStatus::Healthy;
200 let mut message = "All data directories accessible".to_string();
201 let mut details = FxHashMap::default();
202
203 for (i, dir) in directories.iter().enumerate() {
204 let dir_name = format!("directory_{i}");
205
206 if !dir.exists() {
207 status = HealthStatus::Critical;
208 message = format!("Data directory missing: {}", dir.display());
209 details.insert(dir_name, "missing".to_string());
210 } else if !dir.is_dir() {
211 status = HealthStatus::Critical;
212 message = format!("Data path is not a directory: {}", dir.display());
213 details.insert(dir_name, "not_directory".to_string());
214 } else {
215 details.insert(dir_name, "ok".to_string());
216 }
217 }
218
219 HealthCheck {
220 name: "data_directories".to_string(),
221 status,
222 message,
223 timestamp: time::now_nanos(),
224 details,
225 }
226 }
227
228 async fn check_file_permissions_static(config: &MonitorConfig) -> HealthCheck {
230 use std::fs::OpenOptions;
231
232 let test_file = config.storage.market_data_path.join(".permission_test");
233
234 let (status, message) = match OpenOptions::new()
235 .create(true)
236 .write(true)
237 .truncate(true)
238 .open(&test_file)
239 {
240 Ok(_) => {
241 let _ = std::fs::remove_file(&test_file);
243 (
244 HealthStatus::Healthy,
245 "File permissions are adequate".to_string(),
246 )
247 }
248 Err(e) => (
249 HealthStatus::Critical,
250 format!("Cannot write to data directory: {e}"),
251 ),
252 };
253
254 HealthCheck {
255 name: "file_permissions".to_string(),
256 status,
257 message,
258 timestamp: time::now_nanos(),
259 details: FxHashMap::default(),
260 }
261 }
262
263 pub fn get_health_status(&self) -> Vec<HealthCheck> {
265 self.checks.read().clone()
266 }
267
268 pub fn get_overall_status(&self) -> HealthStatus {
270 let checks = self.checks.read();
271
272 if checks
273 .iter()
274 .any(|c| matches!(c.status, HealthStatus::Critical))
275 {
276 HealthStatus::Critical
277 } else if checks
278 .iter()
279 .any(|c| matches!(c.status, HealthStatus::Warning))
280 {
281 HealthStatus::Warning
282 } else if checks.is_empty() {
283 HealthStatus::Unknown
284 } else {
285 HealthStatus::Healthy
286 }
287 }
288}