rusty_bin/monitor/
health.rs

1//! Health checking implementation
2//!
3//! This module provides the actual implementation of health monitoring.
4
5use crate::monitor::config::MonitorConfig;
6use crate::monitor::utils::time;
7use parking_lot::RwLock;
8use rusty_common::collections::FxHashMap;
9use serde::{Deserialize, Serialize};
10use std::sync::Arc;
11use std::time::Duration;
12use tokio::time::interval;
13
14/// Health status enum
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
16pub enum HealthStatus {
17    /// System is healthy
18    Healthy,
19    /// System has warnings
20    Warning,
21    /// System is in critical state
22    Critical,
23    /// Health status is unknown
24    Unknown,
25}
26
27/// Health check structure
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct HealthCheck {
30    /// Name of the health check
31    pub name: String,
32    /// Current health status
33    pub status: HealthStatus,
34    /// Status message
35    pub message: String,
36    /// Timestamp in nanoseconds
37    pub timestamp: u64,
38    /// Additional details about the health check
39    pub details: FxHashMap<String, String>,
40}
41
42/// Health checker implementation
43#[derive(Debug, Clone)]
44pub struct HealthChecker {
45    /// Monitor configuration
46    config: MonitorConfig,
47    /// Cached health checks
48    checks: Arc<RwLock<Vec<HealthCheck>>>,
49}
50
51impl HealthChecker {
52    /// Create a new health checker
53    #[must_use]
54    pub fn new(config: MonitorConfig) -> Self {
55        Self {
56            config,
57            checks: Arc::new(RwLock::new(Vec::new())),
58        }
59    }
60
61    /// Start health monitoring
62    pub async fn start_monitoring(&self) {
63        log::info!("Starting health monitoring");
64
65        let mut interval = interval(Duration::from_secs(
66            self.config.monitoring.health_check_interval_seconds,
67        ));
68        let checks = self.checks.clone();
69        let config = self.config.clone();
70
71        loop {
72            interval.tick().await;
73
74            let health_checks = Self::perform_health_checks_static(&config).await;
75
76            // Update stored checks
77            {
78                let mut stored_checks = checks.write();
79                *stored_checks = health_checks;
80            }
81        }
82    }
83
84    /// Perform all health checks
85    pub async fn perform_health_checks(&self) -> Vec<HealthCheck> {
86        Self::perform_health_checks_static(&self.config).await
87    }
88
89    /// Perform health checks (static version)
90    async fn perform_health_checks_static(config: &MonitorConfig) -> Vec<HealthCheck> {
91        let mut checks = Vec::new();
92
93        // Check disk space
94        checks.push(Self::check_disk_space_static(config).await);
95
96        // Check memory usage
97        checks.push(Self::check_memory_usage_static().await);
98
99        // Check data directories
100        checks.push(Self::check_data_directories_static(config).await);
101
102        // Check file permissions
103        checks.push(Self::check_file_permissions_static(config).await);
104
105        checks
106    }
107
108    /// Check disk space (static version)
109    async fn check_disk_space_static(_config: &MonitorConfig) -> HealthCheck {
110        use sysinfo::Disks;
111
112        let disks = Disks::new_with_refreshed_list();
113
114        let mut status = HealthStatus::Healthy;
115        let mut message = "Disk space is adequate".to_string();
116        let mut details = FxHashMap::default();
117
118        for disk in &disks {
119            let total = disk.total_space();
120            let available = disk.available_space();
121            let usage_percent = ((total - available) as f64 / total as f64) * 100.0;
122
123            let mount_point = disk.mount_point().to_string_lossy().to_string();
124            details.insert(
125                format!("disk_{}", mount_point.replace('/', "_")),
126                format!("{usage_percent:.1}% used"),
127            );
128
129            if usage_percent > 90.0 {
130                status = HealthStatus::Critical;
131                message = format!("Disk space critical: {mount_point} is {usage_percent:.1}% full");
132            } else if usage_percent > 80.0 && status == HealthStatus::Healthy {
133                status = HealthStatus::Warning;
134                message = format!("Disk space warning: {mount_point} is {usage_percent:.1}% full");
135            }
136        }
137
138        HealthCheck {
139            name: "disk_space".to_string(),
140            status,
141            message,
142            timestamp: time::now_nanos(),
143            details,
144        }
145    }
146
147    /// Check memory usage (static version)
148    async fn check_memory_usage_static() -> HealthCheck {
149        use sysinfo::System;
150
151        let mut system = System::new_all();
152        system.refresh_memory();
153
154        let memory_total = system.total_memory();
155        let memory_used = system.used_memory();
156        let usage_percent = (memory_used as f64 / memory_total as f64) * 100.0;
157
158        let (status, message) = if usage_percent > 90.0 {
159            (
160                HealthStatus::Critical,
161                format!("Memory usage critical: {usage_percent:.1}%"),
162            )
163        } else if usage_percent > 80.0 {
164            (
165                HealthStatus::Warning,
166                format!("Memory usage high: {usage_percent:.1}%"),
167            )
168        } else {
169            (
170                HealthStatus::Healthy,
171                format!("Memory usage normal: {usage_percent:.1}%"),
172            )
173        };
174
175        let mut details = FxHashMap::default();
176        details.insert("usage_percent".to_string(), format!("{usage_percent:.1}%"));
177        details.insert(
178            "used_mb".to_string(),
179            format!("{}", memory_used / 1024 / 1024),
180        );
181        details.insert(
182            "total_mb".to_string(),
183            format!("{}", memory_total / 1024 / 1024),
184        );
185
186        HealthCheck {
187            name: "memory_usage".to_string(),
188            status,
189            message,
190            timestamp: time::now_nanos(),
191            details,
192        }
193    }
194
195    /// Check data directories (static version)
196    async fn check_data_directories_static(config: &MonitorConfig) -> HealthCheck {
197        let directories = [&config.storage.market_data_path];
198
199        let mut status = HealthStatus::Healthy;
200        let mut message = "All data directories accessible".to_string();
201        let mut details = FxHashMap::default();
202
203        for (i, dir) in directories.iter().enumerate() {
204            let dir_name = format!("directory_{i}");
205
206            if !dir.exists() {
207                status = HealthStatus::Critical;
208                message = format!("Data directory missing: {}", dir.display());
209                details.insert(dir_name, "missing".to_string());
210            } else if !dir.is_dir() {
211                status = HealthStatus::Critical;
212                message = format!("Data path is not a directory: {}", dir.display());
213                details.insert(dir_name, "not_directory".to_string());
214            } else {
215                details.insert(dir_name, "ok".to_string());
216            }
217        }
218
219        HealthCheck {
220            name: "data_directories".to_string(),
221            status,
222            message,
223            timestamp: time::now_nanos(),
224            details,
225        }
226    }
227
228    /// Check file permissions (static version)
229    async fn check_file_permissions_static(config: &MonitorConfig) -> HealthCheck {
230        use std::fs::OpenOptions;
231
232        let test_file = config.storage.market_data_path.join(".permission_test");
233
234        let (status, message) = match OpenOptions::new()
235            .create(true)
236            .write(true)
237            .truncate(true)
238            .open(&test_file)
239        {
240            Ok(_) => {
241                // Clean up test file
242                let _ = std::fs::remove_file(&test_file);
243                (
244                    HealthStatus::Healthy,
245                    "File permissions are adequate".to_string(),
246                )
247            }
248            Err(e) => (
249                HealthStatus::Critical,
250                format!("Cannot write to data directory: {e}"),
251            ),
252        };
253
254        HealthCheck {
255            name: "file_permissions".to_string(),
256            status,
257            message,
258            timestamp: time::now_nanos(),
259            details: FxHashMap::default(),
260        }
261    }
262
263    /// Get current health status
264    pub fn get_health_status(&self) -> Vec<HealthCheck> {
265        self.checks.read().clone()
266    }
267
268    /// Get overall health status
269    pub fn get_overall_status(&self) -> HealthStatus {
270        let checks = self.checks.read();
271
272        if checks
273            .iter()
274            .any(|c| matches!(c.status, HealthStatus::Critical))
275        {
276            HealthStatus::Critical
277        } else if checks
278            .iter()
279            .any(|c| matches!(c.status, HealthStatus::Warning))
280        {
281            HealthStatus::Warning
282        } else if checks.is_empty() {
283            HealthStatus::Unknown
284        } else {
285            HealthStatus::Healthy
286        }
287    }
288}