rusty_feeder/provider/ext/health.rs
1//! Health monitoring and diagnostics for the HFT provider system
2//!
3//! This module contains structures and types for monitoring system health,
4//! component status, and issue tracking across the trading infrastructure.
5
6/// System health status with detailed component diagnostics
7///
8/// Comprehensive health check information including overall status,
9/// component-level health, active issues, and performance grading.
10#[derive(Debug, Clone)]
11pub struct HealthStatus {
12 /// Overall system health state
13 pub overall_status: HealthState,
14 /// Health status of individual system components
15 pub component_health: Vec<ComponentHealth>,
16 /// List of currently active health issues
17 pub active_issues: Vec<HealthIssue>,
18 /// Performance grade from 'A' (excellent) to 'F' (failing)
19 pub performance_grade: char,
20 /// System uptime in seconds since last restart
21 pub uptime_seconds: u64,
22 /// Timestamp of this health check in nanoseconds
23 pub last_check_time: u64,
24}
25
26/// System health state enumeration
27///
28/// Represents the overall health condition of the monitoring system
29/// or individual components.
30#[derive(Debug, Clone, PartialEq)]
31pub enum HealthState {
32 /// System operating normally within expected parameters
33 Healthy,
34 /// System operational but with performance degradation
35 Degraded,
36 /// System experiencing critical issues requiring attention
37 Critical,
38 /// System or component is not operational
39 Offline,
40}
41
42/// Health information for individual system components
43///
44/// Tracks the health status of specific components like connections,
45/// parsers, or processing pipelines.
46#[derive(Debug, Clone)]
47pub struct ComponentHealth {
48 /// Name of the component being monitored
49 pub component_name: String,
50 /// Current health state of this component
51 pub status: HealthState,
52 /// Last successful heartbeat timestamp in nanoseconds
53 pub last_heartbeat: u64,
54 /// Error rate as a fraction (0.0 to 1.0)
55 pub error_rate: f64,
56 /// 99th percentile latency for this component in nanoseconds
57 pub latency_p99_ns: u64,
58}
59
60/// Description of an active health issue
61///
62/// Captures details about problems detected in the system including
63/// severity, affected component, and occurrence tracking.
64#[derive(Debug, Clone)]
65pub struct HealthIssue {
66 /// Severity level of this issue
67 pub severity: IssueSeverity,
68 /// Component affected by this issue
69 pub component: String,
70 /// Human-readable description of the issue
71 pub description: String,
72 /// Timestamp when issue was first detected in nanoseconds
73 pub first_seen: u64,
74 /// Timestamp of most recent occurrence in nanoseconds
75 pub last_seen: u64,
76 /// Number of times this issue has occurred
77 pub count: u32,
78}
79
80/// Severity levels for health issues and alerts
81///
82/// Used to categorize the importance and urgency of detected problems.
83#[derive(Debug, Clone, PartialEq)]
84pub enum IssueSeverity {
85 /// Informational message, no action required
86 Info,
87 /// Warning condition that may require attention
88 Warning,
89 /// Error condition affecting functionality
90 Error,
91 /// Critical issue requiring immediate attention
92 Critical,
93}