rusty_backtest/features/
mod.rs

1//! Advanced Microstructural Feature Engine
2//!
3//! High-performance implementation of microstructural features for HFT/MFT strategies.
4//! Based on patterns from KRX A3B6G7 and Binance Tardis reference implementations.
5
6pub mod asymmetry;
7pub mod liquidity;
8pub mod market_impact;
9pub mod order_flow;
10pub mod queue;
11pub mod tardis_advanced;
12pub mod tardis_features;
13pub mod volatility;
14
15#[cfg(target_arch = "x86_64")]
16pub mod order_flow_simd;
17
18#[cfg(target_arch = "x86_64")]
19pub mod volatility_simd;
20
21#[cfg(test)]
22mod proptest_tests;
23
24// Re-export key types
25pub use asymmetry::{
26    AsymmetryIndexCalculator, AsymmetryMetrics, MultiTimeframeAsymmetry, calculate_asymmetry_index,
27};
28pub use liquidity::{LiquidityAnalyzer, calculate_liquidity_shocks};
29pub use market_impact::{KylesLambdaCalculator, MarketImpactAnalyzer, MarketImpactMetrics};
30pub use order_flow::{OrderFlowAnalyzer, calculate_ofi, calculate_vpin};
31pub use queue::{QueueAnalyzer, calculate_queue_imbalance};
32pub use tardis_advanced::{
33    HarmonicOscillator, HarmonicResult, OrderType, OrderTypeTracker, TardisAdvancedFeatures,
34    TardisConfig, TardisFeatureVector,
35};
36pub use tardis_features::{
37    AdvancedVPINCalculator, ExponentialDecayCalculator, PriceEntropyCalculator, PriceRunCalculator,
38    RelativeTickVolumeCalculator, RollingPriceImpactCalculator, TradingBurstDetector,
39    VolumePriceSensitivityCalculator, calculate_depth_weighted_ofi,
40    calculate_liquidity_shock_ratio, calculate_multi_level_ofi_detailed,
41    calculate_multi_level_queue_imbalance, calculate_order_book_depth, calculate_order_book_slope,
42    calculate_order_cancel_rate, calculate_relative_spread, calculate_volume_weighted_ofi,
43    calculate_weighted_order_imbalance,
44};
45pub use volatility::{VolatilityEstimator, calculate_realized_volatility};
46
47use rust_decimal::Decimal;
48use rust_decimal::prelude::ToPrimitive;
49use rusty_common::collections::FxHashMap;
50use smallvec::SmallVec;
51
52/// Error type for feature calculation
53#[derive(Debug, Clone)]
54pub enum FeatureError {
55    /// Decimal conversion failed
56    DecimalConversion(&'static str),
57    /// Invalid input data
58    InvalidInput(&'static str),
59    /// Insufficient data for calculation
60    InsufficientData,
61}
62
63impl std::fmt::Display for FeatureError {
64    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65        match self {
66            FeatureError::DecimalConversion(msg) => write!(f, "Decimal conversion error: {msg}"),
67            FeatureError::InvalidInput(msg) => write!(f, "Invalid input: {msg}"),
68            FeatureError::InsufficientData => write!(f, "Insufficient data for calculation"),
69        }
70    }
71}
72
73impl std::error::Error for FeatureError {}
74
75/// Result type for feature calculations
76pub type FeatureResult<T> = Result<T, FeatureError>;
77
78/// Convert Decimal to f64 for statistical calculations
79///
80/// # Safety
81/// This function returns NaN if the conversion fails, which should only happen
82/// for extreme values that don't occur in normal market data. NaN will propagate
83/// through calculations, making any errors visible rather than hidden.
84#[inline]
85pub(crate) fn decimal_to_f64_or_nan(d: Decimal) -> f64 {
86    d.to_f64().unwrap_or_else(|| {
87        #[cfg(debug_assertions)]
88        eprintln!("Warning: Decimal to f64 conversion failed for value: {d}");
89        f64::NAN
90    })
91}
92
93/// L2 Level data for feature calculation
94#[derive(Debug, Clone, Copy)]
95pub struct Level {
96    /// Price at this level
97    pub price: Decimal,
98    /// Total quantity available at this level
99    pub quantity: Decimal,
100    /// Number of orders at this level (if available from exchange)
101    pub order_count: u32,
102}
103
104/// Multi-level order book snapshot
105///
106/// Cache-aligned for optimal memory access patterns
107#[repr(align(64))]
108#[derive(Debug, Clone)]
109pub struct OrderBookSnapshot {
110    /// Timestamp of this snapshot in nanoseconds
111    pub timestamp_ns: u64,
112    /// Symbol/instrument identifier
113    pub symbol: String,
114    /// Bid levels (best to worst, price descending)
115    pub bids: SmallVec<[Level; 25]>,
116    /// Ask levels (best to worst, price ascending)
117    pub asks: SmallVec<[Level; 25]>,
118}
119
120impl OrderBookSnapshot {
121    /// Calculate mid price from best bid and ask
122    #[must_use]
123    pub fn mid_price(&self) -> Decimal {
124        if self.bids.is_empty() || self.asks.is_empty() {
125            return Decimal::ZERO;
126        }
127        (self.bids[0].price + self.asks[0].price) / Decimal::TWO
128    }
129}
130
131/// Trade tick for feature calculation
132///
133/// Cache-aligned for efficient processing in hot paths
134#[repr(align(64))]
135#[derive(Debug, Clone)]
136pub struct TradeTick {
137    /// Timestamp of the trade in nanoseconds
138    pub timestamp_ns: u64,
139    /// Symbol/instrument identifier
140    pub symbol: String,
141    /// Side of the aggressor/taker (buy or sell)
142    pub side: TradeSide,
143    /// Execution price of the trade
144    pub price: Decimal,
145    /// Trade quantity/volume
146    pub quantity: Decimal,
147}
148
149/// Side of the trade indicating the aggressor/taker
150#[derive(Debug, Clone, Copy, PartialEq, Eq)]
151pub enum TradeSide {
152    /// Buy trade (buyer was aggressor)
153    Buy = 1,
154    /// Sell trade (seller was aggressor)
155    Sell = -1,
156}
157
158/// Combined microstructural features
159///
160/// Cache-aligned for efficient batch processing
161#[repr(align(64))]
162#[derive(Debug, Clone, Default)]
163pub struct MicrostructuralFeatures {
164    /// Timestamp when these features were calculated in nanoseconds
165    pub timestamp_ns: u64,
166
167    // Order Flow Features
168    /// Basic order flow imbalance (level 1 only)
169    pub ofi_basic: f64,
170    /// Volume-weighted order flow imbalance across multiple levels
171    pub ofi_weighted: f64,
172    /// Volume-Synchronized Probability of Informed Trading
173    pub vpin: f64,
174
175    // Queue Features
176    /// Imbalance between best bid and ask quantities
177    pub queue_imbalance: f64,
178    /// Queue imbalance weighted by depth levels
179    pub weighted_queue_imbalance: f64,
180
181    // Liquidity Features
182    /// Total bid-side liquidity across tracked levels
183    pub bid_liquidity: Decimal,
184    /// Total ask-side liquidity across tracked levels
185    pub ask_liquidity: Decimal,
186    /// Ratio of bid to ask liquidity
187    pub liquidity_ratio: f64,
188
189    // Volatility Features
190    /// Realized volatility over recent time window
191    pub realized_volatility: f64,
192    /// Estimated permanent price impact of trades
193    pub price_impact: f64,
194
195    // Market Structure
196    /// Current bid-ask spread
197    pub spread: Decimal,
198    /// Current mid price (average of best bid and ask)
199    pub mid_price: Decimal,
200    /// Spread normalized by mid price (basis points)
201    pub relative_spread: f64,
202}
203
204/// Snapshot of all calculated features at a point in time
205#[derive(Debug, Clone, Default)]
206pub struct FeatureSnapshot {
207    /// Timestamp of this snapshot in nanoseconds
208    pub timestamp: u64,
209
210    // Trade-based features
211    /// Volume-weighted average price over the window
212    pub vwap: f64,
213    /// Number of trades per unit time
214    pub trade_intensity: f64,
215    /// Ratio of buy trades to sell trades
216    pub buy_sell_ratio: f64,
217    /// Proportion of volume from large trades
218    pub large_trade_ratio: f64,
219    /// Net order flow (buy volume - sell volume) normalized
220    pub order_flow_imbalance: f64,
221
222    // Orderbook features
223    /// Total quantity on bid side
224    pub bid_liquidity: f64,
225    /// Total quantity on ask side
226    pub ask_liquidity: f64,
227    /// Bid-ask spread in basis points
228    pub spread_bps: f64,
229    /// Current mid price
230    pub mid_price: f64,
231    /// Imbalance at best bid/ask levels
232    pub queue_imbalance: f64,
233    /// Ratio of bid depth to ask depth
234    pub book_depth_ratio: f64,
235    /// Rate of price decay on bid side
236    pub bid_slope: f64,
237    /// Rate of price increase on ask side
238    pub ask_slope: f64,
239    /// Asymmetry in order book shape
240    pub orderbook_skew: f64,
241    /// Overall bid-ask imbalance
242    pub orderbook_imbalance: f64,
243    /// Mid price weighted by best level quantities
244    pub weighted_mid_price: f64,
245
246    // Additional advanced features
247    /// Volume-Synchronized Probability of Informed Trading
248    pub vpin: f64,
249    /// Multi-level weighted queue imbalance
250    pub weighted_queue_imbalance: f64,
251    /// Order Flow Imbalance
252    pub ofi: f64,
253    /// Volume-weighted OFI
254    pub weighted_ofi: f64,
255    /// Realized volatility estimate
256    pub realized_volatility: f64,
257    /// Kyle's lambda (price impact coefficient)
258    pub kyles_lambda: f64,
259    /// Market asymmetry index
260    pub asymmetry_index: f64,
261}
262
263/// Trade-based features calculated from market trade data.
264///
265/// These features capture various aspects of trading activity and order flow dynamics
266/// that are commonly used in HFT strategies for predicting short-term price movements.
267#[derive(Debug, Clone, Default)]
268pub struct TradeFeatures {
269    /// Volume-weighted average price (VWAP) - The average price weighted by volume
270    pub vwap: f64,
271    /// Trade intensity - Number of trades per unit time, indicating market activity level
272    pub trade_intensity: f64,
273    /// Buy/sell ratio - Ratio of buy volume to sell volume, indicating directional pressure
274    pub buy_sell_ratio: f64,
275    /// Large trade ratio - Proportion of volume from large trades, indicating institutional activity
276    pub large_trade_ratio: f64,
277    /// Order flow imbalance - Net directional flow (buy volume - sell volume)
278    pub order_flow_imbalance: f64,
279}
280
281/// Order book-based features calculated from limit order book snapshots.
282///
283/// These features capture the state and dynamics of the order book, providing insights
284/// into liquidity, price pressure, and market microstructure that are essential for
285/// high-frequency trading strategies.
286#[derive(Debug, Clone, Default)]
287pub struct OrderBookFeatures {
288    /// Total bid-side liquidity - Sum of all bid quantities
289    pub bid_liquidity: f64,
290    /// Total ask-side liquidity - Sum of all ask quantities
291    pub ask_liquidity: f64,
292    /// Bid-ask spread in basis points - Relative spread normalized by mid price
293    pub spread_bps: f64,
294    /// Mid price - Average of best bid and best ask prices
295    pub mid_price: f64,
296    /// Queue imbalance - Imbalance between best bid and ask quantities
297    pub queue_imbalance: f64,
298    /// Book depth ratio - Ratio of bid to ask depth, indicating relative liquidity
299    pub book_depth_ratio: f64,
300    /// Bid slope - Rate of price decay on bid side, indicating buying pressure
301    pub bid_slope: f64,
302    /// Ask slope - Rate of price increase on ask side, indicating selling pressure
303    pub ask_slope: f64,
304    /// Order book skew - Asymmetry in the order book shape
305    pub orderbook_skew: f64,
306    /// Order book imbalance - Overall imbalance between bid and ask sides
307    pub orderbook_imbalance: f64,
308    /// Weighted mid price - Mid price weighted by best bid/ask quantities
309    pub weighted_mid_price: f64,
310    /// Basic order flow imbalance - Simple OFI calculation from order book changes
311    pub ofi_basic: f64,
312}
313
314/// High-performance feature calculator
315pub struct FeatureCalculator {
316    window_size: usize,
317    trade_buffer: Vec<TradeTick>,
318    orderbook_buffer: Vec<OrderBookSnapshot>,
319    #[allow(dead_code)]
320    feature_cache: FxHashMap<u64, MicrostructuralFeatures>,
321}
322
323impl FeatureCalculator {
324    /// Create new feature calculator
325    #[must_use]
326    pub fn new(window_size: usize) -> Self {
327        Self {
328            window_size,
329            trade_buffer: Vec::with_capacity(window_size * 2),
330            orderbook_buffer: Vec::with_capacity(window_size * 2),
331            feature_cache: FxHashMap::default(),
332        }
333    }
334
335    /// Add a trade tick to the internal buffer for feature calculation.
336    ///
337    /// This method maintains a sliding window of trades by:
338    /// 1. Adding the new trade to the buffer
339    /// 2. Keeping the buffer size at most 2x the window size
340    /// 3. When the buffer exceeds 2x window size, it drains the oldest trades
341    ///    (removes `window_size` trades from the beginning)
342    ///
343    /// This approach ensures we always have enough historical data for feature
344    /// calculations while limiting memory usage. The 2x window size allows for
345    /// efficient batch removal instead of removing one trade at a time.
346    ///
347    /// # Arguments
348    /// * `trade` - The trade tick to add to the buffer
349    pub fn add_trade(&mut self, trade: &TradeTick) {
350        self.trade_buffer.push(trade.clone());
351
352        // Maintain window size
353        if self.trade_buffer.len() > self.window_size * 2 {
354            let drain_count = self.window_size;
355            self.trade_buffer.drain(0..drain_count);
356        }
357    }
358
359    /// Add order book snapshot and calculate features
360    pub fn add_orderbook(&mut self, snapshot: &OrderBookSnapshot) {
361        self.orderbook_buffer.push(snapshot.clone());
362
363        // Maintain window size
364        if self.orderbook_buffer.len() > self.window_size * 2 {
365            let drain_count = self.window_size;
366            self.orderbook_buffer.drain(0..drain_count);
367        }
368    }
369
370    /// Calculate trade-based features
371    fn calculate_trade_features(&self) -> TradeFeatures {
372        let recent_trades =
373            &self.trade_buffer[self.trade_buffer.len().saturating_sub(self.window_size)..];
374
375        let mut features = TradeFeatures::default();
376
377        // Calculate VWAP
378        if !recent_trades.is_empty() {
379            let total_value: f64 = recent_trades
380                .iter()
381                .map(|t| decimal_to_f64_or_nan(t.price * t.quantity))
382                .sum();
383            let total_volume: f64 = recent_trades
384                .iter()
385                .map(|t| decimal_to_f64_or_nan(t.quantity))
386                .sum();
387            features.vwap = if total_volume > 0.0 {
388                total_value / total_volume
389            } else {
390                0.0
391            };
392
393            // Trade intensity
394            features.trade_intensity = recent_trades.len() as f64;
395
396            // Buy/sell ratio
397            let buy_count = recent_trades
398                .iter()
399                .filter(|t| t.side == TradeSide::Buy)
400                .count() as f64;
401            let sell_count = recent_trades
402                .iter()
403                .filter(|t| t.side == TradeSide::Sell)
404                .count() as f64;
405            features.buy_sell_ratio = if sell_count > 0.0 {
406                buy_count / sell_count
407            } else {
408                buy_count
409            };
410
411            // Large trade ratio (placeholder - you can define "large" based on your criteria)
412            features.large_trade_ratio = 0.0;
413
414            // Order flow imbalance
415            let buy_volume: f64 = recent_trades
416                .iter()
417                .filter(|t| t.side == TradeSide::Buy)
418                .map(|t| decimal_to_f64_or_nan(t.quantity))
419                .sum();
420            let sell_volume: f64 = recent_trades
421                .iter()
422                .filter(|t| t.side == TradeSide::Sell)
423                .map(|t| decimal_to_f64_or_nan(t.quantity))
424                .sum();
425            let total = buy_volume + sell_volume;
426            features.order_flow_imbalance = if total > 0.0 {
427                (buy_volume - sell_volume) / total
428            } else {
429                0.0
430            };
431        }
432
433        features
434    }
435
436    /// Calculate order book based features
437    fn calculate_orderbook_features(&self) -> OrderBookFeatures {
438        let recent_books =
439            &self.orderbook_buffer[self.orderbook_buffer.len().saturating_sub(self.window_size)..];
440
441        let mut features = OrderBookFeatures::default();
442
443        if let Some(current_book) = recent_books.last() {
444            // Basic spread and mid price
445            if let (Some(best_bid), Some(best_ask)) =
446                (current_book.bids.first(), current_book.asks.first())
447            {
448                let spread = best_ask.price - best_bid.price;
449                features.mid_price =
450                    decimal_to_f64_or_nan((best_bid.price + best_ask.price) / Decimal::from(2));
451                features.spread_bps = decimal_to_f64_or_nan(
452                    spread / ((best_bid.price + best_ask.price) / Decimal::from(2))
453                        * Decimal::from(10000),
454                );
455            }
456
457            // Queue imbalance
458            features.queue_imbalance =
459                calculate_queue_imbalance(&current_book.bids, &current_book.asks);
460
461            // OFI calculation
462            if recent_books.len() >= 2 {
463                let prev_book = &recent_books[recent_books.len() - 2];
464                features.ofi_basic = calculate_ofi(prev_book, current_book);
465            }
466
467            // Calculate liquidity
468            features.bid_liquidity = current_book
469                .bids
470                .iter()
471                .take(10)
472                .map(|l| decimal_to_f64_or_nan(l.quantity))
473                .sum();
474            features.ask_liquidity = current_book
475                .asks
476                .iter()
477                .take(10)
478                .map(|l| decimal_to_f64_or_nan(l.quantity))
479                .sum();
480
481            // Book depth ratio
482            features.book_depth_ratio = if features.ask_liquidity > 0.0 {
483                features.bid_liquidity / features.ask_liquidity
484            } else {
485                0.0
486            };
487
488            // Calculate slopes (simplified - you can implement more sophisticated slope calculation)
489            features.bid_slope = 0.0;
490            features.ask_slope = 0.0;
491
492            // Orderbook skew
493            features.orderbook_skew = if features.bid_liquidity + features.ask_liquidity > 0.0 {
494                (features.bid_liquidity - features.ask_liquidity)
495                    / (features.bid_liquidity + features.ask_liquidity)
496            } else {
497                0.0
498            };
499
500            // Orderbook imbalance (same as skew in this simplified version)
501            features.orderbook_imbalance = features.orderbook_skew;
502
503            // Weighted mid price (simplified - using top level only)
504            if let (Some(best_bid), Some(best_ask)) =
505                (current_book.bids.first(), current_book.asks.first())
506            {
507                let bid_weight = decimal_to_f64_or_nan(best_bid.quantity);
508                let ask_weight = decimal_to_f64_or_nan(best_ask.quantity);
509                let total_weight = bid_weight + ask_weight;
510                if total_weight > 0.0 {
511                    features.weighted_mid_price = (decimal_to_f64_or_nan(best_bid.price)
512                        * ask_weight
513                        + decimal_to_f64_or_nan(best_ask.price) * bid_weight)
514                        / total_weight;
515                } else {
516                    features.weighted_mid_price = features.mid_price;
517                }
518            }
519        }
520
521        features
522    }
523
524    /// Alias for add_orderbook() to maintain backward compatibility
525    #[must_use]
526    pub fn on_orderbook(&mut self, snapshot: &OrderBookSnapshot) -> OrderBookFeatures {
527        self.add_orderbook(snapshot);
528        self.calculate_orderbook_features()
529    }
530
531    /// Alias for add_trade() to maintain backward compatibility
532    #[must_use]
533    pub fn on_trade(&mut self, trade: &TradeTick) -> Option<TradeFeatures> {
534        self.add_trade(trade);
535        if !self.trade_buffer.is_empty() {
536            Some(self.calculate_trade_features())
537        } else {
538            None
539        }
540    }
541
542    /// Get current features based on accumulated data
543    pub fn get_features(&self) -> FeatureSnapshot {
544        // Get latest features from both trade and orderbook data
545        let trade_features = if !self.trade_buffer.is_empty() {
546            self.calculate_trade_features()
547        } else {
548            TradeFeatures::default()
549        };
550
551        let orderbook_features = if !self.orderbook_buffer.is_empty() {
552            self.calculate_orderbook_features()
553        } else {
554            OrderBookFeatures::default()
555        };
556
557        FeatureSnapshot {
558            timestamp: self
559                .orderbook_buffer
560                .last()
561                .map(|ob| ob.timestamp_ns)
562                .or_else(|| self.trade_buffer.last().map(|t| t.timestamp_ns))
563                .unwrap_or(0),
564            // Flatten all features into the snapshot
565            vwap: trade_features.vwap,
566            trade_intensity: trade_features.trade_intensity,
567            buy_sell_ratio: trade_features.buy_sell_ratio,
568            large_trade_ratio: trade_features.large_trade_ratio,
569            order_flow_imbalance: trade_features.order_flow_imbalance,
570            bid_liquidity: orderbook_features.bid_liquidity,
571            ask_liquidity: orderbook_features.ask_liquidity,
572            spread_bps: orderbook_features.spread_bps,
573            mid_price: orderbook_features.mid_price,
574            queue_imbalance: orderbook_features.queue_imbalance,
575            book_depth_ratio: orderbook_features.book_depth_ratio,
576            bid_slope: orderbook_features.bid_slope,
577            ask_slope: orderbook_features.ask_slope,
578            orderbook_skew: orderbook_features.orderbook_skew,
579            orderbook_imbalance: orderbook_features.orderbook_imbalance,
580            weighted_mid_price: orderbook_features.weighted_mid_price,
581
582            // Additional advanced features (set to default/0 for now)
583            vpin: 0.0,
584            weighted_queue_imbalance: 0.0,
585            ofi: orderbook_features.ofi_basic,
586            weighted_ofi: 0.0,
587            realized_volatility: 0.0,
588            kyles_lambda: 0.0,
589            asymmetry_index: 0.0,
590        }
591    }
592}