1#[cfg(test)]
11use std::hint::black_box;
12use wide::f64x4;
13
14#[inline(always)]
16fn prefetch_data(ptr: *const f64) {
17 #[cfg(target_arch = "x86_64")]
18 {
19 unsafe {
20 std::arch::x86_64::_mm_prefetch(ptr as *const i8, std::arch::x86_64::_MM_HINT_T0);
21 }
22 }
23 #[cfg(target_arch = "aarch64")]
24 {
25 unsafe {
26 std::arch::aarch64::_prefetch(
27 ptr as *const i8,
28 std::arch::aarch64::_PREFETCH_READ,
29 std::arch::aarch64::_PREFETCH_LOCALITY3,
30 );
31 }
32 }
33 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
36 {
37 let _ = ptr; }
39}
40
41pub use rusty_common::AlignedBuffer64 as SimdAlignedBuffer;
46
47pub struct EnhancedSimdOps;
49
50impl EnhancedSimdOps {
51 #[inline]
53 pub fn sum_f64_optimized(values: &[f64]) -> f64 {
54 if values.is_empty() {
55 return 0.0;
56 }
57
58 if values.len() == 1 {
59 return values[0];
60 }
61
62 if values.len() < 4 {
63 return values.iter().sum();
64 }
65
66 if values.len() > 1024 {
68 Self::sum_f64_with_prefetch(values)
69 } else {
70 Self::sum_f64_fast(values)
71 }
72 }
73
74 #[inline]
76 fn sum_f64_fast(values: &[f64]) -> f64 {
77 let mut i = 0;
78 let len = values.len();
79
80 let mut sum1 = f64x4::ZERO;
82 let mut sum2 = f64x4::ZERO;
83
84 while i + 8 <= len {
86 let v1 = f64x4::from([values[i], values[i + 1], values[i + 2], values[i + 3]]);
87 let v2 = f64x4::from([values[i + 4], values[i + 5], values[i + 6], values[i + 7]]);
88
89 sum1 += v1;
90 sum2 += v2;
91
92 i += 8;
93 }
94
95 while i + 4 <= len {
97 let v = f64x4::from([values[i], values[i + 1], values[i + 2], values[i + 3]]);
98 sum1 += v;
99 i += 4;
100 }
101
102 let combined = sum1 + sum2;
104 let combined_array = combined.as_array_ref();
105 let mut total =
106 combined_array[0] + combined_array[1] + combined_array[2] + combined_array[3];
107
108 while i < len {
110 total += values[i];
111 i += 1;
112 }
113
114 total
115 }
116
117 #[inline]
119 fn sum_f64_with_prefetch(values: &[f64]) -> f64 {
120 let mut i = 0;
121 let len = values.len();
122
123 let mut sum1 = f64x4::ZERO;
125 let mut sum2 = f64x4::ZERO;
126 let mut sum3 = f64x4::ZERO;
127 let mut sum4 = f64x4::ZERO;
128
129 while i + 16 <= len {
131 if i + 24 < len {
133 unsafe {
134 prefetch_data(values.as_ptr().add(i + 16));
135 }
136 }
137
138 let v1 = f64x4::from([values[i], values[i + 1], values[i + 2], values[i + 3]]);
139 let v2 = f64x4::from([values[i + 4], values[i + 5], values[i + 6], values[i + 7]]);
140 let v3 = f64x4::from([values[i + 8], values[i + 9], values[i + 10], values[i + 11]]);
141 let v4 = f64x4::from([
142 values[i + 12],
143 values[i + 13],
144 values[i + 14],
145 values[i + 15],
146 ]);
147
148 sum1 += v1;
149 sum2 += v2;
150 sum3 += v3;
151 sum4 += v4;
152
153 i += 16;
154 }
155
156 let combined = (sum1 + sum2) + (sum3 + sum4);
158 let combined_array = combined.as_array_ref();
159 let mut total =
160 combined_array[0] + combined_array[1] + combined_array[2] + combined_array[3];
161
162 while i < len {
164 total += values[i];
165 i += 1;
166 }
167
168 total
169 }
170
171 #[inline]
174 pub fn variance_welford_scalar(values: &[f64]) -> f64 {
175 if values.len() < 2 {
176 return 0.0;
177 }
178
179 let mut count = 0f64;
181 let mut mean = 0.0;
182 let mut m2 = 0.0;
183
184 for &value in values {
186 count += 1.0;
187 let delta = value - mean;
188 mean += delta / count;
189 let delta2 = value - mean;
190 m2 += delta * delta2;
191 }
192
193 m2 / count
194 }
195
196 #[inline]
199 pub fn variance_two_pass_simd(values: &[f64]) -> f64 {
200 if values.len() < 2 {
201 return 0.0;
202 }
203
204 let len = values.len();
205 let len_f64 = len as f64;
206
207 if len <= 4 {
209 return Self::variance_welford_scalar(values);
210 }
211
212 let complete_chunks = len / 4;
214 let mut sum = f64x4::splat(0.0);
215
216 for i in 0..complete_chunks {
218 let start_idx = i * 4;
219 let chunk = f64x4::from([
220 values[start_idx],
221 values[start_idx + 1],
222 values[start_idx + 2],
223 values[start_idx + 3],
224 ]);
225 sum += chunk;
226 }
227
228 let sum_array = sum.as_array_ref();
230 let mut total_sum = sum_array[0] + sum_array[1] + sum_array[2] + sum_array[3];
231
232 for i in (complete_chunks * 4)..len {
234 total_sum += values[i];
235 }
236
237 let mean = total_sum / len_f64;
238
239 let mean_vec = f64x4::splat(mean);
241 let mut variance_sum = f64x4::splat(0.0);
242
243 for i in 0..complete_chunks {
245 let start_idx = i * 4;
246 let chunk = f64x4::from([
247 values[start_idx],
248 values[start_idx + 1],
249 values[start_idx + 2],
250 values[start_idx + 3],
251 ]);
252 let diff = chunk - mean_vec;
253 variance_sum += diff * diff;
254 }
255
256 let variance_array = variance_sum.as_array_ref();
258 let mut total_variance =
259 variance_array[0] + variance_array[1] + variance_array[2] + variance_array[3];
260
261 for i in (complete_chunks * 4)..len {
263 let diff = values[i] - mean;
264 total_variance += diff * diff;
265 }
266
267 total_variance / len_f64
268 }
269
270 #[inline]
273 pub fn running_average_simd(values: &[f64], window_size: usize) -> Vec<f64> {
274 if values.len() < window_size {
275 return vec![];
276 }
277
278 let mut results = Vec::with_capacity(values.len() - window_size + 1);
279
280 let mut window_sum = Self::sum_f64_fast(&values[0..window_size]);
282 results.push(window_sum / window_size as f64);
283
284 for i in window_size..values.len() {
286 window_sum = window_sum - values[i - window_size] + values[i];
287 results.push(window_sum / window_size as f64);
288 }
289
290 results
291 }
292
293 #[inline]
296 pub fn correlation_simd(x: &[f64], y: &[f64]) -> f64 {
297 if x.len() != y.len() || x.len() < 2 {
298 return 0.0;
299 }
300
301 let n = x.len() as f64;
302 let sum_x = Self::sum_f64_optimized(x);
303 let sum_y = Self::sum_f64_optimized(y);
304 let mean_x = sum_x / n;
305 let mean_y = sum_y / n;
306
307 let mut sum_xy = 0.0;
308 let mut sum_x2 = 0.0;
309 let mut sum_y2 = 0.0;
310
311 let mut i = 0;
312 let len = x.len();
313
314 while i + 4 <= len {
316 let x_vec = f64x4::from([x[i], x[i + 1], x[i + 2], x[i + 3]]);
317 let y_vec = f64x4::from([y[i], y[i + 1], y[i + 2], y[i + 3]]);
318 let mean_x_vec = f64x4::splat(mean_x);
319 let mean_y_vec = f64x4::splat(mean_y);
320
321 let dx = x_vec - mean_x_vec;
322 let dy = y_vec - mean_y_vec;
323
324 let xy = dx * dy;
325 let x2 = dx * dx;
326 let y2 = dy * dy;
327
328 let xy_array = xy.as_array_ref();
329 let x2_array = x2.as_array_ref();
330 let y2_array = y2.as_array_ref();
331
332 sum_xy += xy_array[0] + xy_array[1] + xy_array[2] + xy_array[3];
333 sum_x2 += x2_array[0] + x2_array[1] + x2_array[2] + x2_array[3];
334 sum_y2 += y2_array[0] + y2_array[1] + y2_array[2] + y2_array[3];
335
336 i += 4;
337 }
338
339 while i < len {
341 let dx = x[i] - mean_x;
342 let dy = y[i] - mean_y;
343 sum_xy += dx * dy;
344 sum_x2 += dx * dx;
345 sum_y2 += dy * dy;
346 i += 1;
347 }
348
349 let denominator = (sum_x2 * sum_y2).sqrt();
350 if denominator > f64::EPSILON {
351 sum_xy / denominator
352 } else {
353 0.0
354 }
355 }
356
357 #[inline]
360 pub fn log_returns_simd(prices: &[f64]) -> Vec<f64> {
361 if prices.len() < 2 {
362 return vec![];
363 }
364
365 let mut returns = Vec::with_capacity(prices.len() - 1);
366
367 let mut i = 1;
368 let len = prices.len();
369
370 while i + 4 <= len {
372 let curr_prices = f64x4::from([prices[i], prices[i + 1], prices[i + 2], prices[i + 3]]);
373 let prev_prices = f64x4::from([prices[i - 1], prices[i], prices[i + 1], prices[i + 2]]);
374
375 let ratios = curr_prices / prev_prices;
377 let ratios_array = ratios.as_array_ref();
378
379 returns.push(ratios_array[0].ln());
380 returns.push(ratios_array[1].ln());
381 returns.push(ratios_array[2].ln());
382 returns.push(ratios_array[3].ln());
383
384 i += 4;
385 }
386
387 while i < len {
389 let log_return = (prices[i] / prices[i - 1]).ln();
390 returns.push(log_return);
391 i += 1;
392 }
393
394 returns
395 }
396
397 #[inline]
400 pub fn process_order_book_levels_soa(
401 prices: &[f64],
402 quantities: &[f64],
403 operation: impl Fn(f64, f64) -> f64,
404 ) -> Vec<f64> {
405 if prices.len() != quantities.len() {
406 return vec![];
407 }
408
409 let mut results = Vec::with_capacity(prices.len());
410 let mut i = 0;
411 let len = prices.len();
412
413 while i + 4 <= len {
415 let price_vec = f64x4::from([prices[i], prices[i + 1], prices[i + 2], prices[i + 3]]);
416 let qty_vec = f64x4::from([
417 quantities[i],
418 quantities[i + 1],
419 quantities[i + 2],
420 quantities[i + 3],
421 ]);
422
423 let price_array = price_vec.as_array_ref();
424 let qty_array = qty_vec.as_array_ref();
425
426 for j in 0..4 {
427 #[cfg(test)]
428 let result = black_box(operation(price_array[j], qty_array[j]));
429 #[cfg(not(test))]
430 let result = operation(price_array[j], qty_array[j]);
431 results.push(result);
432 }
433
434 i += 4;
435 }
436
437 while i < len {
439 results.push(operation(prices[i], quantities[i]));
440 i += 1;
441 }
442
443 results
444 }
445}
446
447#[cfg(test)]
448mod tests {
449 use super::*;
450
451 #[test]
452 fn test_enhanced_sum_small_array() {
453 let values = [1.0, 2.0, 3.0];
454 let sum = EnhancedSimdOps::sum_f64_optimized(&values);
455 assert_eq!(sum, 6.0);
456 }
457
458 #[test]
459 fn test_enhanced_sum_medium_array() {
460 let values: Vec<f64> = (1..=100).map(|i| i as f64).collect();
461 let sum = EnhancedSimdOps::sum_f64_optimized(&values);
462 assert_eq!(sum, 5050.0);
463 }
464
465 #[test]
466 fn test_enhanced_sum_large_array() {
467 let values: Vec<f64> = (1..=10000).map(|i| i as f64).collect();
468 let sum = EnhancedSimdOps::sum_f64_optimized(&values);
469 assert_eq!(sum, 50005000.0);
470 }
471
472 #[test]
473 fn test_variance_welford() {
474 let values = [2.0, 4.0, 6.0, 8.0];
475 let variance = EnhancedSimdOps::variance_welford_scalar(&values);
476 assert!((variance - 5.0).abs() < 1e-10);
477 }
478
479 #[test]
480 fn test_running_average() {
481 let values = [1.0, 2.0, 3.0, 4.0, 5.0];
482 let averages = EnhancedSimdOps::running_average_simd(&values, 3);
483 assert_eq!(averages.len(), 3);
484 assert_eq!(averages[0], 2.0); assert_eq!(averages[1], 3.0); assert_eq!(averages[2], 4.0); }
488
489 #[test]
490 fn test_correlation() {
491 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
492 let y = [2.0, 4.0, 6.0, 8.0, 10.0];
493 let corr = EnhancedSimdOps::correlation_simd(&x, &y);
494 assert!((corr - 1.0).abs() < 1e-10); }
496
497 #[test]
498 fn test_log_returns() {
499 let prices = [100.0, 101.0, 99.0, 102.0];
500 let returns = EnhancedSimdOps::log_returns_simd(&prices);
501 assert_eq!(returns.len(), 3);
502 assert!((returns[0] - (101.0f64 / 100.0f64).ln()).abs() < 1e-10);
503 assert!((returns[1] - (99.0f64 / 101.0f64).ln()).abs() < 1e-10);
504 assert!((returns[2] - (102.0f64 / 99.0f64).ln()).abs() < 1e-10);
505 }
506
507 #[test]
508 fn test_soa_processing() {
509 let prices = [100.0, 101.0, 102.0, 103.0];
510 let quantities = [10.0, 20.0, 30.0, 40.0];
511
512 let notionals =
514 EnhancedSimdOps::process_order_book_levels_soa(&prices, &quantities, |price, qty| {
515 price * qty
516 });
517
518 assert_eq!(notionals.len(), 4);
519 assert_eq!(notionals[0], 1000.0);
520 assert_eq!(notionals[1], 2020.0);
521 assert_eq!(notionals[2], 3060.0);
522 assert_eq!(notionals[3], 4120.0);
523 }
524
525 #[test]
526 fn test_simd_aligned_buffer() {
527 let mut buffer = SimdAlignedBuffer::<f64, 8>::new();
528 let slice = buffer.as_mut_slice();
529 slice[0] = 1.0;
530 slice[7] = 8.0;
531
532 assert_eq!(buffer.len(), 8);
533 assert_eq!(buffer.as_slice()[0], 1.0);
534 assert_eq!(buffer.as_slice()[7], 8.0);
535 }
536
537 #[test]
538 fn test_variance_two_pass_simd_correctness() {
539 let values = [2.0, 4.0, 6.0, 8.0];
541 let result = EnhancedSimdOps::variance_two_pass_simd(&values);
542
543 let expected = 5.0;
547 assert!(
548 (result - expected).abs() < 1e-10,
549 "Expected {expected}, got {result}"
550 );
551 }
552
553 #[test]
554 fn test_variance_two_pass_simd_aligned_vs_padded() {
555 let aligned_values = [1.0, 2.0, 3.0, 4.0]; let aligned_result = EnhancedSimdOps::variance_two_pass_simd(&aligned_values);
558
559 let padded_values = [1.0, 2.0, 3.0, 4.0, 5.0]; let padded_result = EnhancedSimdOps::variance_two_pass_simd(&padded_values);
562
563 assert!((aligned_result - 1.25).abs() < 1e-10);
566
567 assert!((padded_result - 2.0).abs() < 1e-10);
569 }
570
571 #[test]
572 fn test_variance_two_pass_simd_edge_cases() {
573 let empty: &[f64] = &[];
575 assert_eq!(EnhancedSimdOps::variance_two_pass_simd(empty), 0.0);
576
577 let single = [42.0];
579 assert_eq!(EnhancedSimdOps::variance_two_pass_simd(&single), 0.0);
580
581 let identical = [5.0, 5.0];
583 assert_eq!(EnhancedSimdOps::variance_two_pass_simd(&identical), 0.0);
584
585 let two_diff = [1.0, 3.0];
587 assert!((EnhancedSimdOps::variance_two_pass_simd(&two_diff) - 1.0).abs() < 1e-10);
589 }
590
591 #[test]
592 fn test_variance_two_pass_simd_large_arrays() {
593 let large_values: Vec<f64> = (1..=100).map(|i| i as f64).collect();
595 let result = EnhancedSimdOps::variance_two_pass_simd(&large_values);
596
597 let expected = 833.25;
599 assert!(
600 (result - expected).abs() < 1e-8,
601 "Expected {expected}, got {result}"
602 );
603
604 let odd_values: Vec<f64> = (1..=99).map(|i| i as f64).collect();
606 let odd_result = EnhancedSimdOps::variance_two_pass_simd(&odd_values);
607
608 let expected_odd = 816.6666666666666;
610 assert!(
611 (odd_result - expected_odd).abs() < 1e-8,
612 "Expected {expected_odd}, got {odd_result}"
613 );
614 }
615
616 #[test]
617 fn test_variance_two_pass_simd_precision() {
618 let precise_values = [1.000000001, 1.000000002, 1.000000003, 1.000000004];
620 let result = EnhancedSimdOps::variance_two_pass_simd(&precise_values);
621
622 assert!(result > 0.0, "Should detect small variance");
624 assert!(result < 1e-15, "Small variance should be very small");
625
626 let price_data = [100.25, 100.50, 99.75, 101.00, 100.125];
628 let price_variance = EnhancedSimdOps::variance_two_pass_simd(&price_data);
629 assert!(
630 price_variance > 0.0,
631 "Should calculate variance for realistic price data"
632 );
633 }
634
635 #[test]
636 fn test_variance_two_pass_simd_vs_welford() {
637 let test_data = [2.5, 4.7, 1.3, 8.9, 6.1, 3.4, 7.8, 5.2];
639
640 let simd_result = EnhancedSimdOps::variance_two_pass_simd(&test_data);
641 let welford_result = EnhancedSimdOps::variance_welford_scalar(&test_data);
642
643 assert!(
645 (simd_result - welford_result).abs() < 1e-12,
646 "SIMD two-pass ({simd_result}) and Welford ({welford_result}) results should be nearly identical"
647 );
648 }
649}