// ============================================================================ // gmp_engine.rs -- Core GMP & Footprint GMP Algorithms // ============================================================================ // Direct port of generate_profiles.py (mBA-GMP.v3). // All computation is pure; no GUI or IO dependencies. // ============================================================================ use std::collections::BTreeMap; // ─── Data Structures ───────────────────────────────────────────────────────── /// A single raw tick used as input to the profile builders. #[derive(Clone, Debug)] pub struct RawTick { pub price: f64, pub time: i64, // unix timestamp (seconds) pub index: usize, // sequential trade index } /// One bin in a GMP profile. #[derive(Clone, Debug, Default)] pub struct GmpBin { pub count: u32, } /// One bin in a Footprint profile (GMP + directional classification). #[derive(Clone, Debug, Default)] pub struct FootprintBin { pub count: u32, pub up: u32, pub down: u32, } impl FootprintBin { #[inline] pub fn delta(&self) -> i32 { self.up as i32 - self.down as i32 } } /// A complete GMP profile for one interval. #[derive(Clone, Debug)] pub struct GmpProfile { pub bins: BTreeMap, pub beta: f64, pub min_bin: i64, pub max_bin: i64, } /// A complete Footprint profile for one interval. #[derive(Clone, Debug)] pub struct FootprintProfile { pub bins: BTreeMap, pub beta: f64, pub min_bin: i64, pub max_bin: i64, } // ─── Core Helpers ──────────────────────────────────────────────────────────── /// Compute bin index: floor(price / beta). /// Matches the Python: `int(math.floor(price / beta))`. #[inline] pub fn bin_index(price: f64, beta: f64) -> i64 { (price / beta).floor() as i64 } /// Price range for a given bin: [b*beta, (b+1)*beta). #[inline] pub fn bin_price_range(b: i64, beta: f64) -> (f64, f64) { (b as f64 * beta, (b + 1) as f64 * beta) } // ─── GMP Builder ───────────────────────────────────────────────────────────── /// Build a Gap-filled Market Profile from a slice of ticks. /// /// Algorithm (from main.tex Section IV): /// 1. CMP placement: each tick fills its own bin. /// 2. Gap-fill: for each consecutive pair, fill every bin strictly /// between the two endpoints. pub fn build_gmp(ticks: &[RawTick], beta: f64) -> GmpProfile { let mut bins: BTreeMap = BTreeMap::new(); if ticks.is_empty() { return GmpProfile { bins, beta, min_bin: 0, max_bin: 0 }; } // Phase 1 -- CMP placement for t in ticks { let b = bin_index(t.price, beta); bins.entry(b).or_default().count += 1; } // Phase 2 -- Gap-fill intermediate bins for pair in ticks.windows(2) { let b_from = bin_index(pair[0].price, beta); let b_to = bin_index(pair[1].price, beta); if (b_to - b_from).abs() <= 1 { continue; // adjacent or same bin } let dir: i64 = if b_to > b_from { 1 } else { -1 }; let mut b = b_from + dir; while b != b_to { bins.entry(b).or_default().count += 1; b += dir; } } let min_bin = *bins.keys().next().unwrap_or(&0); let max_bin = *bins.keys().next_back().unwrap_or(&0); GmpProfile { bins, beta, min_bin, max_bin } } // ─── Footprint Builder ─────────────────────────────────────────────────────── /// Build a Footprint (Up/Down-Bin) Profile from a slice of ticks. /// /// Algorithm (from main.tex Section IV, "Up/Down-Bin Footprint Profile"): /// 1. Compute GMP group labels (same logic as build_gmp). /// 2. For each consecutive pair, classify bins on the path as up or down. pub fn build_footprint(ticks: &[RawTick], beta: f64) -> FootprintProfile { let mut bins: BTreeMap = BTreeMap::new(); if ticks.is_empty() { return FootprintProfile { bins, beta, min_bin: 0, max_bin: 0 }; } // Phase 1 + 2 -- GMP placement (CMP + gap-fill) for the count field for t in ticks { let b = bin_index(t.price, beta); bins.entry(b).or_default().count += 1; } for pair in ticks.windows(2) { let b_from = bin_index(pair[0].price, beta); let b_to = bin_index(pair[1].price, beta); if (b_to - b_from).abs() <= 1 { continue; } let dir: i64 = if b_to > b_from { 1 } else { -1 }; let mut b = b_from + dir; while b != b_to { bins.entry(b).or_default().count += 1; b += dir; } } // Phase 3 -- Directional classification for pair in ticks.windows(2) { let src_price = pair[0].price; let dst_price = pair[1].price; let b_from = bin_index(src_price, beta); let b_to = bin_index(dst_price, beta); if b_from == b_to { // Same bin -- classify by micro price movement let entry = bins.entry(b_from).or_default(); if dst_price > src_price { entry.up += 1; } else if dst_price < src_price { entry.down += 1; } continue; } let is_up = b_to > b_from; let dir: i64 = if is_up { 1 } else { -1 }; // Every bin on the path AFTER source (exclusive), up to and // including destination, gets a directional count. let mut b = b_from + dir; loop { let entry = bins.entry(b).or_default(); if is_up { entry.up += 1; } else { entry.down += 1; } if b == b_to { break; } b += dir; } } let min_bin = *bins.keys().next().unwrap_or(&0); let max_bin = *bins.keys().next_back().unwrap_or(&0); FootprintProfile { bins, beta, min_bin, max_bin } } // ─── Interval Aggregator ───────────────────────────────────────────────────── /// A computed interval containing both profile types and metadata. #[derive(Clone, Debug)] pub struct IntervalProfile { pub gmp: GmpProfile, pub footprint: FootprintProfile, /// Label for the x-axis (time string or trade range). pub label: String, /// Midpoint price of this interval (for alignment). pub mid_price: f64, /// Start index in the original tick buffer (for scrolling). pub start_idx: usize, } /// Aggregate ticks into time-based intervals and compute profiles. /// /// `interval_secs` -- duration of each interval in seconds. pub fn aggregate_by_time( ticks: &[RawTick], interval_secs: u64, beta: f64, ) -> Vec { if ticks.is_empty() || interval_secs == 0 { return Vec::new(); } let mut profiles = Vec::new(); let mut start = 0usize; let base_time = ticks[0].time; while start < ticks.len() { let bucket = ((ticks[start].time - base_time) as u64) / interval_secs; let bucket_end_time = base_time + ((bucket + 1) * interval_secs) as i64; // Find the end of this bucket let mut end = start; while end < ticks.len() && ticks[end].time < bucket_end_time { end += 1; } let slice = &ticks[start..end]; if !slice.is_empty() { let gmp = build_gmp(slice, beta); let footprint = build_footprint(slice, beta); // Label: time range let t0 = slice.first().unwrap().time; let t1 = slice.last().unwrap().time; let label = format_time_range(t0, t1); // Midpoint price let sum: f64 = slice.iter().map(|t| t.price).sum(); let mid_price = sum / slice.len() as f64; profiles.push(IntervalProfile { gmp, footprint, label, mid_price, start_idx: start, }); } start = end; } profiles } /// Aggregate ticks into trade-count-based intervals and compute profiles. /// /// `trade_interval` -- number of ticks per group. pub fn aggregate_by_trades( ticks: &[RawTick], trade_interval: usize, beta: f64, ) -> Vec { if ticks.is_empty() || trade_interval == 0 { return Vec::new(); } let mut profiles = Vec::new(); for (chunk_idx, chunk) in ticks.chunks(trade_interval).enumerate() { let gmp = build_gmp(chunk, beta); let footprint = build_footprint(chunk, beta); let first_idx = chunk_idx * trade_interval; let last_idx = first_idx + chunk.len() - 1; let label = format!("T{}-T{}", first_idx + 1, last_idx + 1); let sum: f64 = chunk.iter().map(|t| t.price).sum(); let mid_price = sum / chunk.len() as f64; profiles.push(IntervalProfile { gmp, footprint, label, mid_price, start_idx: first_idx, }); } profiles } // ─── Helpers ───────────────────────────────────────────────────────────────── fn format_time_range(t0: i64, t1: i64) -> String { let fmt = |t: i64| -> String { let h = (t / 3600) % 24; let m = (t / 60) % 60; let s = t % 60; format!("{:02}:{:02}:{:02}", h, m, s) }; if t0 == t1 { fmt(t0) } else { format!("{}-{}", fmt(t0), fmt(t1)) } } // ─── Tests (parity with generate_profiles.py) ──────────────────────────────── #[cfg(test)] mod tests { use super::*; /// The exact 10 datapoints from generate_profiles.py. fn reference_datapoints() -> Vec { vec![ RawTick { price: 3000.914, time: 1, index: 0 }, RawTick { price: 3003.837, time: 2, index: 1 }, RawTick { price: 3002.432, time: 3, index: 2 }, RawTick { price: 3009.892, time: 4, index: 3 }, RawTick { price: 3007.698, time: 5, index: 4 }, RawTick { price: 3009.176, time: 6, index: 5 }, RawTick { price: 3003.381, time: 7, index: 6 }, RawTick { price: 3004.283, time: 8, index: 7 }, RawTick { price: 3003.512, time: 9, index: 8 }, RawTick { price: 3003.012, time: 10, index: 9 }, ] } #[test] fn test_bin_index() { // beta = 1: floor(3000.914 / 1) = 3000 assert_eq!(bin_index(3000.914, 1.0), 3000); assert_eq!(bin_index(3003.837, 1.0), 3003); assert_eq!(bin_index(3009.892, 1.0), 3009); } #[test] fn test_gmp_parity_with_python() { // From generate_profiles.py with beta=1, the GMP for the 10 // datapoints should produce stacks at bins 3000..=3009, and the // total stack count should be 25 (10 CMP + 15 gap-fills). let ticks = reference_datapoints(); let profile = build_gmp(&ticks, 1.0); // All 10 bins from 3000 to 3009 should be populated for b in 3000..=3009 { assert!( profile.bins.contains_key(&b), "GMP missing bin {}", b ); } // Total stacks = 25 (verified against Python output) let total: u32 = profile.bins.values().map(|b| b.count).sum(); assert_eq!(total, 25, "GMP total stacks mismatch"); } #[test] fn test_footprint_parity_with_python() { // The footprint for the same 10 datapoints should have up/down // counts that match the Python build_updown_profile output. let ticks = reference_datapoints(); let fp = build_footprint(&ticks, 1.0); // All 10 bins should exist for b in 3000..=3009 { assert!( fp.bins.contains_key(&b), "Footprint missing bin {}", b ); } // Total stacks should also be 25 let total: u32 = fp.bins.values().map(|b| b.count).sum(); assert_eq!(total, 25, "Footprint total stacks mismatch"); // The sum of (up + down) across all bins should be > 0 let total_up: u32 = fp.bins.values().map(|b| b.up).sum(); let total_down: u32 = fp.bins.values().map(|b| b.down).sum(); assert!(total_up > 0, "Expected some up counts"); assert!(total_down > 0, "Expected some down counts"); } #[test] fn test_aggregate_by_trades() { let ticks = reference_datapoints(); // Group by 5 trades => 2 intervals let intervals = aggregate_by_trades(&ticks, 5, 1.0); assert_eq!(intervals.len(), 2); assert_eq!(intervals[0].label, "T1-T5"); assert_eq!(intervals[1].label, "T6-T10"); } #[test] fn test_aggregate_by_time() { let ticks = reference_datapoints(); // 5-second intervals, ticks at time 1..10 => 2 intervals let intervals = aggregate_by_time(&ticks, 5, 1.0); assert_eq!(intervals.len(), 2); } }