File size: 3,352 Bytes
8ef2d83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
//! # Latency Port
//!
//! Trait for runtime latency measurement and adaptation.
//!
//! This enables the model to know its actual retrieval constraints:
//! - How fast is the hot tier right now?
//! - How much budget do I have for retrieval?
//! - Should I use fewer, faster retrievals or more, slower ones?

use std::time::Duration;

/// Storage tier levels
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Tier {
    /// RAM storage - fastest
    Hot,
    /// NVMe storage - fast
    Warm,
    /// Archive storage - slow
    Cold,
}

impl Tier {
    /// Get expected latency range for this tier
    pub fn expected_latency(&self) -> (Duration, Duration) {
        match self {
            Tier::Hot => (Duration::from_micros(1), Duration::from_millis(1)),
            Tier::Warm => (Duration::from_millis(1), Duration::from_millis(10)),
            Tier::Cold => (Duration::from_millis(10), Duration::from_millis(100)),
        }
    }
}

/// Latency measurement result
#[derive(Debug, Clone)]
pub struct LatencyMeasurement {
    /// The tier that was measured
    pub tier: Tier,

    /// Measured latency for a single operation
    pub latency: Duration,

    /// Throughput (operations per second) if measured
    pub throughput_ops: Option<f64>,

    /// Timestamp of measurement
    pub measured_at: std::time::Instant,
}

/// Budget allocation for retrieval operations
#[derive(Debug, Clone)]
pub struct LatencyBudget {
    /// Total time budget for this retrieval batch
    pub total: Duration,

    /// Maximum time per individual retrieval
    pub per_operation: Duration,

    /// Maximum number of operations in this budget
    pub max_operations: usize,
}

impl Default for LatencyBudget {
    fn default() -> Self {
        Self {
            total: Duration::from_millis(50),
            per_operation: Duration::from_millis(5),
            max_operations: 10,
        }
    }
}

/// Tier statistics
#[derive(Debug, Clone)]
pub struct TierStats {
    /// The tier
    pub tier: Tier,

    /// Number of points in this tier
    pub count: usize,

    /// Total size in bytes
    pub size_bytes: usize,

    /// Capacity in bytes
    pub capacity_bytes: usize,

    /// Usage ratio (0.0 to 1.0)
    pub usage_ratio: f32,
}

/// Trait for latency measurement and adaptation
///
/// System adapters implement this trait.
pub trait Latency: Send + Sync {
    /// Probe a tier to measure current latency
    ///
    /// Performs a small test operation to measure actual latency.
    fn probe(&mut self, tier: Tier) -> LatencyMeasurement;

    /// Get the current latency budget
    fn budget(&self) -> LatencyBudget;

    /// Set a new latency budget
    fn set_budget(&mut self, budget: LatencyBudget);

    /// Get available capacity in a tier
    fn available_capacity(&self, tier: Tier) -> usize;

    /// Recommend which tier to use for an access pattern
    ///
    /// `expected_accesses` is the expected number of accesses for this data.
    fn recommend_tier(&self, expected_accesses: u32) -> Tier;

    /// Get statistics for a tier
    fn tier_stats(&self, tier: Tier) -> TierStats;

    /// Get statistics for all tiers
    fn all_stats(&self) -> Vec<TierStats> {
        vec![
            self.tier_stats(Tier::Hot),
            self.tier_stats(Tier::Warm),
            self.tier_stats(Tier::Cold),
        ]
    }
}