File size: 9,346 Bytes
8ef2d83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
//! # Arms Engine
//!
//! The main ARMS orchestrator.
//!
//! This struct wires together:
//! - Storage (Place port)
//! - Index (Near port)
//! - Configuration
//!
//! And exposes a unified API for storing and retrieving points.

use crate::core::{Blob, Id, PlacedPoint, Point};
use crate::core::config::ArmsConfig;
use crate::ports::{Near, NearResult, Place, PlaceResult, SearchResult};
use crate::adapters::storage::MemoryStorage;
use crate::adapters::index::FlatIndex;

/// The main ARMS engine
///
/// Orchestrates storage and indexing with a unified API.
pub struct Arms {
    /// Configuration
    config: ArmsConfig,

    /// Storage backend (Place port)
    storage: Box<dyn Place>,

    /// Index backend (Near port)
    index: Box<dyn Near>,
}

impl Arms {
    /// Create a new ARMS instance with default adapters
    ///
    /// Uses MemoryStorage and FlatIndex.
    /// For production, use `Arms::with_adapters` with appropriate backends.
    pub fn new(config: ArmsConfig) -> Self {
        let storage = Box::new(MemoryStorage::new(config.dimensionality));
        let index = Box::new(FlatIndex::new(
            config.dimensionality,
            config.proximity.clone(),
            true, // Assuming cosine-like similarity by default
        ));

        Self {
            config,
            storage,
            index,
        }
    }

    /// Create with custom adapters
    pub fn with_adapters(
        config: ArmsConfig,
        storage: Box<dyn Place>,
        index: Box<dyn Near>,
    ) -> Self {
        Self {
            config,
            storage,
            index,
        }
    }

    /// Get the configuration
    pub fn config(&self) -> &ArmsConfig {
        &self.config
    }

    /// Get the dimensionality of this space
    pub fn dimensionality(&self) -> usize {
        self.config.dimensionality
    }

    // ========================================================================
    // PLACE OPERATIONS
    // ========================================================================

    /// Place a point in the space
    ///
    /// The point will be normalized if configured to do so.
    /// Returns the assigned ID.
    pub fn place(&mut self, point: Point, blob: Blob) -> PlaceResult<Id> {
        // Normalize if configured
        let point = if self.config.normalize_on_insert {
            point.normalize()
        } else {
            point
        };

        // Store in storage
        let id = self.storage.place(point.clone(), blob)?;

        // Add to index
        if let Err(e) = self.index.add(id, &point) {
            // Rollback storage if index fails
            self.storage.remove(id);
            return Err(crate::ports::PlaceError::StorageError(format!(
                "Index error: {:?}",
                e
            )));
        }

        Ok(id)
    }

    /// Place multiple points at once
    pub fn place_batch(&mut self, items: Vec<(Point, Blob)>) -> Vec<PlaceResult<Id>> {
        items
            .into_iter()
            .map(|(point, blob)| self.place(point, blob))
            .collect()
    }

    /// Remove a point from the space
    pub fn remove(&mut self, id: Id) -> Option<PlacedPoint> {
        // Remove from index first
        let _ = self.index.remove(id);

        // Then from storage
        self.storage.remove(id)
    }

    /// Get a point by ID
    pub fn get(&self, id: Id) -> Option<&PlacedPoint> {
        self.storage.get(id)
    }

    /// Check if a point exists
    pub fn contains(&self, id: Id) -> bool {
        self.storage.contains(id)
    }

    /// Get the number of stored points
    pub fn len(&self) -> usize {
        self.storage.len()
    }

    /// Check if the space is empty
    pub fn is_empty(&self) -> bool {
        self.storage.is_empty()
    }

    /// Clear all points
    pub fn clear(&mut self) {
        self.storage.clear();
        let _ = self.index.rebuild(); // Reset index
    }

    // ========================================================================
    // NEAR OPERATIONS
    // ========================================================================

    /// Find k nearest points to query
    pub fn near(&self, query: &Point, k: usize) -> NearResult<Vec<SearchResult>> {
        // Normalize query if configured
        let query = if self.config.normalize_on_insert {
            query.normalize()
        } else {
            query.clone()
        };

        self.index.near(&query, k)
    }

    /// Find all points within threshold
    pub fn within(&self, query: &Point, threshold: f32) -> NearResult<Vec<SearchResult>> {
        let query = if self.config.normalize_on_insert {
            query.normalize()
        } else {
            query.clone()
        };

        self.index.within(&query, threshold)
    }

    /// Find and retrieve k nearest points (with full data)
    pub fn near_with_data(&self, query: &Point, k: usize) -> NearResult<Vec<(&PlacedPoint, f32)>> {
        let results = self.near(query, k)?;

        Ok(results
            .into_iter()
            .filter_map(|r| self.storage.get(r.id).map(|p| (p, r.score)))
            .collect())
    }

    // ========================================================================
    // MERGE OPERATIONS
    // ========================================================================

    /// Merge multiple points into one using the configured merge function
    pub fn merge(&self, points: &[Point]) -> Point {
        self.config.merge.merge(points)
    }

    /// Compute proximity between two points
    pub fn proximity(&self, a: &Point, b: &Point) -> f32 {
        self.config.proximity.proximity(a, b)
    }

    // ========================================================================
    // STATS
    // ========================================================================

    /// Get storage size in bytes
    pub fn size_bytes(&self) -> usize {
        self.storage.size_bytes()
    }

    /// Get index stats
    pub fn index_len(&self) -> usize {
        self.index.len()
    }

    /// Check if index is ready
    pub fn is_ready(&self) -> bool {
        self.index.is_ready()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn create_test_arms() -> Arms {
        Arms::new(ArmsConfig::new(3))
    }

    #[test]
    fn test_arms_place_and_get() {
        let mut arms = create_test_arms();

        let point = Point::new(vec![1.0, 0.0, 0.0]);
        let blob = Blob::from_str("test data");

        let id = arms.place(point, blob).unwrap();

        let retrieved = arms.get(id).unwrap();
        assert_eq!(retrieved.blob.as_str(), Some("test data"));
    }

    #[test]
    fn test_arms_near() {
        let mut arms = create_test_arms();

        // Add some points
        arms.place(Point::new(vec![1.0, 0.0, 0.0]), Blob::from_str("x")).unwrap();
        arms.place(Point::new(vec![0.0, 1.0, 0.0]), Blob::from_str("y")).unwrap();
        arms.place(Point::new(vec![0.0, 0.0, 1.0]), Blob::from_str("z")).unwrap();

        // Query
        let query = Point::new(vec![1.0, 0.0, 0.0]);
        let results = arms.near(&query, 2).unwrap();

        assert_eq!(results.len(), 2);
        // First result should have highest similarity
        assert!(results[0].score > results[1].score);
    }

    #[test]
    fn test_arms_near_with_data() {
        let mut arms = create_test_arms();

        arms.place(Point::new(vec![1.0, 0.0, 0.0]), Blob::from_str("x")).unwrap();
        arms.place(Point::new(vec![0.0, 1.0, 0.0]), Blob::from_str("y")).unwrap();

        let query = Point::new(vec![1.0, 0.0, 0.0]);
        let results = arms.near_with_data(&query, 1).unwrap();

        assert_eq!(results.len(), 1);
        assert_eq!(results[0].0.blob.as_str(), Some("x"));
    }

    #[test]
    fn test_arms_remove() {
        let mut arms = create_test_arms();

        let id = arms.place(Point::new(vec![1.0, 0.0, 0.0]), Blob::empty()).unwrap();

        assert!(arms.contains(id));
        assert_eq!(arms.len(), 1);

        arms.remove(id);

        assert!(!arms.contains(id));
        assert_eq!(arms.len(), 0);
    }

    #[test]
    fn test_arms_merge() {
        let arms = create_test_arms();

        let points = vec![
            Point::new(vec![1.0, 0.0, 0.0]),
            Point::new(vec![0.0, 1.0, 0.0]),
        ];

        let merged = arms.merge(&points);

        // Mean of [1,0,0] and [0,1,0] = [0.5, 0.5, 0]
        assert!((merged.dims()[0] - 0.5).abs() < 0.0001);
        assert!((merged.dims()[1] - 0.5).abs() < 0.0001);
        assert!((merged.dims()[2] - 0.0).abs() < 0.0001);
    }

    #[test]
    fn test_arms_clear() {
        let mut arms = create_test_arms();

        for i in 0..10 {
            arms.place(Point::new(vec![i as f32, 0.0, 0.0]), Blob::empty()).unwrap();
        }

        assert_eq!(arms.len(), 10);

        arms.clear();

        assert_eq!(arms.len(), 0);
        assert!(arms.is_empty());
    }

    #[test]
    fn test_arms_normalizes_on_insert() {
        let mut arms = create_test_arms();

        // Insert a non-normalized point
        let point = Point::new(vec![3.0, 4.0, 0.0]); // magnitude = 5
        let id = arms.place(point, Blob::empty()).unwrap();

        let retrieved = arms.get(id).unwrap();

        // Should be normalized
        assert!(retrieved.point.is_normalized());
    }
}