Andrew Young commited on
Commit
bdb278a
·
verified ·
1 Parent(s): eb107fd

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ paper/figures/fig01_architecture.jpg filter=lfs diff=lfs merge=lfs -text
37
+ paper/figures/fig02_hexagonal.jpg filter=lfs diff=lfs merge=lfs -text
38
+ paper/figures/fig03_primitives.jpg filter=lfs diff=lfs merge=lfs -text
39
+ paper/figures/fig04_position_relationship.jpg filter=lfs diff=lfs merge=lfs -text
40
+ paper/figures/fig05_hippocampus.jpg filter=lfs diff=lfs merge=lfs -text
41
+ paper/figures/fig06_traditional_vs_arms.jpg filter=lfs diff=lfs merge=lfs -text
42
+ paper/figures/fig07_ecosystem.jpg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ /target/
2
+ Cargo.lock
3
+ .env
4
+ .venv/
5
+ .claude/
6
+ *.pyc
7
+ __pycache__/
Cargo.toml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [package]
2
+ name = "arms-core"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ authors = ["Automate Capture LLC <research@automate-capture.com>"]
6
+ description = "ARMS: Attention Reasoning Memory Store - A spatial memory fabric for AI. Position IS relationship."
7
+ license = "MIT"
8
+ repository = "https://github.com/automate-capture/arms"
9
+ homepage = "https://research.automate-capture.com/arms"
10
+ documentation = "https://docs.rs/arms"
11
+ readme = "README.md"
12
+ keywords = ["memory", "spatial-database", "ai", "embeddings", "vector-search"]
13
+ categories = ["database", "science", "algorithms"]
14
+ exclude = [
15
+ "target/",
16
+ ".venv/",
17
+ ".git/",
18
+ ".claude/",
19
+ "paper/",
20
+ "images/",
21
+ ".env",
22
+ ]
23
+
24
+ [lib]
25
+ name = "arms"
26
+ path = "src/lib.rs"
27
+
28
+ [dependencies]
29
+ thiserror = "1.0"
30
+
31
+ [dev-dependencies]
32
+ criterion = "0.5"
33
+
34
+ [features]
35
+ default = []
36
+
37
+ [profile.release]
38
+ lto = true
39
+ codegen-units = 1
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Andrew Young / Automate Capture LLC
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ tags:
4
+ - spatial-database
5
+ - memory
6
+ - embeddings
7
+ - ai
8
+ - vector-search
9
+ - rust
10
+ library_name: arms-core
11
+ pipeline_tag: feature-extraction
12
+ ---
13
+
14
+ # ARMS - Attention Reasoning Memory Store
15
+
16
+ > **Position IS Relationship** - A Spatial Memory Fabric for AI Systems
17
+
18
+ ARMS is a spatial memory fabric that enables AI systems to store and retrieve computed states by their native dimensional coordinates. Unlike traditional databases that require explicit relationships through foreign keys or learned topology through approximate nearest neighbor algorithms, ARMS operates on a fundamental principle: **proximity defines connection**.
19
+
20
+ ![ARMS Architecture](paper/figures/fig01_architecture.jpg)
21
+
22
+ ## The Problem
23
+
24
+ Current AI memory approaches all lose information:
25
+
26
+ - **Extended context**: Expensive, doesn't scale beyond training length
27
+ - **RAG retrieval**: Retrieves text, requires recomputation of attention
28
+ - **Vector databases**: Treat all data as unstructured point clouds
29
+ - **External memory**: Key-value stores with explicit indexing
30
+
31
+ ![Traditional vs ARMS](paper/figures/fig06_traditional_vs_arms.jpg)
32
+
33
+ ## The ARMS Insight
34
+
35
+ ```
36
+ Traditional: State → Project → Index → Retrieve → Reconstruct
37
+ (lossy at each step)
38
+
39
+ ARMS: State → Store AT coordinates → Retrieve → Inject directly
40
+ (native representation preserved)
41
+ ```
42
+
43
+ ## The Five Primitives
44
+
45
+ Everything in ARMS reduces to five operations:
46
+
47
+ ![Five Primitives](paper/figures/fig03_primitives.jpg)
48
+
49
+ | Primitive | Type | Purpose |
50
+ |-----------|------|---------|
51
+ | **Point** | `Vec<f32>` | Any dimensionality |
52
+ | **Proximity** | `fn(a, b) -> f32` | How related? |
53
+ | **Merge** | `fn(points) -> point` | Compose together |
54
+ | **Place** | `fn(point, data) -> id` | Exist in space |
55
+ | **Near** | `fn(point, k) -> ids` | What's related? |
56
+
57
+ ## Quick Start
58
+
59
+ ```rust
60
+ use arms_core::{Arms, ArmsConfig, Point};
61
+
62
+ // Create ARMS with default config
63
+ let mut arms = Arms::new(ArmsConfig::new(768));
64
+
65
+ // Place a point in the space
66
+ let point = Point::new(vec![0.1; 768]);
67
+ let id = arms.place(point, b"my data".to_vec()).unwrap();
68
+
69
+ // Find nearby points
70
+ let query = Point::new(vec![0.1; 768]);
71
+ let neighbors = arms.near(&query, 5).unwrap();
72
+ ```
73
+
74
+ ## Hexagonal Architecture
75
+
76
+ ARMS follows a hexagonal (ports-and-adapters) architecture. The core domain contains pure math with no I/O. Ports define trait contracts. Adapters provide swappable implementations.
77
+
78
+ ![Hexagonal Architecture](paper/figures/fig02_hexagonal.jpg)
79
+
80
+ ```
81
+ ┌─────────────────────────────────────────────────────────────┐
82
+ │ ARMS │
83
+ ├─────────────────────────────────────────────────────────────┤
84
+ │ CORE (pure math, no I/O) │
85
+ │ Point, Id, Blob, Proximity, Merge │
86
+ │ │
87
+ │ PORTS (trait contracts) │
88
+ │ Place, Near, Latency │
89
+ │ │
90
+ │ ADAPTERS (swappable implementations) │
91
+ │ Storage: Memory, NVMe (planned) │
92
+ │ Index: Flat, HAT (see arms-hat crate) │
93
+ │ │
94
+ │ ENGINE (orchestration) │
95
+ │ Arms - the main entry point │
96
+ └─────────────────────────────────────────────────────────────┘
97
+ ```
98
+
99
+ ## The Hippocampus Analogy
100
+
101
+ ARMS functions as an artificial hippocampus for AI systems:
102
+
103
+ ![Hippocampus Analogy](paper/figures/fig05_hippocampus.jpg)
104
+
105
+ | Hippocampus | ARMS |
106
+ |-------------|------|
107
+ | Encodes episodic memories | Stores attention states |
108
+ | Spatial navigation | High-dimensional proximity |
109
+ | Pattern completion | Near queries |
110
+ | Memory consolidation | Merge operations |
111
+ | Place cells | Points at coordinates |
112
+
113
+ ## Ecosystem
114
+
115
+ ![ARMS Ecosystem](paper/figures/fig07_ecosystem.jpg)
116
+
117
+ ### Related Crates
118
+
119
+ - [`arms-hat`](https://crates.io/crates/arms-hat) - Hierarchical Attention Tree index adapter (100% recall, 70x faster than HNSW)
120
+
121
+ ### Planned Adapters
122
+
123
+ - `arms-nvme` - Persistent storage via memory-mapped files
124
+ - `arms-distributed` - Sharded storage across machines
125
+ - `arms-gpu` - CUDA-accelerated similarity search
126
+ - `arms-py` - Python bindings
127
+
128
+ ## Proximity Functions
129
+
130
+ Built-in proximity measures:
131
+
132
+ - **Cosine** - Angle between vectors (semantic similarity)
133
+ - **Euclidean** - Straight-line distance
134
+ - **DotProduct** - Raw dot product
135
+ - **Manhattan** - L1 distance
136
+
137
+ ## Installation
138
+
139
+ ```toml
140
+ [dependencies]
141
+ arms-core = "0.1"
142
+ ```
143
+
144
+ ## Paper
145
+
146
+ The research paper is available in the [`paper/`](paper/) directory.
147
+
148
+ **ARMS: A Spatial Memory Fabric for AI Systems**
149
+ Andrew Young, 2026
150
+
151
+ ## License
152
+
153
+ MIT License - see [LICENSE](LICENSE)
154
+
155
+ ## Citation
156
+
157
+ If you use ARMS in research, please cite:
158
+
159
+ ```bibtex
160
+ @article{young2026arms,
161
+ author = {Young, Andrew},
162
+ title = {ARMS: A Spatial Memory Fabric for AI Systems},
163
+ journal = {arXiv preprint},
164
+ year = {2026},
165
+ url = {https://github.com/automate-capture/arms}
166
+ }
167
+ ```
168
+
169
+ ## Author
170
+
171
+ Andrew Young - [andrew@automate-capture.com](mailto:andrew@automate-capture.com)
paper/ARMS_Spatial_Memory_Young_2026.tex ADDED
@@ -0,0 +1,520 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ % ARMS: A Spatial Memory Fabric for AI Systems
2
+ % arXiv submission - January 2026
3
+
4
+ \documentclass[11pt,a4paper]{article}
5
+
6
+ % ============================================================================
7
+ % PACKAGES
8
+ % ============================================================================
9
+
10
+ \usepackage[utf8]{inputenc}
11
+ \usepackage[T1]{fontenc}
12
+ \usepackage{lmodern}
13
+
14
+ % Math
15
+ \usepackage{amsmath,amssymb,amsthm}
16
+ \usepackage{mathtools}
17
+
18
+ % Graphics
19
+ \usepackage{graphicx}
20
+ \usepackage{float}
21
+ \usepackage{subcaption}
22
+ \usepackage[dvipsnames]{xcolor}
23
+ \graphicspath{{figures/}{Diagrams/}}
24
+
25
+ % Tables
26
+ \usepackage{booktabs}
27
+ \usepackage{multirow}
28
+ \usepackage{array}
29
+
30
+ % Algorithms
31
+ \usepackage{algorithm}
32
+ \usepackage{algorithmic}
33
+
34
+ % Code
35
+ \usepackage{listings}
36
+ \lstset{
37
+ basicstyle=\ttfamily\small,
38
+ breaklines=true,
39
+ frame=single,
40
+ numbers=left,
41
+ numberstyle=\tiny,
42
+ language=Python,
43
+ }
44
+
45
+ % Links
46
+ \usepackage[colorlinks=true,linkcolor=blue,citecolor=blue,urlcolor=blue]{hyperref}
47
+ \usepackage{cleveref}
48
+
49
+ % Layout
50
+ \usepackage[margin=1in]{geometry}
51
+
52
+ % Bibliography
53
+ \usepackage[numbers,sort&compress]{natbib}
54
+
55
+ % ============================================================================
56
+ % CUSTOM COMMANDS
57
+ % ============================================================================
58
+
59
+ \DeclareMathOperator*{\argmax}{arg\,max}
60
+ \DeclareMathOperator*{\argmin}{arg\,min}
61
+ \newcommand{\R}{\mathbb{R}}
62
+
63
+ % ============================================================================
64
+ % TITLE
65
+ % ============================================================================
66
+
67
+ \title{%
68
+ \textbf{ARMS: A Spatial Memory Fabric for AI Systems}\\[0.5em]
69
+ \large Position IS Relationship
70
+ }
71
+
72
+ \author{%
73
+ Andrew Young\\
74
+ \texttt{andrew@automate-capture.com}
75
+ }
76
+
77
+ \date{January 2026}
78
+
79
+ % ============================================================================
80
+ % DOCUMENT
81
+ % ============================================================================
82
+
83
+ \begin{document}
84
+
85
+ \maketitle
86
+
87
+ % ----------------------------------------------------------------------------
88
+ \begin{abstract}
89
+ \noindent
90
+ This paper introduces ARMS (Attention Reasoning Memory Store), a spatial memory fabric that enables AI systems to store and retrieve computed states by their native dimensional coordinates. Unlike traditional databases that require explicit relationships through foreign keys or learned topology through approximate nearest neighbor algorithms, ARMS operates on a fundamental principle: \textbf{position IS relationship}. Proximity in high-dimensional space defines semantic connection without explicit declaration.
91
+
92
+ ARMS reduces memory operations to five primitives: \textbf{Point} (any-dimensional vectors), \textbf{Proximity} (relationship measurement), \textbf{Merge} (composition), \textbf{Place} (existence in space), and \textbf{Near} (retrieval by similarity). This minimal abstraction enables a hexagonal architecture where storage backends, index algorithms, and APIs can be swapped without changing core logic.
93
+
94
+ The framework provides the foundation for specialized index adapters like HAT (Hierarchical Attention Tree), demonstrating that domain-specific structure can be exploited for superior performance. ARMS functions as an artificial hippocampus---enabling AI systems to form, consolidate, and retrieve episodic memories through spatial organization rather than explicit indexing.
95
+
96
+ \vspace{1em}
97
+ \noindent\textbf{Keywords:} spatial memory, AI memory systems, vector databases, hexagonal architecture, episodic memory
98
+ \end{abstract}
99
+
100
+ % ----------------------------------------------------------------------------
101
+ \section{Introduction}
102
+ \label{sec:intro}
103
+
104
+ \subsection{The Memory Problem in AI}
105
+
106
+ Large language models and AI agents face a fundamental limitation: they lack persistent, retrievable memory beyond their context window. Current approaches include:
107
+
108
+ \begin{itemize}
109
+ \item \textbf{Extended context}: Expensive, doesn't scale beyond training length
110
+ \item \textbf{RAG retrieval}: Retrieves text, requires recomputation of attention
111
+ \item \textbf{Vector databases}: Treat all data as unstructured point clouds
112
+ \item \textbf{External memory}: Key-value stores with explicit indexing
113
+ \end{itemize}
114
+
115
+ None of these approaches preserve the \emph{native representation} of computed states. When an LLM processes text, it produces attention states in high-dimensional space. Current systems project, compress, or discard these states rather than storing them directly.
116
+
117
+ \begin{figure}[H]
118
+ \centering
119
+ \includegraphics[width=0.9\textwidth]{fig06_traditional_vs_arms.jpg}
120
+ \caption{Traditional approaches vs ARMS: Traditional systems project, compress, and approximate states at each step, introducing cumulative error. ARMS stores states at their native coordinates and retrieves by proximity, preserving the original representation.}
121
+ \label{fig:traditional}
122
+ \end{figure}
123
+
124
+ \subsection{The ARMS Insight}
125
+
126
+ ARMS takes a different approach:
127
+
128
+ \begin{quote}
129
+ \textbf{Store states at their native coordinates. Retrieve by proximity. Position IS relationship.}
130
+ \end{quote}
131
+
132
+ This insight has three implications:
133
+
134
+ \begin{enumerate}
135
+ \item \textbf{No projection loss}: States are stored in their original dimensionality
136
+ \item \textbf{No explicit relationships}: Semantic similarity is spatial proximity
137
+ \item \textbf{No learned topology}: Structure can be known or exploited, not discovered
138
+ \end{enumerate}
139
+
140
+ \begin{figure}[H]
141
+ \centering
142
+ \includegraphics[width=0.9\textwidth]{fig01_architecture.jpg}
143
+ \caption{ARMS architecture overview. The five primitives (Point, Proximity, Merge, Place, Near) form the core, with swappable storage and index adapters.}
144
+ \label{fig:architecture}
145
+ \end{figure}
146
+
147
+ \subsection{Contributions}
148
+
149
+ This paper makes the following contributions:
150
+
151
+ \begin{enumerate}
152
+ \item A \textbf{five-primitive abstraction} for spatial memory (Point, Proximity, Merge, Place, Near)
153
+ \item A \textbf{hexagonal architecture} enabling swappable storage, index, and API adapters
154
+ \item The \textbf{``position is relationship''} principle for AI memory systems
155
+ \item A \textbf{foundation framework} demonstrated through the HAT index adapter
156
+ \end{enumerate}
157
+
158
+ % ----------------------------------------------------------------------------
159
+ \section{The Five Primitives}
160
+ \label{sec:primitives}
161
+
162
+ ARMS reduces all memory operations to five primitives. This minimal surface area enables maximum flexibility while maintaining semantic clarity.
163
+
164
+ \begin{figure}[H]
165
+ \centering
166
+ \includegraphics[width=0.85\textwidth]{fig03_primitives.jpg}
167
+ \caption{The five primitives of ARMS: Point (representation), Proximity (relationship), Merge (composition), Place (storage), and Near (retrieval). These operations form the complete interface for spatial memory.}
168
+ \label{fig:primitives}
169
+ \end{figure}
170
+
171
+ \begin{table}[H]
172
+ \centering
173
+ \caption{The five primitives of ARMS.}
174
+ \label{tab:primitives}
175
+ \begin{tabular}{llp{7cm}}
176
+ \toprule
177
+ \textbf{Primitive} & \textbf{Signature} & \textbf{Purpose} \\
178
+ \midrule
179
+ Point & \texttt{Vec<f32>} & Any-dimensional vector representation \\
180
+ Proximity & \texttt{fn(a, b) -> f32} & Measure how related two points are \\
181
+ Merge & \texttt{fn(points) -> point} & Compose multiple points into one \\
182
+ Place & \texttt{fn(point, data) -> id} & Store a point in the space \\
183
+ Near & \texttt{fn(point, k) -> ids} & Find k most related points \\
184
+ \bottomrule
185
+ \end{tabular}
186
+ \end{table}
187
+
188
+ \subsection{Point: The Universal Representation}
189
+
190
+ A Point is simply a vector of floating-point numbers:
191
+
192
+ \begin{lstlisting}[language=Rust,caption={Point definition.}]
193
+ pub struct Point {
194
+ dims: Vec<f32>,
195
+ }
196
+
197
+ impl Point {
198
+ pub fn new(dims: Vec<f32>) -> Self;
199
+ pub fn dimensionality(&self) -> usize;
200
+ pub fn magnitude(&self) -> f32;
201
+ pub fn normalize(&self) -> Point;
202
+ }
203
+ \end{lstlisting}
204
+
205
+ Points are dimensionality-agnostic. The same ARMS instance can store 768-dimensional BERT embeddings or 1536-dimensional OpenAI embeddings---the primitives don't change.
206
+
207
+ \subsection{Proximity: Relationship Without Declaration}
208
+
209
+ Proximity functions measure how related two points are:
210
+
211
+ \begin{table}[H]
212
+ \centering
213
+ \caption{Built-in proximity functions.}
214
+ \label{tab:proximity}
215
+ \begin{tabular}{llp{6cm}}
216
+ \toprule
217
+ \textbf{Function} & \textbf{Range} & \textbf{Use Case} \\
218
+ \midrule
219
+ Cosine & $[-1, 1]$ & Semantic similarity (direction matters) \\
220
+ Euclidean & $[0, \infty)$ & Spatial distance (magnitude matters) \\
221
+ DotProduct & $(-\infty, \infty)$ & Raw correlation \\
222
+ Manhattan & $[0, \infty)$ & L1 distance \\
223
+ \bottomrule
224
+ \end{tabular}
225
+ \end{table}
226
+
227
+ The key insight: \textbf{proximity replaces foreign keys}. In a relational database, you declare relationships explicitly. In ARMS, relationships emerge from spatial position.
228
+
229
+ \subsection{Merge: Composition Without Loss}
230
+
231
+ Merge combines multiple points into a single representative:
232
+
233
+ \begin{itemize}
234
+ \item \textbf{Mean}: Arithmetic average (default)
235
+ \item \textbf{WeightedMean}: Importance-weighted average
236
+ \item \textbf{MaxPool}: Element-wise maximum
237
+ \end{itemize}
238
+
239
+ Merge enables hierarchical summarization. A conversation can be represented by the merge of its messages; a session by the merge of its conversations.
240
+
241
+ \subsection{Place and Near: The Memory Interface}
242
+
243
+ Place stores a point with associated data:
244
+
245
+ \begin{lstlisting}[language=Rust,caption={Place and Near operations.}]
246
+ // Store
247
+ let id = arms.place(embedding, blob)?;
248
+
249
+ // Retrieve
250
+ let neighbors = arms.near(&query, k)?;
251
+ \end{lstlisting}
252
+
253
+ This is the complete memory interface. Everything else---storage backends, index algorithms, APIs---is implementation detail.
254
+
255
+ % ----------------------------------------------------------------------------
256
+ \section{Hexagonal Architecture}
257
+ \label{sec:architecture}
258
+
259
+ ARMS follows hexagonal (ports-and-adapters) architecture. The core domain contains pure math with no I/O. Ports define trait contracts. Adapters provide swappable implementations.
260
+
261
+ \begin{figure}[H]
262
+ \centering
263
+ \includegraphics[width=0.9\textwidth]{fig02_hexagonal.jpg}
264
+ \caption{Hexagonal architecture of ARMS. The core domain contains pure math with no I/O. Ports define trait contracts. Adapters provide swappable implementations for storage, indexing, and APIs.}
265
+ \label{fig:hexagonal}
266
+ \end{figure}
267
+
268
+ \subsection{Core Domain}
269
+
270
+ The core contains:
271
+
272
+ \begin{itemize}
273
+ \item \textbf{Point}: Vector representation
274
+ \item \textbf{Id}: Unique identifiers
275
+ \item \textbf{Blob}: Associated data
276
+ \item \textbf{Proximity}: Relationship measurement
277
+ \item \textbf{Merge}: Point composition
278
+ \end{itemize}
279
+
280
+ No I/O, no side effects, pure functions. This enables testing without mocks and reasoning without context.
281
+
282
+ \subsection{Ports}
283
+
284
+ Ports define what the system needs without specifying how:
285
+
286
+ \begin{lstlisting}[language=Rust,caption={Port definitions.}]
287
+ pub trait Place {
288
+ fn place(&mut self, point: Point, blob: Blob) -> Result<Id>;
289
+ fn get(&self, id: Id) -> Option<&PlacedPoint>;
290
+ fn remove(&mut self, id: Id) -> Option<PlacedPoint>;
291
+ }
292
+
293
+ pub trait Near {
294
+ fn near(&self, query: &Point, k: usize) -> Result<Vec<SearchResult>>;
295
+ fn add(&mut self, id: Id, point: &Point) -> Result<()>;
296
+ }
297
+ \end{lstlisting}
298
+
299
+ \subsection{Adapters}
300
+
301
+ Adapters implement ports for specific technologies:
302
+
303
+ \begin{table}[H]
304
+ \centering
305
+ \caption{Available adapters.}
306
+ \label{tab:adapters}
307
+ \begin{tabular}{lll}
308
+ \toprule
309
+ \textbf{Port} & \textbf{Adapter} & \textbf{Description} \\
310
+ \midrule
311
+ Place & MemoryStorage & In-memory hash map \\
312
+ Place & NVMeStorage & Memory-mapped files (planned) \\
313
+ Near & FlatIndex & Brute-force exact search \\
314
+ Near & HatIndex & Hierarchical Attention Tree \\
315
+ \bottomrule
316
+ \end{tabular}
317
+ \end{table}
318
+
319
+ The HAT index adapter (published separately as \texttt{arms-hat}) demonstrates how domain-specific knowledge can be exploited for superior performance on hierarchical data.
320
+
321
+ \begin{figure}[H]
322
+ \centering
323
+ \includegraphics[width=0.85\textwidth]{fig07_ecosystem.jpg}
324
+ \caption{The ARMS ecosystem: \texttt{arms-core} provides the foundational primitives, while specialized adapters like \texttt{arms-hat} exploit domain-specific structure. Future adapters will add persistence, distribution, and GPU acceleration.}
325
+ \label{fig:ecosystem}
326
+ \end{figure}
327
+
328
+ % ----------------------------------------------------------------------------
329
+ \section{Position IS Relationship}
330
+ \label{sec:philosophy}
331
+
332
+ The core philosophical innovation of ARMS is treating position as the fundamental relationship primitive.
333
+
334
+ \begin{figure}[H]
335
+ \centering
336
+ \includegraphics[width=0.9\textwidth]{fig04_position_relationship.jpg}
337
+ \caption{Position IS relationship: Comparison of relationship representation across database paradigms. ARMS uses spatial position as the fundamental relationship primitive, eliminating the need for explicit declarations.}
338
+ \label{fig:position}
339
+ \end{figure}
340
+
341
+ \subsection{Traditional Approaches}
342
+
343
+ \begin{table}[H]
344
+ \centering
345
+ \caption{Relationship representation in different paradigms.}
346
+ \label{tab:paradigms}
347
+ \begin{tabular}{lll}
348
+ \toprule
349
+ \textbf{Paradigm} & \textbf{Relationship} & \textbf{Limitation} \\
350
+ \midrule
351
+ Relational DB & Foreign keys & Must be declared explicitly \\
352
+ Document DB & Nesting & Limited to containment \\
353
+ Graph DB & Edges & Must be declared explicitly \\
354
+ Vector DB & Learned topology & Requires training/building \\
355
+ \textbf{ARMS} & \textbf{Spatial position} & \textbf{Inherent in representation} \\
356
+ \bottomrule
357
+ \end{tabular}
358
+ \end{table}
359
+
360
+ \subsection{Implications}
361
+
362
+ When position is relationship:
363
+
364
+ \begin{enumerate}
365
+ \item \textbf{Schema-free}: No need to declare relationship types
366
+ \item \textbf{Continuous}: Relationships have degrees, not just existence
367
+ \item \textbf{Emergent}: New relationships discovered through proximity
368
+ \item \textbf{Composable}: Merged points represent group relationships
369
+ \end{enumerate}
370
+
371
+ \subsection{The Hippocampus Analogy}
372
+
373
+ ARMS mirrors the function of the biological hippocampus:
374
+
375
+ \begin{figure}[H]
376
+ \centering
377
+ \includegraphics[width=0.85\textwidth]{fig05_hippocampus.jpg}
378
+ \caption{The hippocampus analogy: ARMS functions as an artificial hippocampus, enabling AI systems to form, consolidate, and retrieve episodic memories through spatial organization.}
379
+ \label{fig:hippocampus}
380
+ \end{figure}
381
+
382
+ \begin{table}[H]
383
+ \centering
384
+ \caption{Hippocampus vs ARMS.}
385
+ \label{tab:hippocampus}
386
+ \begin{tabular}{ll}
387
+ \toprule
388
+ \textbf{Hippocampus} & \textbf{ARMS} \\
389
+ \midrule
390
+ Encodes episodic memories & Stores attention states \\
391
+ Spatial navigation & High-dimensional proximity \\
392
+ Pattern completion & Near queries \\
393
+ Memory consolidation & Merge operations \\
394
+ Place cells & Points at coordinates \\
395
+ \bottomrule
396
+ \end{tabular}
397
+ \end{table}
398
+
399
+ % ----------------------------------------------------------------------------
400
+ \section{Implementation}
401
+ \label{sec:implementation}
402
+
403
+ ARMS is implemented in Rust for performance and safety, with Python bindings planned.
404
+
405
+ \subsection{Usage Example}
406
+
407
+ \begin{lstlisting}[language=Rust,caption={Complete ARMS usage example.}]
408
+ use arms_core::{Arms, ArmsConfig, Point, Blob};
409
+
410
+ // Create ARMS with 768 dimensions
411
+ let mut arms = Arms::new(ArmsConfig::new(768));
412
+
413
+ // Store embeddings
414
+ let embedding = Point::new(vec![0.1; 768]);
415
+ let id = arms.place(embedding, Blob::from_str("hello")).unwrap();
416
+
417
+ // Query by proximity
418
+ let query = Point::new(vec![0.1; 768]);
419
+ let neighbors = arms.near(&query, 10).unwrap();
420
+
421
+ // Get with data
422
+ let results = arms.near_with_data(&query, 5).unwrap();
423
+ for (point, score) in results {
424
+ println!("{}: {}", point.blob.as_str().unwrap(), score);
425
+ }
426
+ \end{lstlisting}
427
+
428
+ \subsection{Performance}
429
+
430
+ With the flat index (exact search):
431
+
432
+ \begin{table}[H]
433
+ \centering
434
+ \caption{Flat index performance.}
435
+ \label{tab:performance}
436
+ \begin{tabular}{rrr}
437
+ \toprule
438
+ \textbf{Points} & \textbf{Dimensions} & \textbf{Query Time} \\
439
+ \midrule
440
+ 1,000 & 768 & 0.3ms \\
441
+ 10,000 & 768 & 3ms \\
442
+ 100,000 & 768 & 30ms \\
443
+ \bottomrule
444
+ \end{tabular}
445
+ \end{table}
446
+
447
+ For large-scale deployments, the HAT index adapter provides $O(\log n)$ queries with 100\% recall on hierarchical data.
448
+
449
+ % ----------------------------------------------------------------------------
450
+ \section{Related Work}
451
+ \label{sec:related}
452
+
453
+ \textbf{Vector Databases}: Pinecone, Weaviate, Milvus, and Qdrant provide vector storage and retrieval. ARMS differs by providing a minimal primitive set and hexagonal architecture rather than a monolithic solution.
454
+
455
+ \textbf{Memory-Augmented Networks}: Neural Turing Machines and Differentiable Neural Computers use learned memory access. ARMS provides explicit, interpretable memory operations.
456
+
457
+ \textbf{RAG Systems}: Retrieval-Augmented Generation retrieves text for reprocessing. ARMS can store pre-computed attention states, avoiding recomputation.
458
+
459
+ \textbf{Embedding Stores}: LangChain, LlamaIndex provide embedding storage. ARMS provides lower-level primitives for building such systems.
460
+
461
+ % ----------------------------------------------------------------------------
462
+ \section{Future Work}
463
+ \label{sec:future}
464
+
465
+ \subsection{Planned Adapters}
466
+
467
+ \begin{itemize}
468
+ \item \textbf{NVMe Storage}: Memory-mapped files for persistence
469
+ \item \textbf{Distributed Storage}: Sharded across machines
470
+ \item \textbf{GPU Index}: CUDA-accelerated similarity search
471
+ \end{itemize}
472
+
473
+ \subsection{Applications}
474
+
475
+ \begin{itemize}
476
+ \item \textbf{LLM Memory}: Long-term episodic memory for chatbots
477
+ \item \textbf{Agent State}: Persistent state for AI agents
478
+ \item \textbf{Attention Caching}: Store and retrieve KV cache states
479
+ \item \textbf{Multimodal Memory}: Unified space for text, image, audio embeddings
480
+ \end{itemize}
481
+
482
+ % ----------------------------------------------------------------------------
483
+ \section{Conclusion}
484
+ \label{sec:conclusion}
485
+
486
+ ARMS provides a minimal, principled foundation for AI memory systems. By reducing memory operations to five primitives and adopting a hexagonal architecture, ARMS enables:
487
+
488
+ \begin{enumerate}
489
+ \item \textbf{Simplicity}: Five operations cover all memory needs
490
+ \item \textbf{Flexibility}: Swap storage, index, and API independently
491
+ \item \textbf{Performance}: Domain-specific adapters like HAT
492
+ \item \textbf{Philosophy}: Position IS relationship
493
+ \end{enumerate}
494
+
495
+ ARMS functions as an artificial hippocampus for AI systems, enabling them to form, consolidate, and retrieve memories through spatial organization rather than explicit indexing.
496
+
497
+ % ----------------------------------------------------------------------------
498
+ \section*{Acknowledgments}
499
+
500
+ I thank the open-source Rust community for excellent tooling and the researchers whose work on memory-augmented networks inspired this architecture.
501
+
502
+ % ----------------------------------------------------------------------------
503
+ \bibliographystyle{plainnat}
504
+ \bibliography{refs}
505
+
506
+ % ----------------------------------------------------------------------------
507
+ \appendix
508
+
509
+ \section{Code Availability}
510
+ \label{app:code}
511
+
512
+ ARMS is available as open-source software:
513
+
514
+ \begin{itemize}
515
+ \item \textbf{Rust crate}: \texttt{arms-core} on crates.io
516
+ \item \textbf{HAT adapter}: \texttt{arms-hat} on crates.io
517
+ \item \textbf{Repository}: \url{https://github.com/automate-capture/arms}
518
+ \end{itemize}
519
+
520
+ \end{document}
paper/figures/fig01_architecture.jpg ADDED

Git LFS Details

  • SHA256: f74f7f79383fbd37fcac60d485bdab4eacc3d69a4bab3f806223a570d061f84d
  • Pointer size: 132 Bytes
  • Size of remote file: 6.38 MB
paper/figures/fig02_hexagonal.jpg ADDED

Git LFS Details

  • SHA256: 3dc3213e6e9ff1465a9eb7cbc46e9ea13f32811a0f9cb558a9a22db8df01e28b
  • Pointer size: 132 Bytes
  • Size of remote file: 6.09 MB
paper/figures/fig03_primitives.jpg ADDED

Git LFS Details

  • SHA256: 9e7a02d0ea863b0e5e8a34553d75fdb54a88e8ff521b1f17a41e6818b21178d9
  • Pointer size: 132 Bytes
  • Size of remote file: 5.77 MB
paper/figures/fig04_position_relationship.jpg ADDED

Git LFS Details

  • SHA256: 95ee3c9b3bce75166cb0082cb19a58e07dc23d2460e6fb20d13deb7baaecdeb9
  • Pointer size: 132 Bytes
  • Size of remote file: 6.69 MB
paper/figures/fig05_hippocampus.jpg ADDED

Git LFS Details

  • SHA256: efc1401769547c42823d789f9435bf15773435ed9ba131b9ca19bd99ff387378
  • Pointer size: 132 Bytes
  • Size of remote file: 7.55 MB
paper/figures/fig06_traditional_vs_arms.jpg ADDED

Git LFS Details

  • SHA256: 5e42fcf9e199a4afb157ac6527cc895451080907fbca81547b964c41a569f858
  • Pointer size: 132 Bytes
  • Size of remote file: 6.88 MB
paper/figures/fig07_ecosystem.jpg ADDED

Git LFS Details

  • SHA256: d7df08d340c1c643a42d1da62ec89c99a69dd28eedc9263e44efef91453c18a1
  • Pointer size: 132 Bytes
  • Size of remote file: 6.67 MB
paper/refs.bib ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ % ARMS Paper Bibliography
2
+
3
+ @article{graves2014neural,
4
+ author = {Graves, Alex and Wayne, Greg and Danihelka, Ivo},
5
+ title = {Neural Turing Machines},
6
+ journal = {arXiv preprint arXiv:1410.5401},
7
+ year = {2014},
8
+ }
9
+
10
+ @article{graves2016dnc,
11
+ author = {Graves, Alex and Wayne, Greg and Reynolds, Malcolm and Harley, Tim and Danihelka, Ivo and others},
12
+ title = {Hybrid Computing Using a Neural Network with Dynamic External Memory},
13
+ journal = {Nature},
14
+ volume = {538},
15
+ number = {7626},
16
+ pages = {471--476},
17
+ year = {2016},
18
+ }
19
+
20
+ @inproceedings{weston2015memory,
21
+ author = {Weston, Jason and Chopra, Sumit and Bordes, Antoine},
22
+ title = {Memory Networks},
23
+ booktitle = {International Conference on Learning Representations},
24
+ year = {2015},
25
+ }
26
+
27
+ @inproceedings{lewis2020rag,
28
+ author = {Lewis, Patrick and others},
29
+ title = {Retrieval-Augmented Generation for Knowledge-Intensive {NLP} Tasks},
30
+ booktitle = {Advances in Neural Information Processing Systems},
31
+ volume = {33},
32
+ pages = {9459--9474},
33
+ year = {2020},
34
+ }
35
+
36
+ @article{malkov2018hnsw,
37
+ author = {Malkov, Yu A. and Yashunin, D. A.},
38
+ title = {Efficient and Robust Approximate Nearest Neighbor Search Using Hierarchical Navigable Small World Graphs},
39
+ journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
40
+ volume = {42},
41
+ number = {4},
42
+ pages = {824--836},
43
+ year = {2018},
44
+ }
45
+
46
+ @article{johnson2019faiss,
47
+ author = {Johnson, Jeff and Douze, Matthijs and J{\'e}gou, Herv{\'e}},
48
+ title = {Billion-scale Similarity Search with {GPU}s},
49
+ journal = {IEEE Transactions on Big Data},
50
+ volume = {7},
51
+ number = {3},
52
+ pages = {535--547},
53
+ year = {2019},
54
+ }
55
+
56
+ @article{vaswani2017attention,
57
+ author = {Vaswani, Ashish and others},
58
+ title = {Attention is All You Need},
59
+ journal = {Advances in Neural Information Processing Systems},
60
+ volume = {30},
61
+ year = {2017},
62
+ }
63
+
64
+ @book{cockburn2005hexagonal,
65
+ author = {Cockburn, Alistair},
66
+ title = {Hexagonal Architecture},
67
+ year = {2005},
68
+ note = {Also known as Ports and Adapters pattern},
69
+ }
70
+
71
+ @article{moser2008place,
72
+ author = {Moser, Edvard I. and Kropff, Emilio and Moser, May-Britt},
73
+ title = {Place Cells, Grid Cells, and the Brain's Spatial Representation System},
74
+ journal = {Annual Review of Neuroscience},
75
+ volume = {31},
76
+ pages = {69--89},
77
+ year = {2008},
78
+ }
src/adapters/index/flat.rs ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Flat Index Adapter
2
+ //!
3
+ //! Brute force nearest neighbor search.
4
+ //! Compares query against ALL points - O(n) per query.
5
+ //!
6
+ //! Good for:
7
+ //! - Testing
8
+ //! - Small datasets (< 10,000 points)
9
+ //! - When exact results are required
10
+ //!
11
+ //! Not good for:
12
+ //! - Large datasets (use HNSW instead)
13
+
14
+ use std::collections::HashMap;
15
+ use std::sync::Arc;
16
+
17
+ use crate::core::{Id, Point};
18
+ use crate::core::proximity::Proximity;
19
+ use crate::ports::{Near, NearError, NearResult, SearchResult};
20
+
21
+ /// Brute force index - searches all points
22
+ pub struct FlatIndex {
23
+ /// Stored points (ID -> Point)
24
+ points: HashMap<Id, Point>,
25
+
26
+ /// Expected dimensionality
27
+ dimensionality: usize,
28
+
29
+ /// Proximity function to use
30
+ proximity: Arc<dyn Proximity>,
31
+
32
+ /// Whether higher proximity = more similar
33
+ /// true for cosine/dot product, false for euclidean
34
+ higher_is_better: bool,
35
+ }
36
+
37
+ impl FlatIndex {
38
+ /// Create a new flat index
39
+ ///
40
+ /// `higher_is_better` indicates whether higher proximity scores mean more similar.
41
+ /// - `true` for Cosine, DotProduct
42
+ /// - `false` for Euclidean, Manhattan
43
+ pub fn new(
44
+ dimensionality: usize,
45
+ proximity: Arc<dyn Proximity>,
46
+ higher_is_better: bool,
47
+ ) -> Self {
48
+ Self {
49
+ points: HashMap::new(),
50
+ dimensionality,
51
+ proximity,
52
+ higher_is_better,
53
+ }
54
+ }
55
+
56
+ /// Create with cosine similarity (higher = better)
57
+ pub fn cosine(dimensionality: usize) -> Self {
58
+ use crate::core::proximity::Cosine;
59
+ Self::new(dimensionality, Arc::new(Cosine), true)
60
+ }
61
+
62
+ /// Create with euclidean distance (lower = better)
63
+ pub fn euclidean(dimensionality: usize) -> Self {
64
+ use crate::core::proximity::Euclidean;
65
+ Self::new(dimensionality, Arc::new(Euclidean), false)
66
+ }
67
+
68
+ /// Sort results by relevance
69
+ fn sort_results(&self, results: &mut Vec<SearchResult>) {
70
+ if self.higher_is_better {
71
+ // Higher score = more relevant, sort descending
72
+ results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
73
+ } else {
74
+ // Lower score = more relevant, sort ascending
75
+ results.sort_by(|a, b| a.score.partial_cmp(&b.score).unwrap());
76
+ }
77
+ }
78
+ }
79
+
80
+ impl Near for FlatIndex {
81
+ fn near(&self, query: &Point, k: usize) -> NearResult<Vec<SearchResult>> {
82
+ // Check dimensionality
83
+ if query.dimensionality() != self.dimensionality {
84
+ return Err(NearError::DimensionalityMismatch {
85
+ expected: self.dimensionality,
86
+ got: query.dimensionality(),
87
+ });
88
+ }
89
+
90
+ // Compute proximity to all points
91
+ let mut results: Vec<SearchResult> = self
92
+ .points
93
+ .iter()
94
+ .map(|(id, point)| {
95
+ let score = self.proximity.proximity(query, point);
96
+ SearchResult::new(*id, score)
97
+ })
98
+ .collect();
99
+
100
+ // Sort by relevance
101
+ self.sort_results(&mut results);
102
+
103
+ // Take top k
104
+ results.truncate(k);
105
+
106
+ Ok(results)
107
+ }
108
+
109
+ fn within(&self, query: &Point, threshold: f32) -> NearResult<Vec<SearchResult>> {
110
+ // Check dimensionality
111
+ if query.dimensionality() != self.dimensionality {
112
+ return Err(NearError::DimensionalityMismatch {
113
+ expected: self.dimensionality,
114
+ got: query.dimensionality(),
115
+ });
116
+ }
117
+
118
+ // Find all points within threshold
119
+ let mut results: Vec<SearchResult> = self
120
+ .points
121
+ .iter()
122
+ .filter_map(|(id, point)| {
123
+ let score = self.proximity.proximity(query, point);
124
+ let within = if self.higher_is_better {
125
+ score >= threshold
126
+ } else {
127
+ score <= threshold
128
+ };
129
+ if within {
130
+ Some(SearchResult::new(*id, score))
131
+ } else {
132
+ None
133
+ }
134
+ })
135
+ .collect();
136
+
137
+ // Sort by relevance
138
+ self.sort_results(&mut results);
139
+
140
+ Ok(results)
141
+ }
142
+
143
+ fn add(&mut self, id: Id, point: &Point) -> NearResult<()> {
144
+ if point.dimensionality() != self.dimensionality {
145
+ return Err(NearError::DimensionalityMismatch {
146
+ expected: self.dimensionality,
147
+ got: point.dimensionality(),
148
+ });
149
+ }
150
+
151
+ self.points.insert(id, point.clone());
152
+ Ok(())
153
+ }
154
+
155
+ fn remove(&mut self, id: Id) -> NearResult<()> {
156
+ self.points.remove(&id);
157
+ Ok(())
158
+ }
159
+
160
+ fn rebuild(&mut self) -> NearResult<()> {
161
+ // Flat index doesn't need rebuilding
162
+ Ok(())
163
+ }
164
+
165
+ fn is_ready(&self) -> bool {
166
+ true // Always ready
167
+ }
168
+
169
+ fn len(&self) -> usize {
170
+ self.points.len()
171
+ }
172
+ }
173
+
174
+ #[cfg(test)]
175
+ mod tests {
176
+ use super::*;
177
+
178
+ fn setup_index() -> FlatIndex {
179
+ let mut index = FlatIndex::cosine(3);
180
+
181
+ // Add some test points
182
+ let points = vec![
183
+ (Id::from_bytes([1; 16]), Point::new(vec![1.0, 0.0, 0.0])),
184
+ (Id::from_bytes([2; 16]), Point::new(vec![0.0, 1.0, 0.0])),
185
+ (Id::from_bytes([3; 16]), Point::new(vec![0.0, 0.0, 1.0])),
186
+ (Id::from_bytes([4; 16]), Point::new(vec![0.7, 0.7, 0.0]).normalize()),
187
+ ];
188
+
189
+ for (id, point) in points {
190
+ index.add(id, &point).unwrap();
191
+ }
192
+
193
+ index
194
+ }
195
+
196
+ #[test]
197
+ fn test_flat_index_near() {
198
+ let index = setup_index();
199
+
200
+ // Query for points near [1, 0, 0]
201
+ let query = Point::new(vec![1.0, 0.0, 0.0]);
202
+ let results = index.near(&query, 2).unwrap();
203
+
204
+ assert_eq!(results.len(), 2);
205
+
206
+ // First result should be [1, 0, 0] with cosine = 1.0
207
+ assert_eq!(results[0].id, Id::from_bytes([1; 16]));
208
+ assert!((results[0].score - 1.0).abs() < 0.0001);
209
+ }
210
+
211
+ #[test]
212
+ fn test_flat_index_within_cosine() {
213
+ let index = setup_index();
214
+
215
+ // Find all points with cosine > 0.5 to [1, 0, 0]
216
+ let query = Point::new(vec![1.0, 0.0, 0.0]);
217
+ let results = index.within(&query, 0.5).unwrap();
218
+
219
+ // Should find [1,0,0] (cosine=1.0) and [0.7,0.7,0] (cosine≈0.707)
220
+ assert_eq!(results.len(), 2);
221
+ }
222
+
223
+ #[test]
224
+ fn test_flat_index_euclidean() {
225
+ let mut index = FlatIndex::euclidean(2);
226
+
227
+ index.add(Id::from_bytes([1; 16]), &Point::new(vec![0.0, 0.0])).unwrap();
228
+ index.add(Id::from_bytes([2; 16]), &Point::new(vec![1.0, 0.0])).unwrap();
229
+ index.add(Id::from_bytes([3; 16]), &Point::new(vec![5.0, 0.0])).unwrap();
230
+
231
+ let query = Point::new(vec![0.0, 0.0]);
232
+ let results = index.near(&query, 2).unwrap();
233
+
234
+ // Nearest should be [0,0] with distance 0
235
+ assert_eq!(results[0].id, Id::from_bytes([1; 16]));
236
+ assert!((results[0].score - 0.0).abs() < 0.0001);
237
+
238
+ // Second nearest should be [1,0] with distance 1
239
+ assert_eq!(results[1].id, Id::from_bytes([2; 16]));
240
+ assert!((results[1].score - 1.0).abs() < 0.0001);
241
+ }
242
+
243
+ #[test]
244
+ fn test_flat_index_add_remove() {
245
+ let mut index = FlatIndex::cosine(3);
246
+
247
+ let id = Id::from_bytes([1; 16]);
248
+ let point = Point::new(vec![1.0, 0.0, 0.0]);
249
+
250
+ index.add(id, &point).unwrap();
251
+ assert_eq!(index.len(), 1);
252
+
253
+ index.remove(id).unwrap();
254
+ assert_eq!(index.len(), 0);
255
+ }
256
+
257
+ #[test]
258
+ fn test_flat_index_dimensionality_check() {
259
+ let mut index = FlatIndex::cosine(3);
260
+
261
+ let wrong_dims = Point::new(vec![1.0, 0.0]); // 2 dims
262
+ let result = index.add(Id::now(), &wrong_dims);
263
+
264
+ match result {
265
+ Err(NearError::DimensionalityMismatch { expected, got }) => {
266
+ assert_eq!(expected, 3);
267
+ assert_eq!(got, 2);
268
+ }
269
+ _ => panic!("Expected DimensionalityMismatch error"),
270
+ }
271
+ }
272
+
273
+ #[test]
274
+ fn test_flat_index_ready() {
275
+ let index = FlatIndex::cosine(3);
276
+ assert!(index.is_ready());
277
+ }
278
+ }
src/adapters/index/mod.rs ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Index Adapters
2
+ //!
3
+ //! Implementations of the Near port for different index backends.
4
+ //!
5
+ //! Available adapters:
6
+ //! - `FlatIndex` - Brute force search (exact, slow for large N)
7
+ //! - `HnswIndex` - Hierarchical Navigable Small World (approximate, fast) [TODO]
8
+
9
+ mod flat;
10
+
11
+ pub use flat::FlatIndex;
12
+
13
+ // TODO: Add HNSW adapter
14
+ // mod hnsw;
15
+ // pub use hnsw::HnswIndex;
src/adapters/mod.rs ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Adapters
2
+ //!
3
+ //! Swappable implementations of port traits.
4
+ //!
5
+ //! This is where the hexagonal architecture meets reality:
6
+ //! - Storage adapters: Memory, NVMe
7
+ //! - Index adapters: Flat (brute force)
8
+ //!
9
+ //! Each adapter implements one or more port traits.
10
+ //! Adapters can be swapped without changing core logic.
11
+ //!
12
+ //! For advanced index adapters like HAT (Hierarchical Attention Tree),
13
+ //! see the `arms-hat` crate.
14
+
15
+ pub mod storage;
16
+ pub mod index;
src/adapters/storage/memory.rs ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Memory Storage Adapter
2
+ //!
3
+ //! In-memory storage using HashMap.
4
+ //! Fast, but volatile (data lost on shutdown).
5
+ //!
6
+ //! Good for:
7
+ //! - Testing
8
+ //! - Hot tier storage
9
+ //! - Small datasets
10
+
11
+ use std::collections::HashMap;
12
+
13
+ use crate::core::{Blob, Id, PlacedPoint, Point};
14
+ use crate::ports::{Place, PlaceError, PlaceResult};
15
+
16
+ /// In-memory storage adapter
17
+ pub struct MemoryStorage {
18
+ /// The stored points
19
+ points: HashMap<Id, PlacedPoint>,
20
+
21
+ /// Expected dimensionality
22
+ dimensionality: usize,
23
+
24
+ /// Maximum capacity in bytes (0 = unlimited)
25
+ capacity: usize,
26
+
27
+ /// Current size in bytes
28
+ current_size: usize,
29
+ }
30
+
31
+ impl MemoryStorage {
32
+ /// Create a new memory storage with specified dimensionality
33
+ pub fn new(dimensionality: usize) -> Self {
34
+ Self {
35
+ points: HashMap::new(),
36
+ dimensionality,
37
+ capacity: 0,
38
+ current_size: 0,
39
+ }
40
+ }
41
+
42
+ /// Create with a capacity limit
43
+ pub fn with_capacity(dimensionality: usize, capacity: usize) -> Self {
44
+ Self {
45
+ points: HashMap::new(),
46
+ dimensionality,
47
+ capacity,
48
+ current_size: 0,
49
+ }
50
+ }
51
+
52
+ /// Calculate size of a placed point in bytes
53
+ fn point_size(point: &PlacedPoint) -> usize {
54
+ // Id: 16 bytes
55
+ // Point: dims.len() * 4 bytes (f32)
56
+ // Blob: data.len() bytes
57
+ // Overhead: ~48 bytes for struct padding and HashMap entry
58
+ 16 + (point.point.dimensionality() * 4) + point.blob.size() + 48
59
+ }
60
+ }
61
+
62
+ impl Place for MemoryStorage {
63
+ fn place(&mut self, point: Point, blob: Blob) -> PlaceResult<Id> {
64
+ // Check dimensionality
65
+ if point.dimensionality() != self.dimensionality {
66
+ return Err(PlaceError::DimensionalityMismatch {
67
+ expected: self.dimensionality,
68
+ got: point.dimensionality(),
69
+ });
70
+ }
71
+
72
+ let id = Id::now();
73
+ let placed = PlacedPoint::new(id, point, blob);
74
+
75
+ // Check capacity
76
+ let size = Self::point_size(&placed);
77
+ if self.capacity > 0 && self.current_size + size > self.capacity {
78
+ return Err(PlaceError::CapacityExceeded);
79
+ }
80
+
81
+ self.current_size += size;
82
+ self.points.insert(id, placed);
83
+
84
+ Ok(id)
85
+ }
86
+
87
+ fn place_with_id(&mut self, id: Id, point: Point, blob: Blob) -> PlaceResult<()> {
88
+ // Check dimensionality
89
+ if point.dimensionality() != self.dimensionality {
90
+ return Err(PlaceError::DimensionalityMismatch {
91
+ expected: self.dimensionality,
92
+ got: point.dimensionality(),
93
+ });
94
+ }
95
+
96
+ // Check for duplicates
97
+ if self.points.contains_key(&id) {
98
+ return Err(PlaceError::DuplicateId(id));
99
+ }
100
+
101
+ let placed = PlacedPoint::new(id, point, blob);
102
+
103
+ // Check capacity
104
+ let size = Self::point_size(&placed);
105
+ if self.capacity > 0 && self.current_size + size > self.capacity {
106
+ return Err(PlaceError::CapacityExceeded);
107
+ }
108
+
109
+ self.current_size += size;
110
+ self.points.insert(id, placed);
111
+
112
+ Ok(())
113
+ }
114
+
115
+ fn remove(&mut self, id: Id) -> Option<PlacedPoint> {
116
+ if let Some(placed) = self.points.remove(&id) {
117
+ self.current_size -= Self::point_size(&placed);
118
+ Some(placed)
119
+ } else {
120
+ None
121
+ }
122
+ }
123
+
124
+ fn get(&self, id: Id) -> Option<&PlacedPoint> {
125
+ self.points.get(&id)
126
+ }
127
+
128
+ fn len(&self) -> usize {
129
+ self.points.len()
130
+ }
131
+
132
+ fn iter(&self) -> Box<dyn Iterator<Item = &PlacedPoint> + '_> {
133
+ Box::new(self.points.values())
134
+ }
135
+
136
+ fn size_bytes(&self) -> usize {
137
+ self.current_size
138
+ }
139
+
140
+ fn clear(&mut self) {
141
+ self.points.clear();
142
+ self.current_size = 0;
143
+ }
144
+ }
145
+
146
+ #[cfg(test)]
147
+ mod tests {
148
+ use super::*;
149
+
150
+ #[test]
151
+ fn test_memory_storage_place() {
152
+ let mut storage = MemoryStorage::new(3);
153
+
154
+ let point = Point::new(vec![1.0, 2.0, 3.0]);
155
+ let blob = Blob::from_str("test");
156
+
157
+ let id = storage.place(point, blob).unwrap();
158
+
159
+ assert_eq!(storage.len(), 1);
160
+ assert!(storage.contains(id));
161
+ }
162
+
163
+ #[test]
164
+ fn test_memory_storage_get() {
165
+ let mut storage = MemoryStorage::new(3);
166
+
167
+ let point = Point::new(vec![1.0, 2.0, 3.0]);
168
+ let blob = Blob::from_str("hello");
169
+
170
+ let id = storage.place(point, blob).unwrap();
171
+
172
+ let retrieved = storage.get(id).unwrap();
173
+ assert_eq!(retrieved.blob.as_str(), Some("hello"));
174
+ }
175
+
176
+ #[test]
177
+ fn test_memory_storage_remove() {
178
+ let mut storage = MemoryStorage::new(3);
179
+
180
+ let point = Point::new(vec![1.0, 2.0, 3.0]);
181
+ let id = storage.place(point, Blob::empty()).unwrap();
182
+
183
+ assert_eq!(storage.len(), 1);
184
+
185
+ let removed = storage.remove(id);
186
+ assert!(removed.is_some());
187
+ assert_eq!(storage.len(), 0);
188
+ assert!(!storage.contains(id));
189
+ }
190
+
191
+ #[test]
192
+ fn test_memory_storage_dimensionality_check() {
193
+ let mut storage = MemoryStorage::new(3);
194
+
195
+ let wrong_dims = Point::new(vec![1.0, 2.0]); // 2 dims, expected 3
196
+
197
+ let result = storage.place(wrong_dims, Blob::empty());
198
+
199
+ match result {
200
+ Err(PlaceError::DimensionalityMismatch { expected, got }) => {
201
+ assert_eq!(expected, 3);
202
+ assert_eq!(got, 2);
203
+ }
204
+ _ => panic!("Expected DimensionalityMismatch error"),
205
+ }
206
+ }
207
+
208
+ #[test]
209
+ fn test_memory_storage_capacity() {
210
+ // Small capacity - enough for one point but not two
211
+ // Point size: 16 (id) + 12 (3 f32s) + 10 (blob) + 48 (overhead) = 86 bytes
212
+ let mut storage = MemoryStorage::with_capacity(3, 150);
213
+
214
+ let point = Point::new(vec![1.0, 2.0, 3.0]);
215
+ let blob = Blob::new(vec![0u8; 10]); // Small blob
216
+
217
+ // First one should succeed
218
+ storage.place(point.clone(), blob.clone()).unwrap();
219
+
220
+ // Second should fail due to capacity
221
+ let result = storage.place(point, blob);
222
+ assert!(matches!(result, Err(PlaceError::CapacityExceeded)));
223
+ }
224
+
225
+ #[test]
226
+ fn test_memory_storage_clear() {
227
+ let mut storage = MemoryStorage::new(3);
228
+
229
+ for i in 0..10 {
230
+ let point = Point::new(vec![i as f32, 0.0, 0.0]);
231
+ storage.place(point, Blob::empty()).unwrap();
232
+ }
233
+
234
+ assert_eq!(storage.len(), 10);
235
+ assert!(storage.size_bytes() > 0);
236
+
237
+ storage.clear();
238
+
239
+ assert_eq!(storage.len(), 0);
240
+ assert_eq!(storage.size_bytes(), 0);
241
+ }
242
+
243
+ #[test]
244
+ fn test_memory_storage_iter() {
245
+ let mut storage = MemoryStorage::new(2);
246
+
247
+ storage.place(Point::new(vec![1.0, 0.0]), Blob::empty()).unwrap();
248
+ storage.place(Point::new(vec![0.0, 1.0]), Blob::empty()).unwrap();
249
+
250
+ let points: Vec<_> = storage.iter().collect();
251
+ assert_eq!(points.len(), 2);
252
+ }
253
+ }
src/adapters/storage/mod.rs ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Storage Adapters
2
+ //!
3
+ //! Implementations of the Place port for different storage backends.
4
+ //!
5
+ //! Available adapters:
6
+ //! - `MemoryStorage` - In-memory HashMap (fast, volatile)
7
+ //! - `NvmeStorage` - Memory-mapped NVMe (persistent, large) [TODO]
8
+
9
+ mod memory;
10
+
11
+ pub use memory::MemoryStorage;
12
+
13
+ // TODO: Add NVMe adapter
14
+ // mod nvme;
15
+ // pub use nvme::NvmeStorage;
src/core/blob.rs ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Blob
2
+ //!
3
+ //! Raw payload data attached to a point.
4
+ //!
5
+ //! ARMS doesn't interpret this data - it's yours.
6
+ //! Could be: tensor bytes, text, compressed state, anything.
7
+ //!
8
+ //! Separation of concerns:
9
+ //! - Point = WHERE (position in space)
10
+ //! - Blob = WHAT (the actual data)
11
+
12
+ /// Raw data attached to a point
13
+ ///
14
+ /// ARMS stores this opaquely. You define what it means.
15
+ #[derive(Clone, Debug, PartialEq)]
16
+ pub struct Blob {
17
+ data: Vec<u8>,
18
+ }
19
+
20
+ impl Blob {
21
+ /// Create a new blob from bytes
22
+ ///
23
+ /// # Example
24
+ /// ```
25
+ /// use arms::Blob;
26
+ /// let blob = Blob::new(vec![1, 2, 3, 4]);
27
+ /// assert_eq!(blob.size(), 4);
28
+ /// ```
29
+ pub fn new(data: Vec<u8>) -> Self {
30
+ Self { data }
31
+ }
32
+
33
+ /// Create an empty blob
34
+ ///
35
+ /// Useful when you only care about position, not payload.
36
+ pub fn empty() -> Self {
37
+ Self { data: vec![] }
38
+ }
39
+
40
+ /// Create a blob from a string (UTF-8 bytes)
41
+ ///
42
+ /// # Example
43
+ /// ```
44
+ /// use arms::Blob;
45
+ /// let blob = Blob::from_str("hello");
46
+ /// assert_eq!(blob.as_str(), Some("hello"));
47
+ /// ```
48
+ pub fn from_str(s: &str) -> Self {
49
+ Self {
50
+ data: s.as_bytes().to_vec(),
51
+ }
52
+ }
53
+
54
+ /// Get the raw bytes
55
+ pub fn data(&self) -> &[u8] {
56
+ &self.data
57
+ }
58
+
59
+ /// Get the size in bytes
60
+ pub fn size(&self) -> usize {
61
+ self.data.len()
62
+ }
63
+
64
+ /// Check if the blob is empty
65
+ pub fn is_empty(&self) -> bool {
66
+ self.data.is_empty()
67
+ }
68
+
69
+ /// Try to interpret as UTF-8 string
70
+ pub fn as_str(&self) -> Option<&str> {
71
+ std::str::from_utf8(&self.data).ok()
72
+ }
73
+
74
+ /// Consume and return the inner data
75
+ pub fn into_inner(self) -> Vec<u8> {
76
+ self.data
77
+ }
78
+ }
79
+
80
+ impl From<Vec<u8>> for Blob {
81
+ fn from(data: Vec<u8>) -> Self {
82
+ Self::new(data)
83
+ }
84
+ }
85
+
86
+ impl From<&[u8]> for Blob {
87
+ fn from(data: &[u8]) -> Self {
88
+ Self::new(data.to_vec())
89
+ }
90
+ }
91
+
92
+ impl From<&str> for Blob {
93
+ fn from(s: &str) -> Self {
94
+ Self::from_str(s)
95
+ }
96
+ }
97
+
98
+ impl From<String> for Blob {
99
+ fn from(s: String) -> Self {
100
+ Self::new(s.into_bytes())
101
+ }
102
+ }
103
+
104
+ #[cfg(test)]
105
+ mod tests {
106
+ use super::*;
107
+
108
+ #[test]
109
+ fn test_blob_new() {
110
+ let blob = Blob::new(vec![1, 2, 3]);
111
+ assert_eq!(blob.data(), &[1, 2, 3]);
112
+ assert_eq!(blob.size(), 3);
113
+ }
114
+
115
+ #[test]
116
+ fn test_blob_empty() {
117
+ let blob = Blob::empty();
118
+ assert!(blob.is_empty());
119
+ assert_eq!(blob.size(), 0);
120
+ }
121
+
122
+ #[test]
123
+ fn test_blob_from_str() {
124
+ let blob = Blob::from_str("hello world");
125
+ assert_eq!(blob.as_str(), Some("hello world"));
126
+ }
127
+
128
+ #[test]
129
+ fn test_blob_as_str_invalid_utf8() {
130
+ let blob = Blob::new(vec![0xff, 0xfe]);
131
+ assert_eq!(blob.as_str(), None);
132
+ }
133
+
134
+ #[test]
135
+ fn test_blob_from_conversions() {
136
+ let blob1: Blob = vec![1, 2, 3].into();
137
+ assert_eq!(blob1.size(), 3);
138
+
139
+ let blob2: Blob = "test".into();
140
+ assert_eq!(blob2.as_str(), Some("test"));
141
+
142
+ let blob3: Blob = String::from("test").into();
143
+ assert_eq!(blob3.as_str(), Some("test"));
144
+ }
145
+
146
+ #[test]
147
+ fn test_blob_into_inner() {
148
+ let blob = Blob::new(vec![1, 2, 3]);
149
+ let data = blob.into_inner();
150
+ assert_eq!(data, vec![1, 2, 3]);
151
+ }
152
+ }
src/core/config.rs ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Configuration
2
+ //!
3
+ //! ARMS configuration - define your space.
4
+ //!
5
+ //! Everything is configurable, not hardcoded:
6
+ //! - Dimensionality
7
+ //! - Proximity function
8
+ //! - Merge function
9
+ //! - Tier settings
10
+ //!
11
+ //! "If we say it's a rock now, in 2 years it can never be carved into a wheel."
12
+
13
+ use super::proximity::{Cosine, Proximity};
14
+ use super::merge::{Mean, Merge};
15
+ use std::sync::Arc;
16
+
17
+ /// Main ARMS configuration
18
+ ///
19
+ /// Defines the dimensional space and default operations.
20
+ #[derive(Clone)]
21
+ pub struct ArmsConfig {
22
+ /// Dimensionality of the space
23
+ ///
24
+ /// Set this to match your model's hidden size.
25
+ /// Examples: 768 (BERT), 1024 (GPT-2 medium), 4096 (large models)
26
+ pub dimensionality: usize,
27
+
28
+ /// Proximity function for similarity calculations
29
+ pub proximity: Arc<dyn Proximity>,
30
+
31
+ /// Merge function for hierarchical composition
32
+ pub merge: Arc<dyn Merge>,
33
+
34
+ /// Whether to normalize points on insertion
35
+ pub normalize_on_insert: bool,
36
+
37
+ /// Tier configuration
38
+ pub tiers: TierConfig,
39
+ }
40
+
41
+ impl ArmsConfig {
42
+ /// Create a new configuration with specified dimensionality
43
+ ///
44
+ /// Uses default proximity (Cosine) and merge (Mean) functions.
45
+ pub fn new(dimensionality: usize) -> Self {
46
+ Self {
47
+ dimensionality,
48
+ proximity: Arc::new(Cosine),
49
+ merge: Arc::new(Mean),
50
+ normalize_on_insert: true,
51
+ tiers: TierConfig::default(),
52
+ }
53
+ }
54
+
55
+ /// Set a custom proximity function
56
+ pub fn with_proximity<P: Proximity + 'static>(mut self, proximity: P) -> Self {
57
+ self.proximity = Arc::new(proximity);
58
+ self
59
+ }
60
+
61
+ /// Set a custom merge function
62
+ pub fn with_merge<M: Merge + 'static>(mut self, merge: M) -> Self {
63
+ self.merge = Arc::new(merge);
64
+ self
65
+ }
66
+
67
+ /// Set normalization behavior
68
+ pub fn with_normalize(mut self, normalize: bool) -> Self {
69
+ self.normalize_on_insert = normalize;
70
+ self
71
+ }
72
+
73
+ /// Set tier configuration
74
+ pub fn with_tiers(mut self, tiers: TierConfig) -> Self {
75
+ self.tiers = tiers;
76
+ self
77
+ }
78
+ }
79
+
80
+ impl Default for ArmsConfig {
81
+ /// Default configuration: 768 dimensions, cosine proximity, mean merge
82
+ fn default() -> Self {
83
+ Self::new(768)
84
+ }
85
+ }
86
+
87
+ /// Tier configuration for storage management
88
+ #[derive(Clone, Debug)]
89
+ pub struct TierConfig {
90
+ /// Hot tier (RAM) capacity in bytes
91
+ pub hot_capacity: usize,
92
+
93
+ /// Warm tier (NVMe) capacity in bytes
94
+ pub warm_capacity: usize,
95
+
96
+ /// Number of accesses before promoting to hotter tier
97
+ pub promote_after_accesses: u32,
98
+
99
+ /// Milliseconds since last access before evicting to colder tier
100
+ pub evict_after_ms: u64,
101
+ }
102
+
103
+ impl TierConfig {
104
+ /// Create a new tier configuration
105
+ pub fn new(hot_capacity: usize, warm_capacity: usize) -> Self {
106
+ Self {
107
+ hot_capacity,
108
+ warm_capacity,
109
+ promote_after_accesses: 3,
110
+ evict_after_ms: 3600 * 1000, // 1 hour
111
+ }
112
+ }
113
+
114
+ /// Tiny config for testing
115
+ pub fn tiny() -> Self {
116
+ Self {
117
+ hot_capacity: 1024 * 1024, // 1 MB
118
+ warm_capacity: 10 * 1024 * 1024, // 10 MB
119
+ promote_after_accesses: 2,
120
+ evict_after_ms: 60 * 1000, // 1 minute
121
+ }
122
+ }
123
+ }
124
+
125
+ impl Default for TierConfig {
126
+ fn default() -> Self {
127
+ Self {
128
+ hot_capacity: 1024 * 1024 * 1024, // 1 GB
129
+ warm_capacity: 100 * 1024 * 1024 * 1024, // 100 GB
130
+ promote_after_accesses: 3,
131
+ evict_after_ms: 3600 * 1000, // 1 hour
132
+ }
133
+ }
134
+ }
135
+
136
+ #[cfg(test)]
137
+ mod tests {
138
+ use super::*;
139
+ use crate::core::proximity::Euclidean;
140
+ use crate::core::merge::MaxPool;
141
+
142
+ #[test]
143
+ fn test_default_config() {
144
+ let config = ArmsConfig::default();
145
+ assert_eq!(config.dimensionality, 768);
146
+ assert!(config.normalize_on_insert);
147
+ assert_eq!(config.proximity.name(), "cosine");
148
+ assert_eq!(config.merge.name(), "mean");
149
+ }
150
+
151
+ #[test]
152
+ fn test_custom_config() {
153
+ let config = ArmsConfig::new(4096)
154
+ .with_proximity(Euclidean)
155
+ .with_merge(MaxPool)
156
+ .with_normalize(false);
157
+
158
+ assert_eq!(config.dimensionality, 4096);
159
+ assert!(!config.normalize_on_insert);
160
+ assert_eq!(config.proximity.name(), "euclidean");
161
+ assert_eq!(config.merge.name(), "max_pool");
162
+ }
163
+
164
+ #[test]
165
+ fn test_tier_config() {
166
+ let tiers = TierConfig::new(1024, 2048);
167
+ assert_eq!(tiers.hot_capacity, 1024);
168
+ assert_eq!(tiers.warm_capacity, 2048);
169
+ }
170
+
171
+ #[test]
172
+ fn test_tier_tiny() {
173
+ let tiers = TierConfig::tiny();
174
+ assert_eq!(tiers.hot_capacity, 1024 * 1024);
175
+ assert_eq!(tiers.evict_after_ms, 60 * 1000);
176
+ }
177
+ }
src/core/id.rs ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Id
2
+ //!
3
+ //! Unique identifier for placed points.
4
+ //!
5
+ //! Format: 128 bits = [timestamp_ms:48][counter:16][random:64]
6
+ //! - Timestamp provides natural temporal ordering
7
+ //! - Counter prevents collisions within same millisecond
8
+ //! - Random portion adds uniqueness
9
+ //! - Sortable by time when compared
10
+ //! - No external dependencies (not UUID, just bytes)
11
+
12
+ use std::sync::atomic::{AtomicU64, Ordering};
13
+ use std::time::{SystemTime, UNIX_EPOCH};
14
+
15
+ /// Global counter for uniqueness within same millisecond
16
+ static COUNTER: AtomicU64 = AtomicU64::new(0);
17
+
18
+ /// Unique identifier for a placed point
19
+ ///
20
+ /// 128 bits, timestamp-prefixed for natural time ordering.
21
+ #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
22
+ pub struct Id([u8; 16]);
23
+
24
+ impl Id {
25
+ /// Generate a new Id for the current moment
26
+ ///
27
+ /// Uses current timestamp + counter + random bytes for uniqueness.
28
+ pub fn now() -> Self {
29
+ let timestamp = SystemTime::now()
30
+ .duration_since(UNIX_EPOCH)
31
+ .unwrap()
32
+ .as_millis() as u64;
33
+
34
+ // Atomically increment counter for uniqueness
35
+ let counter = COUNTER.fetch_add(1, Ordering::Relaxed);
36
+
37
+ let mut bytes = [0u8; 16];
38
+
39
+ // First 6 bytes: timestamp (48 bits)
40
+ bytes[0] = (timestamp >> 40) as u8;
41
+ bytes[1] = (timestamp >> 32) as u8;
42
+ bytes[2] = (timestamp >> 24) as u8;
43
+ bytes[3] = (timestamp >> 16) as u8;
44
+ bytes[4] = (timestamp >> 8) as u8;
45
+ bytes[5] = timestamp as u8;
46
+
47
+ // Next 2 bytes: counter (16 bits) - ensures uniqueness within millisecond
48
+ bytes[6] = (counter >> 8) as u8;
49
+ bytes[7] = counter as u8;
50
+
51
+ // Remaining 8 bytes: pseudo-random based on timestamp and counter
52
+ let random_seed = timestamp
53
+ .wrapping_mul(6364136223846793005)
54
+ .wrapping_add(counter);
55
+ bytes[8] = (random_seed >> 56) as u8;
56
+ bytes[9] = (random_seed >> 48) as u8;
57
+ bytes[10] = (random_seed >> 40) as u8;
58
+ bytes[11] = (random_seed >> 32) as u8;
59
+ bytes[12] = (random_seed >> 24) as u8;
60
+ bytes[13] = (random_seed >> 16) as u8;
61
+ bytes[14] = (random_seed >> 8) as u8;
62
+ bytes[15] = random_seed as u8;
63
+
64
+ Self(bytes)
65
+ }
66
+
67
+ /// Create an Id from raw bytes
68
+ pub fn from_bytes(bytes: [u8; 16]) -> Self {
69
+ Self(bytes)
70
+ }
71
+
72
+ /// Get the raw bytes
73
+ pub fn as_bytes(&self) -> &[u8; 16] {
74
+ &self.0
75
+ }
76
+
77
+ /// Extract the timestamp component (milliseconds since epoch)
78
+ pub fn timestamp_ms(&self) -> u64 {
79
+ ((self.0[0] as u64) << 40)
80
+ | ((self.0[1] as u64) << 32)
81
+ | ((self.0[2] as u64) << 24)
82
+ | ((self.0[3] as u64) << 16)
83
+ | ((self.0[4] as u64) << 8)
84
+ | (self.0[5] as u64)
85
+ }
86
+
87
+ /// Create a nil/zero Id (useful for testing)
88
+ pub fn nil() -> Self {
89
+ Self([0u8; 16])
90
+ }
91
+
92
+ /// Check if this is a nil Id
93
+ pub fn is_nil(&self) -> bool {
94
+ self.0 == [0u8; 16]
95
+ }
96
+ }
97
+
98
+ impl std::fmt::Display for Id {
99
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100
+ // Display as hex string
101
+ for byte in &self.0 {
102
+ write!(f, "{:02x}", byte)?;
103
+ }
104
+ Ok(())
105
+ }
106
+ }
107
+
108
+ #[cfg(test)]
109
+ mod tests {
110
+ use super::*;
111
+ use std::thread;
112
+ use std::time::Duration;
113
+
114
+ #[test]
115
+ fn test_id_creation() {
116
+ let id = Id::now();
117
+ assert!(!id.is_nil());
118
+ }
119
+
120
+ #[test]
121
+ fn test_id_timestamp() {
122
+ let before = SystemTime::now()
123
+ .duration_since(UNIX_EPOCH)
124
+ .unwrap()
125
+ .as_millis() as u64;
126
+
127
+ let id = Id::now();
128
+
129
+ let after = SystemTime::now()
130
+ .duration_since(UNIX_EPOCH)
131
+ .unwrap()
132
+ .as_millis() as u64;
133
+
134
+ let ts = id.timestamp_ms();
135
+ assert!(ts >= before);
136
+ assert!(ts <= after);
137
+ }
138
+
139
+ #[test]
140
+ fn test_id_ordering() {
141
+ let id1 = Id::now();
142
+ thread::sleep(Duration::from_millis(2));
143
+ let id2 = Id::now();
144
+
145
+ // id2 should be greater (later timestamp)
146
+ assert!(id2 > id1);
147
+ }
148
+
149
+ #[test]
150
+ fn test_id_from_bytes() {
151
+ let bytes = [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
152
+ let id = Id::from_bytes(bytes);
153
+ assert_eq!(id.as_bytes(), &bytes);
154
+ }
155
+
156
+ #[test]
157
+ fn test_id_nil() {
158
+ let nil = Id::nil();
159
+ assert!(nil.is_nil());
160
+ assert_eq!(nil.timestamp_ms(), 0);
161
+ }
162
+
163
+ #[test]
164
+ fn test_id_display() {
165
+ let id = Id::from_bytes([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
166
+ let display = format!("{}", id);
167
+ assert_eq!(display, "000102030405060708090a0b0c0d0e0f");
168
+ }
169
+ }
src/core/merge.rs ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Merge
2
+ //!
3
+ //! Trait and implementations for composing multiple points into one.
4
+ //!
5
+ //! This is one of the five primitives of ARMS:
6
+ //! `Merge: fn(points) -> point` - Compose together
7
+ //!
8
+ //! Merge is used for hierarchical composition:
9
+ //! - Chunks → Document
10
+ //! - Documents → Session
11
+ //! - Sessions → Domain
12
+ //!
13
+ //! Merge functions are pluggable - use whichever fits your use case.
14
+
15
+ use super::Point;
16
+
17
+ /// Trait for merging multiple points into one
18
+ ///
19
+ /// Used for hierarchical composition and aggregation.
20
+ pub trait Merge: Send + Sync {
21
+ /// Merge multiple points into a single point
22
+ ///
23
+ /// All points must have the same dimensionality.
24
+ /// The slice must not be empty.
25
+ fn merge(&self, points: &[Point]) -> Point;
26
+
27
+ /// Name of this merge function (for debugging/config)
28
+ fn name(&self) -> &'static str;
29
+ }
30
+
31
+ // ============================================================================
32
+ // IMPLEMENTATIONS
33
+ // ============================================================================
34
+
35
+ /// Mean (average) of all points
36
+ ///
37
+ /// The centroid of the input points.
38
+ /// Good default for most hierarchical composition.
39
+ #[derive(Clone, Copy, Debug, Default)]
40
+ pub struct Mean;
41
+
42
+ impl Merge for Mean {
43
+ fn merge(&self, points: &[Point]) -> Point {
44
+ assert!(!points.is_empty(), "Cannot merge empty slice");
45
+
46
+ let dims = points[0].dimensionality();
47
+ let n = points.len() as f32;
48
+
49
+ let mut result = vec![0.0; dims];
50
+ for p in points {
51
+ assert_eq!(
52
+ p.dimensionality(),
53
+ dims,
54
+ "All points must have same dimensionality"
55
+ );
56
+ for (r, d) in result.iter_mut().zip(p.dims()) {
57
+ *r += d / n;
58
+ }
59
+ }
60
+
61
+ Point::new(result)
62
+ }
63
+
64
+ fn name(&self) -> &'static str {
65
+ "mean"
66
+ }
67
+ }
68
+
69
+ /// Weighted mean of points
70
+ ///
71
+ /// Each point contributes proportionally to its weight.
72
+ /// Useful for recency weighting, importance weighting, etc.
73
+ #[derive(Clone, Debug)]
74
+ pub struct WeightedMean {
75
+ weights: Vec<f32>,
76
+ }
77
+
78
+ impl WeightedMean {
79
+ /// Create a new weighted mean with given weights
80
+ ///
81
+ /// Weights will be normalized (divided by sum) during merge.
82
+ pub fn new(weights: Vec<f32>) -> Self {
83
+ Self { weights }
84
+ }
85
+
86
+ /// Create with uniform weights (equivalent to Mean)
87
+ pub fn uniform(n: usize) -> Self {
88
+ Self {
89
+ weights: vec![1.0; n],
90
+ }
91
+ }
92
+
93
+ /// Create with recency weighting (more recent = higher weight)
94
+ ///
95
+ /// `decay` should be in (0, 1). Smaller = faster decay.
96
+ /// First point is oldest, last is most recent.
97
+ pub fn recency(n: usize, decay: f32) -> Self {
98
+ let weights: Vec<f32> = (0..n).map(|i| decay.powi((n - 1 - i) as i32)).collect();
99
+ Self { weights }
100
+ }
101
+ }
102
+
103
+ impl Merge for WeightedMean {
104
+ fn merge(&self, points: &[Point]) -> Point {
105
+ assert!(!points.is_empty(), "Cannot merge empty slice");
106
+ assert_eq!(
107
+ points.len(),
108
+ self.weights.len(),
109
+ "Number of points must match number of weights"
110
+ );
111
+
112
+ let dims = points[0].dimensionality();
113
+ let total_weight: f32 = self.weights.iter().sum();
114
+
115
+ let mut result = vec![0.0; dims];
116
+ for (p, &w) in points.iter().zip(&self.weights) {
117
+ assert_eq!(
118
+ p.dimensionality(),
119
+ dims,
120
+ "All points must have same dimensionality"
121
+ );
122
+ let normalized_w = w / total_weight;
123
+ for (r, d) in result.iter_mut().zip(p.dims()) {
124
+ *r += d * normalized_w;
125
+ }
126
+ }
127
+
128
+ Point::new(result)
129
+ }
130
+
131
+ fn name(&self) -> &'static str {
132
+ "weighted_mean"
133
+ }
134
+ }
135
+
136
+ /// Max pooling across points
137
+ ///
138
+ /// Takes the maximum value of each dimension across all points.
139
+ /// Preserves the strongest activations.
140
+ #[derive(Clone, Copy, Debug, Default)]
141
+ pub struct MaxPool;
142
+
143
+ impl Merge for MaxPool {
144
+ fn merge(&self, points: &[Point]) -> Point {
145
+ assert!(!points.is_empty(), "Cannot merge empty slice");
146
+
147
+ let dims = points[0].dimensionality();
148
+ let mut result = points[0].dims().to_vec();
149
+
150
+ for p in &points[1..] {
151
+ assert_eq!(
152
+ p.dimensionality(),
153
+ dims,
154
+ "All points must have same dimensionality"
155
+ );
156
+ for (r, d) in result.iter_mut().zip(p.dims()) {
157
+ *r = r.max(*d);
158
+ }
159
+ }
160
+
161
+ Point::new(result)
162
+ }
163
+
164
+ fn name(&self) -> &'static str {
165
+ "max_pool"
166
+ }
167
+ }
168
+
169
+ /// Min pooling across points
170
+ ///
171
+ /// Takes the minimum value of each dimension across all points.
172
+ #[derive(Clone, Copy, Debug, Default)]
173
+ pub struct MinPool;
174
+
175
+ impl Merge for MinPool {
176
+ fn merge(&self, points: &[Point]) -> Point {
177
+ assert!(!points.is_empty(), "Cannot merge empty slice");
178
+
179
+ let dims = points[0].dimensionality();
180
+ let mut result = points[0].dims().to_vec();
181
+
182
+ for p in &points[1..] {
183
+ assert_eq!(
184
+ p.dimensionality(),
185
+ dims,
186
+ "All points must have same dimensionality"
187
+ );
188
+ for (r, d) in result.iter_mut().zip(p.dims()) {
189
+ *r = r.min(*d);
190
+ }
191
+ }
192
+
193
+ Point::new(result)
194
+ }
195
+
196
+ fn name(&self) -> &'static str {
197
+ "min_pool"
198
+ }
199
+ }
200
+
201
+ /// Sum of all points (no averaging)
202
+ ///
203
+ /// Simple additive composition.
204
+ #[derive(Clone, Copy, Debug, Default)]
205
+ pub struct Sum;
206
+
207
+ impl Merge for Sum {
208
+ fn merge(&self, points: &[Point]) -> Point {
209
+ assert!(!points.is_empty(), "Cannot merge empty slice");
210
+
211
+ let dims = points[0].dimensionality();
212
+ let mut result = vec![0.0; dims];
213
+
214
+ for p in points {
215
+ assert_eq!(
216
+ p.dimensionality(),
217
+ dims,
218
+ "All points must have same dimensionality"
219
+ );
220
+ for (r, d) in result.iter_mut().zip(p.dims()) {
221
+ *r += d;
222
+ }
223
+ }
224
+
225
+ Point::new(result)
226
+ }
227
+
228
+ fn name(&self) -> &'static str {
229
+ "sum"
230
+ }
231
+ }
232
+
233
+ #[cfg(test)]
234
+ mod tests {
235
+ use super::*;
236
+
237
+ #[test]
238
+ fn test_mean_single() {
239
+ let points = vec![Point::new(vec![1.0, 2.0, 3.0])];
240
+ let merged = Mean.merge(&points);
241
+ assert_eq!(merged.dims(), &[1.0, 2.0, 3.0]);
242
+ }
243
+
244
+ #[test]
245
+ fn test_mean_multiple() {
246
+ let points = vec![
247
+ Point::new(vec![1.0, 2.0]),
248
+ Point::new(vec![3.0, 4.0]),
249
+ ];
250
+ let merged = Mean.merge(&points);
251
+ assert_eq!(merged.dims(), &[2.0, 3.0]);
252
+ }
253
+
254
+ #[test]
255
+ fn test_weighted_mean() {
256
+ let points = vec![
257
+ Point::new(vec![0.0, 0.0]),
258
+ Point::new(vec![10.0, 10.0]),
259
+ ];
260
+ // Weight second point 3x more than first
261
+ let merger = WeightedMean::new(vec![1.0, 3.0]);
262
+ let merged = merger.merge(&points);
263
+ // (0*0.25 + 10*0.75, 0*0.25 + 10*0.75) = (7.5, 7.5)
264
+ assert!((merged.dims()[0] - 7.5).abs() < 0.0001);
265
+ assert!((merged.dims()[1] - 7.5).abs() < 0.0001);
266
+ }
267
+
268
+ #[test]
269
+ fn test_weighted_mean_recency() {
270
+ let merger = WeightedMean::recency(3, 0.5);
271
+ // decay = 0.5, n = 3
272
+ // weights: [0.5^2, 0.5^1, 0.5^0] = [0.25, 0.5, 1.0]
273
+ assert_eq!(merger.weights.len(), 3);
274
+ assert!((merger.weights[0] - 0.25).abs() < 0.0001);
275
+ assert!((merger.weights[1] - 0.5).abs() < 0.0001);
276
+ assert!((merger.weights[2] - 1.0).abs() < 0.0001);
277
+ }
278
+
279
+ #[test]
280
+ fn test_max_pool() {
281
+ let points = vec![
282
+ Point::new(vec![1.0, 5.0, 2.0]),
283
+ Point::new(vec![3.0, 2.0, 4.0]),
284
+ Point::new(vec![2.0, 3.0, 1.0]),
285
+ ];
286
+ let merged = MaxPool.merge(&points);
287
+ assert_eq!(merged.dims(), &[3.0, 5.0, 4.0]);
288
+ }
289
+
290
+ #[test]
291
+ fn test_min_pool() {
292
+ let points = vec![
293
+ Point::new(vec![1.0, 5.0, 2.0]),
294
+ Point::new(vec![3.0, 2.0, 4.0]),
295
+ Point::new(vec![2.0, 3.0, 1.0]),
296
+ ];
297
+ let merged = MinPool.merge(&points);
298
+ assert_eq!(merged.dims(), &[1.0, 2.0, 1.0]);
299
+ }
300
+
301
+ #[test]
302
+ fn test_sum() {
303
+ let points = vec![
304
+ Point::new(vec![1.0, 2.0]),
305
+ Point::new(vec![3.0, 4.0]),
306
+ ];
307
+ let merged = Sum.merge(&points);
308
+ assert_eq!(merged.dims(), &[4.0, 6.0]);
309
+ }
310
+
311
+ #[test]
312
+ fn test_merge_names() {
313
+ assert_eq!(Mean.name(), "mean");
314
+ assert_eq!(MaxPool.name(), "max_pool");
315
+ assert_eq!(MinPool.name(), "min_pool");
316
+ assert_eq!(Sum.name(), "sum");
317
+ }
318
+
319
+ #[test]
320
+ #[should_panic(expected = "Cannot merge empty")]
321
+ fn test_merge_empty_panics() {
322
+ let points: Vec<Point> = vec![];
323
+ Mean.merge(&points);
324
+ }
325
+
326
+ #[test]
327
+ #[should_panic(expected = "same dimensionality")]
328
+ fn test_merge_dimension_mismatch_panics() {
329
+ let points = vec![
330
+ Point::new(vec![1.0, 2.0]),
331
+ Point::new(vec![1.0, 2.0, 3.0]),
332
+ ];
333
+ Mean.merge(&points);
334
+ }
335
+ }
src/core/mod.rs ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Core Domain
2
+ //!
3
+ //! Pure math, no I/O. The foundation of ARMS.
4
+ //!
5
+ //! This module contains the fundamental types and operations:
6
+ //! - `Point` - A position in dimensional space
7
+ //! - `Id` - Unique identifier for placed points
8
+ //! - `Blob` - Raw payload data
9
+ //! - `Proximity` - Trait for measuring relatedness
10
+ //! - `Merge` - Trait for composing points
11
+ //!
12
+ //! ## Design Principles
13
+ //!
14
+ //! - All functions are pure (deterministic, no side effects)
15
+ //! - No I/O operations
16
+ //! - No external dependencies beyond std
17
+ //! - Fully testable in isolation
18
+
19
+ mod point;
20
+ mod id;
21
+ mod blob;
22
+ pub mod proximity;
23
+ pub mod merge;
24
+ pub mod config;
25
+
26
+ // Re-exports
27
+ pub use point::Point;
28
+ pub use id::Id;
29
+ pub use blob::Blob;
30
+
31
+ /// A point that has been placed in the space
32
+ #[derive(Clone)]
33
+ pub struct PlacedPoint {
34
+ /// Unique identifier
35
+ pub id: Id,
36
+ /// Position in dimensional space
37
+ pub point: Point,
38
+ /// Attached payload
39
+ pub blob: Blob,
40
+ }
41
+
42
+ impl PlacedPoint {
43
+ /// Create a new placed point
44
+ pub fn new(id: Id, point: Point, blob: Blob) -> Self {
45
+ Self { id, point, blob }
46
+ }
47
+ }
48
+
49
+ #[cfg(test)]
50
+ mod tests {
51
+ use super::*;
52
+
53
+ #[test]
54
+ fn test_placed_point_creation() {
55
+ let id = Id::now();
56
+ let point = Point::new(vec![1.0, 2.0, 3.0]);
57
+ let blob = Blob::new(vec![1, 2, 3]);
58
+
59
+ let placed = PlacedPoint::new(id, point.clone(), blob);
60
+
61
+ assert_eq!(placed.point.dimensionality(), 3);
62
+ assert_eq!(placed.blob.size(), 3);
63
+ }
64
+ }
src/core/point.rs ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Point
2
+ //!
3
+ //! A position in dimensional space. The fundamental primitive.
4
+ //!
5
+ //! Dimensionality is NOT fixed - configure it for your model.
6
+ //! 768-dim, 1024-dim, 4096-dim, or any size you need.
7
+ //!
8
+ //! The point IS the thought's position.
9
+ //! The position IS its relationship to all other thoughts.
10
+
11
+ /// A point in dimensional space
12
+ #[derive(Clone, Debug, PartialEq)]
13
+ pub struct Point {
14
+ dims: Vec<f32>,
15
+ }
16
+
17
+ impl Point {
18
+ /// Create a new point from a vector of dimensions
19
+ ///
20
+ /// # Example
21
+ /// ```
22
+ /// use arms::Point;
23
+ /// let p = Point::new(vec![1.0, 2.0, 3.0]);
24
+ /// assert_eq!(p.dimensionality(), 3);
25
+ /// ```
26
+ pub fn new(dims: Vec<f32>) -> Self {
27
+ Self { dims }
28
+ }
29
+
30
+ /// Create an origin point (all zeros) of given dimensionality
31
+ ///
32
+ /// # Example
33
+ /// ```
34
+ /// use arms::Point;
35
+ /// let origin = Point::origin(768);
36
+ /// assert_eq!(origin.dimensionality(), 768);
37
+ /// assert!(origin.dims().iter().all(|&x| x == 0.0));
38
+ /// ```
39
+ pub fn origin(dims: usize) -> Self {
40
+ Self {
41
+ dims: vec![0.0; dims],
42
+ }
43
+ }
44
+
45
+ /// Get the dimensionality of this point
46
+ pub fn dimensionality(&self) -> usize {
47
+ self.dims.len()
48
+ }
49
+
50
+ /// Access the dimensions as a slice
51
+ pub fn dims(&self) -> &[f32] {
52
+ &self.dims
53
+ }
54
+
55
+ /// Mutable access to dimensions
56
+ pub fn dims_mut(&mut self) -> &mut [f32] {
57
+ &mut self.dims
58
+ }
59
+
60
+ /// Calculate the magnitude (L2 norm) of this point
61
+ ///
62
+ /// # Example
63
+ /// ```
64
+ /// use arms::Point;
65
+ /// let p = Point::new(vec![3.0, 4.0]);
66
+ /// assert!((p.magnitude() - 5.0).abs() < 0.0001);
67
+ /// ```
68
+ pub fn magnitude(&self) -> f32 {
69
+ self.dims.iter().map(|x| x * x).sum::<f32>().sqrt()
70
+ }
71
+
72
+ /// Check if this point is normalized (magnitude ≈ 1.0)
73
+ pub fn is_normalized(&self) -> bool {
74
+ let mag = self.magnitude();
75
+ (mag - 1.0).abs() < 0.001
76
+ }
77
+
78
+ /// Return a normalized copy of this point
79
+ ///
80
+ /// If magnitude is zero, returns a clone of self.
81
+ ///
82
+ /// # Example
83
+ /// ```
84
+ /// use arms::Point;
85
+ /// let p = Point::new(vec![3.0, 4.0]);
86
+ /// let normalized = p.normalize();
87
+ /// assert!(normalized.is_normalized());
88
+ /// ```
89
+ pub fn normalize(&self) -> Self {
90
+ let mag = self.magnitude();
91
+ if mag == 0.0 {
92
+ return self.clone();
93
+ }
94
+ Self {
95
+ dims: self.dims.iter().map(|x| x / mag).collect(),
96
+ }
97
+ }
98
+
99
+ /// Add another point to this one (element-wise)
100
+ pub fn add(&self, other: &Point) -> Self {
101
+ assert_eq!(
102
+ self.dimensionality(),
103
+ other.dimensionality(),
104
+ "Points must have same dimensionality"
105
+ );
106
+ Self {
107
+ dims: self
108
+ .dims
109
+ .iter()
110
+ .zip(other.dims.iter())
111
+ .map(|(a, b)| a + b)
112
+ .collect(),
113
+ }
114
+ }
115
+
116
+ /// Scale this point by a scalar
117
+ pub fn scale(&self, scalar: f32) -> Self {
118
+ Self {
119
+ dims: self.dims.iter().map(|x| x * scalar).collect(),
120
+ }
121
+ }
122
+ }
123
+
124
+ #[cfg(test)]
125
+ mod tests {
126
+ use super::*;
127
+
128
+ #[test]
129
+ fn test_new_point() {
130
+ let p = Point::new(vec![1.0, 2.0, 3.0]);
131
+ assert_eq!(p.dimensionality(), 3);
132
+ assert_eq!(p.dims(), &[1.0, 2.0, 3.0]);
133
+ }
134
+
135
+ #[test]
136
+ fn test_origin() {
137
+ let origin = Point::origin(768);
138
+ assert_eq!(origin.dimensionality(), 768);
139
+ assert!(origin.dims().iter().all(|&x| x == 0.0));
140
+ }
141
+
142
+ #[test]
143
+ fn test_magnitude() {
144
+ let p = Point::new(vec![3.0, 4.0]);
145
+ assert!((p.magnitude() - 5.0).abs() < 0.0001);
146
+ }
147
+
148
+ #[test]
149
+ fn test_normalize() {
150
+ let p = Point::new(vec![3.0, 4.0]);
151
+ let normalized = p.normalize();
152
+ assert!(normalized.is_normalized());
153
+ assert!((normalized.dims()[0] - 0.6).abs() < 0.0001);
154
+ assert!((normalized.dims()[1] - 0.8).abs() < 0.0001);
155
+ }
156
+
157
+ #[test]
158
+ fn test_normalize_zero() {
159
+ let p = Point::origin(3);
160
+ let normalized = p.normalize();
161
+ assert_eq!(normalized.dims(), &[0.0, 0.0, 0.0]);
162
+ }
163
+
164
+ #[test]
165
+ fn test_add() {
166
+ let a = Point::new(vec![1.0, 2.0]);
167
+ let b = Point::new(vec![3.0, 4.0]);
168
+ let c = a.add(&b);
169
+ assert_eq!(c.dims(), &[4.0, 6.0]);
170
+ }
171
+
172
+ #[test]
173
+ fn test_scale() {
174
+ let p = Point::new(vec![1.0, 2.0]);
175
+ let scaled = p.scale(2.0);
176
+ assert_eq!(scaled.dims(), &[2.0, 4.0]);
177
+ }
178
+
179
+ #[test]
180
+ #[should_panic(expected = "same dimensionality")]
181
+ fn test_add_different_dims_panics() {
182
+ let a = Point::new(vec![1.0, 2.0]);
183
+ let b = Point::new(vec![1.0, 2.0, 3.0]);
184
+ let _ = a.add(&b);
185
+ }
186
+ }
src/core/proximity.rs ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Proximity
2
+ //!
3
+ //! Trait and implementations for measuring how related two points are.
4
+ //!
5
+ //! This is one of the five primitives of ARMS:
6
+ //! `Proximity: fn(a, b) -> f32` - How related?
7
+ //!
8
+ //! Proximity functions are pluggable - use whichever fits your use case.
9
+
10
+ use super::Point;
11
+
12
+ /// Trait for measuring proximity between points
13
+ ///
14
+ /// Higher values typically mean more similar/related.
15
+ /// The exact semantics depend on the implementation.
16
+ pub trait Proximity: Send + Sync {
17
+ /// Compute proximity between two points
18
+ ///
19
+ /// Both points must have the same dimensionality.
20
+ fn proximity(&self, a: &Point, b: &Point) -> f32;
21
+
22
+ /// Name of this proximity function (for debugging/config)
23
+ fn name(&self) -> &'static str;
24
+ }
25
+
26
+ // ============================================================================
27
+ // IMPLEMENTATIONS
28
+ // ============================================================================
29
+
30
+ /// Cosine similarity
31
+ ///
32
+ /// Measures the cosine of the angle between two vectors.
33
+ /// Returns a value in [-1, 1] where 1 means identical direction.
34
+ ///
35
+ /// Best for: Normalized vectors, semantic similarity.
36
+ #[derive(Clone, Copy, Debug, Default)]
37
+ pub struct Cosine;
38
+
39
+ impl Proximity for Cosine {
40
+ fn proximity(&self, a: &Point, b: &Point) -> f32 {
41
+ assert_eq!(
42
+ a.dimensionality(),
43
+ b.dimensionality(),
44
+ "Points must have same dimensionality"
45
+ );
46
+
47
+ let dot: f32 = a
48
+ .dims()
49
+ .iter()
50
+ .zip(b.dims().iter())
51
+ .map(|(x, y)| x * y)
52
+ .sum();
53
+
54
+ let mag_a = a.magnitude();
55
+ let mag_b = b.magnitude();
56
+
57
+ if mag_a == 0.0 || mag_b == 0.0 {
58
+ return 0.0;
59
+ }
60
+
61
+ dot / (mag_a * mag_b)
62
+ }
63
+
64
+ fn name(&self) -> &'static str {
65
+ "cosine"
66
+ }
67
+ }
68
+
69
+ /// Euclidean distance
70
+ ///
71
+ /// The straight-line distance between two points.
72
+ /// Returns a value in [0, ∞) where 0 means identical.
73
+ ///
74
+ /// Note: This returns DISTANCE, not similarity.
75
+ /// Lower values = more similar.
76
+ #[derive(Clone, Copy, Debug, Default)]
77
+ pub struct Euclidean;
78
+
79
+ impl Proximity for Euclidean {
80
+ fn proximity(&self, a: &Point, b: &Point) -> f32 {
81
+ assert_eq!(
82
+ a.dimensionality(),
83
+ b.dimensionality(),
84
+ "Points must have same dimensionality"
85
+ );
86
+
87
+ let dist_sq: f32 = a
88
+ .dims()
89
+ .iter()
90
+ .zip(b.dims().iter())
91
+ .map(|(x, y)| (x - y).powi(2))
92
+ .sum();
93
+
94
+ dist_sq.sqrt()
95
+ }
96
+
97
+ fn name(&self) -> &'static str {
98
+ "euclidean"
99
+ }
100
+ }
101
+
102
+ /// Squared Euclidean distance
103
+ ///
104
+ /// Same ordering as Euclidean but faster (no sqrt).
105
+ /// Use when you only need to compare distances, not absolute values.
106
+ #[derive(Clone, Copy, Debug, Default)]
107
+ pub struct EuclideanSquared;
108
+
109
+ impl Proximity for EuclideanSquared {
110
+ fn proximity(&self, a: &Point, b: &Point) -> f32 {
111
+ assert_eq!(
112
+ a.dimensionality(),
113
+ b.dimensionality(),
114
+ "Points must have same dimensionality"
115
+ );
116
+
117
+ a.dims()
118
+ .iter()
119
+ .zip(b.dims().iter())
120
+ .map(|(x, y)| (x - y).powi(2))
121
+ .sum()
122
+ }
123
+
124
+ fn name(&self) -> &'static str {
125
+ "euclidean_squared"
126
+ }
127
+ }
128
+
129
+ /// Dot product
130
+ ///
131
+ /// The raw dot product without normalization.
132
+ /// Returns a value that depends on magnitudes.
133
+ ///
134
+ /// Best for: When magnitude matters, not just direction.
135
+ #[derive(Clone, Copy, Debug, Default)]
136
+ pub struct DotProduct;
137
+
138
+ impl Proximity for DotProduct {
139
+ fn proximity(&self, a: &Point, b: &Point) -> f32 {
140
+ assert_eq!(
141
+ a.dimensionality(),
142
+ b.dimensionality(),
143
+ "Points must have same dimensionality"
144
+ );
145
+
146
+ a.dims()
147
+ .iter()
148
+ .zip(b.dims().iter())
149
+ .map(|(x, y)| x * y)
150
+ .sum()
151
+ }
152
+
153
+ fn name(&self) -> &'static str {
154
+ "dot_product"
155
+ }
156
+ }
157
+
158
+ /// Manhattan (L1) distance
159
+ ///
160
+ /// Sum of absolute differences along each dimension.
161
+ /// Returns a value in [0, ∞) where 0 means identical.
162
+ #[derive(Clone, Copy, Debug, Default)]
163
+ pub struct Manhattan;
164
+
165
+ impl Proximity for Manhattan {
166
+ fn proximity(&self, a: &Point, b: &Point) -> f32 {
167
+ assert_eq!(
168
+ a.dimensionality(),
169
+ b.dimensionality(),
170
+ "Points must have same dimensionality"
171
+ );
172
+
173
+ a.dims()
174
+ .iter()
175
+ .zip(b.dims().iter())
176
+ .map(|(x, y)| (x - y).abs())
177
+ .sum()
178
+ }
179
+
180
+ fn name(&self) -> &'static str {
181
+ "manhattan"
182
+ }
183
+ }
184
+
185
+ #[cfg(test)]
186
+ mod tests {
187
+ use super::*;
188
+
189
+ #[test]
190
+ fn test_cosine_identical() {
191
+ let a = Point::new(vec![1.0, 0.0, 0.0]);
192
+ let b = Point::new(vec![1.0, 0.0, 0.0]);
193
+ let cos = Cosine.proximity(&a, &b);
194
+ assert!((cos - 1.0).abs() < 0.0001);
195
+ }
196
+
197
+ #[test]
198
+ fn test_cosine_opposite() {
199
+ let a = Point::new(vec![1.0, 0.0, 0.0]);
200
+ let b = Point::new(vec![-1.0, 0.0, 0.0]);
201
+ let cos = Cosine.proximity(&a, &b);
202
+ assert!((cos - (-1.0)).abs() < 0.0001);
203
+ }
204
+
205
+ #[test]
206
+ fn test_cosine_orthogonal() {
207
+ let a = Point::new(vec![1.0, 0.0, 0.0]);
208
+ let b = Point::new(vec![0.0, 1.0, 0.0]);
209
+ let cos = Cosine.proximity(&a, &b);
210
+ assert!(cos.abs() < 0.0001);
211
+ }
212
+
213
+ #[test]
214
+ fn test_euclidean() {
215
+ let a = Point::new(vec![0.0, 0.0]);
216
+ let b = Point::new(vec![3.0, 4.0]);
217
+ let dist = Euclidean.proximity(&a, &b);
218
+ assert!((dist - 5.0).abs() < 0.0001);
219
+ }
220
+
221
+ #[test]
222
+ fn test_euclidean_squared() {
223
+ let a = Point::new(vec![0.0, 0.0]);
224
+ let b = Point::new(vec![3.0, 4.0]);
225
+ let dist_sq = EuclideanSquared.proximity(&a, &b);
226
+ assert!((dist_sq - 25.0).abs() < 0.0001);
227
+ }
228
+
229
+ #[test]
230
+ fn test_dot_product() {
231
+ let a = Point::new(vec![1.0, 2.0, 3.0]);
232
+ let b = Point::new(vec![4.0, 5.0, 6.0]);
233
+ let dot = DotProduct.proximity(&a, &b);
234
+ // 1*4 + 2*5 + 3*6 = 4 + 10 + 18 = 32
235
+ assert!((dot - 32.0).abs() < 0.0001);
236
+ }
237
+
238
+ #[test]
239
+ fn test_manhattan() {
240
+ let a = Point::new(vec![0.0, 0.0]);
241
+ let b = Point::new(vec![3.0, 4.0]);
242
+ let dist = Manhattan.proximity(&a, &b);
243
+ assert!((dist - 7.0).abs() < 0.0001);
244
+ }
245
+
246
+ #[test]
247
+ fn test_proximity_names() {
248
+ assert_eq!(Cosine.name(), "cosine");
249
+ assert_eq!(Euclidean.name(), "euclidean");
250
+ assert_eq!(DotProduct.name(), "dot_product");
251
+ assert_eq!(Manhattan.name(), "manhattan");
252
+ }
253
+
254
+ #[test]
255
+ #[should_panic(expected = "same dimensionality")]
256
+ fn test_dimension_mismatch_panics() {
257
+ let a = Point::new(vec![1.0, 2.0]);
258
+ let b = Point::new(vec![1.0, 2.0, 3.0]);
259
+ Cosine.proximity(&a, &b);
260
+ }
261
+ }
src/engine/arms.rs ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Arms Engine
2
+ //!
3
+ //! The main ARMS orchestrator.
4
+ //!
5
+ //! This struct wires together:
6
+ //! - Storage (Place port)
7
+ //! - Index (Near port)
8
+ //! - Configuration
9
+ //!
10
+ //! And exposes a unified API for storing and retrieving points.
11
+
12
+ use crate::core::{Blob, Id, PlacedPoint, Point};
13
+ use crate::core::config::ArmsConfig;
14
+ use crate::ports::{Near, NearResult, Place, PlaceResult, SearchResult};
15
+ use crate::adapters::storage::MemoryStorage;
16
+ use crate::adapters::index::FlatIndex;
17
+
18
+ /// The main ARMS engine
19
+ ///
20
+ /// Orchestrates storage and indexing with a unified API.
21
+ pub struct Arms {
22
+ /// Configuration
23
+ config: ArmsConfig,
24
+
25
+ /// Storage backend (Place port)
26
+ storage: Box<dyn Place>,
27
+
28
+ /// Index backend (Near port)
29
+ index: Box<dyn Near>,
30
+ }
31
+
32
+ impl Arms {
33
+ /// Create a new ARMS instance with default adapters
34
+ ///
35
+ /// Uses MemoryStorage and FlatIndex.
36
+ /// For production, use `Arms::with_adapters` with appropriate backends.
37
+ pub fn new(config: ArmsConfig) -> Self {
38
+ let storage = Box::new(MemoryStorage::new(config.dimensionality));
39
+ let index = Box::new(FlatIndex::new(
40
+ config.dimensionality,
41
+ config.proximity.clone(),
42
+ true, // Assuming cosine-like similarity by default
43
+ ));
44
+
45
+ Self {
46
+ config,
47
+ storage,
48
+ index,
49
+ }
50
+ }
51
+
52
+ /// Create with custom adapters
53
+ pub fn with_adapters(
54
+ config: ArmsConfig,
55
+ storage: Box<dyn Place>,
56
+ index: Box<dyn Near>,
57
+ ) -> Self {
58
+ Self {
59
+ config,
60
+ storage,
61
+ index,
62
+ }
63
+ }
64
+
65
+ /// Get the configuration
66
+ pub fn config(&self) -> &ArmsConfig {
67
+ &self.config
68
+ }
69
+
70
+ /// Get the dimensionality of this space
71
+ pub fn dimensionality(&self) -> usize {
72
+ self.config.dimensionality
73
+ }
74
+
75
+ // ========================================================================
76
+ // PLACE OPERATIONS
77
+ // ========================================================================
78
+
79
+ /// Place a point in the space
80
+ ///
81
+ /// The point will be normalized if configured to do so.
82
+ /// Returns the assigned ID.
83
+ pub fn place(&mut self, point: Point, blob: Blob) -> PlaceResult<Id> {
84
+ // Normalize if configured
85
+ let point = if self.config.normalize_on_insert {
86
+ point.normalize()
87
+ } else {
88
+ point
89
+ };
90
+
91
+ // Store in storage
92
+ let id = self.storage.place(point.clone(), blob)?;
93
+
94
+ // Add to index
95
+ if let Err(e) = self.index.add(id, &point) {
96
+ // Rollback storage if index fails
97
+ self.storage.remove(id);
98
+ return Err(crate::ports::PlaceError::StorageError(format!(
99
+ "Index error: {:?}",
100
+ e
101
+ )));
102
+ }
103
+
104
+ Ok(id)
105
+ }
106
+
107
+ /// Place multiple points at once
108
+ pub fn place_batch(&mut self, items: Vec<(Point, Blob)>) -> Vec<PlaceResult<Id>> {
109
+ items
110
+ .into_iter()
111
+ .map(|(point, blob)| self.place(point, blob))
112
+ .collect()
113
+ }
114
+
115
+ /// Remove a point from the space
116
+ pub fn remove(&mut self, id: Id) -> Option<PlacedPoint> {
117
+ // Remove from index first
118
+ let _ = self.index.remove(id);
119
+
120
+ // Then from storage
121
+ self.storage.remove(id)
122
+ }
123
+
124
+ /// Get a point by ID
125
+ pub fn get(&self, id: Id) -> Option<&PlacedPoint> {
126
+ self.storage.get(id)
127
+ }
128
+
129
+ /// Check if a point exists
130
+ pub fn contains(&self, id: Id) -> bool {
131
+ self.storage.contains(id)
132
+ }
133
+
134
+ /// Get the number of stored points
135
+ pub fn len(&self) -> usize {
136
+ self.storage.len()
137
+ }
138
+
139
+ /// Check if the space is empty
140
+ pub fn is_empty(&self) -> bool {
141
+ self.storage.is_empty()
142
+ }
143
+
144
+ /// Clear all points
145
+ pub fn clear(&mut self) {
146
+ self.storage.clear();
147
+ let _ = self.index.rebuild(); // Reset index
148
+ }
149
+
150
+ // ========================================================================
151
+ // NEAR OPERATIONS
152
+ // ========================================================================
153
+
154
+ /// Find k nearest points to query
155
+ pub fn near(&self, query: &Point, k: usize) -> NearResult<Vec<SearchResult>> {
156
+ // Normalize query if configured
157
+ let query = if self.config.normalize_on_insert {
158
+ query.normalize()
159
+ } else {
160
+ query.clone()
161
+ };
162
+
163
+ self.index.near(&query, k)
164
+ }
165
+
166
+ /// Find all points within threshold
167
+ pub fn within(&self, query: &Point, threshold: f32) -> NearResult<Vec<SearchResult>> {
168
+ let query = if self.config.normalize_on_insert {
169
+ query.normalize()
170
+ } else {
171
+ query.clone()
172
+ };
173
+
174
+ self.index.within(&query, threshold)
175
+ }
176
+
177
+ /// Find and retrieve k nearest points (with full data)
178
+ pub fn near_with_data(&self, query: &Point, k: usize) -> NearResult<Vec<(&PlacedPoint, f32)>> {
179
+ let results = self.near(query, k)?;
180
+
181
+ Ok(results
182
+ .into_iter()
183
+ .filter_map(|r| self.storage.get(r.id).map(|p| (p, r.score)))
184
+ .collect())
185
+ }
186
+
187
+ // ========================================================================
188
+ // MERGE OPERATIONS
189
+ // ========================================================================
190
+
191
+ /// Merge multiple points into one using the configured merge function
192
+ pub fn merge(&self, points: &[Point]) -> Point {
193
+ self.config.merge.merge(points)
194
+ }
195
+
196
+ /// Compute proximity between two points
197
+ pub fn proximity(&self, a: &Point, b: &Point) -> f32 {
198
+ self.config.proximity.proximity(a, b)
199
+ }
200
+
201
+ // ========================================================================
202
+ // STATS
203
+ // ========================================================================
204
+
205
+ /// Get storage size in bytes
206
+ pub fn size_bytes(&self) -> usize {
207
+ self.storage.size_bytes()
208
+ }
209
+
210
+ /// Get index stats
211
+ pub fn index_len(&self) -> usize {
212
+ self.index.len()
213
+ }
214
+
215
+ /// Check if index is ready
216
+ pub fn is_ready(&self) -> bool {
217
+ self.index.is_ready()
218
+ }
219
+ }
220
+
221
+ #[cfg(test)]
222
+ mod tests {
223
+ use super::*;
224
+
225
+ fn create_test_arms() -> Arms {
226
+ Arms::new(ArmsConfig::new(3))
227
+ }
228
+
229
+ #[test]
230
+ fn test_arms_place_and_get() {
231
+ let mut arms = create_test_arms();
232
+
233
+ let point = Point::new(vec![1.0, 0.0, 0.0]);
234
+ let blob = Blob::from_str("test data");
235
+
236
+ let id = arms.place(point, blob).unwrap();
237
+
238
+ let retrieved = arms.get(id).unwrap();
239
+ assert_eq!(retrieved.blob.as_str(), Some("test data"));
240
+ }
241
+
242
+ #[test]
243
+ fn test_arms_near() {
244
+ let mut arms = create_test_arms();
245
+
246
+ // Add some points
247
+ arms.place(Point::new(vec![1.0, 0.0, 0.0]), Blob::from_str("x")).unwrap();
248
+ arms.place(Point::new(vec![0.0, 1.0, 0.0]), Blob::from_str("y")).unwrap();
249
+ arms.place(Point::new(vec![0.0, 0.0, 1.0]), Blob::from_str("z")).unwrap();
250
+
251
+ // Query
252
+ let query = Point::new(vec![1.0, 0.0, 0.0]);
253
+ let results = arms.near(&query, 2).unwrap();
254
+
255
+ assert_eq!(results.len(), 2);
256
+ // First result should have highest similarity
257
+ assert!(results[0].score > results[1].score);
258
+ }
259
+
260
+ #[test]
261
+ fn test_arms_near_with_data() {
262
+ let mut arms = create_test_arms();
263
+
264
+ arms.place(Point::new(vec![1.0, 0.0, 0.0]), Blob::from_str("x")).unwrap();
265
+ arms.place(Point::new(vec![0.0, 1.0, 0.0]), Blob::from_str("y")).unwrap();
266
+
267
+ let query = Point::new(vec![1.0, 0.0, 0.0]);
268
+ let results = arms.near_with_data(&query, 1).unwrap();
269
+
270
+ assert_eq!(results.len(), 1);
271
+ assert_eq!(results[0].0.blob.as_str(), Some("x"));
272
+ }
273
+
274
+ #[test]
275
+ fn test_arms_remove() {
276
+ let mut arms = create_test_arms();
277
+
278
+ let id = arms.place(Point::new(vec![1.0, 0.0, 0.0]), Blob::empty()).unwrap();
279
+
280
+ assert!(arms.contains(id));
281
+ assert_eq!(arms.len(), 1);
282
+
283
+ arms.remove(id);
284
+
285
+ assert!(!arms.contains(id));
286
+ assert_eq!(arms.len(), 0);
287
+ }
288
+
289
+ #[test]
290
+ fn test_arms_merge() {
291
+ let arms = create_test_arms();
292
+
293
+ let points = vec![
294
+ Point::new(vec![1.0, 0.0, 0.0]),
295
+ Point::new(vec![0.0, 1.0, 0.0]),
296
+ ];
297
+
298
+ let merged = arms.merge(&points);
299
+
300
+ // Mean of [1,0,0] and [0,1,0] = [0.5, 0.5, 0]
301
+ assert!((merged.dims()[0] - 0.5).abs() < 0.0001);
302
+ assert!((merged.dims()[1] - 0.5).abs() < 0.0001);
303
+ assert!((merged.dims()[2] - 0.0).abs() < 0.0001);
304
+ }
305
+
306
+ #[test]
307
+ fn test_arms_clear() {
308
+ let mut arms = create_test_arms();
309
+
310
+ for i in 0..10 {
311
+ arms.place(Point::new(vec![i as f32, 0.0, 0.0]), Blob::empty()).unwrap();
312
+ }
313
+
314
+ assert_eq!(arms.len(), 10);
315
+
316
+ arms.clear();
317
+
318
+ assert_eq!(arms.len(), 0);
319
+ assert!(arms.is_empty());
320
+ }
321
+
322
+ #[test]
323
+ fn test_arms_normalizes_on_insert() {
324
+ let mut arms = create_test_arms();
325
+
326
+ // Insert a non-normalized point
327
+ let point = Point::new(vec![3.0, 4.0, 0.0]); // magnitude = 5
328
+ let id = arms.place(point, Blob::empty()).unwrap();
329
+
330
+ let retrieved = arms.get(id).unwrap();
331
+
332
+ // Should be normalized
333
+ assert!(retrieved.point.is_normalized());
334
+ }
335
+ }
src/engine/mod.rs ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Engine
2
+ //!
3
+ //! The orchestration layer that wires everything together.
4
+ //!
5
+ //! This is where:
6
+ //! - Configuration is applied
7
+ //! - Adapters are connected to ports
8
+ //! - The unified ARMS interface is exposed
9
+
10
+ mod arms;
11
+
12
+ pub use arms::Arms;
src/lib.rs ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # ARMS - Attention Reasoning Memory Store
2
+ //!
3
+ //! > "The hippocampus of artificial minds"
4
+ //!
5
+ //! ARMS is a spatial memory fabric for AI models. It stores computed attention
6
+ //! states at their native dimensional coordinates, enabling instant retrieval
7
+ //! by proximity rather than traditional indexing.
8
+ //!
9
+ //! ## Philosophy
10
+ //!
11
+ //! - **Position IS relationship** - No foreign keys, proximity defines connection
12
+ //! - **Configurable, not hardcoded** - Dimensionality, proximity functions, all flexible
13
+ //! - **Generators over assets** - Algorithms, not rigid structures
14
+ //! - **Pure core, swappable adapters** - Hexagonal architecture
15
+ //!
16
+ //! ## Architecture
17
+ //!
18
+ //! ```text
19
+ //! ┌─────────────────────────────────────────────────────────────┐
20
+ //! │ ARMS │
21
+ //! ├─────────────────────────────────────────────────────────────┤
22
+ //! │ │
23
+ //! │ CORE (pure math, no I/O) │
24
+ //! │ Point, Id, Blob, Proximity, Merge │
25
+ //! │ │
26
+ //! │ PORTS (trait contracts) │
27
+ //! │ Place, Near, Latency │
28
+ //! │ │
29
+ //! │ ADAPTERS (swappable implementations) │
30
+ //! │ Storage: Memory, NVMe │
31
+ //! │ Index: Flat, HNSW │
32
+ //! │ API: Python bindings │
33
+ //! │ │
34
+ //! │ ENGINE (orchestration) │
35
+ //! │ Arms - the main entry point │
36
+ //! │ │
37
+ //! └─────────────────────────────────────────────────────────────┘
38
+ //! ```
39
+ //!
40
+ //! ## Quick Start
41
+ //!
42
+ //! ```rust,ignore
43
+ //! use arms::{Arms, ArmsConfig, Point};
44
+ //!
45
+ //! // Create ARMS with default config (768 dimensions)
46
+ //! let mut arms = Arms::new(ArmsConfig::default());
47
+ //!
48
+ //! // Place a point in the space
49
+ //! let point = Point::new(vec![0.1; 768]);
50
+ //! let id = arms.place(point, b"my data".to_vec());
51
+ //!
52
+ //! // Find nearby points
53
+ //! let query = Point::new(vec![0.1; 768]);
54
+ //! let neighbors = arms.near(&query, 5);
55
+ //! ```
56
+
57
+ // ============================================================================
58
+ // MODULES
59
+ // ============================================================================
60
+
61
+ /// Core domain - pure math, no I/O
62
+ /// Contains: Point, Id, Blob, Proximity trait, Merge trait
63
+ pub mod core;
64
+
65
+ /// Port definitions - trait contracts for adapters
66
+ /// Contains: Place trait, Near trait, Latency trait
67
+ pub mod ports;
68
+
69
+ /// Adapter implementations - swappable components
70
+ /// Contains: storage, index, python submodules
71
+ pub mod adapters;
72
+
73
+ /// Engine - orchestration layer
74
+ /// Contains: Arms main struct
75
+ pub mod engine;
76
+
77
+ // ============================================================================
78
+ // RE-EXPORTS (public API)
79
+ // ============================================================================
80
+
81
+ // Core types
82
+ pub use crate::core::{Point, Id, Blob, PlacedPoint};
83
+ pub use crate::core::proximity::{Proximity, Cosine, Euclidean, DotProduct};
84
+ pub use crate::core::merge::{Merge, Mean, WeightedMean, MaxPool};
85
+ pub use crate::core::config::ArmsConfig;
86
+
87
+ // Port traits
88
+ pub use crate::ports::{Place, Near, Latency};
89
+
90
+ // Engine
91
+ pub use crate::engine::Arms;
92
+
93
+ // ============================================================================
94
+ // CRATE-LEVEL DOCUMENTATION
95
+ // ============================================================================
96
+
97
+ /// The five primitives of ARMS:
98
+ ///
99
+ /// 1. **Point**: `Vec<f32>` - Any dimensionality
100
+ /// 2. **Proximity**: `fn(a, b) -> f32` - How related?
101
+ /// 3. **Merge**: `fn(points) -> point` - Compose together
102
+ /// 4. **Place**: `fn(point, data) -> id` - Exist in space
103
+ /// 5. **Near**: `fn(point, k) -> ids` - What's related?
104
+ ///
105
+ /// Everything else is configuration or adapters.
106
+ #[doc(hidden)]
107
+ pub const _PRIMITIVES: () = ();
src/ports/latency.rs ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Latency Port
2
+ //!
3
+ //! Trait for runtime latency measurement and adaptation.
4
+ //!
5
+ //! This enables the model to know its actual retrieval constraints:
6
+ //! - How fast is the hot tier right now?
7
+ //! - How much budget do I have for retrieval?
8
+ //! - Should I use fewer, faster retrievals or more, slower ones?
9
+
10
+ use std::time::Duration;
11
+
12
+ /// Storage tier levels
13
+ #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14
+ pub enum Tier {
15
+ /// RAM storage - fastest
16
+ Hot,
17
+ /// NVMe storage - fast
18
+ Warm,
19
+ /// Archive storage - slow
20
+ Cold,
21
+ }
22
+
23
+ impl Tier {
24
+ /// Get expected latency range for this tier
25
+ pub fn expected_latency(&self) -> (Duration, Duration) {
26
+ match self {
27
+ Tier::Hot => (Duration::from_micros(1), Duration::from_millis(1)),
28
+ Tier::Warm => (Duration::from_millis(1), Duration::from_millis(10)),
29
+ Tier::Cold => (Duration::from_millis(10), Duration::from_millis(100)),
30
+ }
31
+ }
32
+ }
33
+
34
+ /// Latency measurement result
35
+ #[derive(Debug, Clone)]
36
+ pub struct LatencyMeasurement {
37
+ /// The tier that was measured
38
+ pub tier: Tier,
39
+
40
+ /// Measured latency for a single operation
41
+ pub latency: Duration,
42
+
43
+ /// Throughput (operations per second) if measured
44
+ pub throughput_ops: Option<f64>,
45
+
46
+ /// Timestamp of measurement
47
+ pub measured_at: std::time::Instant,
48
+ }
49
+
50
+ /// Budget allocation for retrieval operations
51
+ #[derive(Debug, Clone)]
52
+ pub struct LatencyBudget {
53
+ /// Total time budget for this retrieval batch
54
+ pub total: Duration,
55
+
56
+ /// Maximum time per individual retrieval
57
+ pub per_operation: Duration,
58
+
59
+ /// Maximum number of operations in this budget
60
+ pub max_operations: usize,
61
+ }
62
+
63
+ impl Default for LatencyBudget {
64
+ fn default() -> Self {
65
+ Self {
66
+ total: Duration::from_millis(50),
67
+ per_operation: Duration::from_millis(5),
68
+ max_operations: 10,
69
+ }
70
+ }
71
+ }
72
+
73
+ /// Tier statistics
74
+ #[derive(Debug, Clone)]
75
+ pub struct TierStats {
76
+ /// The tier
77
+ pub tier: Tier,
78
+
79
+ /// Number of points in this tier
80
+ pub count: usize,
81
+
82
+ /// Total size in bytes
83
+ pub size_bytes: usize,
84
+
85
+ /// Capacity in bytes
86
+ pub capacity_bytes: usize,
87
+
88
+ /// Usage ratio (0.0 to 1.0)
89
+ pub usage_ratio: f32,
90
+ }
91
+
92
+ /// Trait for latency measurement and adaptation
93
+ ///
94
+ /// System adapters implement this trait.
95
+ pub trait Latency: Send + Sync {
96
+ /// Probe a tier to measure current latency
97
+ ///
98
+ /// Performs a small test operation to measure actual latency.
99
+ fn probe(&mut self, tier: Tier) -> LatencyMeasurement;
100
+
101
+ /// Get the current latency budget
102
+ fn budget(&self) -> LatencyBudget;
103
+
104
+ /// Set a new latency budget
105
+ fn set_budget(&mut self, budget: LatencyBudget);
106
+
107
+ /// Get available capacity in a tier
108
+ fn available_capacity(&self, tier: Tier) -> usize;
109
+
110
+ /// Recommend which tier to use for an access pattern
111
+ ///
112
+ /// `expected_accesses` is the expected number of accesses for this data.
113
+ fn recommend_tier(&self, expected_accesses: u32) -> Tier;
114
+
115
+ /// Get statistics for a tier
116
+ fn tier_stats(&self, tier: Tier) -> TierStats;
117
+
118
+ /// Get statistics for all tiers
119
+ fn all_stats(&self) -> Vec<TierStats> {
120
+ vec![
121
+ self.tier_stats(Tier::Hot),
122
+ self.tier_stats(Tier::Warm),
123
+ self.tier_stats(Tier::Cold),
124
+ ]
125
+ }
126
+ }
src/ports/mod.rs ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Ports
2
+ //!
3
+ //! Trait definitions for adapters. Contracts only, no implementations.
4
+ //!
5
+ //! This is the hexagonal architecture boundary:
6
+ //! - Ports define WHAT operations are needed
7
+ //! - Adapters define HOW they're implemented
8
+ //!
9
+ //! The CORE doesn't know about adapters.
10
+ //! Adapters implement these port traits.
11
+
12
+ mod place;
13
+ mod near;
14
+ mod latency;
15
+
16
+ // Re-export traits
17
+ pub use place::Place;
18
+ pub use near::Near;
19
+ pub use latency::Latency;
20
+
21
+ // Re-export types from place
22
+ pub use place::{PlaceError, PlaceResult};
23
+
24
+ // Re-export types from near
25
+ pub use near::{NearError, NearResult, SearchResult};
26
+
27
+ // Re-export types from latency
28
+ pub use latency::{Tier, LatencyBudget, LatencyMeasurement, TierStats};
src/ports/near.rs ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Near Port
2
+ //!
3
+ //! Trait for finding related points.
4
+ //!
5
+ //! This is one of the five primitives of ARMS:
6
+ //! `Near: fn(point, k) -> ids` - What's related?
7
+ //!
8
+ //! Implemented by index adapters (Flat, HNSW, etc.)
9
+
10
+ use crate::core::{Id, Point};
11
+
12
+ /// Result type for near operations
13
+ pub type NearResult<T> = Result<T, NearError>;
14
+
15
+ /// A search result with ID and distance/similarity score
16
+ #[derive(Debug, Clone, PartialEq)]
17
+ pub struct SearchResult {
18
+ /// The ID of the found point
19
+ pub id: Id,
20
+
21
+ /// Distance or similarity score
22
+ /// Interpretation depends on the proximity function used.
23
+ pub score: f32,
24
+ }
25
+
26
+ impl SearchResult {
27
+ pub fn new(id: Id, score: f32) -> Self {
28
+ Self { id, score }
29
+ }
30
+ }
31
+
32
+ /// Errors that can occur during near operations
33
+ #[derive(Debug, Clone, PartialEq)]
34
+ pub enum NearError {
35
+ /// The query point has wrong dimensionality
36
+ DimensionalityMismatch { expected: usize, got: usize },
37
+
38
+ /// Index is not built/ready
39
+ IndexNotReady,
40
+
41
+ /// Index backend error
42
+ IndexError(String),
43
+ }
44
+
45
+ impl std::fmt::Display for NearError {
46
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47
+ match self {
48
+ NearError::DimensionalityMismatch { expected, got } => {
49
+ write!(f, "Dimensionality mismatch: expected {}, got {}", expected, got)
50
+ }
51
+ NearError::IndexNotReady => write!(f, "Index not ready"),
52
+ NearError::IndexError(msg) => write!(f, "Index error: {}", msg),
53
+ }
54
+ }
55
+ }
56
+
57
+ impl std::error::Error for NearError {}
58
+
59
+ /// Trait for finding related points
60
+ ///
61
+ /// Index adapters implement this trait.
62
+ pub trait Near: Send + Sync {
63
+ /// Find k nearest points to query
64
+ ///
65
+ /// Returns results sorted by relevance (most relevant first).
66
+ fn near(&self, query: &Point, k: usize) -> NearResult<Vec<SearchResult>>;
67
+
68
+ /// Find all points within a distance/similarity threshold
69
+ ///
70
+ /// For distance metrics (Euclidean), finds points with distance < threshold.
71
+ /// For similarity metrics (Cosine), finds points with similarity > threshold.
72
+ fn within(&self, query: &Point, threshold: f32) -> NearResult<Vec<SearchResult>>;
73
+
74
+ /// Add a point to the index
75
+ ///
76
+ /// Call this after placing a point in storage.
77
+ fn add(&mut self, id: Id, point: &Point) -> NearResult<()>;
78
+
79
+ /// Remove a point from the index
80
+ fn remove(&mut self, id: Id) -> NearResult<()>;
81
+
82
+ /// Rebuild the index (if needed for performance)
83
+ fn rebuild(&mut self) -> NearResult<()>;
84
+
85
+ /// Check if the index is ready for queries
86
+ fn is_ready(&self) -> bool;
87
+
88
+ /// Get the number of indexed points
89
+ fn len(&self) -> usize;
90
+
91
+ /// Check if the index is empty
92
+ fn is_empty(&self) -> bool {
93
+ self.len() == 0
94
+ }
95
+ }
src/ports/place.rs ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! # Place Port
2
+ //!
3
+ //! Trait for placing points in the space.
4
+ //!
5
+ //! This is one of the five primitives of ARMS:
6
+ //! `Place: fn(point, data) -> id` - Exist in space
7
+ //!
8
+ //! Implemented by storage adapters (Memory, NVMe, etc.)
9
+
10
+ use crate::core::{Blob, Id, PlacedPoint, Point};
11
+
12
+ /// Result type for place operations
13
+ pub type PlaceResult<T> = Result<T, PlaceError>;
14
+
15
+ /// Errors that can occur during place operations
16
+ #[derive(Debug, Clone, PartialEq)]
17
+ pub enum PlaceError {
18
+ /// The point has wrong dimensionality for this space
19
+ DimensionalityMismatch { expected: usize, got: usize },
20
+
21
+ /// Storage capacity exceeded
22
+ CapacityExceeded,
23
+
24
+ /// Point with this ID already exists
25
+ DuplicateId(Id),
26
+
27
+ /// Storage backend error
28
+ StorageError(String),
29
+ }
30
+
31
+ impl std::fmt::Display for PlaceError {
32
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33
+ match self {
34
+ PlaceError::DimensionalityMismatch { expected, got } => {
35
+ write!(f, "Dimensionality mismatch: expected {}, got {}", expected, got)
36
+ }
37
+ PlaceError::CapacityExceeded => write!(f, "Storage capacity exceeded"),
38
+ PlaceError::DuplicateId(id) => write!(f, "Duplicate ID: {}", id),
39
+ PlaceError::StorageError(msg) => write!(f, "Storage error: {}", msg),
40
+ }
41
+ }
42
+ }
43
+
44
+ impl std::error::Error for PlaceError {}
45
+
46
+ /// Trait for placing points in the space
47
+ ///
48
+ /// Storage adapters implement this trait.
49
+ pub trait Place: Send + Sync {
50
+ /// Place a point with its payload in the space
51
+ ///
52
+ /// Returns the ID assigned to the placed point.
53
+ fn place(&mut self, point: Point, blob: Blob) -> PlaceResult<Id>;
54
+
55
+ /// Place a point with a specific ID
56
+ ///
57
+ /// Use when you need deterministic IDs (e.g., replication, testing).
58
+ fn place_with_id(&mut self, id: Id, point: Point, blob: Blob) -> PlaceResult<()>;
59
+
60
+ /// Remove a point from the space
61
+ ///
62
+ /// Returns the removed point if it existed.
63
+ fn remove(&mut self, id: Id) -> Option<PlacedPoint>;
64
+
65
+ /// Get a placed point by ID
66
+ ///
67
+ /// Returns None if not found.
68
+ fn get(&self, id: Id) -> Option<&PlacedPoint>;
69
+
70
+ /// Check if a point exists
71
+ fn contains(&self, id: Id) -> bool {
72
+ self.get(id).is_some()
73
+ }
74
+
75
+ /// Get the number of placed points
76
+ fn len(&self) -> usize;
77
+
78
+ /// Check if the space is empty
79
+ fn is_empty(&self) -> bool {
80
+ self.len() == 0
81
+ }
82
+
83
+ /// Iterate over all placed points
84
+ fn iter(&self) -> Box<dyn Iterator<Item = &PlacedPoint> + '_>;
85
+
86
+ /// Get current storage size in bytes
87
+ fn size_bytes(&self) -> usize;
88
+
89
+ /// Clear all points
90
+ fn clear(&mut self);
91
+ }