| { |
| "version": "2.5", |
| "release_name": "Performance Optimized Edition", |
| "release_date": "2026-01-21T10:46:53.928251", |
| "optimizations": { |
| "hnsw_index": { |
| "description": "Hierarchical Navigable Small World graphs", |
| "improvement": "10x faster search at 10k entries" |
| }, |
| "lru_cache": { |
| "description": "O(1) LRU cache using Rust lru crate", |
| "lookup_time_ns": 23.5 |
| }, |
| "zero_copy": { |
| "description": "Arc<str> string interning", |
| "improvement": "100-1000x cache improvement" |
| }, |
| "batch_simd": { |
| "description": "AVX2/NEON vectorization", |
| "improvement": "4x throughput" |
| }, |
| "memory_pools": { |
| "description": "Arena allocation", |
| "improvement": "50% fewer allocations" |
| } |
| }, |
| "benchmarks": { |
| "query_decomposition_ns": 340, |
| "cache_lookup_ns": 23.5, |
| "memory_search_10k_ms": 0.4, |
| "pattern_retrieval_us": 25, |
| "routing_accuracy_hybrid": 1.0, |
| "routing_accuracy_embedding_only": 0.45 |
| }, |
| "models": { |
| "claude_code_0.5b": { |
| "file": "ruvltra-claude-code-0.5b-q4_k_m.gguf", |
| "size_mb": 398, |
| "purpose": "Agent routing", |
| "context_length": 32768 |
| }, |
| "small_0.5b": { |
| "file": "ruvltra-small-0.5b-q4_k_m.gguf", |
| "size_mb": 400, |
| "purpose": "General embeddings", |
| "context_length": 32768 |
| }, |
| "medium_3b": { |
| "file": "ruvltra-medium-3b-q4_k_m.gguf", |
| "size_mb": 2048, |
| "purpose": "Full LLM inference", |
| "context_length": 262144 |
| } |
| }, |
| "performance_targets": { |
| "flash_attention_speedup": "2.49x-7.47x", |
| "hnsw_search_speedup": "150x-12500x", |
| "memory_reduction": "50-75%", |
| "mcp_response_ms": 100, |
| "sona_adaptation_ms": 0.05 |
| }, |
| "training_data": { |
| "labeled_examples": 381, |
| "contrastive_pairs": 793, |
| "agent_types": 60 |
| } |
| } |