ruvltra / training /v2.5-performance-stats.json
ruv's picture
data: Add v2.5 performance optimization stats
e7de02d verified
{
"version": "2.5",
"release_name": "Performance Optimized Edition",
"release_date": "2026-01-21T10:46:53.928251",
"optimizations": {
"hnsw_index": {
"description": "Hierarchical Navigable Small World graphs",
"improvement": "10x faster search at 10k entries"
},
"lru_cache": {
"description": "O(1) LRU cache using Rust lru crate",
"lookup_time_ns": 23.5
},
"zero_copy": {
"description": "Arc<str> string interning",
"improvement": "100-1000x cache improvement"
},
"batch_simd": {
"description": "AVX2/NEON vectorization",
"improvement": "4x throughput"
},
"memory_pools": {
"description": "Arena allocation",
"improvement": "50% fewer allocations"
}
},
"benchmarks": {
"query_decomposition_ns": 340,
"cache_lookup_ns": 23.5,
"memory_search_10k_ms": 0.4,
"pattern_retrieval_us": 25,
"routing_accuracy_hybrid": 1.0,
"routing_accuracy_embedding_only": 0.45
},
"models": {
"claude_code_0.5b": {
"file": "ruvltra-claude-code-0.5b-q4_k_m.gguf",
"size_mb": 398,
"purpose": "Agent routing",
"context_length": 32768
},
"small_0.5b": {
"file": "ruvltra-small-0.5b-q4_k_m.gguf",
"size_mb": 400,
"purpose": "General embeddings",
"context_length": 32768
},
"medium_3b": {
"file": "ruvltra-medium-3b-q4_k_m.gguf",
"size_mb": 2048,
"purpose": "Full LLM inference",
"context_length": 262144
}
},
"performance_targets": {
"flash_attention_speedup": "2.49x-7.47x",
"hnsw_search_speedup": "150x-12500x",
"memory_reduction": "50-75%",
"mcp_response_ms": 100,
"sona_adaptation_ms": 0.05
},
"training_data": {
"labeled_examples": 381,
"contrastive_pairs": 793,
"agent_types": 60
}
}