training/v2.5-performance-stats.json

ruvltra / training /v2.5-performance-stats.json

data: Add v2.5 performance optimization stats

e7de02d verified 3 months ago

1.84 kB

	{
	"version": "2.5",
	"release_name": "Performance Optimized Edition",
	"release_date": "2026-01-21T10:46:53.928251",
	"optimizations": {
	"hnsw_index": {
	"description": "Hierarchical Navigable Small World graphs",
	"improvement": "10x faster search at 10k entries"
	},
	"lru_cache": {
	"description": "O(1) LRU cache using Rust lru crate",
	"lookup_time_ns": 23.5
	},
	"zero_copy": {
	"description": "Arc<str> string interning",
	"improvement": "100-1000x cache improvement"
	},
	"batch_simd": {
	"description": "AVX2/NEON vectorization",
	"improvement": "4x throughput"
	},
	"memory_pools": {
	"description": "Arena allocation",
	"improvement": "50% fewer allocations"
	}
	},
	"benchmarks": {
	"query_decomposition_ns": 340,
	"cache_lookup_ns": 23.5,
	"memory_search_10k_ms": 0.4,
	"pattern_retrieval_us": 25,
	"routing_accuracy_hybrid": 1.0,
	"routing_accuracy_embedding_only": 0.45
	},
	"models": {
	"claude_code_0.5b": {
	"file": "ruvltra-claude-code-0.5b-q4_k_m.gguf",
	"size_mb": 398,
	"purpose": "Agent routing",
	"context_length": 32768
	},
	"small_0.5b": {
	"file": "ruvltra-small-0.5b-q4_k_m.gguf",
	"size_mb": 400,
	"purpose": "General embeddings",
	"context_length": 32768
	},
	"medium_3b": {
	"file": "ruvltra-medium-3b-q4_k_m.gguf",
	"size_mb": 2048,
	"purpose": "Full LLM inference",
	"context_length": 262144
	}
	},
	"performance_targets": {
	"flash_attention_speedup": "2.49x-7.47x",
	"hnsw_search_speedup": "150x-12500x",
	"memory_reduction": "50-75%",
	"mcp_response_ms": 100,
	"sona_adaptation_ms": 0.05
	},
	"training_data": {
	"labeled_examples": 381,
	"contrastive_pairs": 793,
	"agent_types": 60
	}
	}