use std::path::PathBuf; #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] pub struct Config { /// Path to the Qwen3 base model GGUF file pub base_model_path: PathBuf, /// Path to the SmolLM2 router model GGUF file pub router_model_path: PathBuf, /// Directory containing LoRA adapter files (.safetensors or .gguf) pub adapters_dir: PathBuf, /// Directory for downloaded tokenizer cache pub tokenizer_cache_dir: PathBuf, /// Path to knowledge base JSON for semantic cache pub kb_path: Option, /// Available experts/adapters with their descriptions pub experts: Vec, /// Maximum sequence length pub max_seq_len: usize, /// GPU device ID (0 for first GPU, -1 for CPU) pub gpu_device: i32, /// Temperature for generation pub temperature: f64, /// Top-p for nucleus sampling pub top_p: f64, /// Max tokens to generate per response pub max_gen_tokens: usize, /// KV-cache key quantization type ("q4_0", "q8_0", "f16") pub kv_cache_type_k: String, /// KV-cache value quantization type pub kv_cache_type_v: String, /// Offload K,Q,V tensors to GPU pub kv_offload_kqv: bool, /// KV-cache defrag threshold (-1.0 = disabled) pub kv_defrag_thold: f32, } #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] pub struct ExpertConfig { pub name: String, pub description: String, pub adapter_file: Option, pub system_prompt: Option, } impl Default for Config { fn default() -> Self { Self { base_model_path: PathBuf::from("Qwen3.5-0.8B-Q4_K_M.gguf"), router_model_path: PathBuf::from("smollm2-1.7b-instruct-q5_k_m-imat.gguf"), adapters_dir: PathBuf::from("adapters"), tokenizer_cache_dir: PathBuf::from("tokenizers"), kb_path: Some(PathBuf::from("knowledge_base.json")), experts: vec![ ExpertConfig { name: "general".to_string(), description: "General conversation, default mode".to_string(), adapter_file: None, system_prompt: Some( "You are a helpful, friendly AI assistant.\n\ IMPORTANT: Think step by step before answering. \ First understand the need, then reason through it, \ then give your final response." .to_string(), ), }, // ── Sential 2.0 Orchestra Layers ── ExpertConfig { name: "structural".to_string(), description: "Structural coding: struct, impl, trait, enum, generics".to_string(), adapter_file: Some("structural.gguf".to_string()), system_prompt: Some( "You are an expert Rust architect. Focus on clean data structures,\ idiomatic traits, and well-designed generics. \ Write minimal, composable code with clear type definitions.\n\ Think: 1) What data shape? 2) What behavior? 3) How to compose?\n\ Prefer: structs with derive macros, trait bounds, and impl blocks." .to_string(), ), }, ExpertConfig { name: "flow_error".to_string(), description: "Flow & Error handling: match, Result, Option, concurrency".to_string(), adapter_file: Some("flow_error.gguf".to_string()), system_prompt: Some( "You are an expert in Rust error handling and control flow. \ Master match expressions, Result/Option chains, and concurrency patterns.\n\ Think: 1) What can fail? 2) How to handle it gracefully? 3) Thread safety?\n\ Prefer: match on Result, ? operator, proper error types, Arc>." .to_string(), ), }, ExpertConfig { name: "system_io".to_string(), description: "System & IO: file operations, collections, iterators".to_string(), adapter_file: Some("system_io.gguf".to_string()), system_prompt: Some( "You are an expert in Rust I/O and data processing. \ Master file operations, collections (HashMap, Vec), and iterator chains.\n\ Think: 1) Where does data come from? 2) How to transform it? 3) Where to output?\n\ Prefer: BufReader, iterator combinators, collect(), and efficient data structures." .to_string(), ), }, // ── Legacy adapters (backward compat) ── ExpertConfig { name: "rust_coding".to_string(), description: "Rust programming and systems development".to_string(), adapter_file: Some("rust_coding.gguf".to_string()), system_prompt: Some( "You are an expert Rust developer. Write idiomatic, safe, efficient Rust code. \ Prefer zero-cost abstractions and leverage the type system. \ Always consider error handling, concurrency, and memory safety.\n\ Think step by step before writing code:\n\ 1. ANALYZE the problem and constraints\n\ 2. PLAN the algorithm and data structures\n\ 3. IMPLEMENT clean idiomatic Rust code\n\ 4. EXPLAIN key design choices briefly" .to_string(), ), }, ExpertConfig { name: "friendly_chat".to_string(), description: "Дружеское общение, естественный разговорный стиль".to_string(), adapter_file: Some("friendly_chat.gguf".to_string()), system_prompt: Some( "Ты — дружелюбный, тёплый собеседник. Общайся непринуждённо, \ используй простой и естественный язык. Будь вежливым, \ отзывчивым и поддерживай позитивный тон разговора.\n\ ВАЖНО: Прежде чем ответить, подумай — что человек хочет сказать, \ какое у него настроение, и как лучше поддержать разговор." .to_string(), ), }, ExpertConfig { name: "teaching".to_string(), description: "Обучение и объяснение сложных концепций".to_string(), adapter_file: Some("teaching.gguf".to_string()), system_prompt: Some( "Ты — терпеливый и методичный учитель. Объясняй сложные концепции \ простыми словами, используй аналогии и примеры. Разбивай материал \ на логические шаги. Проверяй понимание и адаптируй объяснения \ под уровень ученика. Используй метод «расскажи-покажи-сделай».\n\ Думай по шагам:\n\ 1. ОЦЕНИ уровень ученика\n\ 2. СТРУКТУРИРУЙ тему на 3-5 шагов\n\ 3. ОБЪЯСНИ (просто + аналогия + пример) и ПРОВЕРЬ понимание" .to_string(), ), }, ], max_seq_len: 4096, // 4K context — room for deep history + CoT + generation gpu_device: 0, temperature: 0.7, top_p: 0.9, max_gen_tokens: 1024, // generous generation budget within 4K window kv_cache_type_k: "q4_0".to_string(), // 4-bit KV cache: saves ~75% VRAM kv_cache_type_v: "q4_0".to_string(), kv_offload_kqv: true, // keep KQV on GPU for fast attention kv_defrag_thold: -1.0, // disabled: llama.cpp handles cache management } } } impl Config { pub fn load(path: Option<&str>) -> anyhow::Result { match path { Some(p) => { let content = std::fs::read_to_string(p)?; Ok(serde_json::from_str(&content)?) } None => Ok(Self::default()), } } pub fn get_expert(&self, name: &str) -> Option<&ExpertConfig> { self.experts.iter().find(|e| e.name == name) } }