SelentialCore / src /config.rs
S4ntyC1t's picture
Upload 23 files
18e0633 verified
Raw
History Blame Contribute Delete
9.48 kB
use std::path::PathBuf;
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub struct Config {
/// Path to the Qwen3 base model GGUF file
pub base_model_path: PathBuf,
/// Path to the SmolLM2 router model GGUF file
pub router_model_path: PathBuf,
/// Directory containing LoRA adapter files (.safetensors or .gguf)
pub adapters_dir: PathBuf,
/// Directory for downloaded tokenizer cache
pub tokenizer_cache_dir: PathBuf,
/// Path to knowledge base JSON for semantic cache
pub kb_path: Option<PathBuf>,
/// Available experts/adapters with their descriptions
pub experts: Vec<ExpertConfig>,
/// Maximum sequence length
pub max_seq_len: usize,
/// GPU device ID (0 for first GPU, -1 for CPU)
pub gpu_device: i32,
/// Temperature for generation
pub temperature: f64,
/// Top-p for nucleus sampling
pub top_p: f64,
/// Max tokens to generate per response
pub max_gen_tokens: usize,
/// KV-cache key quantization type ("q4_0", "q8_0", "f16")
pub kv_cache_type_k: String,
/// KV-cache value quantization type
pub kv_cache_type_v: String,
/// Offload K,Q,V tensors to GPU
pub kv_offload_kqv: bool,
/// KV-cache defrag threshold (-1.0 = disabled)
pub kv_defrag_thold: f32,
}
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub struct ExpertConfig {
pub name: String,
pub description: String,
pub adapter_file: Option<String>,
pub system_prompt: Option<String>,
}
impl Default for Config {
fn default() -> Self {
Self {
base_model_path: PathBuf::from("Qwen3.5-0.8B-Q4_K_M.gguf"),
router_model_path: PathBuf::from("smollm2-1.7b-instruct-q5_k_m-imat.gguf"),
adapters_dir: PathBuf::from("adapters"),
tokenizer_cache_dir: PathBuf::from("tokenizers"),
kb_path: Some(PathBuf::from("knowledge_base.json")),
experts: vec![
ExpertConfig {
name: "general".to_string(),
description: "General conversation, default mode".to_string(),
adapter_file: None,
system_prompt: Some(
"You are a helpful, friendly AI assistant.\n\
IMPORTANT: Think step by step before answering. \
First understand the need, then reason through it, \
then give your final response."
.to_string(),
),
},
// ── Sential 2.0 Orchestra Layers ──
ExpertConfig {
name: "structural".to_string(),
description: "Structural coding: struct, impl, trait, enum, generics".to_string(),
adapter_file: Some("structural.gguf".to_string()),
system_prompt: Some(
"You are an expert Rust architect. Focus on clean data structures,\
idiomatic traits, and well-designed generics. \
Write minimal, composable code with clear type definitions.\n\
Think: 1) What data shape? 2) What behavior? 3) How to compose?\n\
Prefer: structs with derive macros, trait bounds, and impl blocks."
.to_string(),
),
},
ExpertConfig {
name: "flow_error".to_string(),
description: "Flow & Error handling: match, Result, Option, concurrency".to_string(),
adapter_file: Some("flow_error.gguf".to_string()),
system_prompt: Some(
"You are an expert in Rust error handling and control flow. \
Master match expressions, Result/Option chains, and concurrency patterns.\n\
Think: 1) What can fail? 2) How to handle it gracefully? 3) Thread safety?\n\
Prefer: match on Result, ? operator, proper error types, Arc<Mutex<T>>."
.to_string(),
),
},
ExpertConfig {
name: "system_io".to_string(),
description: "System & IO: file operations, collections, iterators".to_string(),
adapter_file: Some("system_io.gguf".to_string()),
system_prompt: Some(
"You are an expert in Rust I/O and data processing. \
Master file operations, collections (HashMap, Vec), and iterator chains.\n\
Think: 1) Where does data come from? 2) How to transform it? 3) Where to output?\n\
Prefer: BufReader, iterator combinators, collect(), and efficient data structures."
.to_string(),
),
},
// ── Legacy adapters (backward compat) ──
ExpertConfig {
name: "rust_coding".to_string(),
description: "Rust programming and systems development".to_string(),
adapter_file: Some("rust_coding.gguf".to_string()),
system_prompt: Some(
"You are an expert Rust developer. Write idiomatic, safe, efficient Rust code. \
Prefer zero-cost abstractions and leverage the type system. \
Always consider error handling, concurrency, and memory safety.\n\
Think step by step before writing code:\n\
1. ANALYZE the problem and constraints\n\
2. PLAN the algorithm and data structures\n\
3. IMPLEMENT clean idiomatic Rust code\n\
4. EXPLAIN key design choices briefly"
.to_string(),
),
},
ExpertConfig {
name: "friendly_chat".to_string(),
description: "Дружеское общение, естественный разговорный стиль".to_string(),
adapter_file: Some("friendly_chat.gguf".to_string()),
system_prompt: Some(
"Ты — дружелюбный, тёплый собеседник. Общайся непринуждённо, \
используй простой и естественный язык. Будь вежливым, \
отзывчивым и поддерживай позитивный тон разговора.\n\
ВАЖНО: Прежде чем ответить, подумай — что человек хочет сказать, \
какое у него настроение, и как лучше поддержать разговор."
.to_string(),
),
},
ExpertConfig {
name: "teaching".to_string(),
description: "Обучение и объяснение сложных концепций".to_string(),
adapter_file: Some("teaching.gguf".to_string()),
system_prompt: Some(
"Ты — терпеливый и методичный учитель. Объясняй сложные концепции \
простыми словами, используй аналогии и примеры. Разбивай материал \
на логические шаги. Проверяй понимание и адаптируй объяснения \
под уровень ученика. Используй метод «расскажи-покажи-сделай».\n\
Думай по шагам:\n\
1. ОЦЕНИ уровень ученика\n\
2. СТРУКТУРИРУЙ тему на 3-5 шагов\n\
3. ОБЪЯСНИ (просто + аналогия + пример) и ПРОВЕРЬ понимание"
.to_string(),
),
},
],
max_seq_len: 4096, // 4K context — room for deep history + CoT + generation
gpu_device: 0,
temperature: 0.7,
top_p: 0.9,
max_gen_tokens: 1024, // generous generation budget within 4K window
kv_cache_type_k: "q4_0".to_string(), // 4-bit KV cache: saves ~75% VRAM
kv_cache_type_v: "q4_0".to_string(),
kv_offload_kqv: true, // keep KQV on GPU for fast attention
kv_defrag_thold: -1.0, // disabled: llama.cpp handles cache management
}
}
}
impl Config {
pub fn load(path: Option<&str>) -> anyhow::Result<Self> {
match path {
Some(p) => {
let content = std::fs::read_to_string(p)?;
Ok(serde_json::from_str(&content)?)
}
None => Ok(Self::default()),
}
}
pub fn get_expert(&self, name: &str) -> Option<&ExpertConfig> {
self.experts.iter().find(|e| e.name == name)
}
}