use std::path::PathBuf;

#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub struct Config {
    /// Path to the Qwen3 base model GGUF file
    pub base_model_path: PathBuf,
    /// Path to the SmolLM2 router model GGUF file
    pub router_model_path: PathBuf,
    /// Directory containing LoRA adapter files (.safetensors or .gguf)
    pub adapters_dir: PathBuf,
    /// Directory for downloaded tokenizer cache
    pub tokenizer_cache_dir: PathBuf,
    /// Path to knowledge base JSON for semantic cache
    pub kb_path: Option<PathBuf>,
    /// Available experts/adapters with their descriptions
    pub experts: Vec<ExpertConfig>,
    /// Maximum sequence length
    pub max_seq_len: usize,
    /// GPU device ID (0 for first GPU, -1 for CPU)
    pub gpu_device: i32,
    /// Temperature for generation
    pub temperature: f64,
    /// Top-p for nucleus sampling
    pub top_p: f64,
    /// Max tokens to generate per response
    pub max_gen_tokens: usize,
    /// KV-cache key quantization type ("q4_0", "q8_0", "f16")
    pub kv_cache_type_k: String,
    /// KV-cache value quantization type
    pub kv_cache_type_v: String,
    /// Offload K,Q,V tensors to GPU
    pub kv_offload_kqv: bool,
    /// KV-cache defrag threshold (-1.0 = disabled)
    pub kv_defrag_thold: f32,
}

#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub struct ExpertConfig {
    pub name: String,
    pub description: String,
    pub adapter_file: Option<String>,
    pub system_prompt: Option<String>,
}

impl Default for Config {
    fn default() -> Self {
        Self {
            base_model_path: PathBuf::from("Qwen3.5-0.8B-Q4_K_M.gguf"),
            router_model_path: PathBuf::from("smollm2-1.7b-instruct-q5_k_m-imat.gguf"),
            adapters_dir: PathBuf::from("adapters"),
            tokenizer_cache_dir: PathBuf::from("tokenizers"),
            kb_path: Some(PathBuf::from("knowledge_base.json")),
            experts: vec![
                ExpertConfig {
                    name: "general".to_string(),
                    description: "General conversation, default mode".to_string(),
                    adapter_file: None,
                    system_prompt: Some(
                        "You are a helpful, friendly AI assistant.\n\
                         IMPORTANT: Think step by step before answering. \
                         First understand the need, then reason through it, \
                         then give your final response."
                            .to_string(),
                    ),
                },
                // ── Sential 2.0 Orchestra Layers ──
                ExpertConfig {
                    name: "structural".to_string(),
                    description: "Structural coding: struct, impl, trait, enum, generics".to_string(),
                    adapter_file: Some("structural.gguf".to_string()),
                    system_prompt: Some(
                        "You are an expert Rust architect. Focus on clean data structures,\
                         idiomatic traits, and well-designed generics. \
                         Write minimal, composable code with clear type definitions.\n\
                         Think: 1) What data shape? 2) What behavior? 3) How to compose?\n\
                         Prefer: structs with derive macros, trait bounds, and impl blocks."
                            .to_string(),
                    ),
                },
                ExpertConfig {
                    name: "flow_error".to_string(),
                    description: "Flow & Error handling: match, Result, Option, concurrency".to_string(),
                    adapter_file: Some("flow_error.gguf".to_string()),
                    system_prompt: Some(
                        "You are an expert in Rust error handling and control flow. \
                         Master match expressions, Result/Option chains, and concurrency patterns.\n\
                         Think: 1) What can fail? 2) How to handle it gracefully? 3) Thread safety?\n\
                         Prefer: match on Result, ? operator, proper error types, Arc<Mutex<T>>."
                            .to_string(),
                    ),
                },
                ExpertConfig {
                    name: "system_io".to_string(),
                    description: "System & IO: file operations, collections, iterators".to_string(),
                    adapter_file: Some("system_io.gguf".to_string()),
                    system_prompt: Some(
                        "You are an expert in Rust I/O and data processing. \
                         Master file operations, collections (HashMap, Vec), and iterator chains.\n\
                         Think: 1) Where does data come from? 2) How to transform it? 3) Where to output?\n\
                         Prefer: BufReader, iterator combinators, collect(), and efficient data structures."
                            .to_string(),
                    ),
                },
                // ── Legacy adapters (backward compat) ──
                ExpertConfig {
                    name: "rust_coding".to_string(),
                    description: "Rust programming and systems development".to_string(),
                    adapter_file: Some("rust_coding.gguf".to_string()),
                    system_prompt: Some(
                        "You are an expert Rust developer. Write idiomatic, safe, efficient Rust code. \
                         Prefer zero-cost abstractions and leverage the type system. \
                         Always consider error handling, concurrency, and memory safety.\n\
                         Think step by step before writing code:\n\
                         1. ANALYZE the problem and constraints\n\
                         2. PLAN the algorithm and data structures\n\
                         3. IMPLEMENT clean idiomatic Rust code\n\
                         4. EXPLAIN key design choices briefly"
                            .to_string(),
                    ),
                },
                ExpertConfig {
                    name: "friendly_chat".to_string(),
                    description: "Дружеское общение, естественный разговорный стиль".to_string(),
                    adapter_file: Some("friendly_chat.gguf".to_string()),
                    system_prompt: Some(
                        "Ты — дружелюбный, тёплый собеседник. Общайся непринуждённо, \
                         используй простой и естественный язык. Будь вежливым, \
                         отзывчивым и поддерживай позитивный тон разговора.\n\
                         ВАЖНО: Прежде чем ответить, подумай — что человек хочет сказать, \
                         какое у него настроение, и как лучше поддержать разговор."
                            .to_string(),
                    ),
                },
                ExpertConfig {
                    name: "teaching".to_string(),
                    description: "Обучение и объяснение сложных концепций".to_string(),
                    adapter_file: Some("teaching.gguf".to_string()),
                    system_prompt: Some(
                        "Ты — терпеливый и методичный учитель. Объясняй сложные концепции \
                         простыми словами, используй аналогии и примеры. Разбивай материал \
                         на логические шаги. Проверяй понимание и адаптируй объяснения \
                         под уровень ученика. Используй метод «расскажи-покажи-сделай».\n\
                         Думай по шагам:\n\
                         1. ОЦЕНИ уровень ученика\n\
                         2. СТРУКТУРИРУЙ тему на 3-5 шагов\n\
                         3. ОБЪЯСНИ (просто + аналогия + пример) и ПРОВЕРЬ понимание"
                            .to_string(),
                    ),
                },
            ],
            max_seq_len: 4096,  // 4K context — room for deep history + CoT + generation
            gpu_device: 0,
            temperature: 0.7,
            top_p: 0.9,
            max_gen_tokens: 1024,  // generous generation budget within 4K window
            kv_cache_type_k: "q4_0".to_string(),  // 4-bit KV cache: saves ~75% VRAM
            kv_cache_type_v: "q4_0".to_string(),
            kv_offload_kqv: true,   // keep KQV on GPU for fast attention
            kv_defrag_thold: -1.0,  // disabled: llama.cpp handles cache management
        }
    }
}

impl Config {
    pub fn load(path: Option<&str>) -> anyhow::Result<Self> {
        match path {
            Some(p) => {
                let content = std::fs::read_to_string(p)?;
                Ok(serde_json::from_str(&content)?)
            }
            None => Ok(Self::default()),
        }
    }

    pub fn get_expert(&self, name: &str) -> Option<&ExpertConfig> {
        self.experts.iter().find(|e| e.name == name)
    }
}