//! Selential Core — MoLoRA Inference Engine //! //! A Candle-based MoE-like architecture with a quantized Qwen3 base model, //! hot-swappable LoRA adapters, and a SmolLM2 query router. //! //! Usage: //! cargo run --release -- interactive //! cargo run --release -- prompt "Write a Rust function" mod config; mod engine; mod hashtags; mod inference; mod kb; mod pipeline; mod router; mod translator; #[allow(unused)] use clap::{Parser, Subcommand}; use std::path::PathBuf; #[derive(Parser)] #[command(name = "selential", version, about = "MoLoRA Inference Engine")] struct Cli { #[command(subcommand)] command: Commands, /// Path to configuration file #[arg(short, long, global = true)] config: Option, /// Path to base Qwen3 model GGUF file #[arg(short = 'm', long, global = true)] model: Option, /// GPU device ID (-1 for CPU) #[arg(short = 'd', long, default_value = "0", global = true)] device: i32, } #[derive(Subcommand)] enum Commands { /// Interactive chat mode Interactive, /// Single prompt inference Prompt { /// User query #[arg(required = true)] prompt: Vec, /// Expert domain #[arg(short, long, default_value = "general")] expert: String, }, /// List available experts and routing info Info, /// Reset conversation Reset, } fn main() -> anyhow::Result<()> { // Initialize tracing with env-filter tracing_subscriber::fmt() .with_env_filter( tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| { tracing_subscriber::EnvFilter::new("selential=info,candle=warn") }), ) .with_target(false) .init(); let cli = Cli::parse(); // Load configuration let mut config = config::Config::load(cli.config.as_deref())?; // Override with CLI arguments if let Some(model_path) = cli.model { config.base_model_path = model_path; } if cli.device >= 0 { config.gpu_device = cli.device; } match cli.command { Commands::Interactive => run_interactive(config)?, Commands::Prompt { prompt, expert } => run_prompt(config, &prompt.join(" "), &expert)?, Commands::Info => { let expert_names: Vec = config.experts.iter().map(|e| e.name.clone()).collect(); let router = router::Router::new(&expert_names); println!("{}", router.routing_info()); println!("\nConfiguration:"); println!(" Base model: {:?}", config.base_model_path); println!(" Router model: {:?}", config.router_model_path); println!(" Adapters dir: {:?}", config.adapters_dir); println!(" Knowledge base: {:?}", config.kb_path); println!(" GPU device: {}", config.gpu_device); println!(" Max gen tokens: {}", config.max_gen_tokens); println!(" Max seq len: {}", config.max_seq_len); } Commands::Reset => { println!("To reset, restart selential or use /reset in interactive mode."); } } Ok(()) } fn run_prompt(config: config::Config, prompt: &str, expert: &str) -> anyhow::Result<()> { let mut engine = inference::InferenceEngine::new(config)?; let response = if expert != "general" && !expert.is_empty() { engine.process_query_with_expert(prompt, Some(expert)) } else { engine.process_query(prompt) }?; println!("{}", response); Ok(()) } fn run_interactive(config: config::Config) -> anyhow::Result<()> { let mut engine = inference::InferenceEngine::new(config)?; println!("\n╔══════════════════════════════════════════════════╗"); println!("║ Selential Core — MoLoRA Engine v2.0 ║"); println!("╠══════════════════════════════════════════════════╣"); println!("║ Orchestra routing: hashtags → expert layers ║"); println!("║ 🏗️ structural — struct, impl, trait, enum ║"); println!("║ 🔀 flow_error — match, result, concurrency ║"); println!("║ 📁 system_io — file I/O, collections ║"); println!("╠══════════════════════════════════════════════════╣"); println!("║ /help /reset /stats /orchestra /exit ║"); println!("║ /hashtags /tags ║"); println!("╚══════════════════════════════════════════════════╝\n"); loop { let mut input = String::new(); print!("> "); use std::io::Write; std::io::stdout().flush()?; std::io::stdin().read_line(&mut input)?; let input = input.trim(); if input.is_empty() { continue; } match input { "/exit" | "/quit" => { println!("Goodbye!"); break; } "/reset" => { engine.reset(); println!("Conversation reset."); continue; } "/stats" => { println!("{}", serde_json::to_string_pretty(&engine.stats())?); continue; } "/orchestra" => { println!("{}", "═".repeat(50)); println!("🎵 Selential 2.0 — Оркестр экспертов"); println!("{}", "═".repeat(50)); println!(""); println!(" 🌐 Layer 1: Generalist Core (#70)"); println!(" Всегда активен — связность, логика, синтаксис"); println!(" GGUF: generalist_core.gguf (~24 MB)"); println!(""); println!(" 🎯 Layer 2: Coding Specialists (по хештегам):"); println!(""); println!(" 🏗️ structural — #struct #impl #trait #enum"); println!(" + #164 (architect) + #92 (impl)"); println!(" GGUF: structural.gguf (~24 MB)"); println!(""); println!(" 🔀 flow_error — #match #result #option #error #concurrency"); println!(" + #116 (match) + #115 (result)"); println!(" GGUF: flow_error.gguf (~24 MB)"); println!(""); println!(" 📁 system_io — #io #file #collections"); println!(" + #172 (file I/O) + #116 (match/IO)"); println!(" GGUF: system_io.gguf (~24 MB)"); println!(""); println!(" 🦀 rust_coding — legacy (backward compat)"); println!(" GGUF: rust_coding.gguf (~4 MB)"); println!(""); println!(" 📊 VRAM: ~47 MB на эксперта | Всего: ~24 MB на оркестр"); println!(" ⚡ t/s: ~8.7 (с LoRA) vs ~9.7 (baseline) — лишь -10%"); println!(""); println!(" Active: {}", engine.active_expert()); continue; } "/tags" => { println!("Available hashtags for routing:"); println!(" Code: #struct #impl #trait #enum #match #result #option #error"); println!(" IO: #io #file #collections #regex"); println!(" Async:#concurrency #async #thread"); println!(" Lang: #rust #python #javascript #zig #cpp #golang"); println!(" Tone: #casual #teaching #formal"); continue; } "/help" => { println!("Commands:"); println!(" /help - Show this help"); println!(" /reset - Reset conversation"); println!(" /stats - Show session statistics"); println!(" /orchestra - Show orchestra routing info"); println!(" /tags - List available hashtags"); println!(" /hashtags - Extract hashtags from a query"); println!(" /pipeline - Show pipeline info (KB, cache)"); println!(" /exit - Exit the program"); println!( "\nActive: {} | Any other input = query", engine.active_expert() ); continue; } "/pipeline" => { println!("{}", engine.pipeline_info()); continue; } s if s.starts_with("/hashtags ") => { let query = &s["/hashtags ".len()..]; let tags = hashtags::extract_hashtags(query); println!("Query: {}", query); println!("Hashtags: {}", tags.join(" ")); let is_ru = hashtags::is_russian(query); println!( "Language: {}", if is_ru { "Russian" } else { "English/mixed" } ); // Show which orchestra would be selected let _tag_names: Vec = tags .iter() .map(|t| t.trim_start_matches('#').to_string()) .collect(); println!("Would route to: (tag-based orchestra detection)"); continue; } _ => {} } // Show extracted hashtags before processing let tags = hashtags::extract_hashtags(input); if !tags.is_empty() { println!(" 🏷️ {}", tags.join(" ")); } match engine.process_query(input) { Ok(response) => { let expert = engine.active_expert(); let icon = match expert { "structural" => "🏗️", "flow_error" => "🔀", "system_io" => "📁", "rust_coding" => "🦀", "friendly_chat" => "💬", "teaching" => "📚", _ => "🤖", }; println!("\n[{icon} {expert}]"); println!("{}\n", response); } Err(e) => { eprintln!("Error: {:#}", e); } } } Ok(()) }