SelentialCore / src /main.rs
S4ntyC1t's picture
Upload 23 files
18e0633 verified
Raw
History Blame Contribute Delete
10.7 kB
//! Selential Core — MoLoRA Inference Engine
//!
//! A Candle-based MoE-like architecture with a quantized Qwen3 base model,
//! hot-swappable LoRA adapters, and a SmolLM2 query router.
//!
//! Usage:
//! cargo run --release -- interactive
//! cargo run --release -- prompt "Write a Rust function"
mod config;
mod engine;
mod hashtags;
mod inference;
mod kb;
mod pipeline;
mod router;
mod translator;
#[allow(unused)]
use clap::{Parser, Subcommand};
use std::path::PathBuf;
#[derive(Parser)]
#[command(name = "selential", version, about = "MoLoRA Inference Engine")]
struct Cli {
#[command(subcommand)]
command: Commands,
/// Path to configuration file
#[arg(short, long, global = true)]
config: Option<String>,
/// Path to base Qwen3 model GGUF file
#[arg(short = 'm', long, global = true)]
model: Option<PathBuf>,
/// GPU device ID (-1 for CPU)
#[arg(short = 'd', long, default_value = "0", global = true)]
device: i32,
}
#[derive(Subcommand)]
enum Commands {
/// Interactive chat mode
Interactive,
/// Single prompt inference
Prompt {
/// User query
#[arg(required = true)]
prompt: Vec<String>,
/// Expert domain
#[arg(short, long, default_value = "general")]
expert: String,
},
/// List available experts and routing info
Info,
/// Reset conversation
Reset,
}
fn main() -> anyhow::Result<()> {
// Initialize tracing with env-filter
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| {
tracing_subscriber::EnvFilter::new("selential=info,candle=warn")
}),
)
.with_target(false)
.init();
let cli = Cli::parse();
// Load configuration
let mut config = config::Config::load(cli.config.as_deref())?;
// Override with CLI arguments
if let Some(model_path) = cli.model {
config.base_model_path = model_path;
}
if cli.device >= 0 {
config.gpu_device = cli.device;
}
match cli.command {
Commands::Interactive => run_interactive(config)?,
Commands::Prompt { prompt, expert } => run_prompt(config, &prompt.join(" "), &expert)?,
Commands::Info => {
let expert_names: Vec<String> = config.experts.iter().map(|e| e.name.clone()).collect();
let router = router::Router::new(&expert_names);
println!("{}", router.routing_info());
println!("\nConfiguration:");
println!(" Base model: {:?}", config.base_model_path);
println!(" Router model: {:?}", config.router_model_path);
println!(" Adapters dir: {:?}", config.adapters_dir);
println!(" Knowledge base: {:?}", config.kb_path);
println!(" GPU device: {}", config.gpu_device);
println!(" Max gen tokens: {}", config.max_gen_tokens);
println!(" Max seq len: {}", config.max_seq_len);
}
Commands::Reset => {
println!("To reset, restart selential or use /reset in interactive mode.");
}
}
Ok(())
}
fn run_prompt(config: config::Config, prompt: &str, expert: &str) -> anyhow::Result<()> {
let mut engine = inference::InferenceEngine::new(config)?;
let response = if expert != "general" && !expert.is_empty() {
engine.process_query_with_expert(prompt, Some(expert))
} else {
engine.process_query(prompt)
}?;
println!("{}", response);
Ok(())
}
fn run_interactive(config: config::Config) -> anyhow::Result<()> {
let mut engine = inference::InferenceEngine::new(config)?;
println!("\n╔══════════════════════════════════════════════════╗");
println!("║ Selential Core — MoLoRA Engine v2.0 ║");
println!("╠══════════════════════════════════════════════════╣");
println!("║ Orchestra routing: hashtags → expert layers ║");
println!("║ 🏗️ structural — struct, impl, trait, enum ║");
println!("║ 🔀 flow_error — match, result, concurrency ║");
println!("║ 📁 system_io — file I/O, collections ║");
println!("╠══════════════════════════════════════════════════╣");
println!("║ /help /reset /stats /orchestra /exit ║");
println!("║ /hashtags <query> /tags ║");
println!("╚══════════════════════════════════════════════════╝\n");
loop {
let mut input = String::new();
print!("> ");
use std::io::Write;
std::io::stdout().flush()?;
std::io::stdin().read_line(&mut input)?;
let input = input.trim();
if input.is_empty() {
continue;
}
match input {
"/exit" | "/quit" => {
println!("Goodbye!");
break;
}
"/reset" => {
engine.reset();
println!("Conversation reset.");
continue;
}
"/stats" => {
println!("{}", serde_json::to_string_pretty(&engine.stats())?);
continue;
}
"/orchestra" => {
println!("{}", "═".repeat(50));
println!("🎵 Selential 2.0 — Оркестр экспертов");
println!("{}", "═".repeat(50));
println!("");
println!(" 🌐 Layer 1: Generalist Core (#70)");
println!(" Всегда активен — связность, логика, синтаксис");
println!(" GGUF: generalist_core.gguf (~24 MB)");
println!("");
println!(" 🎯 Layer 2: Coding Specialists (по хештегам):");
println!("");
println!(" 🏗️ structural — #struct #impl #trait #enum");
println!(" + #164 (architect) + #92 (impl)");
println!(" GGUF: structural.gguf (~24 MB)");
println!("");
println!(" 🔀 flow_error — #match #result #option #error #concurrency");
println!(" + #116 (match) + #115 (result)");
println!(" GGUF: flow_error.gguf (~24 MB)");
println!("");
println!(" 📁 system_io — #io #file #collections");
println!(" + #172 (file I/O) + #116 (match/IO)");
println!(" GGUF: system_io.gguf (~24 MB)");
println!("");
println!(" 🦀 rust_coding — legacy (backward compat)");
println!(" GGUF: rust_coding.gguf (~4 MB)");
println!("");
println!(" 📊 VRAM: ~47 MB на эксперта | Всего: ~24 MB на оркестр");
println!(" ⚡ t/s: ~8.7 (с LoRA) vs ~9.7 (baseline) — лишь -10%");
println!("");
println!(" Active: {}", engine.active_expert());
continue;
}
"/tags" => {
println!("Available hashtags for routing:");
println!(" Code: #struct #impl #trait #enum #match #result #option #error");
println!(" IO: #io #file #collections #regex");
println!(" Async:#concurrency #async #thread");
println!(" Lang: #rust #python #javascript #zig #cpp #golang");
println!(" Tone: #casual #teaching #formal");
continue;
}
"/help" => {
println!("Commands:");
println!(" /help - Show this help");
println!(" /reset - Reset conversation");
println!(" /stats - Show session statistics");
println!(" /orchestra - Show orchestra routing info");
println!(" /tags - List available hashtags");
println!(" /hashtags - Extract hashtags from a query");
println!(" /pipeline - Show pipeline info (KB, cache)");
println!(" /exit - Exit the program");
println!(
"\nActive: {} | Any other input = query",
engine.active_expert()
);
continue;
}
"/pipeline" => {
println!("{}", engine.pipeline_info());
continue;
}
s if s.starts_with("/hashtags ") => {
let query = &s["/hashtags ".len()..];
let tags = hashtags::extract_hashtags(query);
println!("Query: {}", query);
println!("Hashtags: {}", tags.join(" "));
let is_ru = hashtags::is_russian(query);
println!(
"Language: {}",
if is_ru { "Russian" } else { "English/mixed" }
);
// Show which orchestra would be selected
let _tag_names: Vec<String> = tags
.iter()
.map(|t| t.trim_start_matches('#').to_string())
.collect();
println!("Would route to: (tag-based orchestra detection)");
continue;
}
_ => {}
}
// Show extracted hashtags before processing
let tags = hashtags::extract_hashtags(input);
if !tags.is_empty() {
println!(" 🏷️ {}", tags.join(" "));
}
match engine.process_query(input) {
Ok(response) => {
let expert = engine.active_expert();
let icon = match expert {
"structural" => "🏗️",
"flow_error" => "🔀",
"system_io" => "📁",
"rust_coding" => "🦀",
"friendly_chat" => "💬",
"teaching" => "📚",
_ => "🤖",
};
println!("\n[{icon} {expert}]");
println!("{}\n", response);
}
Err(e) => {
eprintln!("Error: {:#}", e);
}
}
}
Ok(())
}