Upload 23 files

18e0633 verified about 1 month ago

10.7 kB

	//! Selential Core — MoLoRA Inference Engine
	//!
	//! A Candle-based MoE-like architecture with a quantized Qwen3 base model,
	//! hot-swappable LoRA adapters, and a SmolLM2 query router.
	//!
	//! Usage:
	//! cargo run --release -- interactive
	//! cargo run --release -- prompt "Write a Rust function"

	mod config;
	mod engine;
	mod hashtags;
	mod inference;
	mod kb;
	mod pipeline;
	mod router;
	mod translator;



	#[allow(unused)]
	use clap::{Parser, Subcommand};
	use std::path::PathBuf;

	#[derive(Parser)]
	#[command(name = "selential", version, about = "MoLoRA Inference Engine")]
	struct Cli {
	#[command(subcommand)]
	command: Commands,

	/// Path to configuration file
	#[arg(short, long, global = true)]
	config: Option<String>,

	/// Path to base Qwen3 model GGUF file
	#[arg(short = 'm', long, global = true)]
	model: Option<PathBuf>,

	/// GPU device ID (-1 for CPU)
	#[arg(short = 'd', long, default_value = "0", global = true)]
	device: i32,
	}

	#[derive(Subcommand)]
	enum Commands {
	/// Interactive chat mode
	Interactive,
	/// Single prompt inference
	Prompt {
	/// User query
	#[arg(required = true)]
	prompt: Vec<String>,
	/// Expert domain
	#[arg(short, long, default_value = "general")]
	expert: String,
	},
	/// List available experts and routing info
	Info,
	/// Reset conversation
	Reset,
	}

	fn main() -> anyhow::Result<()> {
	// Initialize tracing with env-filter
	tracing_subscriber::fmt()
	.with_env_filter(
	tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(\|_\| {
	tracing_subscriber::EnvFilter::new("selential=info,candle=warn")
	}),
	)
	.with_target(false)
	.init();

	let cli = Cli::parse();

	// Load configuration
	let mut config = config::Config::load(cli.config.as_deref())?;

	// Override with CLI arguments
	if let Some(model_path) = cli.model {
	config.base_model_path = model_path;
	}
	if cli.device >= 0 {
	config.gpu_device = cli.device;
	}

	match cli.command {
	Commands::Interactive => run_interactive(config)?,
	Commands::Prompt { prompt, expert } => run_prompt(config, &prompt.join(" "), &expert)?,
	Commands::Info => {
	let expert_names: Vec<String> = config.experts.iter().map(\|e\| e.name.clone()).collect();
	let router = router::Router::new(&expert_names);
	println!("{}", router.routing_info());
	println!("\nConfiguration:");
	println!(" Base model: {:?}", config.base_model_path);
	println!(" Router model: {:?}", config.router_model_path);
	println!(" Adapters dir: {:?}", config.adapters_dir);
	println!(" Knowledge base: {:?}", config.kb_path);
	println!(" GPU device: {}", config.gpu_device);
	println!(" Max gen tokens: {}", config.max_gen_tokens);
	println!(" Max seq len: {}", config.max_seq_len);
	}
	Commands::Reset => {
	println!("To reset, restart selential or use /reset in interactive mode.");
	}
	}

	Ok(())
	}

	fn run_prompt(config: config::Config, prompt: &str, expert: &str) -> anyhow::Result<()> {
	let mut engine = inference::InferenceEngine::new(config)?;
	let response = if expert != "general" && !expert.is_empty() {
	engine.process_query_with_expert(prompt, Some(expert))
	} else {
	engine.process_query(prompt)
	}?;
	println!("{}", response);
	Ok(())
	}

	fn run_interactive(config: config::Config) -> anyhow::Result<()> {
	let mut engine = inference::InferenceEngine::new(config)?;

	println!("\n╔══════════════════════════════════════════════════╗");
	println!("║ Selential Core — MoLoRA Engine v2.0 ║");
	println!("╠══════════════════════════════════════════════════╣");
	println!("║ Orchestra routing: hashtags → expert layers ║");
	println!("║ 🏗️ structural — struct, impl, trait, enum ║");
	println!("║ 🔀 flow_error — match, result, concurrency ║");
	println!("║ 📁 system_io — file I/O, collections ║");
	println!("╠══════════════════════════════════════════════════╣");
	println!("║ /help /reset /stats /orchestra /exit ║");
	println!("║ /hashtags <query> /tags ║");
	println!("╚══════════════════════════════════════════════════╝\n");

	loop {
	let mut input = String::new();
	print!("> ");
	use std::io::Write;
	std::io::stdout().flush()?;
	std::io::stdin().read_line(&mut input)?;
	let input = input.trim();

	if input.is_empty() {
	continue;
	}

	match input {
	"/exit" \| "/quit" => {
	println!("Goodbye!");
	break;
	}
	"/reset" => {
	engine.reset();
	println!("Conversation reset.");
	continue;
	}
	"/stats" => {
	println!("{}", serde_json::to_string_pretty(&engine.stats())?);
	continue;
	}
	"/orchestra" => {
	println!("{}", "═".repeat(50));
	println!("🎵 Selential 2.0 — Оркестр экспертов");
	println!("{}", "═".repeat(50));
	println!("");
	println!(" 🌐 Layer 1: Generalist Core (#70)");
	println!(" Всегда активен — связность, логика, синтаксис");
	println!(" GGUF: generalist_core.gguf (~24 MB)");
	println!("");
	println!(" 🎯 Layer 2: Coding Specialists (по хештегам):");
	println!("");
	println!(" 🏗️ structural — #struct #impl #trait #enum");
	println!(" + #164 (architect) + #92 (impl)");
	println!(" GGUF: structural.gguf (~24 MB)");
	println!("");
	println!(" 🔀 flow_error — #match #result #option #error #concurrency");
	println!(" + #116 (match) + #115 (result)");
	println!(" GGUF: flow_error.gguf (~24 MB)");
	println!("");
	println!(" 📁 system_io — #io #file #collections");
	println!(" + #172 (file I/O) + #116 (match/IO)");
	println!(" GGUF: system_io.gguf (~24 MB)");
	println!("");
	println!(" 🦀 rust_coding — legacy (backward compat)");
	println!(" GGUF: rust_coding.gguf (~4 MB)");
	println!("");
	println!(" 📊 VRAM: ~47 MB на эксперта \| Всего: ~24 MB на оркестр");
	println!(" ⚡ t/s: ~8.7 (с LoRA) vs ~9.7 (baseline) — лишь -10%");
	println!("");
	println!(" Active: {}", engine.active_expert());
	continue;
	}
	"/tags" => {
	println!("Available hashtags for routing:");
	println!(" Code: #struct #impl #trait #enum #match #result #option #error");
	println!(" IO: #io #file #collections #regex");
	println!(" Async:#concurrency #async #thread");
	println!(" Lang: #rust #python #javascript #zig #cpp #golang");
	println!(" Tone: #casual #teaching #formal");
	continue;
	}
	"/help" => {
	println!("Commands:");
	println!(" /help - Show this help");
	println!(" /reset - Reset conversation");
	println!(" /stats - Show session statistics");
	println!(" /orchestra - Show orchestra routing info");
	println!(" /tags - List available hashtags");
	println!(" /hashtags - Extract hashtags from a query");
	println!(" /pipeline - Show pipeline info (KB, cache)");
	println!(" /exit - Exit the program");
	println!(
	"\nActive: {} \| Any other input = query",
	engine.active_expert()
	);
	continue;
	}
	"/pipeline" => {
	println!("{}", engine.pipeline_info());
	continue;
	}
	s if s.starts_with("/hashtags ") => {
	let query = &s["/hashtags ".len()..];
	let tags = hashtags::extract_hashtags(query);
	println!("Query: {}", query);
	println!("Hashtags: {}", tags.join(" "));
	let is_ru = hashtags::is_russian(query);
	println!(
	"Language: {}",
	if is_ru { "Russian" } else { "English/mixed" }
	);
	// Show which orchestra would be selected
	let _tag_names: Vec<String> = tags
	.iter()
	.map(\|t\| t.trim_start_matches('#').to_string())
	.collect();
	println!("Would route to: (tag-based orchestra detection)");
	continue;
	}
	_ => {}
	}

	// Show extracted hashtags before processing
	let tags = hashtags::extract_hashtags(input);
	if !tags.is_empty() {
	println!(" 🏷️ {}", tags.join(" "));
	}

	match engine.process_query(input) {
	Ok(response) => {
	let expert = engine.active_expert();
	let icon = match expert {
	"structural" => "🏗️",
	"flow_error" => "🔀",
	"system_io" => "📁",
	"rust_coding" => "🦀",
	"friendly_chat" => "💬",
	"teaching" => "📚",
	_ => "🤖",
	};
	println!("\n[{icon} {expert}]");
	println!("{}\n", response);
	}
	Err(e) => {
	eprintln!("Error: {:#}", e);
	}
	}
	}

	Ok(())
	}