//! Simple Russian → English translator for query preprocessing. //! //! Strategy: //! 1. Common phrase mapping (fast, no model needed) — covers ~70% of coding queries //! 2. For complex queries, returns the original with a `[lang:ru]` tag //! so the pipeline can optionally use the model for translation. //! //! The goal is to produce English text that Qwen understands better //! while preserving the original language metadata. /// Translation result #[derive(Debug, Clone)] pub struct Translation { /// Translated (or original) text pub text: String, /// Original language code (e.g., "ru", "en") pub original_lang: String, /// Whether the text was actually translated pub was_translated: bool, } /// Common Russian → English phrase mapping for coding domains const RU_TO_EN: &[(&str, &str)] = &[ // --- Greetings --- ("привет", "hello"), ("здравствуй", "hello"), ("как дела", "how are you"), ("что нового", "what's new"), ("расскажи", "tell me"), ("покажи", "show me"), // --- Coding actions --- ("напиши", "write"), ("сделай", "make"), ("создай", "create"), ("реализуй", "implement"), ("исправь", "fix"), ("почини", "fix"), ("объясни", "explain"), ("опиши", "describe"), ("сравни", "compare"), ("переведи", "translate"), ("оптимизируй", "optimize"), ("ускорь", "speed up"), ("протестируй", "test"), ("научи", "teach"), ("разбери", "analyze"), ("найди", "find"), ("проверь", "check"), ("добавь", "add"), ("удали", "remove"), ("измени", "change"), ("перепиши", "rewrite"), ("запусти", "run"), ("скомпилируй", "compile"), ("установи", "install"), // --- Nouns --- ("калькулятор", "calculator"), ("функция", "function"), ("функцию", "function"), ("переменная", "variable"), ("переменную", "variable"), ("программа", "program"), ("программу", "program"), ("алгоритм", "algorithm"), ("структура данных", "data structure"), ("база данных", "database"), ("сервер", "server"), ("клиент", "client"), ("файл", "file"), ("строка", "string"), ("строку", "string"), ("число", "number"), ("массив", "array"), ("список", "list"), ("словарь", "dictionary"), ("ошибка", "error"), ("ошибку", "error"), ("баг", "bug"), ("поток", "thread"), ("сокет", "socket"), ("интерфейс", "interface"), ("библиотека", "library"), ("библиотеку", "library"), ("пакет", "package"), ("модуль", "module"), ("класс", "class"), ("объект", "object"), ("тип", "type"), ("цикл", "loop"), ("условие", "condition"), ("рекурсия", "recursion"), ("рекурсию", "recursion"), ("граф", "graph"), ("дерево", "tree"), ("хеш", "hash"), ("пароль", "password"), ("ключ", "key"), ("значение", "value"), ("память", "memory"), ("указатель", "pointer"), ("ссылку", "reference"), ("замыкание", "closure"), ("итератор", "iterator"), ("генератор", "generator"), // --- Qualifiers --- ("потокобезопасный", "thread-safe"), ("многопоточный", "multithreaded"), ("асинхронный", "asynchronous"), ("быстрый", "fast"), ("простой", "simple"), ("простое", "simple"), ("сложный", "complex"), ("безопасный", "safe"), ("эффективный", "efficient"), ("красивый", "beautiful"), ("разноцветный", "colorful"), ("шрифт", "font"), ("шрифтом", "font"), // --- Utility --- ("что такое", "what is"), ("как работает", "how does"), ("зачем нужен", "why is"), ("зачем нужна", "why is"), ("почему", "why"), ("когда", "when"), ("где", "where"), ("какой", "which"), ("сколько", "how many"), ("можно ли", "is it possible to"), ("нужно ли", "do I need to"), ("должен ли", "should I"), ("пример", "example"), ("например", "for example"), ("используя", "using"), ("помощью", "using"), ("на языке", "in"), ("языке", "language"), ]; /// Translate Russian text to English using common phrase substitution. /// /// Handles mixed Russian/English text (common in coding queries). /// Only translates if the query is predominantly Russian. pub fn translate_ru_to_en(query: &str, force: bool) -> Translation { let is_ru = crate::hashtags::is_russian(query); if !is_ru && !force { return Translation { text: query.to_string(), original_lang: "en".to_string(), was_translated: false, }; } let mut result = query.to_lowercase(); let mut translated = false; // Sort by length descending to match longer phrases first let mut sorted_pairs: Vec<_> = RU_TO_EN.iter().collect(); sorted_pairs.sort_by_key(|(ru, _)| -(ru.len() as i32)); for (ru, en) in &sorted_pairs { if result.contains(*ru) { result = result.replace(*ru, en); translated = true; } } // NOTE: we don't capitalize — translated text goes into model prompts, // not displayed to users. Lowercase is fine for Qwen/LLM consumption. Translation { text: result, original_lang: if is_ru { "ru".to_string() } else { "en".to_string() }, was_translated: translated && is_ru, } } /// Get a language tag for the ChatML system prompt pub fn language_tag(translation: &Translation) -> &str { if translation.original_lang == "ru" && translation.was_translated { "[lang:ru→en]" } else if translation.original_lang == "ru" { "[lang:ru]" } else { "" } } #[cfg(test)] mod tests { use super::*; #[test] fn test_simple_translation() { let t = translate_ru_to_en("напиши калькулятор на rust", false); assert!(t.was_translated); assert!(t.text.contains("write")); assert!(t.text.contains("calculator")); assert_eq!(t.original_lang, "ru"); } #[test] fn test_english_passthrough() { let t = translate_ru_to_en("Write a Rust function", false); assert!(!t.was_translated); assert_eq!(t.original_lang, "en"); } #[test] fn test_mixed_query() { let t = translate_ru_to_en("как работает async в rust", false); assert!(t.text.contains("how does")); assert!(t.text.contains("async")); assert!(t.text.contains("rust")); } }