Spaces:
Sleeping
Sleeping
| # translator.py - AI-Powered Translation System with Arabic Support | |
| import os | |
| from dotenv import load_dotenv | |
| import json | |
| import traceback | |
| from typing import Dict, List, Optional, Tuple | |
| import google.generativeai as genai | |
| import streamlit as st | |
| class AITranslator: | |
| """ | |
| AI-powered translator using Gemini API with specialized Arabic support | |
| """ | |
| def __init__(self): | |
| self.model = None | |
| self.init_error = None | |
| self.supported_languages = { | |
| 'ar': 'Arabic (العربية)', | |
| 'en': 'English', | |
| 'fr': 'French (Français)', | |
| 'es': 'Spanish (Español)', | |
| 'de': 'German (Deutsch)', | |
| 'zh': 'Chinese (中文)', | |
| 'hi': 'Hindi (हिन्दी)', | |
| 'ur': 'Urdu (اردو)' | |
| } | |
| self._initialize_gemini() | |
| def _initialize_gemini(self): | |
| """Initialize Gemini AI client using the new API structure.""" | |
| try: | |
| load_dotenv() | |
| api_key = os.getenv("GEMINI_API_KEY") | |
| if not api_key: | |
| # This error is handled by the main app now, but we keep a fallback. | |
| self.init_error = "GEMINI_API_KEY not found." | |
| return | |
| genai.configure(api_key=api_key) | |
| self.model = genai.GenerativeModel('gemini-1.5-flash') | |
| except Exception as e: | |
| self.init_error = f"FATAL ERROR during Gemini Init: {str(e)}" | |
| self.model = None | |
| def translate_text(self, text: str, target_language: str = 'ar', source_language: str = 'auto') -> Tuple[Optional[str], Optional[str]]: | |
| """ | |
| Translate text to target language using Gemini AI | |
| Args: | |
| text: Text to translate | |
| target_language: Target language code ('ar' for Arabic) | |
| source_language: Source language code ('auto' for auto-detection) | |
| Returns: | |
| Tuple of (translated_text, error_message) | |
| """ | |
| if self.init_error: | |
| return None, self.init_error | |
| if not self.model: | |
| return None, "ERROR: Gemini model is not available." | |
| if not text or not text.strip(): | |
| return None, "ERROR: Empty text provided for translation." | |
| try: | |
| target_lang_name = self.supported_languages.get(target_language, target_language) | |
| prompt = self._create_translation_prompt(text, target_lang_name, target_language) | |
| # Use the new generate_content method on the model instance | |
| response = self.model.generate_content(prompt) | |
| if response and hasattr(response, 'text') and response.text: | |
| translated_text = response.text.strip() | |
| translated_text = self._clean_translation_output(translated_text) | |
| return translated_text, None | |
| else: | |
| return None, "WARNING: Gemini returned empty translation response." | |
| except Exception as e: | |
| error_msg = f"FATAL ERROR during translation: {traceback.format_exc()}" | |
| return None, error_msg | |
| def _create_translation_prompt(self, text: str, target_lang_name: str, target_lang_code: str) -> str: | |
| """Create optimized prompt for translation""" | |
| if target_lang_code == 'ar': | |
| # Specialized prompt for Arabic translation | |
| prompt = f""" | |
| Translate the following text to Arabic (العربية) with these requirements: | |
| 1. Maintain the original meaning accurately | |
| 2. Use Modern Standard Arabic (MSA) appropriate for academic contexts | |
| 3. Preserve technical terms when appropriate | |
| 4. Make it natural and fluent for Arabic speakers | |
| 5. For educational content, use clear and accessible language | |
| 6. Return ONLY the translated text without any explanations or formatting | |
| Text to translate: | |
| {text} | |
| """ | |
| else: | |
| # General prompt for other languages | |
| prompt = f""" | |
| Translate the following text to {target_lang_name} accurately while: | |
| 1. Maintaining the original meaning | |
| 2. Using appropriate formal/academic tone if the content appears educational | |
| 3. Preserving any technical terms appropriately | |
| 4. Making it natural and fluent for native speakers | |
| 5. Return ONLY the translated text without explanations | |
| Text to translate: | |
| {text} | |
| """ | |
| return prompt | |
| def _clean_translation_output(self, text: str) -> str: | |
| """Clean up translation output from any unwanted formatting""" | |
| # Remove common markdown artifacts | |
| text = text.replace('**', '').replace('*', '') | |
| text = text.replace('```', '').replace('`', '') | |
| # Remove any leading/trailing quotes | |
| text = text.strip('"\'') | |
| # Clean up extra whitespace | |
| text = ' '.join(text.split()) | |
| return text | |
| def translate_ui_elements(self, ui_dict: Dict[str, str], target_language: str = 'ar') -> Dict[str, str]: | |
| """ | |
| Translate UI elements dictionary | |
| Args: | |
| ui_dict: Dictionary of UI elements {key: english_text} | |
| target_language: Target language code | |
| Returns: | |
| Dictionary with translated values | |
| """ | |
| translated_dict = {} | |
| for key, english_text in ui_dict.items(): | |
| translated_text, error = self.translate_text(english_text, target_language) | |
| if translated_text: | |
| translated_dict[key] = translated_text | |
| else: | |
| # Fallback to original text if translation fails | |
| translated_dict[key] = english_text | |
| print(f"Translation failed for '{key}': {error}") | |
| return translated_dict | |
| def batch_translate(self, texts: List[str], target_language: str = 'ar') -> List[Dict[str, str]]: | |
| """ | |
| Translate multiple texts in batch | |
| Args: | |
| texts: List of texts to translate | |
| target_language: Target language code | |
| Returns: | |
| List of dictionaries with original and translated text | |
| """ | |
| results = [] | |
| for i, text in enumerate(texts): | |
| translated_text, error = self.translate_text(text, target_language) | |
| result = { | |
| 'index': i, | |
| 'original': text, | |
| 'translated': translated_text if translated_text else text, | |
| 'success': translated_text is not None, | |
| 'error': error | |
| } | |
| results.append(result) | |
| return results | |
| def get_supported_languages(self) -> Dict[str, str]: | |
| """Get list of supported languages""" | |
| return self.supported_languages.copy() | |
| def detect_language(self, text: str) -> Tuple[Optional[str], Optional[str]]: | |
| """ | |
| Detect the language of given text | |
| Args: | |
| text: Text to analyze | |
| Returns: | |
| Tuple of (language_code, error_message) | |
| """ | |
| if not self.model: | |
| return None, "ERROR: Gemini model not available" | |
| try: | |
| prompt = f""" | |
| Detect the language of the following text and return ONLY the language code: | |
| - Return 'en' for English | |
| - Return 'ar' for Arabic | |
| - Return 'fr' for French | |
| - Return 'es' for Spanish | |
| - Return 'de' for German | |
| - Return 'zh' for Chinese | |
| - Return 'hi' for Hindi | |
| - Return 'ur' for Urdu | |
| - Return 'unknown' if unsure | |
| Text: {text[:200]}... | |
| """ | |
| response = self.model.generate_content(prompt) | |
| if response and hasattr(response, 'text') and response.text: | |
| detected_lang = response.text.strip().lower() | |
| return detected_lang, None | |
| else: | |
| return None, "Failed to detect language" | |
| except Exception as e: | |
| return None, f"Error detecting language: {str(e)}" | |
| # UI Translations Dictionary for Common Elements | |
| UI_TRANSLATIONS = { | |
| 'en': { | |
| 'start_recording': 'Start Recording', | |
| 'stop_recording': 'Stop Recording', | |
| 'pause_recording': 'Pause Recording', | |
| 'resume_recording': 'Resume Recording', | |
| 'mark_important': 'Mark Important', | |
| 'extract_text': 'Extract Text', | |
| 'rerecord': 'Re-record', | |
| 'processing': 'Processing...', | |
| 'ready_to_record': 'Ready to Record', | |
| 'recording': 'Recording...', | |
| 'paused': 'Paused', | |
| 'review_recording': 'Review your recording', | |
| 'processing_complete': 'Processing Complete!', | |
| 'upload_file': 'Upload a File', | |
| 'record_audio': 'Record Audio', | |
| 'choose_audio_file': 'Choose an audio file', | |
| 'supported_formats': 'Supported formats: MP3, WAV, M4A', | |
| 'microphone_permission': 'Microphone permission denied.', | |
| 'browser_not_supported': 'Your browser does not support audio recording.', | |
| 'quality': 'Quality', | |
| 'language': 'Language', | |
| 'settings': 'Settings', | |
| 'help': 'Help', | |
| 'about': 'About', | |
| 'simple_mode': 'Simple Mode', | |
| 'advanced_mode': 'Advanced Mode', | |
| 'lecture_mode': 'Lecture Mode', | |
| 'transcription': 'Transcription', | |
| 'translation': 'Translation', | |
| 'markers': 'Important Markers', | |
| 'duration': 'Duration', | |
| 'file_size': 'File Size', | |
| 'audio_level': 'Audio Level', | |
| 'error_occurred': 'An error occurred', | |
| 'try_again': 'Try Again', | |
| 'success': 'Success', | |
| 'failed': 'Failed', | |
| 'loading': 'Loading...', | |
| 'save': 'Save', | |
| 'cancel': 'Cancel', | |
| 'close': 'Close', | |
| 'download': 'Download', | |
| 'share': 'Share', | |
| 'copy': 'Copy', | |
| 'paste': 'Paste', | |
| 'clear': 'Clear', | |
| 'reset': 'Reset' | |
| }, | |
| 'ar': { | |
| 'start_recording': 'بدء التسجيل', | |
| 'stop_recording': 'إيقاف التسجيل', | |
| 'pause_recording': 'إيقاف مؤقت', | |
| 'resume_recording': 'استئناف التسجيل', | |
| 'mark_important': 'تعليم مهم', | |
| 'extract_text': 'استخراج النص', | |
| 'rerecord': 'إعادة تسجيل', | |
| 'processing': 'جاري المعالجة...', | |
| 'ready_to_record': 'جاهز للتسجيل', | |
| 'recording': 'جاري التسجيل...', | |
| 'paused': 'متوقف مؤقتاً', | |
| 'review_recording': 'مراجعة التسجيل', | |
| 'processing_complete': 'اكتملت المعالجة!', | |
| 'upload_file': 'رفع ملف', | |
| 'record_audio': 'تسجيل صوتي', | |
| 'choose_audio_file': 'اختر ملف صوتي', | |
| 'supported_formats': 'التنسيقات المدعومة: MP3, WAV, M4A', | |
| 'microphone_permission': 'تم رفض إذن الميكروفون.', | |
| 'browser_not_supported': 'متصفحك لا يدعم التسجيل الصوتي.', | |
| 'quality': 'الجودة', | |
| 'language': 'اللغة', | |
| 'settings': 'الإعدادات', | |
| 'help': 'المساعدة', | |
| 'about': 'حول', | |
| 'simple_mode': 'الوضع البسيط', | |
| 'advanced_mode': 'الوضع المتقدم', | |
| 'lecture_mode': 'وضع المحاضرة', | |
| 'transcription': 'النسخ النصي', | |
| 'translation': 'الترجمة', | |
| 'markers': 'العلامات المهمة', | |
| 'duration': 'المدة', | |
| 'file_size': 'حجم الملف', | |
| 'audio_level': 'مستوى الصوت', | |
| 'error_occurred': 'حدث خطأ', | |
| 'try_again': 'حاول مرة أخرى', | |
| 'success': 'نجح', | |
| 'failed': 'فشل', | |
| 'loading': 'جاري التحميل...', | |
| 'save': 'حفظ', | |
| 'cancel': 'إلغاء', | |
| 'close': 'إغلاق', | |
| 'download': 'تحميل', | |
| 'share': 'مشاركة', | |
| 'copy': 'نسخ', | |
| 'paste': 'لصق', | |
| 'clear': 'مسح', | |
| 'reset': 'إعادة تعيين' | |
| } | |
| } | |
| # Helper function to get translations | |
| def get_translation(key: str, language: str = 'en') -> str: | |
| """Get translation for a specific key and language""" | |
| return UI_TRANSLATIONS.get(language, {}).get(key, UI_TRANSLATIONS['en'].get(key, key)) | |
| def get_translator(): | |
| """ | |
| Get a singleton translator instance using Streamlit's resource caching. | |
| This ensures the model is initialized only once per session. | |
| """ | |
| return AITranslator() | |