Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| import os | |
| import sys | |
| # Set UTF-8 encoding for Windows | |
| if sys.platform == 'win32': | |
| import codecs | |
| sys.stdout = codecs.getwriter('utf-8')(sys.stdout.detach()) | |
| sys.stderr = codecs.getwriter('utf-8')(sys.stderr.detach()) | |
| # Load environment variables from .env file (optimized for HF Spaces) | |
| try: | |
| # Only load .env in local development, skip in production | |
| if not os.environ.get("SPACE_ID") and not os.environ.get("HF_SPACE_ID"): | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| print("✅ Environment variables loaded from .env file") | |
| else: | |
| print("🏭 Production environment - using system environment variables") | |
| except ImportError: | |
| print("⚠️ python-dotenv not installed. Using system environment variables only.") | |
| except Exception as e: | |
| print(f"⚠️ Error loading .env file: {e}") | |
| # Essential imports for HF Spaces | |
| import numpy as np | |
| import gradio as gr | |
| # Try to import google-generativeai with fallback | |
| try: | |
| import google.generativeai as genai | |
| GENAI_AVAILABLE = True | |
| except ImportError as e: | |
| print(f"⚠️ google-generativeai not available: {e}") | |
| GENAI_AVAILABLE = False | |
| genai = None | |
| try: | |
| from gtts import gTTS, lang | |
| GTTS_AVAILABLE = True | |
| except ImportError as e: | |
| print(f"⚠️ gtts not available: {e}") | |
| GTTS_AVAILABLE = False | |
| import tempfile | |
| # import soundfile as sf # Import locally to avoid startup overhead | |
| # Kokoro not used - removed for performance | |
| import time | |
| import base64 | |
| # Try to import optional dependencies | |
| try: | |
| import edge_tts | |
| EDGE_TTS_AVAILABLE = True | |
| except ImportError as e: | |
| print(f"⚠️ edge-tts not available: {e}") | |
| EDGE_TTS_AVAILABLE = False | |
| import asyncio | |
| import io | |
| try: | |
| import PyPDF2 | |
| PDF_AVAILABLE = True | |
| except ImportError: | |
| PDF_AVAILABLE = False | |
| try: | |
| import docx | |
| DOCX_AVAILABLE = True | |
| except ImportError: | |
| DOCX_AVAILABLE = False | |
| import shutil | |
| import atexit | |
| import glob | |
| import datetime | |
| # Librosa not used - removed for performance | |
| # === RECORD DATA MANAGEMENT === | |
| RECORD_DATA_DIR = "record_data" | |
| def create_record_data_directory(): | |
| """Create record_data directory if it doesn't exist""" | |
| if not os.path.exists(RECORD_DATA_DIR): | |
| os.makedirs(RECORD_DATA_DIR) | |
| print(f"✅ Created directory: {RECORD_DATA_DIR}") | |
| return RECORD_DATA_DIR | |
| def cleanup_record_data(): | |
| """Clean up record_data directory when app closes (disabled for production)""" | |
| try: | |
| # Disable cleanup for HF Spaces and production environments | |
| if os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID"): | |
| print(f"🏭 Production environment detected - keeping {RECORD_DATA_DIR} directory") | |
| return | |
| # Only cleanup in local development | |
| if os.path.exists(RECORD_DATA_DIR): | |
| shutil.rmtree(RECORD_DATA_DIR) | |
| print(f"🧹 Cleaned up {RECORD_DATA_DIR} directory") | |
| except Exception as e: | |
| print(f"⚠️ Error cleaning up {RECORD_DATA_DIR}: {e}") | |
| def save_recorded_audio(audio_data, original_filename=None): | |
| """Save audio data to record_data directory""" | |
| try: | |
| # Create directory if needed | |
| create_record_data_directory() | |
| # Generate filename with timestamp | |
| timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") | |
| if original_filename: | |
| name_part = os.path.splitext(os.path.basename(original_filename))[0] | |
| filename = f"recorded_{name_part}_{timestamp}.wav" | |
| else: | |
| filename = f"recorded_{timestamp}.wav" | |
| filepath = os.path.join(RECORD_DATA_DIR, filename) | |
| # Handle different audio data types | |
| if isinstance(audio_data, str) and os.path.exists(audio_data): | |
| # File path - copy the file | |
| shutil.copy2(audio_data, filepath) | |
| elif isinstance(audio_data, tuple) and len(audio_data) == 2: | |
| # Numpy array format (sample_rate, audio_array) | |
| sample_rate, audio_array = audio_data | |
| import soundfile as sf | |
| sf.write(filepath, audio_array, sample_rate) | |
| print(f"📊 Saved numpy audio: sr={sample_rate}, shape={audio_array.shape}") | |
| else: | |
| # Raw data | |
| with open(filepath, 'wb') as f: | |
| f.write(audio_data) | |
| print(f"✅ Saved recorded audio: {filepath}") | |
| return filepath | |
| except Exception as e: | |
| print(f"❌ Error saving recorded audio: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| def get_recorded_files(): | |
| """Get list of recorded audio files""" | |
| try: | |
| if not os.path.exists(RECORD_DATA_DIR): | |
| print(f"📁 Record directory does not exist: {RECORD_DATA_DIR}") | |
| return [] | |
| # Get all audio files in record_data | |
| pattern = os.path.join(RECORD_DATA_DIR, "*.wav") | |
| files = glob.glob(pattern) | |
| print(f"🔍 Found {len(files)} files in {RECORD_DATA_DIR}") | |
| # Sort by modification time (newest first) | |
| files.sort(key=os.path.getmtime, reverse=True) | |
| # Return just filenames for display | |
| filenames = [os.path.basename(f) for f in files] | |
| print(f"📂 Returning filenames: {filenames}") | |
| return filenames | |
| except Exception as e: | |
| print(f"❌ Error getting recorded files: {e}") | |
| return [] | |
| def get_recorded_file_path(filename): | |
| """Get full path of recorded file""" | |
| return os.path.join(RECORD_DATA_DIR, filename) | |
| def delete_recorded_file(filename): | |
| """Delete recorded file from record_data directory""" | |
| try: | |
| if not filename or not filename.strip(): | |
| return "❌ Không có file nào được chọn để xóa" | |
| file_path = get_recorded_file_path(filename) | |
| print(f"🗑️ Attempting to delete: {file_path}") | |
| if os.path.exists(file_path): | |
| os.remove(file_path) | |
| print(f"✅ Successfully deleted: {filename}") | |
| return f"✅ Đã xóa file: {filename}" | |
| else: | |
| print(f"❌ File not found: {file_path}") | |
| return f"❌ Không tìm thấy file: {filename}" | |
| except Exception as e: | |
| print(f"❌ Error deleting file: {e}") | |
| return f"❌ Lỗi khi xóa file: {str(e)}" | |
| # Register cleanup function to run when app exits (disabled for stability) | |
| # atexit.register(cleanup_record_data) # Disabled to prevent data loss on deployment | |
| # DOCX support already checked above | |
| # Configure Gemini API - Delayed configuration for faster startup | |
| GEMINI_API_KEY = None | |
| def configure_gemini_api(): | |
| """Configure Gemini API on first use to speed up startup""" | |
| global GEMINI_API_KEY | |
| if not GENAI_AVAILABLE: | |
| print("❌ google-generativeai not available") | |
| return None | |
| if GEMINI_API_KEY is None: | |
| GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY") | |
| if GEMINI_API_KEY: | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| print("✅ Gemini API configured successfully") | |
| else: | |
| print("⚠️ GEMINI_API_KEY or GOOGLE_API_KEY not found in environment variables") | |
| return GEMINI_API_KEY | |
| # Language configurations for Audio Translation (simplified) | |
| if GTTS_AVAILABLE: | |
| GTTS_LANGUAGES = lang.tts_langs() | |
| GTTS_LANGUAGES['ja'] = 'Japanese' | |
| else: | |
| GTTS_LANGUAGES = {'en': 'English', 'vi': 'Vietnamese'} | |
| SUPPORTED_LANGUAGES = sorted(list(GTTS_LANGUAGES.values())) | |
| # Voice mapping for Edge TTS - defined once for performance | |
| VOICE_MAP = { | |
| "🇻🇳 HoaiMy - Nữ Việt Chuẩn": "vi-VN-HoaiMyNeural", | |
| "🇻🇳 NamMinh - Nam Việt Chuẩn": "vi-VN-NamMinhNeural", | |
| "🇺🇸 Aria - Nữ Mỹ": "en-US-AriaNeural", | |
| "🇺🇸 Guy - Nam Mỹ": "en-US-GuyNeural", | |
| "🇬🇧 Sonia - Nữ Anh": "en-GB-SoniaNeural", | |
| "🇬🇧 Ryan - Nam Anh": "en-GB-RyanNeural", | |
| "🇩🇪 Katja - Deutsche Frau": "de-DE-KatjaNeural", | |
| "🇩🇪 Conrad - Deutscher Mann": "de-DE-ConradNeural", | |
| "🇫🇷 Denise - Française": "fr-FR-DeniseNeural", | |
| "🇫🇷 Henri - Français": "fr-FR-HenriNeural", | |
| "🇪🇸 Elvira - Española": "es-ES-ElviraNeural", | |
| "🇪🇸 Alvaro - Español": "es-ES-AlvaroNeural", | |
| "🇮🇹 Elsa - Italiana": "it-IT-ElsaNeural", | |
| "🇮🇹 Diego - Italiano": "it-IT-DiegoNeural", | |
| "🇯🇵 Nanami - 日本女性": "ja-JP-NanamiNeural", | |
| "🇯🇵 Keita - 日本男性": "ja-JP-KeitaNeural", | |
| "🇰🇷 SunHi - 한국 여성": "ko-KR-SunHiNeural", | |
| "🇰🇷 BongJin - 한국 남성": "ko-KR-BongJinNeural", | |
| "🇨🇳 Xiaoxiao - 中文女声": "zh-CN-XiaoxiaoNeural", | |
| "🇨🇳 Yunxi - 中文男声": "zh-CN-YunxiNeural", | |
| "🇷🇺 Svetlana - Русская": "ru-RU-SvetlanaNeural", | |
| "🇷🇺 Dmitry - Русский": "ru-RU-DmitryNeural", | |
| "🇵🇹 Francisca - Portuguesa": "pt-BR-FranciscaNeural", | |
| "🇵🇹 Antonio - Português": "pt-BR-AntonioNeural", | |
| "🇸🇦 Zariyah - عربية": "ar-SA-ZariyahNeural", | |
| "🇸🇦 Hamed - عربي": "ar-SA-HamedNeural" | |
| } | |
| # Voice RAG Functions (Tích hợp từ hf_Voice_Audio_Translation) | |
| def read_pdf(file_path): | |
| """Extract text from PDF file""" | |
| try: | |
| with open(file_path, 'rb') as file: | |
| reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| except Exception as e: | |
| return f"Error reading PDF: {str(e)}" | |
| def read_docx(file_path): | |
| """Extract text from Word document""" | |
| try: | |
| if not DOCX_AVAILABLE: | |
| return "❌ python-docx not available" | |
| doc = docx.Document(file_path) | |
| text = "" | |
| for paragraph in doc.paragraphs: | |
| text += paragraph.text + "\n" | |
| return text | |
| except Exception as e: | |
| return f"Error reading DOCX: {str(e)}" | |
| def read_txt(file_path): | |
| """Extract text from TXT file""" | |
| try: | |
| with open(file_path, 'r', encoding='utf-8') as file: | |
| return file.read() | |
| except Exception as e: | |
| return f"Error reading TXT: {str(e)}" | |
| def extract_text_from_file(file_path): | |
| """Extract text from various file formats""" | |
| if file_path is None: | |
| return "No file uploaded" | |
| file_extension = os.path.splitext(file_path)[1].lower() | |
| if file_extension == '.pdf': | |
| return read_pdf(file_path) | |
| elif file_extension == '.docx': | |
| return read_docx(file_path) | |
| elif file_extension == '.txt': | |
| return read_txt(file_path) | |
| else: | |
| return f"Unsupported file format: {file_extension}" | |
| def detect_language_from_text(text): | |
| """Detect language from text content""" | |
| # Vietnamese detection | |
| vietnamese_chars = 'àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ' | |
| if any(char in text.lower() for char in vietnamese_chars): | |
| return "Vietnamese" | |
| # Chinese detection | |
| chinese_chars = '中文汉字學習语言' | |
| if any(char in text for char in chinese_chars): | |
| return "Chinese" | |
| # Japanese detection | |
| japanese_chars = 'ひらがなカタカナ日本語' | |
| if any(char in text for char in japanese_chars): | |
| return "Japanese" | |
| # Korean detection | |
| korean_chars = '한국어문자' | |
| if any(char in text for char in korean_chars): | |
| return "Korean" | |
| # French detection | |
| french_words = ['le', 'la', 'les', 'de', 'et', 'à', 'un', 'une', 'ce', 'qui', 'que'] | |
| french_chars = 'àâäéèêëïîôöùûüÿç' | |
| if any(word in text.lower() for word in french_words) or any(char in text.lower() for char in french_chars): | |
| return "French" | |
| # German detection | |
| german_words = ['der', 'die', 'das', 'und', 'ist', 'ich', 'bin', 'haben', 'sein', 'werden'] | |
| german_chars = 'äöüß' | |
| if any(word in text.lower() for word in german_words) or any(char in text.lower() for char in german_chars): | |
| return "German" | |
| # Spanish detection | |
| spanish_words = ['el', 'la', 'de', 'que', 'y', 'a', 'en', 'un', 'es', 'se', 'no', 'te', 'lo'] | |
| spanish_chars = 'ñáéíóúü' | |
| if any(word in text.lower() for word in spanish_words) or any(char in text.lower() for char in spanish_chars): | |
| return "Spanish" | |
| # English detection (default) | |
| english_words = ['the', 'and', 'is', 'are', 'have', 'has', 'will', 'would', 'can', 'could'] | |
| if any(word in text.lower() for word in english_words): | |
| return "English" | |
| return "English" # Default fallback | |
| def process_with_gemini(text, question, answer_language="Vietnamese"): | |
| """Process text and question using Gemini with multi-language support""" | |
| try: | |
| api_key = configure_gemini_api() | |
| if not api_key: | |
| return "❌ Lỗi: Chưa cấu hình GEMINI_API_KEY hoặc GOOGLE_API_KEY trong environment variables" | |
| model = genai.GenerativeModel("gemini-2.0-flash") | |
| # Detect document language | |
| detected_doc_language = detect_language_from_text(text) | |
| prompt = f""" | |
| Based on the following document content, please answer the question in {answer_language}: | |
| Document Content (detected language: {detected_doc_language}): | |
| {text} | |
| Question: {question} | |
| Please provide a comprehensive and accurate answer in {answer_language}. | |
| If the document is in a different language than the question, please still answer in {answer_language}. | |
| Maintain the factual accuracy while adapting cultural context appropriately. | |
| """ | |
| response = model.generate_content(prompt) | |
| return response.text | |
| except Exception as e: | |
| return f"Error processing with Gemini: {str(e)}" | |
| def text_to_speech_rag(text, voice_selection): | |
| """Convert text to speech using Edge TTS for RAG results""" | |
| try: | |
| if not text or text.startswith("Error"): | |
| return None | |
| # Use global VOICE_MAP for performance | |
| voice_name = VOICE_MAP.get(voice_selection, "vi-VN-HoaiMyNeural") | |
| text_limited = text[:2000] if len(text) > 2000 else text | |
| # Generate speech using Edge TTS | |
| audio_data = asyncio.run(generate_speech(text_limited, voice_name, 0.0)) | |
| # Save to temporary file | |
| fd, temp_output_path = tempfile.mkstemp(suffix=".wav", prefix="voice_rag_audio_") | |
| os.close(fd) | |
| # Write raw audio data to temporary file | |
| with open(temp_output_path, 'wb') as f: | |
| f.write(audio_data) | |
| return temp_output_path | |
| except Exception as e: | |
| print(f"TTS Error: {str(e)}") | |
| return None | |
| def voice_rag_pipeline(uploaded_file, question, answer_language="Vietnamese", voice_selection="🇻🇳 HoaiMy - Nữ Việt Chuẩn", text_format="txt"): | |
| """Complete Voice RAG pipeline with multi-language support and downloadable text""" | |
| if uploaded_file is None: | |
| return "Please upload a document first.", "N/A", None, None | |
| if not question.strip(): | |
| return "Please enter a question.", "N/A", None, None | |
| # Extract text from uploaded file | |
| extracted_text = extract_text_from_file(uploaded_file) | |
| if extracted_text.startswith("Error"): | |
| return extracted_text, "Error", None, None | |
| # Detect document language | |
| detected_doc_language = detect_language_from_text(extracted_text) | |
| # Process with Gemini using selected answer language | |
| answer = process_with_gemini(extracted_text, question, answer_language) | |
| # Generate speech using selected voice | |
| audio_file = text_to_speech_rag(answer, voice_selection) | |
| # Create formatted content for download | |
| if text_format.lower() == "md": | |
| # Create beautiful Markdown format | |
| formatted_content = format_voice_rag_response( | |
| question, answer, detected_doc_language, voice_selection | |
| ) | |
| text_file_path = create_text_file(formatted_content, "md", "voice_rag_response") | |
| else: | |
| # Create standard text file | |
| text_file_path = create_text_file(answer, text_format, "voice_rag_answer") | |
| return answer, detected_doc_language, audio_file, text_file_path | |
| def detect_language(text): | |
| """Detect language of input text with improved accuracy""" | |
| if not text.strip(): | |
| return "unknown" | |
| text_lower = text.lower() | |
| # Vietnamese detection (more comprehensive) | |
| vietnamese_chars = 'àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ' | |
| vietnamese_words = ['và', 'của', 'là', 'có', 'này', 'được', 'cho', 'từ', 'một', 'những', 'tôi', 'bạn'] | |
| vietnamese_score = sum(1 for char in text if char in vietnamese_chars) + sum(2 for word in vietnamese_words if word in text_lower) | |
| # English detection (more comprehensive) | |
| english_words = ['the', 'and', 'is', 'are', 'have', 'has', 'will', 'would', 'can', 'could', 'that', 'this', 'with', 'for', 'you', 'he', 'she', 'it', 'they', 'we'] | |
| english_score = sum(1 for word in english_words if word in text_lower) | |
| # German detection | |
| german_words = ['der', 'die', 'das', 'und', 'ist', 'ich', 'bin', 'haben', 'sein', 'werden', 'mit', 'auf', 'für', 'von'] | |
| german_chars = 'äöüß' | |
| german_score = sum(1 for word in german_words if word in text_lower) + sum(1 for char in text if char in german_chars) | |
| # French detection | |
| french_words = ['le', 'la', 'les', 'de', 'et', 'à', 'un', 'une', 'ce', 'qui', 'que', 'avec', 'pour', 'dans'] | |
| french_chars = 'àâäéèêëïîôöùûüÿç' | |
| french_score = sum(1 for word in french_words if word in text_lower) + sum(0.5 for char in text if char in french_chars) | |
| # Spanish detection | |
| spanish_words = ['el', 'la', 'de', 'que', 'y', 'a', 'en', 'un', 'es', 'se', 'no', 'te', 'lo', 'con', 'para'] | |
| spanish_chars = 'ñáéíóúü' | |
| spanish_score = sum(1 for word in spanish_words if word in text_lower) + sum(0.5 for char in text if char in spanish_chars) | |
| # Score-based detection | |
| scores = { | |
| 'Vietnamese': vietnamese_score, | |
| 'English': english_score, | |
| 'German': german_score, | |
| 'French': french_score, | |
| 'Spanish': spanish_score | |
| } | |
| # Find the language with highest score | |
| max_score = max(scores.values()) | |
| if max_score > 0: | |
| detected = max(scores, key=scores.get) | |
| print(f"🔍 Language detection scores: {scores}") | |
| print(f"🎯 Detected language: {detected} (score: {max_score})") | |
| return detected | |
| # Default fallback | |
| print(f"⚠️ Could not detect language, defaulting to English") | |
| return "English" | |
| async def generate_speech(text, voice_name, rate): | |
| """Generate speech using Edge TTS""" | |
| communicate = edge_tts.Communicate(text, voice_name, rate=f"{rate:+.0%}") | |
| # Create in-memory buffer | |
| audio_buffer = io.BytesIO() | |
| async for chunk in communicate.stream(): | |
| if chunk["type"] == "audio": | |
| audio_buffer.write(chunk["data"]) | |
| audio_buffer.seek(0) | |
| return audio_buffer.getvalue() | |
| def create_text_file(content, file_format="txt", filename_prefix="translated_text"): | |
| """ | |
| Create a downloadable text file from content in TXT, DOCX, or MD format | |
| """ | |
| if not content or content.startswith("Lỗi:") or content.startswith("❌"): | |
| return None | |
| try: | |
| if file_format.lower() == "docx" and DOCX_AVAILABLE: | |
| # Create Word document | |
| fd, temp_file_path = tempfile.mkstemp(suffix=".docx", prefix=f"{filename_prefix}_") | |
| os.close(fd) | |
| if not DOCX_AVAILABLE: | |
| return None | |
| from docx import Document | |
| doc = Document() | |
| doc.add_heading('Nội dung đã dịch', 0) | |
| doc.add_paragraph(content) | |
| doc.save(temp_file_path) | |
| return temp_file_path | |
| elif file_format.lower() == "md": | |
| # Create Markdown file | |
| fd, temp_file_path = tempfile.mkstemp(suffix=".md", prefix=f"{filename_prefix}_") | |
| os.close(fd) | |
| with open(temp_file_path, 'w', encoding='utf-8') as f: | |
| f.write(content) | |
| return temp_file_path | |
| else: | |
| # Create TXT file (default) | |
| fd, temp_file_path = tempfile.mkstemp(suffix=".txt", prefix=f"{filename_prefix}_") | |
| os.close(fd) | |
| with open(temp_file_path, 'w', encoding='utf-8') as f: | |
| f.write(content) | |
| return temp_file_path | |
| except Exception as e: | |
| return None | |
| def format_voice_rag_response(question, answer, detected_language, voice_selection, timestamp=None): | |
| """ | |
| Format Voice RAG response as beautiful Markdown | |
| """ | |
| if timestamp is None: | |
| timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) | |
| # Clean and format the answer | |
| formatted_answer = answer.strip() | |
| # Create beautiful Markdown document | |
| markdown_content = f"""# 📚 Voice RAG - Intelligent Document Q&A | |
| --- | |
| ## 📄 **Session Information** | |
| | **Field** | **Details** | | |
| |-----------|-------------| | |
| | 🕒 **Timestamp** | {timestamp} | | |
| | 🌍 **Document Language** | {detected_language} | | |
| | 🎭 **Voice Selection** | {voice_selection} | | |
| | 🤖 **AI Model** | Google Gemini 2.0 Flash | | |
| --- | |
| ## ❓ **Question** | |
| > {question} | |
| --- | |
| ## 💬 **AI Response** | |
| {formatted_answer} | |
| --- | |
| --- | |
| ## 📱 **Generated by** | |
| **🎙️ Voice AI Platform** - Digitized Brains | |
| *Powered by Claude Code & Google Gemini 2.0 Flash* | |
| > 🌐 **Voice RAG Technology** - Combining document intelligence with premium voice synthesis | |
| --- | |
| *Generated on {timestamp} | Voice: {voice_selection} | Language: {detected_language}* | |
| """ | |
| return markdown_content | |
| def format_voice_studio_response(text, voice_selection, speed, detected_language="Auto-detected", timestamp=None): | |
| """ | |
| Format Voice Studio response as simple Markdown | |
| """ | |
| if timestamp is None: | |
| timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) | |
| # Clean and format the text | |
| formatted_text = text.strip() | |
| # Create simple Markdown document | |
| markdown_content = f"""# Voice Studio Result | |
| ## Input Text ({detected_language}) | |
| {formatted_text} | |
| --- | |
| *Generated on {timestamp} | Voice: {voice_selection} | Speed: {speed:.1f}x* | |
| """ | |
| return markdown_content | |
| def format_audio_translation_response(original_text, translated_text, source_language, target_language, voice_selection, timestamp=None): | |
| """ | |
| Format Audio Translation response as simple Markdown | |
| """ | |
| if timestamp is None: | |
| timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) | |
| # Clean and format the texts | |
| formatted_original = original_text.strip() | |
| formatted_translated = translated_text.strip() | |
| # Create simple Markdown document | |
| markdown_content = f"""# Audio Translation Result | |
| ## Original Text ({source_language}) | |
| {formatted_original} | |
| ## Translated Text ({target_language}) | |
| {formatted_translated} | |
| --- | |
| *Generated on {timestamp} | {source_language} → {target_language} | Voice: {voice_selection}* | |
| """ | |
| return markdown_content | |
| def create_audio_voice_studio(text, voice_selection, speed, text_format="txt"): | |
| """Voice Studio functionality with text file generation""" | |
| if not text.strip(): | |
| return "❌ Vui lòng nhập văn bản / Please enter text / Bitte Text eingeben", None | |
| try: | |
| # Use global VOICE_MAP for performance (avoiding recreation on each call) | |
| voice_name = VOICE_MAP.get(voice_selection, "vi-VN-HoaiMyNeural") | |
| text_limited = text[:1000] if len(text) > 1000 else text | |
| # Convert speed (0.5-2.0) to rate percentage (-50% to +100%) | |
| rate_percent = (speed - 1.0) | |
| # Generate speech using Edge TTS | |
| audio_data = asyncio.run(generate_speech(text_limited, voice_name, rate_percent)) | |
| # Convert to base64 | |
| audio_base64 = base64.b64encode(audio_data).decode('utf-8') | |
| timestamp = int(time.time()) | |
| filename = f"voice_{voice_name}_{speed}x_{timestamp}.mp3" | |
| # Detect language | |
| detected_lang = detect_language(text_limited) | |
| # Mobile-optimized HTML player | |
| html_player = f''' | |
| <div style=" | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| border-radius: 20px; | |
| padding: 20px; | |
| margin: 10px 0; | |
| box-shadow: 0 8px 32px rgba(0,0,0,0.2); | |
| color: white; | |
| text-align: center; | |
| "> | |
| <div style="margin-bottom: 20px;"> | |
| <h3 style="color: #fff; margin: 0 0 15px 0; font-size: 1.3em; text-shadow: 1px 1px 2px rgba(0,0,0,0.3);"> | |
| 🎵 Âm thanh hoàn thành! | |
| </h3> | |
| <div style=" | |
| background: rgba(255,255,255,0.2); | |
| border-radius: 12px; | |
| padding: 12px; | |
| font-size: 0.9em; | |
| line-height: 1.5; | |
| backdrop-filter: blur(10px); | |
| "> | |
| <div><strong>🎭 Giọng:</strong> {voice_selection}</div> | |
| <div><strong>⚡ Tốc độ:</strong> {speed:.1f}x | <strong>🌍 Ngôn ngữ:</strong> {detected_lang.title()}</div> | |
| <div><strong>📝 Độ dài:</strong> {len(text_limited)} ký tự</div> | |
| </div> | |
| </div> | |
| <audio controls style=" | |
| width: 100%; | |
| max-width: 100%; | |
| height: 50px; | |
| margin: 20px 0; | |
| border-radius: 25px; | |
| background: rgba(255,255,255,0.95); | |
| box-shadow: 0 4px 15px rgba(0,0,0,0.2); | |
| "> | |
| <source src="data:audio/mpeg;base64,{audio_base64}" type="audio/mpeg"> | |
| Trình duyệt không hỗ trợ audio. | |
| </audio> | |
| <div style=" | |
| display: flex; | |
| justify-content: center; | |
| margin-top: 20px; | |
| "> | |
| <a href="data:audio/mpeg;base64,{audio_base64}" download="{filename}" | |
| style=" | |
| background: linear-gradient(45deg, #28a745, #20c997); | |
| color: white; | |
| padding: 15px 30px; | |
| text-decoration: none; | |
| border-radius: 25px; | |
| font-weight: 700; | |
| font-size: 1.1em; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| box-shadow: 0 4px 15px rgba(40,167,69,0.3); | |
| transition: all 0.3s ease; | |
| min-height: 48px; | |
| min-width: 200px; | |
| " | |
| ontouchstart="" | |
| onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 6px 20px rgba(40,167,69,0.4)'" | |
| onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='0 4px 15px rgba(40,167,69,0.3)'"> | |
| 📥 TẢI XUỐNG MP3 | |
| </a> | |
| </div> | |
| </div> | |
| ''' | |
| # Create text file based on format | |
| text_file_path = None | |
| if text_format == "md": | |
| # Use Markdown formatting function | |
| detected_language = detect_language(text_limited) | |
| markdown_content = format_voice_studio_response(text_limited, voice_selection, speed, detected_language) | |
| text_file_path = create_text_file(markdown_content, "md", "voice_studio") | |
| elif text_format == "docx": | |
| # Create Word document with Voice Studio formatting | |
| detected_language = detect_language(text_limited) | |
| markdown_content = format_voice_studio_response(text_limited, voice_selection, speed, detected_language) | |
| text_file_path = create_text_file(markdown_content, "docx", "voice_studio") | |
| elif text_format == "txt": | |
| # Create simple text file | |
| text_file_path = create_text_file(text_limited, "txt", "voice_studio") | |
| return html_player, text_file_path | |
| except Exception as e: | |
| return f"❌ Error: {str(e)}", None | |
| # Language mapping for voices - defined once for performance | |
| VOICE_TO_LANGUAGE = { | |
| # Vietnamese | |
| "🇻🇳 HoaiMy - Nữ Việt Chuẩn": "Vietnamese", | |
| "🇻🇳 NamMinh - Nam Việt Chuẩn": "Vietnamese", | |
| # English | |
| "🇺🇸 Aria - Nữ Mỹ": "English", | |
| "🇺🇸 Guy - Nam Mỹ": "English", | |
| "🇬🇧 Sonia - Nữ Anh": "English", | |
| "🇬🇧 Ryan - Nam Anh": "English", | |
| # German | |
| "🇩🇪 Katja - Deutsche Frau": "German", | |
| "🇩🇪 Conrad - Deutscher Mann": "German", | |
| # French | |
| "🇫🇷 Denise - Française": "French", | |
| "🇫🇷 Henri - Français": "French", | |
| # Spanish | |
| "🇪🇸 Elvira - Española": "Spanish", | |
| "🇪🇸 Alvaro - Español": "Spanish", | |
| # Italian | |
| "🇮🇹 Elsa - Italiana": "Italian", | |
| "🇮🇹 Diego - Italiano": "Italian", | |
| # Japanese | |
| "🇯🇵 Nanami - 日本女性": "Japanese", | |
| "🇯🇵 Keita - 日本男性": "Japanese", | |
| # Korean | |
| "🇰🇷 SunHi - 한국 여성": "Korean", | |
| "🇰🇷 BongJin - 한국 남성": "Korean", | |
| # Chinese | |
| "🇨🇳 Xiaoxiao - 中文女声": "Chinese", | |
| "🇨🇳 Yunxi - 中文男声": "Chinese", | |
| # Russian | |
| "🇷🇺 Svetlana - Русская": "Russian", | |
| "🇷🇺 Dmitry - Русский": "Russian", | |
| # Portuguese | |
| "🇵🇹 Francisca - Portuguesa": "Portuguese", | |
| "🇵🇹 Antonio - Português": "Portuguese", | |
| # Arabic | |
| "🇸🇦 Zariyah - عربية": "Arabic", | |
| "🇸🇦 Hamed - عربي": "Arabic" | |
| } | |
| def get_target_language_from_voice(voice_selection): | |
| """Map voice selection to target language for translation""" | |
| return VOICE_TO_LANGUAGE.get(voice_selection, "Vietnamese") | |
| def translate_text_with_gemini(text, target_language): | |
| """Translate text using Gemini API""" | |
| try: | |
| api_key = configure_gemini_api() | |
| if not api_key: | |
| return f"❌ Lỗi: Chưa cấu hình GEMINI_API_KEY hoặc GOOGLE_API_KEY trong environment variables" | |
| if not text.strip(): | |
| return "" | |
| model = genai.GenerativeModel("gemini-2.0-flash") | |
| prompt = f"""Translate the following text to {target_language}. Return ONLY the translated text, nothing else: | |
| {text}""" | |
| response = model.generate_content(prompt) | |
| translated_text = response.text.strip() | |
| # Clean up any unwanted text that might be included | |
| if translated_text.lower().startswith("translation:"): | |
| translated_text = translated_text[12:].strip() | |
| if translated_text.lower().startswith("here is"): | |
| lines = translated_text.split('\n') | |
| if len(lines) > 1: | |
| translated_text = '\n'.join(lines[1:]).strip() | |
| return translated_text | |
| except Exception as e: | |
| return f"Lỗi dịch thuật: {str(e)}" | |
| def translate_audio(audio_file, target_country, voice_selection, text_format="txt"): | |
| """ | |
| Transcribe, translate and synthesize audio to target language with Voice Studio integration | |
| """ | |
| try: | |
| api_key = configure_gemini_api() | |
| if not api_key: | |
| return "❌ Lỗi: Chưa cấu hình GEMINI_API_KEY hoặc GOOGLE_API_KEY trong environment variables", "Không xác định", "", target_country, None, None, "", "", None | |
| if audio_file is None: | |
| return "Lỗi: Vui lòng tải lên file audio", "Không xác định", "", target_country, None, None, "", "", None | |
| # Save recorded audio to record_data directory | |
| print(f"🔍 Processing audio file type: {type(audio_file)}") | |
| saved_audio_path = save_recorded_audio(audio_file) | |
| if saved_audio_path: | |
| print(f"🎤 Audio saved to record_data: {os.path.basename(saved_audio_path)}") | |
| # Debug: check if file really exists | |
| if os.path.exists(saved_audio_path): | |
| file_size = os.path.getsize(saved_audio_path) | |
| print(f"✅ File confirmed: {saved_audio_path} ({file_size} bytes)") | |
| else: | |
| print(f"❌ File not found after save: {saved_audio_path}") | |
| return "❌ Lỗi: Không thể lưu file audio", "Không xác định", "", target_country, None, None, "", "", None | |
| else: | |
| print("❌ Failed to save audio file") | |
| return "❌ Lỗi: Không thể lưu file audio", "Không xác định", "", target_country, None, None, "", "", None | |
| # Get target language from voice selection | |
| target_language = get_target_language_from_voice(voice_selection) | |
| # Transcribe audio using Gemini | |
| model = genai.GenerativeModel("gemini-2.0-flash") | |
| # Read audio file using saved path | |
| with open(saved_audio_path, 'rb') as f: | |
| audio_data = f.read() | |
| # Create audio blob | |
| audio_blob = { | |
| 'mime_type': 'audio/wav', | |
| 'data': audio_data | |
| } | |
| # Step 1: Transcribe audio only first | |
| transcribe_prompt = """Transcribe this audio accurately in its original language. Return only the transcribed text, nothing else.""" | |
| response = model.generate_content([transcribe_prompt, audio_blob]) | |
| transcription = response.text.strip() | |
| # Step 2: Detect language of transcription | |
| detected_lang = detect_language(transcription) | |
| # Step 3: Translate if needed (only if source is different from target) | |
| if detected_lang.lower() != target_language.lower(): | |
| print(f"🔄 Translating from {detected_lang} to {target_language}") | |
| translated_text = translate_text_with_gemini(transcription, target_language) | |
| # Check if translation was successful | |
| if translated_text.startswith("❌") or translated_text.startswith("Lỗi"): | |
| print(f"❌ Translation failed: {translated_text}") | |
| # Use original transcription if translation fails | |
| translated_text = transcription | |
| else: | |
| print(f"✅ Translation successful") | |
| else: | |
| print(f"ℹ️ No translation needed - same language ({detected_lang})") | |
| translated_text = transcription | |
| # Generate audio using Edge TTS (use global VOICE_MAP for performance) | |
| edge_voice = VOICE_MAP.get(voice_selection, "vi-VN-HoaiMyNeural") | |
| print(f"🎙️ Generating audio with voice: {edge_voice}") | |
| audio_data = asyncio.run(generate_speech(translated_text, edge_voice, 0.0)) | |
| print(f"🎵 Generated audio data: {len(audio_data)} bytes") | |
| # Save audio file | |
| fd, temp_output_path = tempfile.mkstemp(suffix=".wav", prefix="translated_audio_") | |
| os.close(fd) | |
| print(f"📁 Created temp audio file: {temp_output_path}") | |
| # Write raw audio data to temporary file | |
| with open(temp_output_path, 'wb') as f: | |
| f.write(audio_data) | |
| # Verify file was created | |
| if os.path.exists(temp_output_path): | |
| file_size = os.path.getsize(temp_output_path) | |
| print(f"✅ Audio file created successfully: {file_size} bytes") | |
| else: | |
| print(f"❌ Failed to create audio file: {temp_output_path}") | |
| # Create text file for download with proper formatting | |
| text_file_path = None | |
| if text_format == "md": | |
| # Use Markdown formatting function for Audio Translation | |
| markdown_content = format_audio_translation_response( | |
| transcription, translated_text, detected_lang, target_language, voice_selection | |
| ) | |
| text_file_path = create_text_file(markdown_content, "md", "audio_translation") | |
| elif text_format == "docx": | |
| # Create Word document with Audio Translation formatting | |
| markdown_content = format_audio_translation_response( | |
| transcription, translated_text, detected_lang, target_language, voice_selection | |
| ) | |
| text_file_path = create_text_file(markdown_content, "docx", "audio_translation") | |
| else: | |
| # Create simple text file | |
| text_file_path = create_text_file(translated_text, "txt", "audio_translation") | |
| return transcription, detected_lang, translated_text, target_language, temp_output_path, temp_output_path, transcription, translated_text, text_file_path | |
| except Exception as e: | |
| # Get target language for error response | |
| target_language = get_target_language_from_voice(voice_selection) if 'voice_selection' in locals() else "Vietnamese" | |
| return f"Lỗi: {str(e)}", "Lỗi", "", target_language, None, None, "", "", None | |
| # Voice choices organized by country - ONLY OFFICIAL VOICES | |
| voice_choices_by_country = { | |
| "🇻🇳 Việt Nam": [ | |
| "🇻🇳 HoaiMy - Nữ Việt Chuẩn", | |
| "🇻🇳 NamMinh - Nam Việt Chuẩn" | |
| ], | |
| "🇺🇸 Hoa Kỳ": [ | |
| "🇺🇸 Aria - Nữ Mỹ", | |
| "🇺🇸 Guy - Nam Mỹ" | |
| ], | |
| "🇬🇧 Anh": [ | |
| "🇬🇧 Sonia - Nữ Anh", | |
| "🇬🇧 Ryan - Nam Anh" | |
| ], | |
| "🇩🇪 Đức": [ | |
| "🇩🇪 Katja - Deutsche Frau", | |
| "🇩🇪 Conrad - Deutscher Mann" | |
| ], | |
| "🇫🇷 Pháp": [ | |
| "🇫🇷 Denise - Française", | |
| "🇫🇷 Henri - Français" | |
| ], | |
| "🇪🇸 Tây Ban Nha": [ | |
| "🇪🇸 Elvira - Española", | |
| "🇪🇸 Alvaro - Español" | |
| ], | |
| "🇮🇹 Ý": [ | |
| "🇮🇹 Elsa - Italiana", | |
| "🇮🇹 Diego - Italiano" | |
| ], | |
| "🇯🇵 Nhật Bản": [ | |
| "🇯🇵 Nanami - 日本女性", | |
| "🇯🇵 Keita - 日本男性" | |
| ], | |
| "🇰🇷 Hàn Quốc": [ | |
| "🇰🇷 SunHi - 한국 여성", | |
| "🇰🇷 BongJin - 한국 남성" | |
| ], | |
| "🇨🇳 Trung Quốc": [ | |
| "🇨🇳 Xiaoxiao - 中文女声", | |
| "🇨🇳 Yunxi - 中文男声" | |
| ], | |
| "🇷🇺 Nga": [ | |
| "🇷🇺 Svetlana - Русская", | |
| "🇷🇺 Dmitry - Русский" | |
| ], | |
| "🇵🇹 Bồ Đào Nha": [ | |
| "🇵🇹 Francisca - Portuguesa", | |
| "🇵🇹 Antonio - Português" | |
| ], | |
| "🇸🇦 Ả Rập": [ | |
| "🇸🇦 Zariyah - عربية", | |
| "🇸🇦 Hamed - عربي" | |
| ] | |
| } | |
| def update_voices(country): | |
| """Update voice choices based on selected country""" | |
| if country in voice_choices_by_country: | |
| voices = voice_choices_by_country[country] | |
| return gr.Dropdown(choices=voices, value=voices[0]) | |
| else: | |
| # Default to Vietnamese voices | |
| default_voices = voice_choices_by_country["🇻🇳 Việt Nam"] | |
| return gr.Dropdown(choices=default_voices, value=default_voices[0]) | |
| # Lightweight CSS - optimized for performance | |
| css = """ | |
| * { | |
| font-family: system-ui, -apple-system, 'Segoe UI', Arial, sans-serif; | |
| } | |
| .gradio-container { | |
| max-width: 1200px; | |
| margin: 0 auto; | |
| position: relative; | |
| } | |
| /* Critical fix for dropdown interaction */ | |
| .gradio-container * { | |
| pointer-events: auto; | |
| } | |
| /* Hide Gradio footer */ | |
| .footer { | |
| display: none !important; | |
| } | |
| /* Pulsing animation for processing status */ | |
| @keyframes pulse-processing { | |
| 0% { | |
| opacity: 1; | |
| transform: scale(1); | |
| box-shadow: 0 4px 15px rgba(255, 193, 7, 0.3); | |
| } | |
| 50% { | |
| opacity: 0.8; | |
| transform: scale(1.02); | |
| box-shadow: 0 6px 25px rgba(255, 193, 7, 0.6); | |
| } | |
| 100% { | |
| opacity: 1; | |
| transform: scale(1); | |
| box-shadow: 0 4px 15px rgba(255, 193, 7, 0.3); | |
| } | |
| } | |
| .status-processing { | |
| animation: pulse-processing 1.5s ease-in-out infinite; | |
| background: linear-gradient(135deg, #FFC107 0%, #FF9800 100%) !important; | |
| } | |
| /* Success status animation */ | |
| @keyframes pulse-success { | |
| 0% { | |
| opacity: 1; | |
| transform: scale(1); | |
| } | |
| 50% { | |
| opacity: 0.9; | |
| transform: scale(1.01); | |
| } | |
| 100% { | |
| opacity: 1; | |
| transform: scale(1); | |
| } | |
| } | |
| .status-success { | |
| animation: pulse-success 2s ease-in-out 3; | |
| background: linear-gradient(135deg, #4CAF50 0%, #2E7D32 100%) !important; | |
| } | |
| /* Custom footer to cover Gradio attribution */ | |
| .custom-footer { | |
| position: fixed; | |
| bottom: 0; | |
| left: 0; | |
| right: 0; | |
| background: linear-gradient(135deg, #4A90E2 0%, #2E86AB 70%, #FF8A65 85%, #FF6B9D 100%); | |
| color: white; | |
| padding: 15px; | |
| text-align: center; | |
| font-weight: bold; | |
| z-index: 1000; | |
| box-shadow: 0 -2px 10px rgba(0,0,0,0.1); | |
| } | |
| /* Add padding to body to account for fixed footer */ | |
| body { | |
| padding-bottom: 60px; | |
| } | |
| /* Mobile-first responsive design */ | |
| .input-card { | |
| background: rgba(255,255,255,0.95); | |
| border-radius: 16px; | |
| padding: 16px; | |
| margin: 10px 0; | |
| box-shadow: 0 4px 20px rgba(0,0,0,0.1); | |
| backdrop-filter: blur(10px); | |
| } | |
| .output-area { | |
| background: rgba(255,255,255,0.95); | |
| border-radius: 16px; | |
| padding: 16px; | |
| margin: 15px 0; | |
| min-height: 200px; | |
| box-shadow: 0 4px 20px rgba(0,0,0,0.1); | |
| } | |
| .examples-section { | |
| background: rgba(255,255,255,0.9); | |
| border-radius: 16px; | |
| padding: 16px; | |
| margin: 20px 0; | |
| } | |
| .main-header { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 20px; | |
| border-radius: 10px; | |
| margin-bottom: 20px; | |
| text-align: center; | |
| } | |
| .feature-box { | |
| background: #f8f9fa; | |
| padding: 15px; | |
| border-radius: 8px; | |
| margin: 10px 0; | |
| border-left: 4px solid #667eea; | |
| } | |
| .status-indicator { | |
| display: inline-block; | |
| padding: 5px 10px; | |
| border-radius: 15px; | |
| font-size: 12px; | |
| font-weight: bold; | |
| margin: 5px; | |
| } | |
| .status-success { | |
| background-color: #d4edda; | |
| color: #155724; | |
| } | |
| .status-processing { | |
| background-color: #fff3cd; | |
| color: #856404; | |
| } | |
| .comparison-section { | |
| border: 1px solid #e0e0e0; | |
| border-radius: 8px; | |
| padding: 15px; | |
| margin: 10px 0; | |
| background: #fafafa; | |
| } | |
| .language-label { | |
| font-weight: bold; | |
| color: #667eea; | |
| padding: 5px 10px; | |
| background: #f0f2ff; | |
| border-radius: 15px; | |
| display: inline-block; | |
| margin-bottom: 10px; | |
| font-size: 14px; | |
| } | |
| .content-compare { | |
| background: white; | |
| border: 1px solid #ddd; | |
| border-radius: 6px; | |
| padding: 12px; | |
| min-height: 120px; | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
| line-height: 1.5; | |
| } | |
| /* Reset any problematic dropdown styles */ | |
| .gradio-container * { | |
| pointer-events: auto; | |
| } | |
| /* Remove any potential blocking overlays */ | |
| .gradio-container::before, | |
| .gradio-container::after { | |
| display: none; | |
| } | |
| /* Ensure all interactive elements work */ | |
| button, select, input, textarea, .gr-dropdown { | |
| pointer-events: auto !important; | |
| position: relative !important; | |
| } | |
| /* Simple dropdown fix without complex selectors */ | |
| [class*="dropdown"] { | |
| position: relative !important; | |
| z-index: 999 !important; | |
| } | |
| [class*="dropdown"] * { | |
| pointer-events: auto !important; | |
| } | |
| /* Make sure no overlay blocks clicks */ | |
| .gradio-container .gr-form { | |
| position: relative; | |
| z-index: 1; | |
| } | |
| .gradio-container .gr-block { | |
| position: relative; | |
| z-index: 1; | |
| } | |
| .mobile-button { | |
| width: 100% !important; | |
| padding: 15px !important; | |
| font-size: 1.1em !important; | |
| margin: 20px 0 !important; | |
| border-radius: 12px !important; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; | |
| border: none !important; | |
| color: white !important; | |
| font-weight: bold !important; | |
| box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3) !important; | |
| transition: all 0.3s ease !important; | |
| cursor: pointer !important; | |
| position: relative !important; | |
| overflow: hidden !important; | |
| } | |
| .mobile-button:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 8px 25px rgba(102, 126, 234, 0.4) !important; | |
| background: linear-gradient(135deg, #5a6fd8 0%, #6b4190 100%) !important; | |
| } | |
| .mobile-button:active { | |
| transform: translateY(0px) !important; | |
| box-shadow: 0 2px 10px rgba(102, 126, 234, 0.3) !important; | |
| } | |
| /* Ripple effect for button */ | |
| .mobile-button::before { | |
| content: ''; | |
| position: absolute; | |
| top: 50%; | |
| left: 50%; | |
| width: 0; | |
| height: 0; | |
| border-radius: 50%; | |
| background: rgba(255, 255, 255, 0.3); | |
| transform: translate(-50%, -50%); | |
| transition: width 0.6s, height 0.6s; | |
| } | |
| .mobile-button:active::before { | |
| width: 300px; | |
| height: 300px; | |
| } | |
| /* Loading spinner animation */ | |
| @keyframes spin { | |
| 0% { transform: rotate(0deg); } | |
| 100% { transform: rotate(360deg); } | |
| } | |
| .loading-spinner { | |
| display: inline-block; | |
| width: 20px; | |
| height: 20px; | |
| border: 3px solid rgba(255,255,255,0.3); | |
| border-radius: 50%; | |
| border-top-color: white; | |
| animation: spin 1s ease-in-out infinite; | |
| margin-right: 10px; | |
| } | |
| /* Button pulse effect when processing */ | |
| @keyframes pulse { | |
| 0% { | |
| box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3); | |
| } | |
| 50% { | |
| box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6); | |
| } | |
| 100% { | |
| box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3); | |
| } | |
| } | |
| .button-processing { | |
| animation: pulse 2s ease-in-out infinite; | |
| background: linear-gradient(135deg, #FF8E53 0%, #FF6B6B 100%) !important; | |
| } | |
| .mobile-textbox textarea { | |
| border-radius: 10px !important; | |
| border: 2px solid #e0e0e0 !important; | |
| padding: 12px !important; | |
| font-size: 1em !important; | |
| line-height: 1.5 !important; | |
| } | |
| .mobile-compare textarea { | |
| border-radius: 8px !important; | |
| border: 1px solid #ddd !important; | |
| padding: 10px !important; | |
| background: #fafafa !important; | |
| font-size: 0.95em !important; | |
| } | |
| .mobile-audio { | |
| margin: 10px 0 !important; | |
| border-radius: 10px !important; | |
| } | |
| .mobile-file { | |
| margin: 10px 0 !important; | |
| border-radius: 10px !important; | |
| } | |
| /* Beautiful Markdown styling for Voice RAG responses */ | |
| .markdown-response { | |
| background: linear-gradient(135deg, #ffffff 0%, #f8fffe 100%); | |
| border-radius: 12px; | |
| padding: 20px; | |
| margin: 15px 0; | |
| box-shadow: 0 4px 20px rgba(0,0,0,0.1); | |
| border-left: 4px solid #4CAF50; | |
| } | |
| .markdown-response h1 { | |
| color: #2e7d32; | |
| border-bottom: 2px solid #4CAF50; | |
| padding-bottom: 10px; | |
| margin-bottom: 20px; | |
| font-size: 1.8em; | |
| } | |
| .markdown-response h2 { | |
| color: #388E3C; | |
| margin-top: 25px; | |
| margin-bottom: 15px; | |
| font-size: 1.4em; | |
| border-left: 3px solid #4CAF50; | |
| padding-left: 15px; | |
| } | |
| .markdown-response h3 { | |
| color: #43A047; | |
| margin-top: 20px; | |
| margin-bottom: 12px; | |
| font-size: 1.2em; | |
| } | |
| .markdown-response p { | |
| line-height: 1.6; | |
| margin-bottom: 12px; | |
| color: #333; | |
| } | |
| .markdown-response blockquote { | |
| background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%); | |
| border-left: 4px solid #4CAF50; | |
| padding: 15px 20px; | |
| margin: 15px 0; | |
| border-radius: 8px; | |
| font-style: italic; | |
| color: #2e7d32; | |
| } | |
| .markdown-response table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| margin: 15px 0; | |
| box-shadow: 0 2px 10px rgba(0,0,0,0.1); | |
| border-radius: 8px; | |
| overflow: hidden; | |
| } | |
| .markdown-response table th { | |
| background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%); | |
| color: white; | |
| padding: 12px 15px; | |
| text-align: left; | |
| font-weight: bold; | |
| } | |
| .markdown-response table td { | |
| padding: 12px 15px; | |
| border-bottom: 1px solid #e0e0e0; | |
| background: white; | |
| } | |
| .markdown-response table tr:nth-child(even) td { | |
| background: #f8fffe; | |
| } | |
| .markdown-response table tr:hover td { | |
| background: #e8f5e8; | |
| transition: background 0.3s ease; | |
| } | |
| .markdown-response ul, .markdown-response ol { | |
| margin: 15px 0; | |
| padding-left: 25px; | |
| } | |
| .markdown-response li { | |
| margin-bottom: 8px; | |
| line-height: 1.5; | |
| } | |
| .markdown-response code { | |
| background: #f5f5f5; | |
| border: 1px solid #e0e0e0; | |
| border-radius: 4px; | |
| padding: 2px 6px; | |
| font-family: 'Courier New', monospace; | |
| color: #d32f2f; | |
| } | |
| .markdown-response pre { | |
| background: #f5f5f5; | |
| border: 1px solid #e0e0e0; | |
| border-radius: 8px; | |
| padding: 15px; | |
| overflow-x: auto; | |
| margin: 15px 0; | |
| } | |
| .markdown-response pre code { | |
| background: none; | |
| border: none; | |
| padding: 0; | |
| color: #333; | |
| } | |
| .markdown-response hr { | |
| border: none; | |
| height: 2px; | |
| background: linear-gradient(90deg, transparent, #4CAF50, transparent); | |
| margin: 25px 0; | |
| } | |
| .markdown-response strong { | |
| color: #2e7d32; | |
| font-weight: bold; | |
| } | |
| .markdown-response em { | |
| color: #388E3C; | |
| font-style: italic; | |
| } | |
| /* Responsive design for markdown */ | |
| @media (max-width: 768px) { | |
| .markdown-response { | |
| padding: 15px; | |
| margin: 10px 0; | |
| } | |
| .markdown-response table { | |
| font-size: 0.9em; | |
| } | |
| .markdown-response h1 { | |
| font-size: 1.6em; | |
| } | |
| .markdown-response h2 { | |
| font-size: 1.3em; | |
| } | |
| } | |
| /* Mobile responsive breakpoints */ | |
| @media (max-width: 768px) { | |
| .gradio-container { | |
| padding: 10px !important; | |
| } | |
| .input-card { | |
| padding: 12px !important; | |
| margin: 8px 0 !important; | |
| } | |
| .output-area { | |
| padding: 12px !important; | |
| margin: 10px 0 !important; | |
| } | |
| .examples-section { | |
| padding: 12px !important; | |
| } | |
| .main-header h2 { | |
| font-size: 1.5em !important; | |
| } | |
| .main-header p { | |
| font-size: 1em !important; | |
| } | |
| /* Mobile layout adjustments - less aggressive */ | |
| .gr-row { | |
| flex-direction: column; | |
| } | |
| .gr-column { | |
| width: 100%; | |
| margin-bottom: 15px; | |
| } | |
| } | |
| @media (max-width: 480px) { | |
| .gradio-container { | |
| padding: 5px !important; | |
| } | |
| .input-card { | |
| padding: 10px !important; | |
| margin: 5px 0 !important; | |
| } | |
| .main-header { | |
| padding: 15px !important; | |
| } | |
| .main-header h2 { | |
| font-size: 1.3em !important; | |
| } | |
| .mobile-button { | |
| padding: 12px !important; | |
| font-size: 1em !important; | |
| } | |
| } | |
| /* JavaScript for button interactions */ | |
| """ | |
| # Add JavaScript for button effects | |
| js_code = """ | |
| <script> | |
| function addButtonEffects() { | |
| // Find button by class since Gradio might change IDs | |
| const buttons = document.querySelectorAll('.mobile-button'); | |
| buttons.forEach(button => { | |
| // Remove existing listeners to avoid duplicates | |
| button.removeEventListener('click', handleClick); | |
| // Add enhanced click effect | |
| button.addEventListener('click', handleClick); | |
| // Add hover effects for better interaction | |
| button.addEventListener('mouseenter', function() { | |
| if (!this.disabled) { | |
| this.style.transform = 'translateY(-2px) scale(1.02)'; | |
| } | |
| }); | |
| button.addEventListener('mouseleave', function() { | |
| if (!this.disabled) { | |
| this.style.transform = 'translateY(0) scale(1)'; | |
| } | |
| }); | |
| }); | |
| } | |
| function handleClick(e) { | |
| const button = e.target; | |
| // Immediate visual feedback | |
| button.style.transform = 'scale(0.98)'; | |
| button.style.transition = 'all 0.1s ease'; | |
| setTimeout(() => { | |
| button.style.transform = 'scale(1)'; | |
| button.style.transition = 'all 0.3s ease'; | |
| }, 100); | |
| // Add processing state | |
| const originalText = button.innerHTML; | |
| button.innerHTML = '<span class="loading-spinner"></span>⏳ ĐANG XỬ LÝ...'; | |
| button.classList.add('button-processing'); | |
| button.disabled = true; | |
| // Monitor for completion and reset | |
| let checkCount = 0; | |
| const checkInterval = setInterval(() => { | |
| checkCount++; | |
| // Reset after 15 seconds max or if status changes | |
| const statusElements = document.querySelectorAll('[style*="Hoàn thành"]'); | |
| if (statusElements.length > 0 || checkCount > 50) { | |
| clearInterval(checkInterval); | |
| button.innerHTML = originalText; | |
| button.classList.remove('button-processing'); | |
| button.disabled = false; | |
| button.style.transform = 'scale(1)'; | |
| } | |
| }, 300); | |
| } | |
| // Initialize when DOM is ready | |
| if (document.readyState === 'loading') { | |
| document.addEventListener('DOMContentLoaded', addButtonEffects); | |
| } else { | |
| addButtonEffects(); | |
| } | |
| // Re-initialize periodically for Gradio updates | |
| setInterval(addButtonEffects, 2000); | |
| </script> | |
| """ | |
| # Create interface with tabs | |
| with gr.Blocks(css=css, title="🎙️ Voice AI Platform - Voice RAG & Audio Translation") as demo: | |
| # Simplified header for faster loading on HF Spaces | |
| if not (os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID")): | |
| # Only load complex microphone permissions in local development | |
| gr.HTML(""" | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <meta http-equiv="Permissions-Policy" content="microphone=*, camera=*, display-capture=*, autoplay=*"> | |
| <meta http-equiv="Feature-Policy" content="microphone 'self' *; camera 'self' *; autoplay 'self' *"> | |
| <meta name="theme-color" content="#4A90E2"> | |
| <script> | |
| // Global microphone management | |
| window.microphoneStatus = { | |
| granted: false, | |
| requested: false, | |
| supported: false | |
| }; | |
| // Enhanced microphone permission request for iframe and main window | |
| function initializeMicrophoneSupport() { | |
| console.log('🎤 Initializing microphone support...'); | |
| // Check browser support | |
| if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { | |
| window.microphoneStatus.supported = true; | |
| console.log('✅ Browser supports microphone'); | |
| // Check current permission status | |
| if (navigator.permissions) { | |
| navigator.permissions.query({name: 'microphone'}).then(function(result) { | |
| console.log('🔐 Current microphone permission:', result.state); | |
| window.microphoneStatus.granted = (result.state === 'granted'); | |
| // Update UI based on permission status | |
| updateMicrophoneUI(result.state); | |
| // Listen for permission changes | |
| result.onchange = function() { | |
| console.log('🔄 Microphone permission changed to:', this.state); | |
| window.microphoneStatus.granted = (this.state === 'granted'); | |
| updateMicrophoneUI(this.state); | |
| }; | |
| }).catch(function(err) { | |
| console.log('⚠️ Permission query failed:', err); | |
| }); | |
| } | |
| // Auto-request permissions if we're in iframe (with user gesture simulation) | |
| if (window.location !== window.parent.location && !window.microphoneStatus.requested) { | |
| console.log('🖼️ Running in iframe - preparing microphone access'); | |
| window.microphoneStatus.requested = true; | |
| // Add a global click listener to request permissions on first interaction | |
| document.addEventListener('click', function requestOnFirstClick() { | |
| if (!window.microphoneStatus.granted) { | |
| console.log('👆 First click detected - requesting microphone access'); | |
| requestMicrophonePermission(); | |
| document.removeEventListener('click', requestOnFirstClick); | |
| } | |
| }, { once: true }); | |
| } | |
| } else { | |
| console.log('❌ Browser does not support microphone'); | |
| window.microphoneStatus.supported = false; | |
| updateMicrophoneUI('unsupported'); | |
| } | |
| } | |
| function requestMicrophonePermission() { | |
| console.log('🎤 Requesting microphone permission...'); | |
| if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { | |
| navigator.mediaDevices.getUserMedia({ | |
| audio: { | |
| echoCancellation: true, | |
| noiseSuppression: true, | |
| autoGainControl: true, | |
| sampleRate: 44100 | |
| } | |
| }) | |
| .then(function(stream) { | |
| console.log('✅ Microphone access granted'); | |
| window.microphoneStatus.granted = true; | |
| // Stop the stream immediately (we just wanted permission) | |
| stream.getTracks().forEach(track => track.stop()); | |
| updateMicrophoneUI('granted'); | |
| // Notify other parts of the app | |
| window.dispatchEvent(new CustomEvent('microphoneGranted')); | |
| }) | |
| .catch(function(err) { | |
| console.log('❌ Microphone access denied:', err); | |
| window.microphoneStatus.granted = false; | |
| updateMicrophoneUI('denied', err.message); | |
| }); | |
| } | |
| } | |
| function updateMicrophoneUI(status, errorMessage = '') { | |
| // This will be called by the specific UI components | |
| console.log('🎛️ Updating microphone UI for status:', status); | |
| window.dispatchEvent(new CustomEvent('microphoneStatusChanged', { | |
| detail: { status, errorMessage } | |
| })); | |
| } | |
| // Initialize when DOM is ready | |
| if (document.readyState === 'loading') { | |
| document.addEventListener('DOMContentLoaded', initializeMicrophoneSupport); | |
| } else { | |
| initializeMicrophoneSupport(); | |
| } | |
| // Also initialize on any dynamic content changes (for Gradio updates) | |
| if (window.MutationObserver) { | |
| const observer = new MutationObserver(function(mutations) { | |
| mutations.forEach(function(mutation) { | |
| if (mutation.type === 'childList' && mutation.addedNodes.length > 0) { | |
| // Check if audio components were added | |
| const hasAudioComponent = Array.from(mutation.addedNodes).some(node => | |
| node.nodeType === 1 && ( | |
| node.querySelector && ( | |
| node.querySelector('audio') || | |
| node.querySelector('[data-testid*="audio"]') || | |
| node.classList.contains('audio') | |
| ) | |
| ) | |
| ); | |
| if (hasAudioComponent) { | |
| console.log('🔄 Audio component detected, re-initializing microphone'); | |
| setTimeout(initializeMicrophoneSupport, 500); | |
| } | |
| } | |
| }); | |
| }); | |
| observer.observe(document.body, { | |
| childList: true, | |
| subtree: true | |
| }); | |
| } | |
| </script> | |
| <div style="text-align: center; background: linear-gradient(135deg, #4A90E2 0%, #FF6B9D 100%); color: white; padding: 20px; border-radius: 10px; margin-bottom: 20px;"> | |
| <h1>🎙️ Voice AI Platform</h1> | |
| <p>Voice RAG, Audio Translation và Voice Studio - Nền tảng AI giọng nói toàn diện</p> | |
| <div style="margin-top: 10px; font-size: 14px; opacity: 0.9;"> | |
| ✨ Tính năng mới: Voice RAG với 24 giọng nói đa ngôn ngữ | |
| </div> | |
| <div style="margin-top: 8px;">🧠 <strong>Digitized Brains</strong></div> | |
| </div> | |
| """) | |
| else: | |
| # Production mode - minimal header | |
| gr.HTML('<div style="text-align:center;"><h1>🎙️ Voice AI Platform</h1></div>') | |
| with gr.Tabs(): | |
| # Tab 1: Voice RAG | |
| with gr.TabItem("📚 Voice RAG"): | |
| # Header section with hf_voice style | |
| gr.HTML(""" | |
| <div style="display: flex; justify-content: center; gap: 15px; margin: 20px 0; flex-wrap: wrap;"> | |
| <div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;"> | |
| <h4>📚 Voice RAG</h4> | |
| <p style="margin: 0; font-size: 12px;">Hỏi đáp tài liệu thông minh</p> | |
| </div> | |
| <div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;"> | |
| <h4>🌍 Multi-Language</h4> | |
| <p style="margin: 0; font-size: 12px;">13 ngôn ngữ trả lời</p> | |
| </div> | |
| <div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;"> | |
| <h4>🎤 Voice Output</h4> | |
| <p style="margin: 0; font-size: 12px;">24 giọng nói đa dạng</p> | |
| </div> | |
| <div style="background: linear-gradient(135deg, #A8E6CF 0%, #88D8A3 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 140px;"> | |
| <h4>🔄 AI Gemini</h4> | |
| <p style="margin: 0; font-size: 12px;">Gemini 2.0 Flash</p> | |
| </div> | |
| </div> | |
| """) | |
| gr.Markdown("### 📝 Upload tài liệu và đặt câu hỏi") | |
| # Input section - Mobile optimized | |
| with gr.Column(): | |
| # Document upload | |
| with gr.Row(): | |
| file_upload_rag = gr.File( | |
| label="📎 Tải lên tài liệu (PDF, DOCX, TXT)", | |
| file_types=[".pdf", ".docx", ".txt"] | |
| ) | |
| # Question input | |
| with gr.Row(): | |
| question_input_rag = gr.Textbox( | |
| label="❓ Câu hỏi của bạn", | |
| placeholder="Hãy đặt câu hỏi về nội dung tài liệu...", | |
| lines=3 | |
| ) | |
| # Language selection for answer | |
| with gr.Row(): | |
| answer_language_dropdown_rag = gr.Dropdown( | |
| choices=SUPPORTED_LANGUAGES, | |
| value="Vietnamese", | |
| label="🌍 Ngôn ngữ trả lời" | |
| ) | |
| # Voice selection từ Voice Studio | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| rag_country_dropdown = gr.Dropdown( | |
| choices=list(voice_choices_by_country.keys()), | |
| value="🇻🇳 Việt Nam", | |
| label="🌍 Chọn quốc gia giọng nói" | |
| ) | |
| with gr.Column(scale=1): | |
| rag_voice_dropdown = gr.Dropdown( | |
| choices=voice_choices_by_country["🇻🇳 Việt Nam"], | |
| value="🇻🇳 HoaiMy - Nữ Việt Chuẩn", | |
| label="🎭 Chọn giọng nói" | |
| ) | |
| # Format selection for download | |
| with gr.Row(): | |
| rag_text_format_dropdown = gr.Dropdown( | |
| choices=["Markdown (.md)", "TXT (.txt)", "Word (.docx)"] if DOCX_AVAILABLE else ["Markdown (.md)", "TXT (.txt)"], | |
| value="Markdown (.md)", | |
| label="📄 Định dạng file trả lời" | |
| ) | |
| # Process button | |
| with gr.Row(): | |
| submit_btn_rag = gr.Button( | |
| "🚀 Xử lý tài liệu và trả lời", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| # Results section - Mobile optimized | |
| with gr.Column(): | |
| # Document info section | |
| with gr.Accordion("📄 Thông tin tài liệu", open=True): | |
| detected_doc_language_rag = gr.Textbox( | |
| label="🌐 Ngôn ngữ tài liệu được phát hiện", | |
| lines=1, | |
| interactive=False, | |
| placeholder="Tự động nhận diện ngôn ngữ tài liệu..." | |
| ) | |
| # Text answer section | |
| with gr.Accordion("💬 Câu trả lời", open=True): | |
| gr.HTML(""" | |
| <div style=" | |
| background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%); | |
| padding: 15px; | |
| border-radius: 12px; | |
| margin: 15px 0; | |
| border-left: 4px solid #4CAF50; | |
| text-align: center; | |
| "> | |
| <h4 style="margin: 0 0 10px 0; color: #2e7d32;">💬 AI Response with Markdown Formatting</h4> | |
| <p style="color: #388E3C; margin: 0; font-style: italic;"> | |
| Formatted response with tables, headers, and beautiful layout | |
| </p> | |
| </div> | |
| """) | |
| answer_output_rag = gr.Markdown( | |
| value="**Câu trả lời sẽ xuất hiện ở đây sau khi xử lý...**\n\n*Hỗ trợ format Markdown với tables, headers, lists và nhiều style khác*", | |
| label="", | |
| show_label=False, | |
| elem_classes=["markdown-response"] | |
| ) | |
| # Downloads section - Mobile optimized | |
| with gr.Accordion("💾 Tải xuống kết quả", open=True): | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-bottom: 15px;"> | |
| <p style="color: #666; font-style: italic;">Tải xuống câu trả lời dưới dạng file và audio</p> | |
| </div> | |
| """) | |
| # Stack vertically on mobile | |
| with gr.Column(): | |
| # Audio download section | |
| with gr.Row(): | |
| audio_output_rag = gr.Audio( | |
| label="🔊 Audio câu trả lời", | |
| type="filepath" | |
| ) | |
| # Text download section | |
| with gr.Row(): | |
| text_output_rag = gr.File( | |
| label="📄 Văn bản câu trả lời", | |
| file_count="single", | |
| file_types=[".md", ".txt", ".docx"] | |
| ) | |
| # Status indicator for RAG | |
| rag_status_text = gr.HTML(""" | |
| <div style="text-align: center; margin: 20px 0;"> | |
| <div style=" | |
| background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); | |
| color: white; | |
| padding: 15px; | |
| border-radius: 12px; | |
| box-shadow: 0 4px 15px rgba(78,205,196,0.3); | |
| "> | |
| <span style="font-weight: bold; font-size: 1.1em;">✅ Sẵn sàng xử lý tài liệu</span> | |
| </div> | |
| </div> | |
| """) | |
| # Helper function for RAG format | |
| def get_rag_format_from_dropdown(format_choice): | |
| if "Word" in format_choice or "docx" in format_choice: | |
| return "docx" | |
| elif "Markdown" in format_choice or "md" in format_choice: | |
| return "md" | |
| return "txt" | |
| # RAG processing function | |
| def update_rag_status_processing(): | |
| return """ | |
| <div style="text-align: center; margin: 20px 0;"> | |
| <div style=" | |
| background: linear-gradient(135deg, #FF8E53 0%, #FF6B6B 100%); | |
| color: white; | |
| padding: 15px; | |
| border-radius: 12px; | |
| box-shadow: 0 4px 15px rgba(255,142,83,0.3); | |
| "> | |
| <span style="font-weight: bold; font-size: 1.1em;">⏳ Đang xử lý tài liệu...</span> | |
| </div> | |
| </div> | |
| """ | |
| def update_rag_status_complete(): | |
| return """ | |
| <div style="text-align: center; margin: 20px 0;"> | |
| <div style=" | |
| background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); | |
| color: white; | |
| padding: 15px; | |
| border-radius: 12px; | |
| box-shadow: 0 4px 15px rgba(78,205,196,0.3); | |
| "> | |
| <span style="font-weight: bold; font-size: 1.1em;">✅ Xử lý hoàn thành!</span> | |
| </div> | |
| </div> | |
| """ | |
| # Event handlers for Voice RAG | |
| rag_country_dropdown.change( | |
| fn=update_voices, | |
| inputs=[rag_country_dropdown], | |
| outputs=[rag_voice_dropdown] | |
| ) | |
| submit_btn_rag.click( | |
| fn=lambda: update_rag_status_processing(), | |
| outputs=[rag_status_text] | |
| ).then( | |
| fn=lambda file, question, lang, voice, fmt: voice_rag_pipeline(file, question, lang, voice, get_rag_format_from_dropdown(fmt)), | |
| inputs=[file_upload_rag, question_input_rag, answer_language_dropdown_rag, rag_voice_dropdown, rag_text_format_dropdown], | |
| outputs=[answer_output_rag, detected_doc_language_rag, audio_output_rag, text_output_rag] | |
| ).then( | |
| fn=lambda: update_rag_status_complete(), | |
| outputs=[rag_status_text] | |
| ) | |
| # Voice Studio Tab | |
| with gr.TabItem("🎤 Voice Studio"): | |
| gr.HTML(""" | |
| <div style="display: flex; justify-content: center; gap: 15px; margin: 20px 0; flex-wrap: wrap;"> | |
| <div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;"> | |
| <h4>🇻🇳 Tiếng Việt</h4> | |
| <p style="margin: 0; font-size: 12px;">2 giọng chuẩn</p> | |
| <p style="margin: 0; font-size: 10px;">HoaiMy • NamMinh</p> | |
| </div> | |
| <div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;"> | |
| <h4>🇺🇸🇬🇧 English</h4> | |
| <p style="margin: 0; font-size: 12px;">4 giọng chuẩn</p> | |
| <p style="margin: 0; font-size: 10px;">US • UK</p> | |
| </div> | |
| <div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;"> | |
| <h4>🌍 Đa ngôn ngữ</h4> | |
| <p style="margin: 0; font-size: 12px;">20 giọng chuẩn</p> | |
| <p style="margin: 0; font-size: 10px;">10 ngôn ngữ</p> | |
| </div> | |
| </div> | |
| """) | |
| gr.Markdown("### 📝 Nhập nội dung và chọn giọng nói") | |
| with gr.Row(): | |
| text_input = gr.Textbox( | |
| placeholder="Nhập văn bản cần chuyển thành giọng nói...", | |
| lines=4, | |
| label="Văn bản", | |
| scale=2 | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| country_dropdown = gr.Dropdown( | |
| choices=list(voice_choices_by_country.keys()), | |
| value="🇻🇳 Việt Nam", | |
| label="🌍 Chọn quốc gia" | |
| ) | |
| with gr.Column(scale=1): | |
| voice_dropdown = gr.Dropdown( | |
| choices=voice_choices_by_country["🇻🇳 Việt Nam"], | |
| value="🇻🇳 HoaiMy - Nữ Việt Chuẩn", | |
| label="🎭 Chọn giọng nói" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| speed_slider = gr.Slider( | |
| minimum=0.5, | |
| maximum=2.0, | |
| value=1.0, | |
| step=0.1, | |
| label="⚡ Tốc độ phát" | |
| ) | |
| with gr.Column(scale=1): | |
| voice_studio_format_dropdown = gr.Dropdown( | |
| choices=["Markdown (.md)", "TXT (.txt)", "Word (.docx)"] if DOCX_AVAILABLE else ["Markdown (.md)", "TXT (.txt)"], | |
| value="Markdown (.md)", | |
| label="📄 Định dạng file tải xuống" | |
| ) | |
| # Translation feature | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| translate_checkbox = gr.Checkbox( | |
| label="🌍 Dịch văn bản trước khi tạo giọng nói", | |
| value=False | |
| ) | |
| with gr.Column(scale=2): | |
| translate_btn = gr.Button("🔄 DỊCH VĂN BẢN", variant="secondary", size="lg", visible=False) | |
| # Show translated text when translation is enabled | |
| translated_text_output = gr.Textbox( | |
| label="📝 Văn bản đã dịch", | |
| lines=3, | |
| interactive=True, | |
| visible=False, | |
| placeholder="Văn bản sau khi dịch sẽ hiển thị ở đây..." | |
| ) | |
| generate_btn = gr.Button("🎵 TẠO GIỌNG NÓI", variant="primary", size="lg") | |
| # Status indicator for Voice Studio | |
| studio_status_text = gr.HTML(""" | |
| <div style="text-align: center; margin: 20px 0;"> | |
| <div style=" | |
| background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); | |
| color: white; | |
| padding: 15px; | |
| border-radius: 12px; | |
| box-shadow: 0 4px 15px rgba(78,205,196,0.3); | |
| "> | |
| <span style="font-weight: bold; font-size: 1.1em;">⚡ Sẵn sàng tạo giọng nói</span> | |
| </div> | |
| </div> | |
| """) | |
| gr.Markdown("### 🎧 Kết quả âm thanh") | |
| audio_output_vs = gr.HTML( | |
| value="<p style='text-align: center; color: #666; padding: 40px;'>Nhấn 'TẠO GIỌNG NÓI' để bắt đầu 🎤</p>" | |
| ) | |
| # Download section for Voice Studio | |
| with gr.Accordion("💾 Tải xuống kết quả", open=False): | |
| gr.HTML(""" | |
| <div style=" | |
| background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%); | |
| padding: 15px; | |
| border-radius: 12px; | |
| margin: 15px 0; | |
| border-left: 4px solid #2196F3; | |
| text-align: center; | |
| "> | |
| <h4 style="margin: 0 0 10px 0; color: #1976D2;">📄 Tải xuống văn bản với Markdown formatting</h4> | |
| <p style="color: #1565C0; margin: 0; font-style: italic;"> | |
| File chứa thông tin session, cấu hình giọng nói và technical details | |
| </p> | |
| </div> | |
| """) | |
| voice_studio_text_output = gr.File( | |
| label="📄 Văn bản với thông tin chi tiết", | |
| file_count="single", | |
| file_types=[".md", ".txt", ".docx"] | |
| ) | |
| # Examples section | |
| gr.Markdown("### 📚 Ví dụ nhanh") | |
| with gr.Row(): | |
| example_vn = gr.Button("🇻🇳 Tiếng Việt", size="sm") | |
| example_en = gr.Button("🇺🇸 English", size="sm") | |
| example_de = gr.Button("🇩🇪 Deutsch", size="sm") | |
| example_translate = gr.Button("🌍 Dịch thuật", size="sm") | |
| # Example button functions | |
| def load_vn_example(): | |
| return "Xin chào! Chào mừng bạn đến với studio giọng nói.", "🇻🇳 Việt Nam" | |
| def load_en_example(): | |
| return "Hello! Welcome to our voice studio.", "🇺🇸 Hoa Kỳ" | |
| def load_de_example(): | |
| return "Hallo! Willkommen in unserem Sprachstudio.", "🇩🇪 Đức" | |
| def load_translate_example(): | |
| return "Hello! This is an example text for translation.", "🇺🇸 Hoa Kỳ", True | |
| # Translation functions | |
| def toggle_translation_ui(translate_enabled): | |
| """Show/hide translation UI elements""" | |
| return ( | |
| gr.update(visible=translate_enabled), # translate_btn | |
| gr.update(visible=translate_enabled) # translated_text_output | |
| ) | |
| def translate_text_interface(text, voice_selection): | |
| """Translate text for Voice Studio""" | |
| if not text.strip(): | |
| return "Vui lòng nhập văn bản trước khi dịch" | |
| target_language = get_target_language_from_voice(voice_selection) | |
| translated = translate_text_with_gemini(text, target_language) | |
| return translated | |
| def create_voice_with_translation(original_text, translated_text, translate_enabled, voice_selection, speed, text_format="txt"): | |
| """Create voice using original or translated text""" | |
| if translate_enabled and translated_text.strip() and not translated_text.startswith("Lỗi"): | |
| # Use translated text | |
| return create_audio_voice_studio(translated_text, voice_selection, speed, text_format) | |
| else: | |
| # Use original text | |
| return create_audio_voice_studio(original_text, voice_selection, speed, text_format) | |
| # Event handlers for Voice Studio | |
| country_dropdown.change( | |
| fn=update_voices, | |
| inputs=[country_dropdown], | |
| outputs=[voice_dropdown] | |
| ) | |
| example_vn.click( | |
| fn=load_vn_example, | |
| outputs=[text_input, country_dropdown] | |
| ) | |
| example_en.click( | |
| fn=load_en_example, | |
| outputs=[text_input, country_dropdown] | |
| ) | |
| example_de.click( | |
| fn=load_de_example, | |
| outputs=[text_input, country_dropdown] | |
| ) | |
| example_translate.click( | |
| fn=load_translate_example, | |
| outputs=[text_input, country_dropdown, translate_checkbox] | |
| ) | |
| # Translation UI toggle | |
| translate_checkbox.change( | |
| fn=toggle_translation_ui, | |
| inputs=[translate_checkbox], | |
| outputs=[translate_btn, translated_text_output] | |
| ) | |
| # Translation button | |
| translate_btn.click( | |
| fn=translate_text_interface, | |
| inputs=[text_input, voice_dropdown], | |
| outputs=[translated_text_output] | |
| ) | |
| # Helper function to extract format and process Voice Studio | |
| def process_voice_studio(original_text, translated_text, translate_enabled, voice_selection, speed, format_choice): | |
| """Process Voice Studio with format support""" | |
| # Extract format from dropdown | |
| if "Markdown" in format_choice: | |
| text_format = "md" | |
| elif "Word" in format_choice: | |
| text_format = "docx" | |
| else: | |
| text_format = "txt" | |
| return create_voice_with_translation(original_text, translated_text, translate_enabled, voice_selection, speed, text_format) | |
| # Generate voice with translation support | |
| generate_btn.click( | |
| fn=process_voice_studio, | |
| inputs=[text_input, translated_text_output, translate_checkbox, voice_dropdown, speed_slider, voice_studio_format_dropdown], | |
| outputs=[audio_output_vs, voice_studio_text_output] | |
| ) | |
| # Audio Translation Tab | |
| with gr.TabItem("🎙️ Audio Translation"): | |
| # Colorful feature cards like Voice Studio | |
| gr.HTML(""" | |
| <div style="display: flex; justify-content: center; gap: 15px; margin: 20px 0; flex-wrap: wrap;"> | |
| <div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;"> | |
| <h4>🎤 Ghi âm</h4> | |
| <p style="margin: 0; font-size: 12px;">Microphone</p> | |
| <p style="margin: 0; font-size: 10px;">Real-time</p> | |
| </div> | |
| <div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;"> | |
| <h4>📁 Upload</h4> | |
| <p style="margin: 0; font-size: 12px;">Audio Files</p> | |
| <p style="margin: 0; font-size: 10px;">WAV • MP3</p> | |
| </div> | |
| <div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;"> | |
| <h4>🔄 AI Dịch</h4> | |
| <p style="margin: 0; font-size: 12px;">13 ngôn ngữ</p> | |
| <p style="margin: 0; font-size: 10px;">Gemini 2.0</p> | |
| </div> | |
| <div style="background: linear-gradient(135deg, #A855F7 0%, #EC4899 100%); padding: 15px; border-radius: 10px; color: white; text-align: center; min-width: 150px;"> | |
| <h4>🎵 Tổng hợp</h4> | |
| <p style="margin: 0; font-size: 12px;">Neural TTS</p> | |
| <p style="margin: 0; font-size: 10px;">26 giọng</p> | |
| </div> | |
| </div> | |
| """) | |
| # Input section with colorful design | |
| gr.HTML(""" | |
| <div style=" | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 20px; | |
| border-radius: 15px; | |
| margin: 20px 0; | |
| text-align: center; | |
| box-shadow: 0 8px 32px rgba(0,0,0,0.2); | |
| "> | |
| <h3 style="margin: 0 0 10px 0;">🎤 Tải lên file audio hoặc ghi âm trực tiếp</h3> | |
| <p style="margin: 0; opacity: 0.9; font-size: 0.95em;"> | |
| Hỗ trợ file WAV, MP3 hoặc ghi âm real-time qua microphone | |
| </p> | |
| </div> | |
| """) | |
| # Enhanced microphone permission notice and controls | |
| if not (os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID")): | |
| gr.HTML(""" | |
| <div id="microphone-section" style="margin: 15px 0;"> | |
| <!-- Microphone Status Indicator --> | |
| <div id="mic-status" style=" | |
| background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%); | |
| color: #2e7d32; | |
| padding: 12px; | |
| border-radius: 8px; | |
| margin-bottom: 10px; | |
| text-align: center; | |
| border: 1px solid #4caf50; | |
| display: none; | |
| "> | |
| <strong>🎤 Microphone Ready</strong> - Bạn có thể ghi âm trực tiếp | |
| </div> | |
| <!-- Microphone Error/Permission Notice --> | |
| <div id="microphone-notice" style=" | |
| background: linear-gradient(135deg, #fff3cd 0%, #ffeaa7 100%); | |
| color: #856404; | |
| padding: 15px; | |
| border-radius: 10px; | |
| border: 1px solid #ffeaa7; | |
| text-align: center; | |
| display: none; | |
| "> | |
| <strong>🎤 Microphone Access Required</strong><br> | |
| Để sử dụng ghi âm, vui lòng cho phép truy cập microphone.<br> | |
| <button onclick="requestMicrophoneAccess()" style=" | |
| background: #4caf50; | |
| color: white; | |
| padding: 8px 16px; | |
| border: none; | |
| border-radius: 6px; | |
| cursor: pointer; | |
| margin: 8px 4px; | |
| ">🎤 Kích hoạt Microphone</button> | |
| <a href="#" onclick="window.open(window.location.href, '_blank')" style=" | |
| background: #667eea; | |
| color: white; | |
| padding: 8px 16px; | |
| text-decoration: none; | |
| border-radius: 6px; | |
| display: inline-block; | |
| margin: 8px 4px; | |
| ">🔗 Mở cửa sổ mới</a> | |
| </div> | |
| <!-- Iframe Warning --> | |
| <div id="iframe-warning" style=" | |
| background: linear-gradient(135deg, #ffebee 0%, #ffcdd2 100%); | |
| color: #c62828; | |
| padding: 12px; | |
| border-radius: 8px; | |
| border: 1px solid #f44336; | |
| text-align: center; | |
| display: none; | |
| "> | |
| <strong>⚠️ Iframe Restriction</strong><br> | |
| Microphone có thể bị hạn chế trong iframe. | |
| <a href="#" onclick="window.open(window.location.href, '_blank')" style="color: #c62828; text-decoration: underline;"> | |
| Mở trong cửa sổ mới | |
| </a> để sử dụng đầy đủ tính năng. | |
| </div> | |
| </div> | |
| <script> | |
| // Enhanced microphone permission handling | |
| let microphoneAccess = false; | |
| function requestMicrophoneAccess() { | |
| console.log('🎤 Audio Translation: Requesting microphone access...'); | |
| // Use global microphone function if available | |
| if (window.requestMicrophonePermission) { | |
| window.requestMicrophonePermission(); | |
| return; | |
| } | |
| // Fallback to local implementation | |
| if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { | |
| navigator.mediaDevices.getUserMedia({ | |
| audio: { | |
| echoCancellation: true, | |
| noiseSuppression: true, | |
| autoGainControl: true, | |
| sampleRate: 44100 | |
| } | |
| }) | |
| .then(function(stream) { | |
| console.log('✅ Audio Translation: Microphone access granted'); | |
| microphoneAccess = true; | |
| // Show success status | |
| updateLocalMicrophoneUI('granted'); | |
| // Stop the stream (we just wanted permission) | |
| stream.getTracks().forEach(track => track.stop()); | |
| // Trigger Gradio audio component refresh | |
| setTimeout(() => { | |
| const audioComponents = document.querySelectorAll('[data-testid*="audio"]'); | |
| audioComponents.forEach(comp => { | |
| // Try to trigger a refresh or re-initialization | |
| if (comp.click) comp.click(); | |
| }); | |
| }, 500); | |
| // Update global status if available | |
| if (window.microphoneStatus) { | |
| window.microphoneStatus.granted = true; | |
| } | |
| }) | |
| .catch(function(err) { | |
| console.log('❌ Audio Translation: Microphone access denied:', err); | |
| updateLocalMicrophoneUI('denied', err.message); | |
| }); | |
| } else { | |
| console.log('❌ getUserMedia not supported'); | |
| updateLocalMicrophoneUI('unsupported'); | |
| } | |
| } | |
| function updateLocalMicrophoneUI(status, errorMessage = '') { | |
| const micStatus = document.getElementById('mic-status'); | |
| const micNotice = document.getElementById('microphone-notice'); | |
| switch(status) { | |
| case 'granted': | |
| if (micStatus) micStatus.style.display = 'block'; | |
| if (micNotice) micNotice.style.display = 'none'; | |
| microphoneAccess = true; | |
| break; | |
| case 'denied': | |
| if (micNotice) { | |
| micNotice.style.display = 'block'; | |
| micNotice.innerHTML = ` | |
| <strong>❌ Microphone Access Denied</strong><br> | |
| Lỗi: ${errorMessage}<br> | |
| Vui lòng kiểm tra cài đặt trình duyệt và cho phép microphone. | |
| <br><br> | |
| <button onclick="requestMicrophoneAccess()" style=" | |
| background: #ff9800; | |
| color: white; | |
| padding: 8px 16px; | |
| border: none; | |
| border-radius: 6px; | |
| cursor: pointer; | |
| margin: 4px; | |
| ">🔄 Thử lại</button> | |
| <button onclick="window.open(window.location.href, '_blank')" style=" | |
| background: #2196f3; | |
| color: white; | |
| padding: 8px 16px; | |
| border: none; | |
| border-radius: 6px; | |
| cursor: pointer; | |
| margin: 4px; | |
| ">🔗 Mở cửa sổ mới</button> | |
| `; | |
| } | |
| break; | |
| case 'unsupported': | |
| if (micNotice) { | |
| micNotice.style.display = 'block'; | |
| micNotice.innerHTML = ` | |
| <strong>❌ Microphone Not Supported</strong><br> | |
| Trình duyệt của bạn không hỗ trợ ghi âm.<br> | |
| Vui lòng sử dụng Chrome, Firefox, Safari hoặc Edge phiên bản mới. | |
| <br><br> | |
| <a href="https://caniuse.com/stream" target="_blank" style=" | |
| color: #856404; | |
| text-decoration: underline; | |
| ">Kiểm tra tương thích trình duyệt</a> | |
| `; | |
| } | |
| break; | |
| default: | |
| if (micNotice) { | |
| micNotice.style.display = 'block'; | |
| } | |
| break; | |
| } | |
| } | |
| // Listen for global microphone events | |
| window.addEventListener('microphoneStatusChanged', function(event) { | |
| console.log('🔄 Audio Translation: Received microphone status update:', event.detail); | |
| updateLocalMicrophoneUI(event.detail.status, event.detail.errorMessage); | |
| }); | |
| window.addEventListener('microphoneGranted', function() { | |
| console.log('✅ Audio Translation: Global microphone granted'); | |
| updateLocalMicrophoneUI('granted'); | |
| }); | |
| // Check microphone availability on load | |
| function checkMicrophoneAvailability() { | |
| console.log('🔍 Audio Translation: Checking microphone availability...'); | |
| // Check global status first | |
| if (window.microphoneStatus) { | |
| if (window.microphoneStatus.granted) { | |
| updateLocalMicrophoneUI('granted'); | |
| return; | |
| } else if (!window.microphoneStatus.supported) { | |
| updateLocalMicrophoneUI('unsupported'); | |
| return; | |
| } | |
| } | |
| // Check if we're in an iframe | |
| if (window.location !== window.parent.location) { | |
| console.log('Running in iframe'); | |
| const iframeWarning = document.getElementById('iframe-warning'); | |
| if (iframeWarning) { | |
| setTimeout(() => { | |
| iframeWarning.style.display = 'block'; | |
| }, 1000); | |
| } | |
| } | |
| // Try to get microphone permissions | |
| if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { | |
| // Check if we already have permission | |
| navigator.permissions.query({name: 'microphone'}).then(function(result) { | |
| console.log('Microphone permission status:', result.state); | |
| if (result.state === 'granted') { | |
| const micStatus = document.getElementById('mic-status'); | |
| if (micStatus) micStatus.style.display = 'block'; | |
| microphoneAccess = true; | |
| } else if (result.state === 'prompt' || result.state === 'denied') { | |
| const micNotice = document.getElementById('microphone-notice'); | |
| if (micNotice) { | |
| setTimeout(() => { | |
| micNotice.style.display = 'block'; | |
| }, 1500); | |
| } | |
| } | |
| // Listen for permission changes | |
| result.onchange = function() { | |
| console.log('Microphone permission changed to:', this.state); | |
| if (this.state === 'granted') { | |
| const micStatus = document.getElementById('mic-status'); | |
| const micNotice = document.getElementById('microphone-notice'); | |
| if (micStatus) micStatus.style.display = 'block'; | |
| if (micNotice) micNotice.style.display = 'none'; | |
| microphoneAccess = true; | |
| } | |
| }; | |
| }).catch(function(err) { | |
| console.log('Permission query failed:', err); | |
| // Fallback to showing the notice | |
| setTimeout(() => { | |
| const micNotice = document.getElementById('microphone-notice'); | |
| if (micNotice) micNotice.style.display = 'block'; | |
| }, 2000); | |
| }); | |
| } else { | |
| // Browser doesn't support getUserMedia | |
| setTimeout(() => { | |
| const micNotice = document.getElementById('microphone-notice'); | |
| if (micNotice) { | |
| micNotice.style.display = 'block'; | |
| micNotice.innerHTML = ` | |
| <strong>❌ Microphone Not Supported</strong><br> | |
| Trình duyệt không hỗ trợ ghi âm. Vui lòng cập nhật trình duyệt. | |
| `; | |
| } | |
| }, 1000); | |
| } | |
| } | |
| // Initialize when DOM is ready | |
| if (document.readyState === 'loading') { | |
| document.addEventListener('DOMContentLoaded', checkMicrophoneAvailability); | |
| } else { | |
| checkMicrophoneAvailability(); | |
| } | |
| // Re-check periodically for dynamic content | |
| setInterval(checkMicrophoneAvailability, 5000); | |
| </script> | |
| """) | |
| else: | |
| # Production mode - simple microphone notice | |
| gr.HTML('<div style="text-align:center;color:#666;padding:10px;">📎 Upload audio file or use microphone</div>') | |
| audio_input = gr.Audio( | |
| label="📎 Tải lên file audio hoặc ghi âm trực tiếp", | |
| type="numpy", # Use numpy to avoid temp file issues | |
| sources=["upload", "microphone"], | |
| show_label=True, | |
| interactive=True, | |
| elem_id="audio-input-translation" | |
| ) | |
| # Audio Recording Control Buttons | |
| with gr.Row(): | |
| save_recording_btn = gr.Button( | |
| "💾 Save Recording", | |
| variant="secondary", | |
| size="sm" | |
| ) | |
| new_recording_btn = gr.Button( | |
| "🎙️ New Record", | |
| variant="primary", | |
| size="sm" | |
| ) | |
| # Button descriptions | |
| gr.HTML(""" | |
| <div style="display: flex; justify-content: space-between; margin: 5px 0 15px 0; font-size: 0.8em; color: #666;"> | |
| <span>💾 Lưu file audio hiện tại vào record_data</span> | |
| <span>🎙️ Xóa audio hiện tại để ghi âm mới</span> | |
| </div> | |
| """) | |
| # Status for recording actions | |
| recording_status = gr.HTML( | |
| value="<p style='text-align: center; color: #666; font-style: italic;'>Sẵn sàng ghi âm hoặc tải lên file</p>" | |
| ) | |
| # === RECORDED FILES FUNCTIONS === | |
| def refresh_recorded_files(): | |
| """Refresh the list of recorded files""" | |
| files = get_recorded_files() | |
| print(f"🔄 Refreshing dropdown - found files: {files}") | |
| return gr.Dropdown(choices=files, value=None) | |
| def load_recorded_file(filename): | |
| """Load selected recorded file for playback""" | |
| print(f"🎵 Loading recorded file: {filename}") | |
| if filename and filename.strip(): | |
| file_path = get_recorded_file_path(filename) | |
| print(f"📁 Full path: {file_path}") | |
| if os.path.exists(file_path): | |
| file_size = os.path.getsize(file_path) | |
| print(f"✅ File exists, size: {file_size} bytes") | |
| try: | |
| # Load audio as numpy array for Gradio compatibility | |
| import soundfile as sf | |
| audio_data, sample_rate = sf.read(file_path) | |
| print(f"🎵 Loaded audio: shape={audio_data.shape}, sr={sample_rate}") | |
| # Return tuple (sample_rate, audio_data) for Gradio numpy type | |
| return (sample_rate, audio_data) | |
| except Exception as e: | |
| print(f"❌ Error loading audio: {e}") | |
| return None | |
| else: | |
| print(f"❌ File not found: {file_path}") | |
| print(f"📁 Directory contents: {os.listdir(os.path.dirname(file_path)) if os.path.exists(os.path.dirname(file_path)) else 'Directory not found'}") | |
| else: | |
| print("❌ No filename provided") | |
| return None | |
| def use_recorded_for_translation(filename, country, voice, fmt): | |
| """Use selected recorded file for translation""" | |
| print(f"🔄 Using recorded file for translation: {filename}") | |
| if filename and filename.strip(): | |
| file_path = get_recorded_file_path(filename) | |
| print(f"📁 Translation file path: {file_path}") | |
| if os.path.exists(file_path): | |
| print(f"✅ Starting translation for: {filename}") | |
| # Use the same translation function | |
| return translate_audio(file_path, country, voice, get_format_from_dropdown(fmt)) | |
| else: | |
| print(f"❌ File not found for translation: {file_path}") | |
| # Return empty results if no file selected | |
| print("❌ No file selected for translation") | |
| return "", "", "", "", None, "", "", None | |
| def prepare_recorded_file_download(filename): | |
| """Prepare recorded file for download""" | |
| print(f"📥 Preparing download for: {filename}") | |
| if filename and filename.strip(): | |
| file_path = get_recorded_file_path(filename) | |
| print(f"📁 Download file path: {file_path}") | |
| if os.path.exists(file_path): | |
| print(f"✅ File ready for download: {filename}") | |
| return file_path | |
| else: | |
| print(f"❌ Download file not found: {file_path}") | |
| print("❌ No file selected for download") | |
| return None | |
| def save_current_recording(audio_file): | |
| """Save current audio recording to record_data""" | |
| if audio_file is None: | |
| current_files = get_recorded_files() | |
| return ( | |
| "<p style='color: #e74c3c; text-align: center;'>❌ Không có file audio để lưu</p>", | |
| gr.Dropdown(choices=current_files, value=None) | |
| ) | |
| try: | |
| saved_path = save_recorded_audio(audio_file) | |
| if saved_path: | |
| saved_filename = os.path.basename(saved_path) | |
| # Get updated file list after saving | |
| updated_files = get_recorded_files() | |
| print(f"🔄 After save - updated files: {updated_files}") | |
| return ( | |
| f"<p style='color: #27ae60; text-align: center;'>✅ Đã lưu: {saved_filename}</p>", | |
| gr.Dropdown(choices=updated_files, value=saved_filename) | |
| ) | |
| else: | |
| current_files = get_recorded_files() | |
| return ( | |
| "<p style='color: #e74c3c; text-align: center;'>❌ Lỗi khi lưu file</p>", | |
| gr.Dropdown(choices=current_files, value=None) | |
| ) | |
| except Exception as e: | |
| current_files = get_recorded_files() | |
| return ( | |
| f"<p style='color: #e74c3c; text-align: center;'>❌ Lỗi: {str(e)}</p>", | |
| gr.Dropdown(choices=current_files, value=None) | |
| ) | |
| def clear_audio_for_new_recording(): | |
| """Clear audio input for new recording""" | |
| return ( | |
| None, # Clear audio input | |
| "<p style='color: #3498db; text-align: center;'>🎙️ Sẵn sàng ghi âm mới</p>" | |
| ) | |
| def delete_selected_file(filename): | |
| """Delete selected file and refresh dropdown""" | |
| if not filename or not filename.strip(): | |
| current_files = get_recorded_files() | |
| return ( | |
| "<p style='color: #e74c3c; text-align: center;'>❌ Vui lòng chọn file để xóa</p>", | |
| gr.Dropdown(choices=current_files, value=None), | |
| None # Clear audio player | |
| ) | |
| # Delete the file | |
| delete_result = delete_recorded_file(filename) | |
| # Refresh file list | |
| updated_files = get_recorded_files() | |
| # Determine status color based on result | |
| if "✅" in delete_result: | |
| status_html = f"<p style='color: #27ae60; text-align: center;'>{delete_result}</p>" | |
| else: | |
| status_html = f"<p style='color: #e74c3c; text-align: center;'>{delete_result}</p>" | |
| return ( | |
| status_html, | |
| gr.Dropdown(choices=updated_files, value=None), | |
| None # Clear audio player | |
| ) | |
| # Recorded Files Management Section | |
| with gr.Accordion("🎤 File đã ghi âm", open=False): | |
| gr.HTML(""" | |
| <div style=" | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 15px; | |
| border-radius: 12px; | |
| margin: 15px 0; | |
| text-align: center; | |
| "> | |
| <h4 style="margin: 0 0 8px 0;">📁 Quản lý file đã ghi</h4> | |
| <p style="margin: 0; opacity: 0.9; font-size: 0.9em;"> | |
| Chọn file từ danh sách để phát lại hoặc dịch thuật | |
| </p> | |
| </div> | |
| """) | |
| # Refresh button for recorded files | |
| refresh_files_btn = gr.Button( | |
| "🔄 Làm mới danh sách", | |
| variant="secondary", | |
| size="sm" | |
| ) | |
| # Status display for file operations | |
| file_operation_status = gr.HTML( | |
| value="<p style='text-align: center; color: #666; font-style: italic;'>Chọn file để thực hiện thao tác</p>" | |
| ) | |
| # Dropdown for recorded files | |
| initial_files = get_recorded_files() | |
| print(f"🔍 Initial recorded files: {initial_files}") | |
| recorded_files_dropdown = gr.Dropdown( | |
| choices=initial_files, | |
| label="📂 Chọn file đã ghi", | |
| info="Các file audio đã được ghi âm trước đó" | |
| ) | |
| # Preview and controls for selected file | |
| with gr.Row(): | |
| with gr.Column(): | |
| # Audio player for selected file | |
| recorded_audio_player = gr.Audio( | |
| label="🎵 Phát lại file đã chọn", | |
| interactive=False, | |
| show_label=True, | |
| type="numpy" # Use numpy for better compatibility | |
| ) | |
| with gr.Column(): | |
| # Action buttons | |
| use_for_translation_btn = gr.Button( | |
| "🔄 Sử dụng để dịch thuật", | |
| variant="primary", | |
| size="sm" | |
| ) | |
| with gr.Row(): | |
| download_recorded_btn = gr.Button( | |
| "📥 Tải xuống", | |
| variant="secondary", | |
| size="sm" | |
| ) | |
| delete_recorded_btn = gr.Button( | |
| "🗑️ Xóa file", | |
| variant="stop", | |
| size="sm" | |
| ) | |
| # Download link for recorded file | |
| download_recorded_file = gr.File( | |
| label="📥 File tải xuống", | |
| visible=True, | |
| file_count="single" | |
| ) | |
| # Settings section with gradient header | |
| gr.HTML(""" | |
| <div style=" | |
| background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); | |
| color: white; | |
| padding: 18px; | |
| border-radius: 12px; | |
| margin: 25px 0 20px 0; | |
| text-align: center; | |
| box-shadow: 0 6px 24px rgba(255,107,107,0.3); | |
| "> | |
| <h3 style="margin: 0 0 8px 0;">🌍 Cài đặt dịch thuật</h3> | |
| <p style="margin: 0; opacity: 0.9; font-size: 0.9em;"> | |
| Chọn ngôn ngữ đích và giọng nói cho kết quả dịch thuật | |
| </p> | |
| </div> | |
| """) | |
| # Separate dropdowns without complex wrappers to avoid CSS conflicts | |
| target_country_dropdown = gr.Dropdown( | |
| choices=list(voice_choices_by_country.keys()), | |
| value="🇻🇳 Việt Nam", | |
| label="🌍 Chọn quốc gia đích" | |
| ) | |
| target_voice_dropdown = gr.Dropdown( | |
| choices=voice_choices_by_country["🇻🇳 Việt Nam"], | |
| value="🇻🇳 HoaiMy - Nữ Việt Chuẩn", | |
| label="🎭 Chọn giọng nói đích" | |
| ) | |
| text_format_dropdown = gr.Dropdown( | |
| choices=["Markdown (.md)", "TXT (.txt)", "Word (.docx)"] if DOCX_AVAILABLE else ["Markdown (.md)", "TXT (.txt)"], | |
| value="Markdown (.md)", | |
| label="📄 Định dạng file văn bản" | |
| ) | |
| # Colorful action button | |
| gr.HTML(""" | |
| """) | |
| # Auto-translate on audio upload - no manual button needed | |
| # Results section with colorful headers | |
| gr.HTML(""" | |
| <div style=" | |
| background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); | |
| color: white; | |
| padding: 18px; | |
| border-radius: 12px; | |
| margin: 30px 0 20px 0; | |
| text-align: center; | |
| box-shadow: 0 6px 24px rgba(69,183,209,0.3); | |
| "> | |
| <h3 style="margin: 0 0 8px 0;">📊 Kết quả xử lý</h3> | |
| <p style="margin: 0; opacity: 0.9; font-size: 0.9em;"> | |
| Phiên âm, dịch thuật và tổng hợp giọng nói | |
| </p> | |
| </div> | |
| """) | |
| # Dynamic status indicator | |
| status_text = gr.HTML("") | |
| # Card-based layout for mobile | |
| with gr.Column(elem_classes=["output-area"]): | |
| # Original content card | |
| gr.HTML(""" | |
| <div style=" | |
| background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%); | |
| padding: 15px; | |
| border-radius: 12px; | |
| margin: 15px 0; | |
| border-left: 4px solid #2196F3; | |
| "> | |
| <h4 style="margin: 0 0 10px 0; color: #1976D2;">📝 Nội dung gốc từ audio</h4> | |
| </div> | |
| """) | |
| transcription_output = gr.Textbox( | |
| label="🎯 Phiên âm từ audio", | |
| lines=4, | |
| interactive=False, | |
| placeholder="Nội dung phiên âm từ file audio sẽ hiển thị ở đây...", | |
| elem_classes=["mobile-textbox"] | |
| ) | |
| detected_language = gr.Textbox( | |
| label="🌐 Ngôn ngữ được phát hiện", | |
| lines=1, | |
| interactive=False, | |
| placeholder="Tự động nhận diện...", | |
| elem_classes=["mobile-textbox"] | |
| ) | |
| # Translation result card | |
| gr.HTML(""" | |
| <div style=" | |
| background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%); | |
| padding: 15px; | |
| border-radius: 12px; | |
| margin: 15px 0; | |
| border-left: 4px solid #4CAF50; | |
| "> | |
| <h4 style="margin: 0 0 10px 0; color: #388E3C;">✨ Kết quả dịch thuật</h4> | |
| </div> | |
| """) | |
| translation_output = gr.Textbox( | |
| label="🔄 Nội dung đã dịch", | |
| lines=4, | |
| interactive=False, | |
| placeholder="Bản dịch sẽ hiển thị ở đây...", | |
| elem_classes=["mobile-textbox"] | |
| ) | |
| target_language_display = gr.Textbox( | |
| label="🎯 Ngôn ngữ đích", | |
| lines=1, | |
| interactive=False, | |
| placeholder="Chưa chọn...", | |
| elem_classes=["mobile-textbox"] | |
| ) | |
| # Mobile-friendly comparison section | |
| with gr.Accordion("🔍 So sánh nội dung", open=False): | |
| gr.HTML(""" | |
| <div style=" | |
| text-align: center; | |
| margin-bottom: 15px; | |
| padding: 10px; | |
| background: #f5f5f5; | |
| border-radius: 8px; | |
| "> | |
| <p style="color: #666; font-style: italic; margin: 0;"> | |
| Xem nội dung gốc và bản dịch để so sánh | |
| </p> | |
| </div> | |
| """) | |
| # Stack vertically on mobile for better readability | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div style=" | |
| background: #e3f2fd; | |
| padding: 10px; | |
| border-radius: 8px; | |
| margin: 10px 0; | |
| text-align: center; | |
| font-weight: bold; | |
| color: #1976D2; | |
| ">📝 Ngôn ngữ gốc</div> | |
| """) | |
| original_compare = gr.Textbox( | |
| label="", | |
| lines=4, | |
| interactive=False, | |
| show_label=False, | |
| placeholder="Nội dung phiên âm từ audio sẽ hiển thị ở đây...", | |
| elem_classes=["mobile-compare"] | |
| ) | |
| gr.HTML(""" | |
| <div style=" | |
| background: #e8f5e8; | |
| padding: 10px; | |
| border-radius: 8px; | |
| margin: 15px 0 5px 0; | |
| text-align: center; | |
| font-weight: bold; | |
| color: #388E3C; | |
| ">✨ Sau khi dịch</div> | |
| """) | |
| translated_compare = gr.Textbox( | |
| label="", | |
| lines=4, | |
| interactive=False, | |
| show_label=False, | |
| placeholder="Nội dung sau khi dịch sẽ hiển thị ở đây...", | |
| elem_classes=["mobile-compare"] | |
| ) | |
| # Mobile-optimized download section | |
| with gr.Accordion("💾 Tải xuống kết quả", open=True): | |
| gr.HTML(""" | |
| <div style=" | |
| background: linear-gradient(135deg, #fff3e0 0%, #ffcc80 100%); | |
| padding: 15px; | |
| border-radius: 12px; | |
| margin: 15px 0; | |
| border-left: 4px solid #FF9800; | |
| text-align: center; | |
| "> | |
| <h4 style="margin: 0 0 10px 0; color: #E65100;">💾 Tải xuống kết quả</h4> | |
| <p style="color: #BF360C; margin: 0; font-style: italic;"> | |
| File audio và văn bản đã dịch | |
| </p> | |
| </div> | |
| """) | |
| # Stack downloads vertically for mobile | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div style=" | |
| background: #e3f2fd; | |
| padding: 12px; | |
| border-radius: 8px; | |
| margin: 15px 0 10px 0; | |
| text-align: center; | |
| font-weight: bold; | |
| color: #1976D2; | |
| ">🔊 Audio đã dịch</div> | |
| """) | |
| audio_output_at = gr.Audio( | |
| label="🎵 Audio đã dịch", | |
| type="filepath", | |
| show_label=True, | |
| elem_classes=["mobile-audio"], | |
| format="wav" # Specify format explicitly | |
| ) | |
| # Explicit download component for translated audio | |
| audio_download_at = gr.File( | |
| label="📥 Tải xuống audio đã dịch", | |
| file_count="single", | |
| file_types=[".wav"], | |
| visible=True | |
| ) | |
| gr.HTML(""" | |
| <div style=" | |
| background: #e8f5e8; | |
| padding: 12px; | |
| border-radius: 8px; | |
| margin: 25px 0 10px 0; | |
| text-align: center; | |
| font-weight: bold; | |
| color: #388E3C; | |
| ">📄 Văn bản đã dịch</div> | |
| """) | |
| text_output = gr.File( | |
| label="", | |
| file_count="single", | |
| file_types=[".txt", ".docx"], | |
| show_label=False, | |
| elem_classes=["mobile-file"] | |
| ) | |
| # Event handlers for Audio Translation with colorful status | |
| def update_status_processing(): | |
| return """ | |
| <div class="status-processing" style=" | |
| text-align: center; | |
| margin: 20px 0; | |
| padding: 15px; | |
| border-radius: 12px; | |
| color: white; | |
| transition: all 0.3s ease; | |
| "> | |
| <span style="font-weight: bold; font-size: 1.1em;"> | |
| ⚡ Đang tự động dịch thuật... | |
| </span> | |
| </div> | |
| """ | |
| def update_status_complete(): | |
| return """ | |
| <div class="status-success" style=" | |
| text-align: center; | |
| margin: 20px 0; | |
| padding: 15px; | |
| border-radius: 12px; | |
| color: white; | |
| transition: all 0.3s ease; | |
| "> | |
| <span style="font-weight: bold; font-size: 1.1em;"> | |
| ✅ Dịch thuật hoàn thành! | |
| </span> | |
| </div> | |
| """ | |
| target_country_dropdown.change( | |
| fn=update_voices, | |
| inputs=[target_country_dropdown], | |
| outputs=[target_voice_dropdown] | |
| ) | |
| # Update target language display when dropdown changes | |
| target_voice_dropdown.change( | |
| fn=lambda voice: voice, | |
| inputs=[target_voice_dropdown], | |
| outputs=[target_language_display] | |
| ) | |
| # Helper function to extract format | |
| def get_format_from_dropdown(format_choice): | |
| if "Markdown" in format_choice: | |
| return "md" | |
| elif "Word" in format_choice: | |
| return "docx" | |
| return "txt" | |
| # Auto-translate when audio is uploaded or changed | |
| audio_input.change( | |
| fn=lambda: update_status_processing(), | |
| outputs=[status_text] | |
| ).then( | |
| fn=lambda audio, country, voice, fmt: translate_audio(audio, country, voice, get_format_from_dropdown(fmt)) if audio is not None else ("", "", "📎 Vui lòng tải lên file audio hoặc ghi âm", country, None, "", "", None), | |
| inputs=[audio_input, target_country_dropdown, target_voice_dropdown, text_format_dropdown], | |
| outputs=[ | |
| transcription_output, | |
| detected_language, | |
| translation_output, | |
| target_language_display, | |
| audio_output_at, | |
| audio_download_at, | |
| original_compare, | |
| translated_compare, | |
| text_output | |
| ] | |
| ).then( | |
| fn=lambda: update_status_complete(), | |
| outputs=[status_text] | |
| ).then( | |
| fn=refresh_recorded_files, | |
| outputs=[recorded_files_dropdown] | |
| ) | |
| # === RECORDED FILES EVENT HANDLERS === | |
| # Save current recording | |
| save_recording_btn.click( | |
| fn=save_current_recording, | |
| inputs=[audio_input], | |
| outputs=[recording_status, recorded_files_dropdown] | |
| ) | |
| # New recording (clear audio) | |
| new_recording_btn.click( | |
| fn=clear_audio_for_new_recording, | |
| outputs=[audio_input, recording_status] | |
| ) | |
| refresh_files_btn.click( | |
| fn=refresh_recorded_files, | |
| outputs=[recorded_files_dropdown] | |
| ) | |
| recorded_files_dropdown.change( | |
| fn=load_recorded_file, | |
| inputs=[recorded_files_dropdown], | |
| outputs=[recorded_audio_player] | |
| ) | |
| use_for_translation_btn.click( | |
| fn=lambda: update_status_processing(), | |
| outputs=[status_text] | |
| ).then( | |
| fn=use_recorded_for_translation, | |
| inputs=[recorded_files_dropdown, target_country_dropdown, target_voice_dropdown, text_format_dropdown], | |
| outputs=[ | |
| transcription_output, | |
| detected_language, | |
| translation_output, | |
| target_language_display, | |
| audio_output_at, | |
| audio_download_at, | |
| original_compare, | |
| translated_compare, | |
| text_output | |
| ] | |
| ).then( | |
| fn=lambda: update_status_complete(), | |
| outputs=[status_text] | |
| ).then( | |
| fn=refresh_recorded_files, | |
| outputs=[recorded_files_dropdown] | |
| ) | |
| download_recorded_btn.click( | |
| fn=prepare_recorded_file_download, | |
| inputs=[recorded_files_dropdown], | |
| outputs=[download_recorded_file] | |
| ) | |
| delete_recorded_btn.click( | |
| fn=delete_selected_file, | |
| inputs=[recorded_files_dropdown], | |
| outputs=[file_operation_status, recorded_files_dropdown, recorded_audio_player] | |
| ) | |
| # Features section cho Voice RAG | |
| gr.Markdown("### 📚 Tính năng chính") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div style="background: linear-gradient(135deg, #FF6B6B 0%, #FF8E53 100%); padding: 20px; border-radius: 15px; color: white; text-align: center; margin: 10px;"> | |
| <h3>📚 Voice RAG</h3> | |
| <p>Upload tài liệu và đặt câu hỏi. Nhận trả lời bằng giọng nói đa ngôn ngữ.</p> | |
| <div style="margin-top: 15px;"> | |
| <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;"> | |
| ✓ Hỗ trợ PDF, DOCX, TXT | |
| </div> | |
| <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;"> | |
| ✓ AI Gemini 2.0 Flash | |
| </div> | |
| <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;"> | |
| ✓ 24 giọng nói đa quốc gia | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div style="background: linear-gradient(135deg, #4ECDC4 0%, #44A08D 100%); padding: 20px; border-radius: 15px; color: white; text-align: center; margin: 10px;"> | |
| <h3>🌍 Audio Translation</h3> | |
| <p>Dịch thuật âm thanh sang nhiều ngôn ngữ với giọng nói tự nhiên.</p> | |
| <div style="margin-top: 15px;"> | |
| <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;"> | |
| ✓ Ghi âm real-time | |
| </div> | |
| <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;"> | |
| ✓ 13 ngôn ngữ chính | |
| </div> | |
| <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;"> | |
| ✓ Edge TTS Neural | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div style="background: linear-gradient(135deg, #45B7D1 0%, #96C93D 100%); padding: 20px; border-radius: 15px; color: white; text-align: center; margin: 10px;"> | |
| <h3>🎤 Voice Studio</h3> | |
| <p>Chuyển văn bản thành giọng nói với nhiều lựa chọn quốc gia và giọng nói.</p> | |
| <div style="margin-top: 15px;"> | |
| <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;"> | |
| ✓ 13 quốc gia | |
| </div> | |
| <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;"> | |
| ✓ Tích hợp dịch thuật | |
| </div> | |
| <div style="background: rgba(255,255,255,0.2); padding: 8px; border-radius: 8px; margin: 5px 0;"> | |
| ✓ Điều chỉnh tốc độ | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| # Footer | |
| gr.HTML(""" | |
| <div class="custom-footer"> | |
| <div style="display: flex; justify-content: center; align-items: center; gap: 15px; flex-wrap: wrap;"> | |
| <div style="display: flex; align-items: center; gap: 8px;"> | |
| <div style="background: rgba(255,255,255,0.2); padding: 8px 15px; border-radius: 20px; font-size: 16px;"> | |
| 🧠 DB | |
| </div> | |
| <span style="font-size: 18px; font-weight: bold;">Digitized Brains</span> | |
| </div> | |
| <div style="font-size: 14px; opacity: 0.9;"> | |
| Voice Studio - AI Powered | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| # Add JavaScript for button effects | |
| gr.HTML(js_code) | |
| if __name__ == "__main__": | |
| import sys | |
| import locale | |
| import os | |
| # Ensure UTF-8 encoding | |
| if sys.platform == 'win32': | |
| os.environ['PYTHONIOENCODING'] = 'utf-8' | |
| # Optimize startup for HF Spaces | |
| print(f"===== Application Startup at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====") | |
| # Only create record_data directory when actually needed to speed up startup | |
| if not os.environ.get("SPACE_ID") and not os.environ.get("HF_SPACE_ID"): | |
| create_record_data_directory() | |
| print(f"📁 Record data directory ready: {RECORD_DATA_DIR}") | |
| else: | |
| print(f"🏭 Production mode - record_data will be created on first use") | |
| # Set environment variables for iframe support | |
| os.environ['GRADIO_ALLOW_FLAGGING'] = 'never' | |
| # Disable Gradio temp directory to prevent file serving issues | |
| # os.environ['GRADIO_TEMP_DIR'] = '/tmp' | |
| # Hugging Face Spaces configuration - Use standard port 7860 for HF | |
| if os.environ.get("SPACE_ID") or os.environ.get("HF_SPACE_ID"): | |
| # HF Spaces standard configuration | |
| port = 7860 | |
| print("🏭 Using HF Spaces standard port 7860") | |
| else: | |
| # Local development | |
| port = int(os.environ.get("GRADIO_SERVER_PORT", 7880)) | |
| print(f"🖥️ Using local development port {port}") | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=port, | |
| share=False, | |
| show_error=True, | |
| ssr_mode=False, # Disable SSR to prevent timeout issues on HF Spaces | |
| enable_monitoring=False # Disable monitoring for faster startup | |
| ) |