ducnguyen1978 commited on
Commit
f769b62
Β·
verified Β·
1 Parent(s): a913108

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +2018 -0
app.py ADDED
@@ -0,0 +1,2018 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import sys
5
+ import io
6
+ import os
7
+ import locale
8
+
9
+ # Comprehensive UTF-8 encoding setup for Windows
10
+ if sys.platform.startswith('win'):
11
+ try:
12
+ # Set locale to UTF-8
13
+ try:
14
+ locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
15
+ except:
16
+ try:
17
+ locale.setlocale(locale.LC_ALL, 'C.UTF-8')
18
+ except:
19
+ pass
20
+
21
+ # Set console to UTF-8 mode
22
+ os.system('chcp 65001 > nul 2>&1')
23
+
24
+ # Set environment variables for UTF-8
25
+ os.environ['PYTHONIOENCODING'] = 'utf-8:replace'
26
+ os.environ['PYTHONUTF8'] = '1'
27
+
28
+ # Force UTF-8 encoding for stdout/stderr with error handling
29
+ try:
30
+ if hasattr(sys.stdout, 'reconfigure'):
31
+ sys.stdout.reconfigure(encoding='utf-8', errors='replace')
32
+ sys.stderr.reconfigure(encoding='utf-8', errors='replace')
33
+ else:
34
+ # Fallback for older Python versions
35
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
36
+ sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
37
+ except Exception:
38
+ # Final fallback
39
+ sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding='utf-8', errors='replace')
40
+ sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding='utf-8', errors='replace')
41
+
42
+ except Exception as e:
43
+ # Silently continue if encoding setup fails
44
+ pass
45
+
46
+ # Store original print function before any imports
47
+ import builtins
48
+ _original_print = builtins.print
49
+
50
+ def safe_print(*args, **kwargs):
51
+ """Safe print function that handles UTF-8 encoding"""
52
+ try:
53
+ # Convert all arguments to strings first to avoid encoding issues
54
+ safe_args = []
55
+ for arg in args:
56
+ if isinstance(arg, str):
57
+ # Ensure string can be encoded/decoded properly
58
+ try:
59
+ arg.encode('utf-8')
60
+ safe_args.append(arg)
61
+ except UnicodeEncodeError:
62
+ safe_args.append(arg.encode('utf-8', errors='replace').decode('utf-8'))
63
+ else:
64
+ safe_args.append(str(arg))
65
+
66
+ _original_print(*safe_args, **kwargs)
67
+ except (UnicodeEncodeError, UnicodeDecodeError) as e:
68
+ # Last resort: convert to ASCII with replacement
69
+ ascii_args = []
70
+ for arg in args:
71
+ if isinstance(arg, str):
72
+ ascii_args.append(arg.encode('ascii', errors='replace').decode('ascii'))
73
+ else:
74
+ ascii_args.append(str(arg).encode('ascii', errors='replace').decode('ascii'))
75
+ _original_print(*ascii_args, **kwargs)
76
+ except Exception:
77
+ # Ultimate fallback
78
+ _original_print("[Encoding Error in Print]")
79
+
80
+ # Override built-in print
81
+ builtins.print = safe_print
82
+ """
83
+ Live Translation AI Agent - Two Person Mode
84
+ Real-time Cross-Translation between Person A & Person B
85
+ """
86
+
87
+ import gradio as gr
88
+ import numpy as np
89
+ import librosa
90
+ import soundfile as sf
91
+ import tempfile
92
+ import os
93
+ import time
94
+ import logging
95
+ import json
96
+ from typing import Optional, Tuple, Dict, List
97
+ import asyncio
98
+ import threading
99
+ from pathlib import Path
100
+
101
+ # Load environment variables from .env file
102
+ try:
103
+ from dotenv import load_dotenv
104
+ load_dotenv()
105
+ print("Environment variables loaded from .env file")
106
+ except ImportError:
107
+ print("python-dotenv not available, using system environment variables")
108
+
109
+ # Google Gemini integration
110
+ try:
111
+ import google.generativeai as genai
112
+ GEMINI_AVAILABLE = True
113
+ print("Google Gemini library loaded successfully")
114
+ except ImportError:
115
+ GEMINI_AVAILABLE = False
116
+ print("Google Gemini library not available")
117
+
118
+ # Google Speech Recognition integration
119
+ try:
120
+ import speech_recognition as sr
121
+ SPEECH_RECOGNITION_AVAILABLE = True
122
+ print("SpeechRecognition library loaded successfully")
123
+ except ImportError:
124
+ SPEECH_RECOGNITION_AVAILABLE = False
125
+ print("SpeechRecognition library not available")
126
+
127
+ # Edge TTS for speech synthesis
128
+ try:
129
+ import edge_tts
130
+ EDGE_TTS_AVAILABLE = True
131
+ print("Edge TTS loaded successfully")
132
+ except ImportError:
133
+ EDGE_TTS_AVAILABLE = False
134
+ print("Edge TTS not available")
135
+
136
+ # Configure logging
137
+ logging.basicConfig(level=logging.INFO)
138
+ logger = logging.getLogger(__name__)
139
+
140
+ class TranslationAIAgent:
141
+ """Main AI Agent for translation tasks - Google Gemini Powered"""
142
+
143
+ def __init__(self):
144
+ # Enhanced language and voice options with country flags
145
+ self.language_voice_options = {
146
+ 'en': {
147
+ 'name': 'English',
148
+ 'options': [
149
+ {'code': 'en-us', 'display': 'English (United States)', 'voice': 'en-US-JennyNeural', 'alt_voice': 'en-US-GuyNeural'},
150
+ {'code': 'en-gb', 'display': 'English (United Kingdom)', 'voice': 'en-GB-LibbyNeural', 'alt_voice': 'en-GB-RyanNeural'},
151
+ ]
152
+ },
153
+ 'es': {
154
+ 'name': 'Spanish',
155
+ 'options': [
156
+ {'code': 'es-es', 'display': 'Spanish (Spain)', 'voice': 'es-ES-ElviraNeural', 'alt_voice': 'es-ES-AlvaroNeural'},
157
+ {'code': 'es-mx', 'display': 'Spanish (Mexico)', 'voice': 'es-MX-DaliaNeural', 'alt_voice': 'es-MX-JorgeNeural'},
158
+ ]
159
+ },
160
+ 'fr': {
161
+ 'name': 'French',
162
+ 'options': [
163
+ {'code': 'fr-fr', 'display': 'French (France)', 'voice': 'fr-FR-DeniseNeural', 'alt_voice': 'fr-FR-HenriNeural'},
164
+ {'code': 'fr-ca', 'display': 'French (Canada)', 'voice': 'fr-CA-SylvieNeural', 'alt_voice': 'fr-CA-AntoineNeural'},
165
+ ]
166
+ },
167
+ 'de': {
168
+ 'name': 'German',
169
+ 'options': [
170
+ {'code': 'de-de', 'display': 'German (Germany)', 'voice': 'de-DE-KatjaNeural', 'alt_voice': 'de-DE-ConradNeural'},
171
+ ]
172
+ },
173
+ 'vi': {
174
+ 'name': 'Vietnamese',
175
+ 'options': [
176
+ {'code': 'vi-vn', 'display': 'Vietnamese (Vietnam)', 'voice': 'vi-VN-HoaiMyNeural', 'alt_voice': 'vi-VN-NamMinhNeural'}
177
+ ]
178
+ },
179
+ 'ja': {
180
+ 'name': 'Japanese',
181
+ 'options': [
182
+ {'code': 'ja-jp', 'display': 'Japanese (Japan)', 'voice': 'ja-JP-NanamiNeural', 'alt_voice': 'ja-JP-KeitaNeural'}
183
+ ]
184
+ },
185
+ 'zh': {
186
+ 'name': 'Chinese',
187
+ 'options': [
188
+ {'code': 'zh-cn', 'display': 'Chinese (Simplified)', 'voice': 'zh-CN-XiaoxiaoNeural', 'alt_voice': 'zh-CN-YunxiNeural'},
189
+ ]
190
+ }
191
+ }
192
+
193
+ # Create simple supported languages mapping for backward compatibility
194
+ self.supported_languages = {
195
+ lang_code: lang_info['name']
196
+ for lang_code, lang_info in self.language_voice_options.items()
197
+ }
198
+
199
+ # Create default voice mapping for backward compatibility
200
+ self.voice_map = {
201
+ lang_code: lang_info['options'][0]['voice']
202
+ for lang_code, lang_info in self.language_voice_options.items()
203
+ }
204
+
205
+ self.setup_gemini_client()
206
+ self.setup_speech_recognizer()
207
+
208
+ def setup_gemini_client(self):
209
+ """Setup Google Gemini client for translation"""
210
+ try:
211
+ if not GEMINI_AVAILABLE:
212
+ logger.error("Google Gemini library not available - please install: pip install google-generativeai")
213
+ self.gemini_model = None
214
+ self.gemini_configured = False
215
+ return
216
+
217
+ # Get Google API key from environment
218
+ api_key = (
219
+ os.environ.get("GOOGLE_API_KEY") or
220
+ os.environ.get("GEMINI_API_KEY") or
221
+ os.getenv("GOOGLE_API_KEY")
222
+ )
223
+
224
+ if api_key and api_key.strip() and not api_key.strip().startswith("your-"):
225
+ genai.configure(api_key=api_key.strip())
226
+ self.gemini_model = genai.GenerativeModel('gemini-2.0-flash-exp')
227
+ self.gemini_configured = True
228
+ logger.info("[SUCCESS] Google Gemini client configured successfully - Real translation mode enabled")
229
+ else:
230
+ self.gemini_model = None
231
+ self.gemini_configured = False
232
+ logger.error("❌ Google API key not found or invalid in environment variables")
233
+ logger.error("Please set GOOGLE_API_KEY in your .env file with a valid API key")
234
+
235
+ except Exception as e:
236
+ logger.error(f"Gemini setup failed: {e}")
237
+ self.gemini_model = None
238
+ self.gemini_configured = False
239
+
240
+ def setup_speech_recognizer(self):
241
+ """Setup speech recognizer for audio input"""
242
+ try:
243
+ if not SPEECH_RECOGNITION_AVAILABLE:
244
+ logger.error("SpeechRecognition library not available - please install: pip install SpeechRecognition")
245
+ self.recognizer = None
246
+ self.speech_configured = False
247
+ return
248
+
249
+ self.recognizer = sr.Recognizer()
250
+ # More conservative settings for better recognition
251
+ self.recognizer.energy_threshold = 1000 # Lower threshold for processed audio
252
+ self.recognizer.dynamic_energy_threshold = False # More consistent
253
+ self.recognizer.pause_threshold = 0.5 # Shorter pauses
254
+ self.recognizer.operation_timeout = 15 # Longer timeout
255
+ self.recognizer.phrase_threshold = 0.3 # More sensitive phrase detection
256
+ self.recognizer.non_speaking_duration = 0.2 # Less aggressive silence detection
257
+ self.speech_configured = True
258
+ logger.info("[SUCCESS] Speech recognizer configured successfully")
259
+
260
+ except Exception as e:
261
+ logger.error(f"Speech recognizer setup failed: {e}")
262
+ self.recognizer = None
263
+ self.speech_configured = False
264
+
265
+ def speech_to_text(self, audio_path: str, language: str = 'auto') -> tuple[str, str]:
266
+ """Convert speech to text using Gemini Flash 2.0 with language detection or specified language"""
267
+ try:
268
+ if not self.gemini_configured:
269
+ raise Exception("Gemini client not configured. Please check your API key.")
270
+
271
+ if not os.path.exists(audio_path):
272
+ raise Exception(f"Audio file not found: {audio_path}")
273
+
274
+ # Load and preprocess audio with better error handling
275
+ try:
276
+ # Wait a bit to ensure file is fully written
277
+ import time as time_module
278
+ time_module.sleep(0.5)
279
+
280
+ # Try to access the file multiple times if needed
281
+ for attempt in range(3):
282
+ try:
283
+ y, sr_rate = librosa.load(audio_path, sr=16000, duration=30)
284
+ break
285
+ except Exception as e:
286
+ if attempt < 2:
287
+ logger.warning(f"Audio loading attempt {attempt + 1} failed: {e}, retrying...")
288
+ time_module.sleep(0.5)
289
+ else:
290
+ raise e
291
+
292
+ if len(y) == 0:
293
+ return "No audio data found", "unknown"
294
+
295
+ # Check for audio clipping and quality issues
296
+ max_amplitude = np.max(np.abs(y))
297
+ rms_level = np.sqrt(np.mean(y**2))
298
+
299
+ logger.info(f"Audio quality check - Max: {max_amplitude:.4f}, RMS: {rms_level:.4f}, Duration: {len(y)/sr_rate:.2f}s")
300
+
301
+ # Handle clipped audio (amplitude = 1.0 means clipping)
302
+ if max_amplitude >= 0.99:
303
+ logger.warning("⚠️ Audio appears to be clipped - applying de-clipping")
304
+ # Apply soft clipping recovery
305
+ y = np.tanh(y * 0.8) * 0.9 # Soft compression to recover from clipping
306
+ max_amplitude = np.max(np.abs(y))
307
+
308
+ # Check if audio is too quiet
309
+ if rms_level < 0.01:
310
+ logger.warning("⚠️ Audio level very low - boosting signal")
311
+ # Boost quiet audio
312
+ y = y * (0.1 / rms_level)
313
+ y = np.clip(y, -0.95, 0.95) # Prevent new clipping
314
+ elif rms_level > 0.5:
315
+ logger.warning("⚠️ Audio level very high - reducing signal")
316
+ # Reduce loud audio
317
+ y = y * (0.3 / rms_level)
318
+
319
+ # Final normalization to safe level
320
+ if max_amplitude > 0.8:
321
+ y = y * (0.7 / max_amplitude)
322
+ elif max_amplitude > 0:
323
+ y = y * (0.7 / max_amplitude) # Normalize to 70% to avoid clipping
324
+
325
+ logger.info(f"After processing - Max: {np.max(np.abs(y)):.4f}, RMS: {np.sqrt(np.mean(y**2)):.4f}")
326
+
327
+ # Apply simple noise reduction
328
+ if len(y) > sr_rate: # Only if audio is longer than 1 second
329
+ # Calculate RMS energy
330
+ frame_length = int(0.025 * sr_rate) # 25ms frames
331
+ hop_length = int(0.010 * sr_rate) # 10ms hop
332
+ rms = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0]
333
+
334
+ # Simple voice activity detection
335
+ rms_threshold = np.percentile(rms, 30) # Bottom 30% is likely silence
336
+
337
+ # Keep frames above threshold
338
+ frame_indices = np.where(rms > rms_threshold)[0]
339
+ if len(frame_indices) > 0:
340
+ start_frame = max(0, frame_indices[0] - 2)
341
+ end_frame = min(len(rms) - 1, frame_indices[-1] + 2)
342
+
343
+ start_sample = start_frame * hop_length
344
+ end_sample = min(len(y), end_frame * hop_length + frame_length)
345
+
346
+ y = y[start_sample:end_sample]
347
+
348
+ # Save processed audio to temporary file with unique name
349
+ temp_dir = tempfile.gettempdir()
350
+ temp_audio = os.path.join(temp_dir, f"speech_audio_{os.getpid()}_{int(time.time() * 1000)}.wav")
351
+
352
+ # Ensure we can write to the temp file
353
+ try:
354
+ sf.write(temp_audio, y, sr_rate, format='WAV', subtype='PCM_16')
355
+
356
+ # Verify file was written
357
+ if not os.path.exists(temp_audio) or os.path.getsize(temp_audio) == 0:
358
+ raise Exception("Failed to write temporary audio file")
359
+
360
+ except Exception as e:
361
+ logger.error(f"Failed to save temporary audio: {e}")
362
+ return f"Audio processing failed: {str(e)}", "unknown"
363
+
364
+ # Give file system time to finish writing
365
+ time_module.sleep(0.1)
366
+
367
+ # Use Gemini Flash 2.0 for speech-to-text
368
+ logger.info("🧠 Using Gemini Flash 2.0 for speech recognition...")
369
+
370
+ try:
371
+ # Upload audio file to Gemini
372
+ import google.generativeai as genai
373
+
374
+ # Upload the audio file
375
+ audio_file = genai.upload_file(temp_audio, mime_type="audio/wav")
376
+ logger.info(f"πŸ“€ Audio uploaded to Gemini: {audio_file.name}")
377
+
378
+ # Create prompt based on language preference
379
+ if language == 'auto':
380
+ prompt = """Please transcribe this audio file.
381
+
382
+ Instructions:
383
+ 1. Listen to the audio and transcribe exactly what is spoken
384
+ 2. Detect the language automatically
385
+ 3. Provide the transcription in the original language
386
+ 4. Return ONLY the transcribed text, no explanations
387
+ 5. If you cannot understand the audio, respond with "RECOGNITION_FAILED"
388
+
389
+ Transcription:"""
390
+ else:
391
+ # Map language codes to language names
392
+ lang_name_map = {
393
+ 'en-US': 'English', 'vi-VN': 'Vietnamese', 'es-ES': 'Spanish',
394
+ 'fr-FR': 'French', 'de-DE': 'German', 'ja-JP': 'Japanese',
395
+ 'zh-CN': 'Chinese', 'ko-KR': 'Korean', 'it-IT': 'Italian',
396
+ 'pt-PT': 'Portuguese', 'ru-RU': 'Russian', 'ar-SA': 'Arabic',
397
+ 'hi-IN': 'Hindi', 'th-TH': 'Thai', 'tr-TR': 'Turkish'
398
+ }
399
+ expected_lang = lang_name_map.get(language, 'English')
400
+
401
+ prompt = f"""Please transcribe this audio file in {expected_lang}.
402
+
403
+ Instructions:
404
+ 1. Listen to the audio and transcribe exactly what is spoken
405
+ 2. The audio should be in {expected_lang}
406
+ 3. Provide the transcription in {expected_lang}
407
+ 4. Return ONLY the transcribed text, no explanations
408
+ 5. If you cannot understand the audio, respond with "RECOGNITION_FAILED"
409
+
410
+ Transcription:"""
411
+
412
+ # Generate transcription with Gemini (with timeout)
413
+ try:
414
+ response = self.gemini_model.generate_content(
415
+ [prompt, audio_file],
416
+ generation_config=genai.types.GenerationConfig(
417
+ candidate_count=1,
418
+ max_output_tokens=200,
419
+ temperature=0.1
420
+ )
421
+ )
422
+ transcription = response.text.strip()
423
+ except Exception as gen_error:
424
+ logger.error(f"Gemini generation failed: {gen_error}")
425
+ raise gen_error
426
+
427
+ logger.info(f"🧠 Gemini transcription result: {transcription[:100]}...")
428
+
429
+ # Clean up uploaded file
430
+ try:
431
+ genai.delete_file(audio_file.name)
432
+ logger.info("πŸ—‘οΈ Cleaned up uploaded file from Gemini")
433
+ except:
434
+ pass
435
+
436
+ # Check if recognition failed
437
+ if transcription == "RECOGNITION_FAILED" or "cannot understand" in transcription.lower():
438
+ logger.warning("❌ Gemini could not understand the audio")
439
+ return "Could not understand speech - please try speaking more clearly or check your microphone", "unknown"
440
+
441
+ # Detect language of transcription using Gemini
442
+ detected_language = self.detect_language_with_gemini(transcription)
443
+
444
+ logger.info(f"βœ… Gemini transcription successful: {transcription[:50]}...")
445
+ logger.info(f"βœ… Detected language: {detected_language}")
446
+
447
+ return transcription, detected_language
448
+
449
+ except Exception as gemini_error:
450
+ logger.error(f"❌ Gemini transcription failed: {gemini_error}")
451
+ return f"Gemini transcription failed: {str(gemini_error)}", "unknown"
452
+
453
+ finally:
454
+ # Clean up temp file
455
+ try:
456
+ os.remove(temp_audio)
457
+ except Exception as e:
458
+ logger.warning(f"Failed to cleanup temp file: {e}")
459
+
460
+ except Exception as audio_error:
461
+ logger.error(f"Audio processing error: {audio_error}")
462
+ return f"Audio processing failed: {str(audio_error)}", "unknown"
463
+
464
+ except Exception as e:
465
+ error_msg = str(e)
466
+ logger.error(f"Speech recognition error: {error_msg}")
467
+ raise Exception(f"Speech recognition failed: {error_msg}")
468
+
469
+ def detect_language_with_gemini(self, text: str) -> str:
470
+ """Use Gemini to detect language of text"""
471
+ try:
472
+ if not self.gemini_configured or not text.strip():
473
+ return "English"
474
+
475
+ prompt = f"""Analyze this text and identify the language. Respond with just the language name in English (e.g., "English", "Vietnamese", "Spanish", etc.):
476
+
477
+ {text[:200]}"""
478
+
479
+ response = self.gemini_model.generate_content(prompt)
480
+ detected_lang = response.text.strip()
481
+
482
+ # Validate response
483
+ valid_languages = ['English', 'Vietnamese', 'Spanish', 'French', 'German', 'Japanese', 'Chinese', 'Korean', 'Italian', 'Portuguese', 'Russian', 'Arabic', 'Hindi', 'Thai', 'Turkish']
484
+ if detected_lang in valid_languages:
485
+ return detected_lang
486
+ else:
487
+ return "English"
488
+
489
+ except Exception as e:
490
+ logger.warning(f"Gemini language detection failed: {e}")
491
+ return "English"
492
+
493
+ def get_audio_duration(self, audio_path: str) -> float:
494
+ """Get duration of audio file"""
495
+ try:
496
+ y, sr = librosa.load(audio_path)
497
+ return len(y) / sr
498
+ except:
499
+ return 0.0
500
+
501
+ def translate_text(self, text: str, source_lang: str, target_lang: str) -> str:
502
+ """Translate text using Google Gemini Flash 2.0"""
503
+ try:
504
+ if not self.gemini_configured:
505
+ raise Exception("Google Gemini client not configured. Please check your API key.")
506
+
507
+ # Create translation prompt
508
+ source_name = self.supported_languages.get(source_lang, source_lang)
509
+ target_name = self.supported_languages.get(target_lang, target_lang)
510
+
511
+ prompt = f"""Translate the following {source_name} text to {target_name}. Provide only the translation, no explanations or additional text:
512
+
513
+ {text}"""
514
+
515
+ response = self.gemini_model.generate_content(
516
+ prompt,
517
+ generation_config=genai.types.GenerationConfig(
518
+ candidate_count=1,
519
+ max_output_tokens=300,
520
+ temperature=0.2
521
+ )
522
+ )
523
+ translated_text = response.text.strip()
524
+
525
+ if translated_text:
526
+ logger.info(f"Gemini Flash 2.0 translation successful: {translated_text[:100]}...")
527
+ return translated_text
528
+ else:
529
+ raise Exception("Empty translation response from Gemini")
530
+
531
+ except Exception as e:
532
+ error_msg = str(e)
533
+ logger.error(f"Translation error: {error_msg}")
534
+
535
+ # Check for quota exceeded error
536
+ if "429" in error_msg or "quota" in error_msg.lower() or "insufficient_quota" in error_msg.lower():
537
+ logger.warning("[WARNING] Google Gemini API quota exceeded - using fallback translation")
538
+ target_name = self.supported_languages.get(target_lang, target_lang)
539
+ return f"[API Quota Exceeded] Please add credits to your Google account. Original text: {text}"
540
+
541
+ raise Exception(f"Translation failed: {error_msg}")
542
+
543
+ async def generate_speech_with_custom_voice(self, text: str, voice: str) -> str:
544
+ """Generate speech using Edge TTS with custom voice"""
545
+ try:
546
+ if not EDGE_TTS_AVAILABLE:
547
+ logger.warning("Edge TTS not available")
548
+ return None
549
+
550
+ # Create temporary output file
551
+ temp_dir = tempfile.gettempdir()
552
+ output_path = os.path.join(temp_dir, f"tts_output_{int(time.time())}.wav")
553
+
554
+ # Generate speech with specific voice
555
+ communicate = edge_tts.Communicate(text, voice)
556
+ await communicate.save(output_path)
557
+
558
+ if os.path.exists(output_path):
559
+ logger.info(f"Edge TTS generated with {voice}: {output_path}")
560
+ return output_path
561
+ else:
562
+ return None
563
+
564
+ except Exception as e:
565
+ logger.error(f"TTS Error: {e}")
566
+ return None
567
+
568
+ def process_audio_translation_with_voice(
569
+ self,
570
+ audio_path: str,
571
+ target_lang: str,
572
+ voice: str,
573
+ input_language: str = 'auto'
574
+ ) -> Tuple[str, str, str, Optional[str]]:
575
+ """Complete audio translation pipeline with custom voice selection and input language option"""
576
+
577
+ if not audio_path:
578
+ return "Please upload an audio file", "", "", None
579
+
580
+ input_desc = "auto-detection" if input_language == 'auto' else f"specified language ({input_language})"
581
+ logger.info(f"Processing audio translation with {input_desc} -> {target_lang} (voice: {voice})")
582
+
583
+ # Step 1: Speech to text with language detection or specified language
584
+ logger.info(f"Step 1: Transcribing audio with {input_desc}...")
585
+ transcribed_text, detected_language = self.speech_to_text(audio_path, input_language)
586
+
587
+ if transcribed_text.startswith("Error"):
588
+ return transcribed_text, "", "", None
589
+
590
+ logger.info(f"Transcription: {transcribed_text[:100]}...")
591
+ logger.info(f"Language: {detected_language}")
592
+
593
+ # Step 2: Translate text with Gemini Flash 2.0 using detected/specified language
594
+ logger.info("Step 2: Translating text...")
595
+ # Map detected language name to code for translation
596
+ lang_code_map = {
597
+ 'English': 'en', 'Vietnamese': 'vi', 'Spanish': 'es', 'French': 'fr',
598
+ 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Russian': 'ru',
599
+ 'Japanese': 'ja', 'Korean': 'ko', 'Chinese': 'zh', 'Arabic': 'ar',
600
+ 'Hindi': 'hi', 'Thai': 'th', 'Turkish': 'tr'
601
+ }
602
+
603
+ detected_lang_code = lang_code_map.get(detected_language, 'en')
604
+ translated_text = self.translate_text(transcribed_text, detected_lang_code, target_lang)
605
+
606
+ if translated_text.startswith("[Translation Error]"):
607
+ return transcribed_text, detected_language, translated_text, None
608
+
609
+ logger.info(f"Translation: {translated_text[:100]}...")
610
+
611
+ # Step 3: Generate speech with Edge TTS using custom voice
612
+ logger.info(f"Step 3: Generating speech with voice: {voice}")
613
+ audio_output = asyncio.run(self.generate_speech_with_custom_voice(translated_text, voice))
614
+
615
+ if audio_output:
616
+ logger.info("Complete translation pipeline successful!")
617
+ else:
618
+ logger.warning("TTS generation failed, returning text only")
619
+
620
+ return transcribed_text, detected_language, translated_text, audio_output
621
+
622
+ # Initialize AI Agent
623
+ agent = TranslationAIAgent()
624
+
625
+ # Interface Functions
626
+ def get_country_options() -> List[str]:
627
+ """Get country options with flags for target language"""
628
+ choices = []
629
+ for lang_code, lang_info in agent.language_voice_options.items():
630
+ for option in lang_info['options']:
631
+ choice = f"{option['code']} | {option['display']}"
632
+ choices.append(choice)
633
+ return sorted(choices)
634
+
635
+ def get_input_language_options() -> List[str]:
636
+ """Get input language options for speech recognition"""
637
+ choices = ["auto | Auto-detect Language (Recommended)"]
638
+
639
+ # Add specific language options
640
+ language_options = [
641
+ ("en-US", "English (United States)"),
642
+ ("vi-VN", "Vietnamese (Vietnam)"),
643
+ ("es-ES", "Spanish (Spain)"),
644
+ ("fr-FR", "French (France)"),
645
+ ("de-DE", "German (Germany)"),
646
+ ("ja-JP", "Japanese (Japan)"),
647
+ ("zh-CN", "Chinese (Simplified)"),
648
+ ("ko-KR", "Korean (South Korea)"),
649
+ ("it-IT", "Italian (Italy)"),
650
+ ("pt-PT", "Portuguese (Portugal)"),
651
+ ("ru-RU", "Russian (Russia)"),
652
+ ("ar-SA", "Arabic (Saudi Arabia)"),
653
+ ("hi-IN", "Hindi (India)"),
654
+ ("th-TH", "Thai (Thailand)"),
655
+ ("tr-TR", "Turkish (Turkey)")
656
+ ]
657
+
658
+ for code, display in language_options:
659
+ choice = f"{code} | {display}"
660
+ choices.append(choice)
661
+
662
+ return choices
663
+
664
+ def get_voice_options_for_country(country_selection: str) -> List[str]:
665
+ """Get voice options for selected country"""
666
+ if not country_selection or '|' not in country_selection:
667
+ return ["Jenny (Female)", "Guy (Male)"]
668
+
669
+ code = country_selection.split(' | ')[0].strip()
670
+
671
+ for lang_info in agent.language_voice_options.values():
672
+ for option in lang_info['options']:
673
+ if option['code'] == code:
674
+ main_voice = option['voice'].replace('Neural', '').split('-')[-1]
675
+ alt_voice = option['alt_voice'].replace('Neural', '').split('-')[-1]
676
+
677
+ def get_gender(voice_name):
678
+ female_names = ['Jenny', 'Libby', 'Natasha', 'Clara', 'Elvira', 'Dalia', 'Denise', 'Sylvie', 'Katja', 'Elsa', 'Raquel', 'Francisca', 'Svetlana', 'Nanami', 'SunHi', 'Xiaoxiao', 'HoaiMy']
679
+ return "(Female)" if any(name in voice_name for name in female_names) else "(Male)"
680
+
681
+ return [
682
+ f"{main_voice} {get_gender(main_voice)}",
683
+ f"{alt_voice} {get_gender(alt_voice)}"
684
+ ]
685
+
686
+ return ["Jenny (Female)", "Guy (Male)"]
687
+
688
+ def get_voice_code_from_selections(country_selection: str, voice_selection: str) -> str:
689
+ """Get full voice code from country and voice selections"""
690
+ if not country_selection or '|' not in country_selection:
691
+ return 'en-US-JennyNeural'
692
+
693
+ code = country_selection.split(' | ')[0].strip()
694
+ voice_name = voice_selection.split(' (')[0].strip()
695
+
696
+ for lang_info in agent.language_voice_options.values():
697
+ for option in lang_info['options']:
698
+ if option['code'] == code:
699
+ main_voice_name = option['voice'].replace('Neural', '').split('-')[-1]
700
+ alt_voice_name = option['alt_voice'].replace('Neural', '').split('-')[-1]
701
+
702
+ if voice_name == main_voice_name:
703
+ return option['voice']
704
+ elif voice_name == alt_voice_name:
705
+ return option['alt_voice']
706
+
707
+ return 'en-US-JennyNeural'
708
+
709
+ def get_language_code_from_country(country_selection: str) -> str:
710
+ """Extract language code from country selection"""
711
+ if not country_selection or '|' not in country_selection:
712
+ return 'en'
713
+
714
+ code = country_selection.split(' | ')[0].strip()
715
+ return code.split('-')[0]
716
+
717
+ def update_voice_options(country_selection: str) -> gr.Dropdown:
718
+ """Update voice dropdown based on country selection"""
719
+ voice_options = get_voice_options_for_country(country_selection)
720
+ return gr.Dropdown(choices=voice_options, value=voice_options[0] if voice_options else "Jenny (Female)")
721
+
722
+ def get_input_language_code_from_selection(input_lang_selection: str) -> str:
723
+ """Extract language code from input language selection"""
724
+ if not input_lang_selection or '|' not in input_lang_selection:
725
+ return 'auto'
726
+
727
+ code = input_lang_selection.split(' | ')[0].strip()
728
+ if code == 'auto':
729
+ return 'auto'
730
+
731
+ return code
732
+
733
+ # Global conversation and audio state
734
+ conversation_state = {
735
+ "person_a_messages": [],
736
+ "person_b_messages": [],
737
+ "person_a_translations": [],
738
+ "person_b_translations": [],
739
+ "latest_audio_for_a": None, # Audio that Person A should hear
740
+ "latest_audio_for_b": None # Audio that Person B should hear
741
+ }
742
+
743
+ # Recording state management
744
+ recording_state = {
745
+ "person_a_recording": False,
746
+ "person_b_recording": False,
747
+ "person_a_audio": None,
748
+ "person_b_audio": None
749
+ }
750
+
751
+ def add_message_to_conversation(person, original, detected_lang, translation, target_person):
752
+ """Add message to global conversation state"""
753
+ if original and translation:
754
+ timestamp = time.strftime("%H:%M")
755
+
756
+ if person == "A":
757
+ conversation_state["person_a_messages"].append(f"[{timestamp}] Person A ({detected_lang}): {original}")
758
+ conversation_state["person_b_translations"].append(f"[{timestamp}] -> Person B: {translation}")
759
+ else: # person == "B"
760
+ conversation_state["person_b_messages"].append(f"[{timestamp}] Person B ({detected_lang}): {original}")
761
+ conversation_state["person_a_translations"].append(f"[{timestamp}] -> Person A: {translation}")
762
+
763
+ def get_full_conversation():
764
+ """Get complete conversation history for both tabs"""
765
+ all_messages = []
766
+ max_length = max(
767
+ len(conversation_state["person_a_messages"]),
768
+ len(conversation_state["person_b_messages"]),
769
+ len(conversation_state["person_a_translations"]),
770
+ len(conversation_state["person_b_translations"])
771
+ )
772
+
773
+ for i in range(max_length):
774
+ if i < len(conversation_state["person_a_messages"]):
775
+ all_messages.append(conversation_state["person_a_messages"][i])
776
+ if i < len(conversation_state["person_b_translations"]):
777
+ all_messages.append(conversation_state["person_b_translations"][i])
778
+ if i < len(conversation_state["person_b_messages"]):
779
+ all_messages.append(conversation_state["person_b_messages"][i])
780
+ if i < len(conversation_state["person_a_translations"]):
781
+ all_messages.append(conversation_state["person_a_translations"][i])
782
+
783
+ return "\n".join(all_messages[-10:]) # Show last 10 messages
784
+
785
+ def translate_person_a_to_b(audio_file, country_b: str, voice_b: str, input_lang_a: str) -> tuple[str, Optional[str]]:
786
+ """Person A speaks -> results appear in Person B's tab"""
787
+ if audio_file is None:
788
+ return "", None
789
+
790
+ try:
791
+ print(f"[DEBUG] Person A recording: {audio_file}")
792
+ tgt_code = get_language_code_from_country(country_b)
793
+ selected_voice = get_voice_code_from_selections(country_b, voice_b)
794
+ input_language = get_input_language_code_from_selection(input_lang_a)
795
+ print(f"[DEBUG] Input Language: {input_language}, Target: {tgt_code}, Voice: {selected_voice}")
796
+
797
+ original_text, detected_lang, translated_text, audio_output = agent.process_audio_translation_with_voice(
798
+ audio_file, tgt_code, selected_voice, input_language
799
+ )
800
+
801
+ print(f"[DEBUG] Results: {original_text[:50]}... -> {translated_text[:50]}...")
802
+ print(f"[DEBUG] Audio output: {audio_output}")
803
+
804
+ # Add to conversation
805
+ add_message_to_conversation("A", original_text, detected_lang, translated_text, "B")
806
+
807
+ # Return conversation for Person B's tab and audio
808
+ conversation_history = get_full_conversation()
809
+ print(f"[DEBUG] Conversation length: {len(conversation_history)}")
810
+
811
+ return conversation_history, audio_output
812
+
813
+ except Exception as e:
814
+ print(f"[ERROR] translate_person_a_to_b: {e}")
815
+ return f"Error: {str(e)}", None
816
+
817
+ def translate_person_b_to_a(audio_file, country_a: str, voice_a: str, input_lang_b: str) -> tuple[str, Optional[str]]:
818
+ """Person B speaks -> results appear in Person A's tab"""
819
+ if audio_file is None:
820
+ return "", None
821
+
822
+ try:
823
+ print(f"[DEBUG] Person B recording: {audio_file}")
824
+ tgt_code = get_language_code_from_country(country_a)
825
+ selected_voice = get_voice_code_from_selections(country_a, voice_a)
826
+ input_language = get_input_language_code_from_selection(input_lang_b)
827
+ print(f"[DEBUG] Input Language: {input_language}, Target: {tgt_code}, Voice: {selected_voice}")
828
+
829
+ original_text, detected_lang, translated_text, audio_output = agent.process_audio_translation_with_voice(
830
+ audio_file, tgt_code, selected_voice, input_language
831
+ )
832
+
833
+ print(f"[DEBUG] Results: {original_text[:50]}... -> {translated_text[:50]}...")
834
+ print(f"[DEBUG] Audio output: {audio_output}")
835
+
836
+ # Add to conversation
837
+ add_message_to_conversation("B", original_text, detected_lang, translated_text, "A")
838
+
839
+ # Return conversation for Person A's tab and audio
840
+ conversation_history = get_full_conversation()
841
+ print(f"[DEBUG] Conversation length: {len(conversation_history)}")
842
+
843
+ return conversation_history, audio_output
844
+
845
+ except Exception as e:
846
+ print(f"[ERROR] translate_person_b_to_a: {e}")
847
+ return f"Error: {str(e)}", None
848
+
849
+ def get_audio_for_person_a() -> Optional[str]:
850
+ """Get latest audio that Person A should hear"""
851
+ return conversation_state.get("latest_audio_for_a")
852
+
853
+ def get_audio_for_person_b() -> Optional[str]:
854
+ """Get latest audio that Person B should hear"""
855
+ return conversation_state.get("latest_audio_for_b")
856
+
857
+ def toggle_person_a_recording():
858
+ """Toggle Person A recording state and return button updates"""
859
+ if recording_state["person_a_recording"]:
860
+ # Currently recording, stop it
861
+ recording_state["person_a_recording"] = False
862
+ return (
863
+ gr.Button("🎀 Talk", elem_classes=["talk-button"]),
864
+ "<div style='text-align: center; padding: 10px; color: #666;'>Ready to record</div>"
865
+ )
866
+ else:
867
+ # Not recording, start it
868
+ recording_state["person_a_recording"] = True
869
+ # Clear previous audio
870
+ recording_state["person_a_audio"] = None
871
+ return (
872
+ gr.Button("πŸ›‘ Stop", elem_classes=["stop-button"]),
873
+ "<div style='text-align: center; padding: 10px; color: #f44336;'>πŸ”΄ Click Stop when done recording</div>"
874
+ )
875
+
876
+ def toggle_person_b_recording():
877
+ """Toggle Person B recording state and return button updates"""
878
+ if recording_state["person_b_recording"]:
879
+ # Currently recording, stop it
880
+ recording_state["person_b_recording"] = False
881
+ return (
882
+ gr.Button("🎀 Talk", elem_classes=["talk-button"]),
883
+ "<div style='text-align: center; padding: 10px; color: #666;'>Ready to record</div>"
884
+ )
885
+ else:
886
+ # Not recording, start it
887
+ recording_state["person_b_recording"] = True
888
+ # Clear previous audio
889
+ recording_state["person_b_audio"] = None
890
+ return (
891
+ gr.Button("πŸ›‘ Stop", elem_classes=["stop-button"]),
892
+ "<div style='text-align: center; padding: 10px; color: #f44336;'>πŸ”΄ Click Stop when done recording</div>"
893
+ )
894
+
895
+ def handle_person_a_audio_update(audio_file):
896
+ """Handle audio update for Person A"""
897
+ if audio_file and recording_state["person_a_recording"]:
898
+ recording_state["person_a_audio"] = audio_file
899
+ # Auto-stop recording when audio is received
900
+ recording_state["person_a_recording"] = False
901
+ return audio_file, "🎀 Talk", "talk-button"
902
+ return None, "🎀 Talk", "talk-button"
903
+
904
+ def handle_person_b_audio_update(audio_file):
905
+ """Handle audio update for Person B"""
906
+ if audio_file and recording_state["person_b_recording"]:
907
+ recording_state["person_b_audio"] = audio_file
908
+ # Auto-stop recording when audio is received
909
+ recording_state["person_b_recording"] = False
910
+ return audio_file, "🎀 Talk", "talk-button"
911
+ return None, "🎀 Talk", "talk-button"
912
+
913
+ def process_person_a_translation(country_b: str, voice_b: str, input_lang_a: str):
914
+ """Process translation for Person A using stored audio"""
915
+ audio_file = recording_state.get("person_a_audio")
916
+ if not audio_file:
917
+ return "", None
918
+
919
+ try:
920
+ print(f"[DEBUG] Person A processing stored audio: {audio_file}")
921
+ tgt_code = get_language_code_from_country(country_b)
922
+ selected_voice = get_voice_code_from_selections(country_b, voice_b)
923
+ input_language = get_input_language_code_from_selection(input_lang_a)
924
+
925
+ original_text, detected_lang, translated_text, audio_output = agent.process_audio_translation_with_voice(
926
+ audio_file, tgt_code, selected_voice, input_language
927
+ )
928
+
929
+ # Add to conversation
930
+ add_message_to_conversation("A", original_text, detected_lang, translated_text, "B")
931
+
932
+ # Clear processed audio
933
+ recording_state["person_a_audio"] = None
934
+
935
+ # Return conversation for Person B's tab and audio
936
+ conversation_history = get_full_conversation()
937
+ return conversation_history, audio_output
938
+
939
+ except Exception as e:
940
+ print(f"[ERROR] process_person_a_translation: {e}")
941
+ return f"Error: {str(e)}", None
942
+
943
+ def process_person_b_translation(country_a: str, voice_a: str, input_lang_b: str):
944
+ """Process translation for Person B using stored audio"""
945
+ audio_file = recording_state.get("person_b_audio")
946
+ if not audio_file:
947
+ return "", None
948
+
949
+ try:
950
+ print(f"[DEBUG] Person B processing stored audio: {audio_file}")
951
+ tgt_code = get_language_code_from_country(country_a)
952
+ selected_voice = get_voice_code_from_selections(country_a, voice_a)
953
+ input_language = get_input_language_code_from_selection(input_lang_b)
954
+
955
+ original_text, detected_lang, translated_text, audio_output = agent.process_audio_translation_with_voice(
956
+ audio_file, tgt_code, selected_voice, input_language
957
+ )
958
+
959
+ # Add to conversation
960
+ add_message_to_conversation("B", original_text, detected_lang, translated_text, "A")
961
+
962
+ # Clear processed audio
963
+ recording_state["person_b_audio"] = None
964
+
965
+ # Return conversation for Person A's tab and audio
966
+ conversation_history = get_full_conversation()
967
+ return conversation_history, audio_output
968
+
969
+ except Exception as e:
970
+ print(f"[ERROR] process_person_b_translation: {e}")
971
+ return f"Error: {str(e)}", None
972
+
973
+
974
+ # Create Two-Person Translation Interface
975
+ with gr.Blocks(
976
+ title="πŸŽ™οΈ Two-Person Live Translation",
977
+ theme=gr.themes.Soft(),
978
+ css="""
979
+ .gradio-container { max-width: 1400px !important; margin: 0 auto !important; }
980
+ .header {
981
+ text-align: center;
982
+ background: linear-gradient(135deg, #4A90E2 0%, #FF6B9D 100%);
983
+ color: white;
984
+ padding: 20px;
985
+ border-radius: 10px;
986
+ margin-bottom: 20px;
987
+ }
988
+ .status-box {
989
+ background: rgba(78, 205, 196, 0.1);
990
+ border: 2px solid rgba(78, 205, 196, 0.3);
991
+ border-radius: 10px;
992
+ padding: 15px;
993
+ text-align: center;
994
+ margin: 15px 0;
995
+ }
996
+ .footer {
997
+ text-align: center;
998
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
999
+ color: white;
1000
+ padding: 20px;
1001
+ border-radius: 10px;
1002
+ margin-top: 30px;
1003
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
1004
+ }
1005
+ .guide-box {
1006
+ background: rgba(255, 255, 255, 0.05);
1007
+ border: 1px solid rgba(255, 255, 255, 0.2);
1008
+ border-radius: 10px;
1009
+ padding: 20px;
1010
+ margin: 15px 0;
1011
+ }
1012
+ .step-card {
1013
+ background: rgba(78, 205, 196, 0.1);
1014
+ border-left: 4px solid #4ECDCC;
1015
+ padding: 15px;
1016
+ margin: 10px 0;
1017
+ border-radius: 5px;
1018
+ }
1019
+ .tips-card {
1020
+ background: rgba(255, 193, 7, 0.1);
1021
+ border-left: 4px solid #FFC107;
1022
+ padding: 15px;
1023
+ margin: 10px 0;
1024
+ border-radius: 5px;
1025
+ }
1026
+ .talk-button {
1027
+ background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%) !important;
1028
+ color: white !important;
1029
+ border: none !important;
1030
+ padding: 15px 30px !important;
1031
+ font-size: 18px !important;
1032
+ font-weight: bold !important;
1033
+ border-radius: 25px !important;
1034
+ cursor: pointer !important;
1035
+ transition: all 0.3s ease !important;
1036
+ box-shadow: 0 4px 8px rgba(76, 175, 80, 0.3) !important;
1037
+ min-height: 60px !important;
1038
+ }
1039
+ .talk-button:hover {
1040
+ transform: translateY(-2px) !important;
1041
+ box-shadow: 0 6px 12px rgba(76, 175, 80, 0.4) !important;
1042
+ }
1043
+ .stop-button {
1044
+ background: linear-gradient(135deg, #f44336 0%, #d32f2f 100%) !important;
1045
+ color: white !important;
1046
+ border: none !important;
1047
+ padding: 15px 30px !important;
1048
+ font-size: 18px !important;
1049
+ font-weight: bold !important;
1050
+ border-radius: 25px !important;
1051
+ cursor: pointer !important;
1052
+ transition: all 0.3s ease !important;
1053
+ box-shadow: 0 4px 8px rgba(244, 67, 54, 0.3) !important;
1054
+ min-height: 60px !important;
1055
+ animation: pulse 2s infinite !important;
1056
+ }
1057
+ .stop-button:hover {
1058
+ transform: translateY(-2px) !important;
1059
+ box-shadow: 0 6px 12px rgba(244, 67, 54, 0.4) !important;
1060
+ }
1061
+ @keyframes pulse {
1062
+ 0% { box-shadow: 0 4px 8px rgba(244, 67, 54, 0.3); }
1063
+ 50% { box-shadow: 0 6px 16px rgba(244, 67, 54, 0.6); }
1064
+ 100% { box-shadow: 0 4px 8px rgba(244, 67, 54, 0.3); }
1065
+ }
1066
+ .hidden-audio {
1067
+ display: none !important;
1068
+ }
1069
+ """
1070
+ ) as demo:
1071
+
1072
+ # Header
1073
+ api_status = "Ready" if agent.gemini_configured else "Missing API Key"
1074
+ edge_tts_status = "Ready" if EDGE_TTS_AVAILABLE else "Not Available"
1075
+
1076
+ gr.HTML(f"""
1077
+ <div class="header">
1078
+ <h1>πŸŽ™οΈ Two-Person Live Translation</h1>
1079
+ <p>Real-time Cross-Translation between Person A & Person B</p>
1080
+ <div style="margin-top: 15px;">
1081
+ <span style="background: rgba(255,255,255,0.2); padding: 6px 12px; border-radius: 15px; margin: 0 5px;">
1082
+ <strong>Gemini:</strong> {api_status}
1083
+ </span>
1084
+ <span style="background: rgba(255,255,255,0.2); padding: 6px 12px; border-radius: 15px; margin: 0 5px;">
1085
+ <strong>Edge TTS:</strong> {edge_tts_status}
1086
+ </span>
1087
+ </div>
1088
+ <div style="margin-top: 10px;">🧠 <strong>Digitized Brains</strong></div>
1089
+ </div>
1090
+ """)
1091
+
1092
+ # Status Box
1093
+ gr.HTML(f"""
1094
+ <div class="status-box">
1095
+ <h4>πŸ€– AI Pipeline Status</h4>
1096
+ <div style="display: flex; justify-content: center; gap: 20px; flex-wrap: wrap;">
1097
+ <span><strong>🧠 Gemini Speech Recognition:</strong> {'🟒 Ready' if agent.gemini_configured else 'πŸ”΄ Not Ready'}</span>
1098
+ <span><strong>🧠 Gemini Translation:</strong> {'🟒 Ready' if agent.gemini_configured else 'πŸ”΄ Not Ready'}</span>
1099
+ <span><strong>πŸ”Š Edge TTS:</strong> {'🟒 Ready' if EDGE_TTS_AVAILABLE else 'πŸ”΄ Not Ready'}</span>
1100
+ </div>
1101
+ </div>
1102
+ """)
1103
+
1104
+ with gr.Tabs():
1105
+ # Person A Tab - Only shows translation FROM Person B
1106
+ with gr.TabItem("Person A View"):
1107
+ gr.Markdown("### Person A receives translations from Person B")
1108
+
1109
+ # Conversation History for Person A
1110
+ conversation_display_a = gr.Textbox(
1111
+ label="Full Conversation",
1112
+ lines=8,
1113
+ interactive=False,
1114
+ placeholder="Conversation will appear here...",
1115
+ value=""
1116
+ )
1117
+
1118
+ with gr.Row():
1119
+ with gr.Column(scale=2):
1120
+ # Audio component with custom recording interface
1121
+ mic_a = gr.Audio(
1122
+ sources=["microphone"],
1123
+ type="filepath",
1124
+ label="Person A: Record Your Voice",
1125
+ elem_id="mic_a"
1126
+ )
1127
+
1128
+ # Talk/Stop button for Person A
1129
+ talk_button_a = gr.Button(
1130
+ "🎀 Talk",
1131
+ elem_classes=["talk-button"],
1132
+ size="lg",
1133
+ elem_id="talk_button_a"
1134
+ )
1135
+
1136
+ # Status display for Person A
1137
+ status_a = gr.HTML(
1138
+ "<div style='text-align: center; padding: 10px; color: #666;'>Ready to record</div>",
1139
+ elem_id="status_a"
1140
+ )
1141
+
1142
+ with gr.Column(scale=1):
1143
+ # Person A's input language selection
1144
+ input_lang_a = gr.Dropdown(
1145
+ choices=get_input_language_options(),
1146
+ label="Person A's Input Language",
1147
+ value="auto | Auto-detect Language (Recommended)",
1148
+ info="Select your speaking language or use auto-detect"
1149
+ )
1150
+
1151
+ # Person B's output settings
1152
+ country_b_for_a = gr.Dropdown(
1153
+ choices=get_country_options(),
1154
+ label="Person B's Language",
1155
+ value="vi-vn | Vietnamese (Vietnam)"
1156
+ )
1157
+ voice_b_for_a = gr.Dropdown(
1158
+ choices=["HoaiMy (Female)", "NamMinh (Male)"],
1159
+ label="Person B's Voice",
1160
+ value="HoaiMy (Female)"
1161
+ )
1162
+
1163
+ # Only show audio FROM Person B (Person A hears this)
1164
+ audio_from_b = gr.Audio(
1165
+ label="πŸ”Š Translation Audio from Person B",
1166
+ interactive=False,
1167
+ value=None,
1168
+ autoplay=True
1169
+ )
1170
+
1171
+ # Person B Tab - Only shows translation FROM Person A
1172
+ with gr.TabItem("Person B View"):
1173
+ gr.Markdown("### Person B receives translations from Person A")
1174
+
1175
+ # Conversation History for Person B
1176
+ conversation_display_b = gr.Textbox(
1177
+ label="Full Conversation",
1178
+ lines=8,
1179
+ interactive=False,
1180
+ placeholder="Conversation will appear here...",
1181
+ value=""
1182
+ )
1183
+
1184
+ with gr.Row():
1185
+ with gr.Column(scale=2):
1186
+ # Audio component with custom recording interface
1187
+ mic_b = gr.Audio(
1188
+ sources=["microphone"],
1189
+ type="filepath",
1190
+ label="Person B: Record Your Voice",
1191
+ elem_id="mic_b"
1192
+ )
1193
+
1194
+ # Talk/Stop button for Person B
1195
+ talk_button_b = gr.Button(
1196
+ "🎀 Talk",
1197
+ elem_classes=["talk-button"],
1198
+ size="lg",
1199
+ elem_id="talk_button_b"
1200
+ )
1201
+
1202
+ # Status display for Person B
1203
+ status_b = gr.HTML(
1204
+ "<div style='text-align: center; padding: 10px; color: #666;'>Ready to record</div>",
1205
+ elem_id="status_b"
1206
+ )
1207
+
1208
+ with gr.Column(scale=1):
1209
+ # Person B's input language selection
1210
+ input_lang_b = gr.Dropdown(
1211
+ choices=get_input_language_options(),
1212
+ label="Person B's Input Language",
1213
+ value="auto | Auto-detect Language (Recommended)",
1214
+ info="Select your speaking language or use auto-detect"
1215
+ )
1216
+
1217
+ # Person A's output settings
1218
+ country_a_for_b = gr.Dropdown(
1219
+ choices=get_country_options(),
1220
+ label="Person A's Language",
1221
+ value="en-us | English (United States)"
1222
+ )
1223
+ voice_a_for_b = gr.Dropdown(
1224
+ choices=["Jenny (Female)", "Guy (Male)"],
1225
+ label="Person A's Voice",
1226
+ value="Jenny (Female)"
1227
+ )
1228
+
1229
+ # Only show audio FROM Person A (Person B hears this)
1230
+ audio_from_a = gr.Audio(
1231
+ label="πŸ”Š Translation Audio from Person A",
1232
+ interactive=False,
1233
+ value=None,
1234
+ autoplay=True
1235
+ )
1236
+
1237
+
1238
+ # User Guide Tab
1239
+ with gr.TabItem("πŸ“š User Guide"):
1240
+ gr.HTML("""
1241
+ <div class="guide-box">
1242
+ <h2 style="color: #4A90E2; margin-bottom: 20px;">πŸŽ™οΈ Two-Way Translation App User Guide</h2>
1243
+ <p style="font-size: 16px; margin-bottom: 20px;">This application enables two people to communicate in different languages through automatic translation.</p>
1244
+ </div>
1245
+ """)
1246
+
1247
+ gr.HTML("""
1248
+ <div class="step-card">
1249
+ <h3>πŸš€ Step 1: Preparation</h3>
1250
+ <ul>
1251
+ <li><strong>Check microphone:</strong> Ensure your microphone works properly</li>
1252
+ <li><strong>Quiet environment:</strong> Find a location with minimal background noise</li>
1253
+ <li><strong>Stable internet:</strong> Internet connection required for AI processing</li>
1254
+ <li><strong>Speakers/headphones:</strong> To hear translated audio output</li>
1255
+ </ul>
1256
+ </div>
1257
+ """)
1258
+
1259
+ gr.HTML("""
1260
+ <div class="step-card">
1261
+ <h3>πŸ‘₯ Step 2: Choose Your Tab</h3>
1262
+ <ul>
1263
+ <li><strong>Person A View:</strong> For the first person</li>
1264
+ <li><strong>Person B View:</strong> For the second person</li>
1265
+ <li><strong>Each person only needs to focus on their own tab</strong></li>
1266
+ </ul>
1267
+ </div>
1268
+ """)
1269
+
1270
+ gr.HTML("""
1271
+ <div class="step-card">
1272
+ <h3>πŸ—£οΈ Step 3: Language Setup</h3>
1273
+ <ul>
1274
+ <li><strong>Input Language:</strong> Select the language you will speak (or Auto-detect)</li>
1275
+ <li><strong>Target Language:</strong> Choose the language to translate to</li>
1276
+ <li><strong>Voice:</strong> Select voice for translated audio output</li>
1277
+ <li><strong>Recommendation:</strong> Choose specific language instead of Auto-detect for better accuracy</li>
1278
+ </ul>
1279
+ </div>
1280
+ """)
1281
+
1282
+ gr.HTML("""
1283
+ <div class="step-card">
1284
+ <h3>🎀 Step 4: One-Click Talk Button Recording</h3>
1285
+ <ul>
1286
+ <li><strong>Click "🎀 Talk"</strong> - Button turns green and starts recording automatically</li>
1287
+ <li><strong>Speak clearly for 3-7 seconds</strong> - Button shows "πŸ›‘ Stop" with red pulse animation</li>
1288
+ <li><strong>Click "πŸ›‘ Stop"</strong> - Automatically stops recording and processes translation</li>
1289
+ <li><strong>Audio automatically plays</strong> - Translation appears in the other person's view</li>
1290
+ <li><strong>Ready for next recording</strong> - System automatically resets for next conversation</li>
1291
+ </ul>
1292
+ </div>
1293
+ """)
1294
+
1295
+ gr.HTML("""
1296
+ <div class="step-card">
1297
+ <h3>✨ Key Features</h3>
1298
+ <ul>
1299
+ <li><strong>🎯 One-Click Operation:</strong> No need to manually start/stop recording - Talk button handles everything</li>
1300
+ <li><strong>πŸ”„ Auto-Clear:</strong> System automatically clears previous recordings for seamless conversation flow</li>
1301
+ <li><strong>🎡 Auto-Play:</strong> Translated audio plays immediately when ready</li>
1302
+ <li><strong>🎨 Visual Feedback:</strong> Button colors and animations show current recording state</li>
1303
+ <li><strong>πŸš€ Instant Translation:</strong> From speech to translation in one click</li>
1304
+ </ul>
1305
+ </div>
1306
+ """)
1307
+
1308
+ gr.HTML("""
1309
+ <div class="tips-card">
1310
+ <h3>πŸ’‘ Tips for Best Results</h3>
1311
+ <ul>
1312
+ <li><strong>🎀 Microphone:</strong> Speak close to mic, not too loud or quiet</li>
1313
+ <li><strong>⏱️ Duration:</strong> 3-7 seconds is ideal (not too short/long)</li>
1314
+ <li><strong>πŸ—£οΈ Speaking style:</strong> Clear, not too fast, natural punctuation</li>
1315
+ <li><strong>πŸ”‡ Environment:</strong> Minimize background noise</li>
1316
+ <li><strong>🌍 Language:</strong> Select correct input language instead of auto-detect</li>
1317
+ <li><strong>πŸ”„ Retry:</strong> If unsuccessful, try again with different approach</li>
1318
+ </ul>
1319
+ </div>
1320
+ """)
1321
+
1322
+ gr.HTML("""
1323
+ <div class="step-card">
1324
+ <h3>πŸ”§ Common Troubleshooting</h3>
1325
+ <ul>
1326
+ <li><strong>"Could not understand speech":</strong> Speak more clearly, check microphone</li>
1327
+ <li><strong>No audio output:</strong> Check speakers/headphones</li>
1328
+ <li><strong>Incorrect translation:</strong> Select specific input language</li>
1329
+ <li><strong>Slow processing:</strong> Check internet connection</li>
1330
+ </ul>
1331
+ </div>
1332
+ """)
1333
+
1334
+
1335
+ # JavaScript for controlling recording
1336
+ js_control_recording = """
1337
+ function(button_text, elem_classes) {
1338
+ // Get the button element and audio component
1339
+ const button = arguments[2]; // The button that was clicked
1340
+ const audio_component = document.querySelector('audio');
1341
+
1342
+ if (button_text.includes('Talk')) {
1343
+ // Start recording
1344
+ if (audio_component) {
1345
+ const startButton = audio_component.querySelector('[aria-label="Record from microphone"]');
1346
+ if (startButton) startButton.click();
1347
+ }
1348
+ return ["πŸ›‘ Stop", "stop-button"];
1349
+ } else {
1350
+ // Stop recording
1351
+ if (audio_component) {
1352
+ const stopButton = audio_component.querySelector('[aria-label="Stop recording"]');
1353
+ if (stopButton) stopButton.click();
1354
+ }
1355
+ return ["🎀 Talk", "talk-button"];
1356
+ }
1357
+ }
1358
+ """
1359
+
1360
+ # Event Handlers
1361
+ # Voice options update
1362
+ country_b_for_a.change(
1363
+ fn=update_voice_options,
1364
+ inputs=[country_b_for_a],
1365
+ outputs=[voice_b_for_a]
1366
+ )
1367
+
1368
+ country_a_for_b.change(
1369
+ fn=update_voice_options,
1370
+ inputs=[country_a_for_b],
1371
+ outputs=[voice_a_for_b]
1372
+ )
1373
+
1374
+ # JavaScript-powered Talk/Stop button handlers
1375
+ def handle_person_a_recording():
1376
+ return None
1377
+
1378
+ def handle_person_b_recording():
1379
+ return None
1380
+
1381
+ talk_button_a.click(
1382
+ fn=handle_person_a_recording,
1383
+ js="""
1384
+ () => {
1385
+ console.log("🎀 Person A Talk button clicked");
1386
+
1387
+ // Find Gradio's recording buttons (search globally after recording state changes)
1388
+ const micContainer = document.getElementById('mic_a');
1389
+ let recordBtn = micContainer.querySelector('.record-button');
1390
+ let stopBtn = micContainer.querySelector('.stop-button');
1391
+
1392
+ // If not found in container, search globally (happens after recording)
1393
+ if (!recordBtn || !stopBtn) {
1394
+ const allRecordBtns = document.querySelectorAll('.record-button');
1395
+ const allStopBtns = document.querySelectorAll('.stop-button');
1396
+
1397
+ // Find Person A buttons by checking parent containers
1398
+ for (let i = 0; i < allRecordBtns.length; i++) {
1399
+ const btn = allRecordBtns[i];
1400
+ const container = btn.closest('[id*="mic_a"]');
1401
+ if (container && container.id === 'mic_a') {
1402
+ recordBtn = btn;
1403
+ stopBtn = allStopBtns[i];
1404
+ console.log("πŸ” Found Person A buttons globally:", container.id);
1405
+ break;
1406
+ }
1407
+ }
1408
+
1409
+ // Fallback to first button if specific search fails
1410
+ if (!recordBtn) {
1411
+ recordBtn = allRecordBtns[0];
1412
+ stopBtn = allStopBtns[0];
1413
+ console.log("πŸ” Using fallback buttons");
1414
+ }
1415
+ }
1416
+ const statusEl = document.getElementById('status_a');
1417
+ const btn = document.getElementById('talk_button_a');
1418
+
1419
+ // Initialize state if not exists
1420
+ if (!window.personARecording) {
1421
+ window.personARecording = { isRecording: false };
1422
+ }
1423
+
1424
+ if (!window.personARecording.isRecording) {
1425
+ // Clear any existing audio first to restore record button
1426
+ const clearBtn = micContainer.querySelector('button[aria-label="Clear"]');
1427
+ if (clearBtn) {
1428
+ console.log("🧹 Clearing existing audio for Person A");
1429
+ clearBtn.click();
1430
+
1431
+ // Wait a moment for UI to update, then find record button
1432
+ setTimeout(() => {
1433
+ const newRecordBtn = micContainer.querySelector('.record-button') ||
1434
+ document.querySelectorAll('.record-button')[0];
1435
+ if (newRecordBtn) {
1436
+ console.log("βœ… Clicking Gradio's Record button for Person A");
1437
+ newRecordBtn.click();
1438
+ window.personARecording.isRecording = true;
1439
+
1440
+ // Update UI
1441
+ if (btn) {
1442
+ btn.textContent = 'πŸ›‘ Stop';
1443
+ btn.className = btn.className.replace('talk-button', 'stop-button');
1444
+ }
1445
+
1446
+ if (statusEl) {
1447
+ statusEl.innerHTML = '<div style="text-align: center; padding: 10px; color: #f44336;">πŸ”΄ Recording... Click Stop when done</div>';
1448
+ }
1449
+ }
1450
+ }, 100);
1451
+ } else if (recordBtn) {
1452
+ // No clear needed, record button available
1453
+ console.log("βœ… Clicking Gradio's Record button for Person A");
1454
+ recordBtn.click();
1455
+ window.personARecording.isRecording = true;
1456
+
1457
+ // Update UI
1458
+ if (btn) {
1459
+ btn.textContent = 'πŸ›‘ Stop';
1460
+ btn.className = btn.className.replace('talk-button', 'stop-button');
1461
+ }
1462
+
1463
+ if (statusEl) {
1464
+ statusEl.innerHTML = '<div style="text-align: center; padding: 10px; color: #f44336;">πŸ”΄ Recording... Click Stop when done</div>';
1465
+ }
1466
+ }
1467
+
1468
+ } else if (window.personARecording.isRecording && stopBtn) {
1469
+ // Stop recording by clicking Gradio's stop button
1470
+ console.log("⏹️ Clicking Gradio's Stop button for Person A");
1471
+ stopBtn.click();
1472
+ window.personARecording.isRecording = false;
1473
+
1474
+ // Update UI
1475
+ if (btn) {
1476
+ btn.textContent = '🎀 Talk';
1477
+ btn.className = btn.className.replace('stop-button', 'talk-button');
1478
+ }
1479
+
1480
+ if (statusEl) {
1481
+ statusEl.innerHTML = '<div style="text-align: center; padding: 10px; color: #4CAF50;">βœ… Processing translation...</div>';
1482
+ }
1483
+ } else {
1484
+ console.error("❌ Gradio recording buttons not found");
1485
+ if (statusEl) {
1486
+ statusEl.innerHTML = '<div style="color: red;">❌ Recording interface not available</div>';
1487
+ }
1488
+ }
1489
+ }
1490
+ """
1491
+ )
1492
+
1493
+ talk_button_b.click(
1494
+ fn=handle_person_b_recording,
1495
+ js="""
1496
+ () => {
1497
+ console.log("🎀 Person B Talk button clicked");
1498
+
1499
+ // Find Gradio's recording buttons (search globally after recording state changes)
1500
+ const micContainer = document.getElementById('mic_b');
1501
+ let recordBtn = micContainer.querySelector('.record-button');
1502
+ let stopBtn = micContainer.querySelector('.stop-button');
1503
+
1504
+ // If not found in container, search globally (happens after recording)
1505
+ if (!recordBtn || !stopBtn) {
1506
+ const allRecordBtns = document.querySelectorAll('.record-button');
1507
+ const allStopBtns = document.querySelectorAll('.stop-button');
1508
+
1509
+ // Find Person B buttons by checking parent containers
1510
+ for (let i = 0; i < allRecordBtns.length; i++) {
1511
+ const btn = allRecordBtns[i];
1512
+ const container = btn.closest('[id*="mic_b"]');
1513
+ if (container && container.id === 'mic_b') {
1514
+ recordBtn = btn;
1515
+ stopBtn = allStopBtns[i];
1516
+ console.log("πŸ” Found Person B buttons globally:", container.id);
1517
+ break;
1518
+ }
1519
+ }
1520
+
1521
+ // Fallback to second button if specific search fails
1522
+ if (!recordBtn) {
1523
+ recordBtn = allRecordBtns[1] || allRecordBtns[0];
1524
+ stopBtn = allStopBtns[1] || allStopBtns[0];
1525
+ console.log("πŸ” Using fallback buttons for Person B");
1526
+ }
1527
+ }
1528
+ const statusEl = document.getElementById('status_b');
1529
+ const btn = document.getElementById('talk_button_b');
1530
+
1531
+ // Initialize state if not exists
1532
+ if (!window.personBRecording) {
1533
+ window.personBRecording = { isRecording: false };
1534
+ }
1535
+
1536
+ if (!window.personBRecording.isRecording) {
1537
+ // Clear any existing audio first to restore record button
1538
+ const clearBtn = micContainer.querySelector('button[aria-label="Clear"]');
1539
+ if (clearBtn) {
1540
+ console.log("🧹 Clearing existing audio for Person B");
1541
+ clearBtn.click();
1542
+
1543
+ // Wait a moment for UI to update, then find record button
1544
+ setTimeout(() => {
1545
+ const newRecordBtn = micContainer.querySelector('.record-button') ||
1546
+ document.querySelectorAll('.record-button')[1];
1547
+ if (newRecordBtn) {
1548
+ console.log("βœ… Clicking Gradio's Record button for Person B");
1549
+ newRecordBtn.click();
1550
+ window.personBRecording.isRecording = true;
1551
+
1552
+ // Update UI
1553
+ if (btn) {
1554
+ btn.textContent = 'πŸ›‘ Stop';
1555
+ btn.className = btn.className.replace('talk-button', 'stop-button');
1556
+ }
1557
+
1558
+ if (statusEl) {
1559
+ statusEl.innerHTML = '<div style="text-align: center; padding: 10px; color: #f44336;">πŸ”΄ Recording... Click Stop when done</div>';
1560
+ }
1561
+ }
1562
+ }, 100);
1563
+ } else if (recordBtn) {
1564
+ // No clear needed, record button available
1565
+ console.log("βœ… Clicking Gradio's Record button for Person B");
1566
+ recordBtn.click();
1567
+ window.personBRecording.isRecording = true;
1568
+
1569
+ // Update UI
1570
+ if (btn) {
1571
+ btn.textContent = 'πŸ›‘ Stop';
1572
+ btn.className = btn.className.replace('talk-button', 'stop-button');
1573
+ }
1574
+
1575
+ if (statusEl) {
1576
+ statusEl.innerHTML = '<div style="text-align: center; padding: 10px; color: #f44336;">πŸ”΄ Recording... Click Stop when done</div>';
1577
+ }
1578
+ }
1579
+
1580
+ } else if (window.personBRecording.isRecording && stopBtn) {
1581
+ // Stop recording by clicking Gradio's stop button
1582
+ console.log("⏹️ Clicking Gradio's Stop button for Person B");
1583
+ stopBtn.click();
1584
+ window.personBRecording.isRecording = false;
1585
+
1586
+ // Update UI
1587
+ if (btn) {
1588
+ btn.textContent = '🎀 Talk';
1589
+ btn.className = btn.className.replace('stop-button', 'talk-button');
1590
+ }
1591
+
1592
+ if (statusEl) {
1593
+ statusEl.innerHTML = '<div style="text-align: center; padding: 10px; color: #4CAF50;">βœ… Processing translation...</div>';
1594
+ }
1595
+ } else {
1596
+ console.error("❌ Gradio recording buttons not found");
1597
+ if (statusEl) {
1598
+ statusEl.innerHTML = '<div style="color: red;">❌ Recording interface not available</div>';
1599
+ }
1600
+ }
1601
+ }
1602
+ """
1603
+ )
1604
+
1605
+ # JavaScript to implement direct recording from Talk buttons
1606
+ gr.HTML("""
1607
+ <script>
1608
+ console.log("🎀 Setting up direct recording control...");
1609
+
1610
+ let personARecording = false;
1611
+ let personBRecording = false;
1612
+ let personAMediaRecorder = null;
1613
+ let personBMediaRecorder = null;
1614
+ let personAStream = null;
1615
+ let personBStream = null;
1616
+ let personAAudioChunks = [];
1617
+ let personBAudioChunks = [];
1618
+
1619
+ function findGradioAudioInputs() {
1620
+ // Find Gradio audio input components to send our recorded audio to
1621
+ console.log("πŸ” Finding Gradio audio inputs...");
1622
+ const audioInputA = document.querySelector('#mic_a input[type="file"]');
1623
+ const audioInputB = document.querySelector('#mic_b input[type="file"]');
1624
+
1625
+ console.log("Audio Input A found:", !!audioInputA);
1626
+ console.log("Audio Input B found:", !!audioInputB);
1627
+
1628
+ return { audioInputA, audioInputB };
1629
+ }
1630
+
1631
+ function createAudioFile(audioBlob, filename) {
1632
+ // Create a File object from audio blob
1633
+ const file = new File([audioBlob], filename, {
1634
+ type: 'audio/wav',
1635
+ lastModified: Date.now()
1636
+ });
1637
+ return file;
1638
+ }
1639
+
1640
+ function sendAudioToGradio(audioFile, audioInput) {
1641
+ // Send recorded audio file to Gradio's file input
1642
+ try {
1643
+ if (audioInput) {
1644
+ // Create a FileList-like object
1645
+ const fileList = {
1646
+ 0: audioFile,
1647
+ length: 1,
1648
+ item: (index) => index === 0 ? audioFile : null
1649
+ };
1650
+
1651
+ // Set the files property
1652
+ Object.defineProperty(audioInput, 'files', {
1653
+ value: fileList,
1654
+ writable: false
1655
+ });
1656
+
1657
+ // Trigger change event
1658
+ const event = new Event('change', { bubbles: true });
1659
+ audioInput.dispatchEvent(event);
1660
+
1661
+ console.log("βœ… Audio file sent to Gradio:", audioFile.name);
1662
+ return true;
1663
+ }
1664
+ } catch (error) {
1665
+ console.error("❌ Failed to send audio to Gradio:", error);
1666
+ }
1667
+ return false;
1668
+ }
1669
+
1670
+ function setupDirectRecording() {
1671
+ setTimeout(() => {
1672
+ const talkButtonA = document.getElementById('talk_button_a');
1673
+ const talkButtonB = document.getElementById('talk_button_b');
1674
+ const statusA = document.getElementById('status_a');
1675
+ const statusB = document.getElementById('status_b');
1676
+
1677
+ console.log("Talk Button A found:", !!talkButtonA);
1678
+ console.log("Talk Button B found:", !!talkButtonB);
1679
+
1680
+ // Find Gradio audio inputs
1681
+ const { audioInputA, audioInputB } = findGradioAudioInputs();
1682
+
1683
+ // Setup Person A direct recording
1684
+ if (talkButtonA) {
1685
+ // Remove existing event listeners
1686
+ const newTalkButtonA = talkButtonA.cloneNode(true);
1687
+ talkButtonA.parentNode.replaceChild(newTalkButtonA, talkButtonA);
1688
+
1689
+ newTalkButtonA.addEventListener('click', async function(e) {
1690
+ e.preventDefault();
1691
+ e.stopPropagation();
1692
+
1693
+ console.log("🎀 Person A Talk button clicked, currently recording:", personARecording);
1694
+
1695
+ if (!personARecording) {
1696
+ // Start direct recording
1697
+ try {
1698
+ console.log("πŸ”΄ Starting Person A direct recording...");
1699
+
1700
+ // Request microphone access
1701
+ personAStream = await navigator.mediaDevices.getUserMedia({
1702
+ audio: {
1703
+ sampleRate: 16000,
1704
+ channelCount: 1,
1705
+ echoCancellation: true,
1706
+ noiseSuppression: true
1707
+ }
1708
+ });
1709
+
1710
+ console.log("βœ… Microphone access granted");
1711
+
1712
+ // Create MediaRecorder
1713
+ personAMediaRecorder = new MediaRecorder(personAStream, {
1714
+ mimeType: 'audio/webm;codecs=opus'
1715
+ });
1716
+
1717
+ personAAudioChunks = [];
1718
+
1719
+ // Setup event handlers
1720
+ personAMediaRecorder.ondataavailable = function(event) {
1721
+ if (event.data.size > 0) {
1722
+ personAAudioChunks.push(event.data);
1723
+ }
1724
+ };
1725
+
1726
+ personAMediaRecorder.onstop = function() {
1727
+ console.log("🎡 Person A recording stopped, processing...");
1728
+
1729
+ // Create audio blob
1730
+ const audioBlob = new Blob(personAAudioChunks, { type: 'audio/webm' });
1731
+ console.log("Audio blob created:", audioBlob.size, "bytes");
1732
+
1733
+ // Convert to file and send to Gradio
1734
+ const audioFile = createAudioFile(audioBlob, `person_a_recording_${Date.now()}.webm`);
1735
+
1736
+ if (sendAudioToGradio(audioFile, audioInputA)) {
1737
+ console.log("βœ… Audio sent to Gradio successfully");
1738
+ if (statusA) {
1739
+ statusA.innerHTML = '<div style="text-align: center; padding: 10px; color: #4CAF50;">βœ… Processing translation...</div>';
1740
+ }
1741
+ } else {
1742
+ console.error("❌ Failed to send audio to Gradio");
1743
+ if (statusA) {
1744
+ statusA.innerHTML = '<div style="text-align: center; padding: 10px; color: #f44336;">❌ Failed to process audio</div>';
1745
+ }
1746
+ }
1747
+
1748
+ // Cleanup
1749
+ personAStream.getTracks().forEach(track => track.stop());
1750
+ personAStream = null;
1751
+ personAMediaRecorder = null;
1752
+ };
1753
+
1754
+ // Start recording
1755
+ personAMediaRecorder.start();
1756
+ personARecording = true;
1757
+
1758
+ // Update status
1759
+ if (statusA) {
1760
+ statusA.innerHTML = '<div style="text-align: center; padding: 10px; color: #f44336;">πŸ”΄ Recording... Click Stop when done</div>';
1761
+ }
1762
+
1763
+ console.log("βœ… Person A recording started successfully");
1764
+
1765
+ } catch (error) {
1766
+ console.error("❌ Error starting Person A recording:", error);
1767
+ if (statusA) {
1768
+ statusA.innerHTML = '<div style="text-align: center; padding: 10px; color: #f44336;">❌ Microphone access denied or failed</div>';
1769
+ }
1770
+ }
1771
+
1772
+ } else {
1773
+ // Stop recording
1774
+ console.log("⏹️ Stopping Person A recording...");
1775
+
1776
+ if (personAMediaRecorder && personAMediaRecorder.state === 'recording') {
1777
+ personAMediaRecorder.stop();
1778
+ personARecording = false;
1779
+
1780
+ if (statusA) {
1781
+ statusA.innerHTML = '<div style="text-align: center; padding: 10px; color: #4CAF50;">βœ… Processing audio...</div>';
1782
+ }
1783
+ }
1784
+ }
1785
+ });
1786
+ }
1787
+
1788
+ // Setup Person B direct recording
1789
+ if (talkButtonB) {
1790
+ // Remove existing event listeners
1791
+ const newTalkButtonB = talkButtonB.cloneNode(true);
1792
+ talkButtonB.parentNode.replaceChild(newTalkButtonB, talkButtonB);
1793
+
1794
+ newTalkButtonB.addEventListener('click', async function(e) {
1795
+ e.preventDefault();
1796
+ e.stopPropagation();
1797
+
1798
+ console.log("🎀 Person B Talk button clicked, currently recording:", personBRecording);
1799
+
1800
+ if (!personBRecording) {
1801
+ // Start direct recording
1802
+ try {
1803
+ console.log("πŸ”΄ Starting Person B direct recording...");
1804
+
1805
+ // Request microphone access
1806
+ personBStream = await navigator.mediaDevices.getUserMedia({
1807
+ audio: {
1808
+ sampleRate: 16000,
1809
+ channelCount: 1,
1810
+ echoCancellation: true,
1811
+ noiseSuppression: true
1812
+ }
1813
+ });
1814
+
1815
+ console.log("βœ… Microphone access granted for Person B");
1816
+
1817
+ // Create MediaRecorder
1818
+ personBMediaRecorder = new MediaRecorder(personBStream, {
1819
+ mimeType: 'audio/webm;codecs=opus'
1820
+ });
1821
+
1822
+ personBAudioChunks = [];
1823
+
1824
+ // Setup event handlers
1825
+ personBMediaRecorder.ondataavailable = function(event) {
1826
+ if (event.data.size > 0) {
1827
+ personBAudioChunks.push(event.data);
1828
+ }
1829
+ };
1830
+
1831
+ personBMediaRecorder.onstop = function() {
1832
+ console.log("🎡 Person B recording stopped, processing...");
1833
+
1834
+ // Create audio blob
1835
+ const audioBlob = new Blob(personBAudioChunks, { type: 'audio/webm' });
1836
+ console.log("Audio blob created:", audioBlob.size, "bytes");
1837
+
1838
+ // Convert to file and send to Gradio
1839
+ const audioFile = createAudioFile(audioBlob, `person_b_recording_${Date.now()}.webm`);
1840
+
1841
+ if (sendAudioToGradio(audioFile, audioInputB)) {
1842
+ console.log("βœ… Audio sent to Gradio successfully");
1843
+ if (statusB) {
1844
+ statusB.innerHTML = '<div style="text-align: center; padding: 10px; color: #4CAF50;">βœ… Processing translation...</div>';
1845
+ }
1846
+ } else {
1847
+ console.error("❌ Failed to send audio to Gradio");
1848
+ if (statusB) {
1849
+ statusB.innerHTML = '<div style="text-align: center; padding: 10px; color: #f44336;">❌ Failed to process audio</div>';
1850
+ }
1851
+ }
1852
+
1853
+ // Cleanup
1854
+ personBStream.getTracks().forEach(track => track.stop());
1855
+ personBStream = null;
1856
+ personBMediaRecorder = null;
1857
+ };
1858
+
1859
+ // Start recording
1860
+ personBMediaRecorder.start();
1861
+ personBRecording = true;
1862
+
1863
+ // Update status
1864
+ if (statusB) {
1865
+ statusB.innerHTML = '<div style="text-align: center; padding: 10px; color: #f44336;">πŸ”΄ Recording... Click Stop when done</div>';
1866
+ }
1867
+
1868
+ console.log("βœ… Person B recording started successfully");
1869
+
1870
+ } catch (error) {
1871
+ console.error("❌ Error starting Person B recording:", error);
1872
+ if (statusB) {
1873
+ statusB.innerHTML = '<div style="text-align: center; padding: 10px; color: #f44336;">❌ Microphone access denied or failed</div>';
1874
+ }
1875
+ }
1876
+
1877
+ } else {
1878
+ // Stop recording
1879
+ console.log("⏹️ Stopping Person B recording...");
1880
+
1881
+ if (personBMediaRecorder && personBMediaRecorder.state === 'recording') {
1882
+ personBMediaRecorder.stop();
1883
+ personBRecording = false;
1884
+
1885
+ if (statusB) {
1886
+ statusB.innerHTML = '<div style="text-align: center; padding: 10px; color: #4CAF50;">βœ… Processing audio...</div>';
1887
+ }
1888
+ }
1889
+ }
1890
+ });
1891
+ }
1892
+
1893
+ console.log("βœ… Direct recording setup complete");
1894
+
1895
+ }, 3000); // Wait for Gradio to fully load
1896
+ }
1897
+
1898
+ // Initialize with retry mechanism
1899
+ function initDirectRecording() {
1900
+ setupDirectRecording();
1901
+
1902
+ // Retry in case interface isn't ready
1903
+ setTimeout(() => {
1904
+ console.log("πŸ”„ Retrying direct recording setup...");
1905
+ setupDirectRecording();
1906
+ }, 5000);
1907
+
1908
+ setTimeout(() => {
1909
+ console.log("πŸ”„ Final retry for direct recording setup...");
1910
+ setupDirectRecording();
1911
+ }, 8000);
1912
+ }
1913
+
1914
+ // Start initialization
1915
+ if (document.readyState === 'loading') {
1916
+ document.addEventListener('DOMContentLoaded', initDirectRecording);
1917
+ } else {
1918
+ initDirectRecording();
1919
+ }
1920
+
1921
+ console.log("🎀 Direct recording script loaded");
1922
+ alert("🎀 Direct recording script loaded");
1923
+
1924
+ </script>
1925
+ """)
1926
+
1927
+ # Enhanced audio change handlers with JavaScript state sync
1928
+ def handle_person_a_audio_change(audio, country_b, voice_b, input_lang_a_val):
1929
+ # Always process audio when received (JavaScript controls when recording happens)
1930
+ if audio:
1931
+ # Process translation
1932
+ conversation, audio_output = translate_person_a_to_b(audio, country_b, voice_b, input_lang_a_val)
1933
+ # Reset recording state and button
1934
+ recording_state["person_a_recording"] = False
1935
+ reset_button = gr.Button("🎀 Talk", elem_classes=["talk-button"])
1936
+ reset_status = """<div style='text-align: center; padding: 10px; color: #4CAF50;'>βœ… Translation complete! Ready for next recording</div>
1937
+ <script>
1938
+ // Reset JavaScript state
1939
+ if (typeof personARecording !== 'undefined') {
1940
+ personARecording = false;
1941
+ console.log("πŸ”„ Person A recording state reset");
1942
+ }
1943
+ </script>"""
1944
+ return conversation, audio_output, reset_button, reset_status
1945
+ return "", None, gr.Button("🎀 Talk", elem_classes=["talk-button"]), "<div style='text-align: center; padding: 10px; color: #666;'>Ready to record</div>"
1946
+
1947
+ def handle_person_b_audio_change(audio, country_a, voice_a, input_lang_b_val):
1948
+ # Always process audio when received (JavaScript controls when recording happens)
1949
+ if audio:
1950
+ # Process translation
1951
+ conversation, audio_output = translate_person_b_to_a(audio, country_a, voice_a, input_lang_b_val)
1952
+ # Reset recording state and button
1953
+ recording_state["person_b_recording"] = False
1954
+ reset_button = gr.Button("🎀 Talk", elem_classes=["talk-button"])
1955
+ reset_status = """<div style='text-align: center; padding: 10px; color: #4CAF50;'>βœ… Translation complete! Ready for next recording</div>
1956
+ <script>
1957
+ // Reset JavaScript state
1958
+ if (typeof personBRecording !== 'undefined') {
1959
+ personBRecording = false;
1960
+ console.log("πŸ”„ Person B recording state reset");
1961
+ }
1962
+ </script>"""
1963
+ return conversation, audio_output, reset_button, reset_status
1964
+ return "", None, gr.Button("🎀 Talk", elem_classes=["talk-button"]), "<div style='text-align: center; padding: 10px; color: #666;'>Ready to record</div>"
1965
+
1966
+ mic_a.change(
1967
+ fn=handle_person_a_audio_change,
1968
+ inputs=[mic_a, country_b_for_a, voice_b_for_a, input_lang_a],
1969
+ outputs=[conversation_display_b, audio_from_a, talk_button_a, status_a]
1970
+ )
1971
+
1972
+ mic_b.change(
1973
+ fn=handle_person_b_audio_change,
1974
+ inputs=[mic_b, country_a_for_b, voice_a_for_b, input_lang_b],
1975
+ outputs=[conversation_display_a, audio_from_b, talk_button_b, status_b]
1976
+ )
1977
+
1978
+ # Footer
1979
+ gr.HTML("""
1980
+ <div class="footer">
1981
+ <div style="display: flex; align-items: center; justify-content: center; gap: 10px; margin-bottom: 10px;">
1982
+ <span style="font-size: 24px;">🧠</span>
1983
+ <h3 style="margin: 0; font-size: 20px; font-weight: 600; background: linear-gradient(45deg, #fff, #e0e0e0); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text;">
1984
+ Digitized Brains - AI Translation
1985
+ </h3>
1986
+ </div>
1987
+ <div style="height: 1px; background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent); margin: 15px 0;"></div>
1988
+ <p style="margin: 0; font-size: 14px; opacity: 0.8; font-style: italic;">
1989
+ Intelligent Communication Solutions
1990
+ </p>
1991
+ </div>
1992
+ """)
1993
+
1994
+ if __name__ == "__main__":
1995
+ print("===== Two-Person Live Translation Startup =====")
1996
+ print("Starting Two-Person Live Translation with Google Gemini")
1997
+ print(f"Google Gemini API Status: {'Ready' if agent.gemini_configured else 'Missing - Set GOOGLE_API_KEY'}")
1998
+ print(f"Edge TTS Status: {'Ready' if EDGE_TTS_AVAILABLE else 'Not Available'}")
1999
+
2000
+ if agent.gemini_configured:
2001
+ print("Production Mode - Full Gemini AI Translation enabled")
2002
+ print("Speech Recognition: Google Gemini Flash 2.0")
2003
+ print("Language Detection: Google Gemini Flash 2.0")
2004
+ print("Translation Model: Google Gemini Flash 2.0")
2005
+ print("🧠 All AI processing powered by Gemini Flash 2.0!")
2006
+ else:
2007
+ print("Demo Mode - Configure GOOGLE_API_KEY for full functionality")
2008
+
2009
+ # Use environment port or default (7860 is Hugging Face standard)
2010
+ port = int(os.environ.get("PORT", os.environ.get("GRADIO_SERVER_PORT", 7860)))
2011
+ demo.launch(
2012
+ server_name="0.0.0.0",
2013
+ server_port=port,
2014
+ share=False,
2015
+ show_error=True,
2016
+ ssr_mode=False, # Disable SSR for better container compatibility
2017
+ show_api=False # Reduce overhead
2018
+ )