Ttspro7

Sleeping

App Files Files Community

Hamed744 commited on Dec 13, 2025

Commit

45831db

verified ·

1 Parent(s): ff24def

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -59

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py - نسخه Worker با پشتیبانی از Gemini Live و Standard
 import os
 import sys
@@ -56,7 +56,7 @@ def get_random_api_key_and_client():
     return key_to_use, client
 FIXED_MODEL_NAME_STANDARD = "gemini-2.5-flash-preview-tts"
-FIXED_MODEL_NAME_LIVE = "models/gemini-2.5-flash-native-audio-preview-12-2025" # مدل لایف
 DEFAULT_MAX_CHUNK_SIZE = 3800
 DEFAULT_SLEEP_BETWEEN_REQUESTS = 5
@@ -117,14 +117,9 @@ def merge_audio_files_func(file_paths, output_path):
         return True
     except Exception as e: logging.error(f"❌ خطا در ادغام فایل‌های صوتی: {e}"); return False
-# --- منطق Gemini Live (جدید) ---
 async def generate_audio_live_with_retry(text, prompt, voice, session_id):
-    """
-    اتصال به مدل لایف با استفاده از وب‌سوکت و دریافت صدا.
-    """
-    MAX_RETRIES = 50
-    # تنظیمات مدل لایف
     live_config = types.LiveConnectConfig(
         response_modalities=["AUDIO"],
         speech_config=types.SpeechConfig(
@@ -133,68 +128,53 @@ async def generate_audio_live_with_retry(text, prompt, voice, session_id):
             )
         ),
     )
     for attempt in range(MAX_RETRIES):
         selected_api_key, _ = get_random_api_key_and_client()
         if not selected_api_key: break
-        # برای لایف نیاز به کلاینت Async جدید داریم که با کلید خاص ساخته شود
-        # چون کلاینت‌های کش شده ممکن است سینک باشند یا تنظیمات متفاوتی داشته باشند
         client = genai.Client(http_options={"api_version": "v1beta"}, api_key=selected_api_key)
         unique_id_for_req = str(uuid.uuid4())[:8]
         tts_prompt = f"Please read the following text naturally: '{text}' [ID: {unique_id_for_req}]"
-        if prompt:
-            tts_prompt = f"With a {prompt} tone, please read: '{text}'"
         try:
             logging.info(f"[{session_id}] (Live) تلاش {attempt+1} با کلید ...{selected_api_key[-4:]}")
             audio_buffer = bytearray()
             async with client.aio.live.connect(model=FIXED_MODEL_NAME_LIVE, config=live_config) as session:
                 await session.send(input=tts_prompt, end_of_turn=True)
-                # دریافت استریم
                 async for response in session.receive():
-                    if response.data:
-                        audio_buffer.extend(response.data)
-                    if response.text:
-                        pass # متن را نادیده می‌گیریم
             if len(audio_buffer) > 0:
-                logging.info(f"[{session_id}] ✅ (Live) دریافت موفقیت‌آمیز {len(audio_buffer)} بایت.")
                 return audio_buffer
-            else:
-                raise Exception("بافر صوتی خالی بود.")
         except Exception as e:
             logging.warning(f"[{session_id}] ⚠️ (Live) خطا در تلاش {attempt+1}: {e}")
             time.sleep(0.5)
     return None
 def save_pcm_to_wav(pcm_data, output_path):
-    """ذخیره دیتای خام PCM مدل لایف به فرمت WAV استاندارد"""
     try:
         with wave.open(output_path, 'wb') as wf:
-            wf.setnchannels(1)     # Mono
-            wf.setsampwidth(2)     # 16-bit
-            wf.setframerate(24000) # 24kHz (استاندارد مدل لایف)
             wf.writeframes(pcm_data)
         return True
     except Exception as e:
         logging.error(f"خطا در تبدیل PCM به WAV: {e}")
         return False
-# --- منطق Gemini Standard (قدیمی) ---
-def generate_audio_chunk_standard_with_retry(chunk_text, prompt_text, voice, temp, session_id):
-    if not ALL_API_KEYS: raise Exception("هیچ کلید API برای پردازش در دسترس نیست.")
-    MAX_RETRIES = 50
     for attempt in range(MAX_RETRIES):
         selected_api_key, client = get_random_api_key_and_client()
         if not client: break
         try:
             final_text = f'{chunk_text}({prompt_text})' if prompt_text and prompt_text.strip() else chunk_text
             contents = [types.Content(role="user", parts=[types.Part.from_text(text=final_text)])]
             config = types.GenerateContentConfig(temperature=temp, response_modalities=["audio"],
@@ -203,42 +183,39 @@ def generate_audio_chunk_standard_with_retry(chunk_text, prompt_text, voice, tem
             response = client.models.generate_content(model=FIXED_MODEL_NAME_STANDARD, contents=contents, config=config)
             if response.candidates and response.candidates[0].content and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data:
-                logging.info(f"[{session_id}] ✅ (Standard) قطعه تولید شد.")
                 return response.candidates[0].content.parts[0].inline_data
         except Exception as e:
             logging.warning(f"[{session_id}] ⚠️ (Standard) خطا در تلاش {attempt+1}: {e}")
             time.sleep(0.5)
     return None
-def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, session_id, use_live_model=False):
-    logging.info(f"[{session_id}] 🚀 شروع پردازش (Live Mode: {use_live_model})")
     temp_dir = f"temp_{session_id}"
     os.makedirs(temp_dir, exist_ok=True)
     output_base_name = f"{temp_dir}/audio_session_{session_id}"
     final_output_path = f"output_{session_id}.wav"
     try:
-        # --- مسیر ۱: استفاده از مدل لایف ---
         if use_live_model:
-            # در مدل لایف، متن را تکه تکه نمی‌کنیم (چون شرط <500 کاراکتر چک شده)
-            # باید تابع async را در محیط sync اجرا کنیم
             pcm_data = asyncio.run(generate_audio_live_with_retry(text_input, prompt_input, selected_voice, session_id))
-            if pcm_data:
-                if save_pcm_to_wav(pcm_data, final_output_path):
-                    logging.info(f"[{session_id}] ✅ فایل لایف ذخیره شد.")
-                    return final_output_path
-                else:
-                    raise Exception("خطا در ذخیره فایل WAV لایف.")
             else:
-                raise Exception("تولید صدا با مدل لایف پس از تلاش‌های مکرر ناموفق بود.")
-        # --- مسیر ۲: استفاده از مدل استاندارد ---
         else:
             text_chunks = smart_text_split(text_input, DEFAULT_MAX_CHUNK_SIZE)
             generated_files = []
             for i, chunk in enumerate(text_chunks):
-                inline_data = generate_audio_chunk_standard_with_retry(chunk, prompt_input, selected_voice, temperature_val, session_id)
                 if inline_data:
                     data_buffer = inline_data.data
                     ext = mimetypes.guess_extension(inline_data.mime_type) or ".wav"
@@ -248,9 +225,25 @@ def core_generate_audio(text_input, prompt_input, selected_voice, temperature_va
                     fpath = save_binary_file(f"{output_base_name}_part{i+1:03d}{ext}", data_buffer)
                     if fpath: generated_files.append(fpath)
                 else:
-                    raise Exception(f"تولید قطعه {i+1} استاندارد ناموفق بود.")
-                if i < len(text_chunks) - 1: time.sleep(DEFAULT_SLEEP_BETWEEN_REQUESTS)
             if not generated_files: raise Exception("هیچ فایلی تولید نشد.")
             if len(generated_files) > 1:
@@ -276,7 +269,9 @@ class TTSRequest(BaseModel):
     prompt: str | None = ""
     speaker: str
     temperature: float
-    use_live_model: bool = False # پارامتر جدید
 @app.post("/generate")
 def generate_audio_endpoint(request: TTSRequest):
@@ -288,7 +283,9 @@ def generate_audio_endpoint(request: TTSRequest):
             selected_voice=request.speaker,
             temperature_val=request.temperature,
             session_id=session_id,
-            use_live_model=request.use_live_model
         )
         if final_path and os.path.exists(final_path):
             from fastapi.responses import FileResponse

+# app.py - نسخه Worker با پشتیبانی از Fallback و Retry Limit
 import os
 import sys
     return key_to_use, client
 FIXED_MODEL_NAME_STANDARD = "gemini-2.5-flash-preview-tts"
+FIXED_MODEL_NAME_LIVE = "models/gemini-2.5-flash-native-audio-preview-12-2025"
 DEFAULT_MAX_CHUNK_SIZE = 3800
 DEFAULT_SLEEP_BETWEEN_REQUESTS = 5
         return True
     except Exception as e: logging.error(f"❌ خطا در ادغام فایل‌های صوتی: {e}"); return False
+# --- منطق Gemini Live ---
 async def generate_audio_live_with_retry(text, prompt, voice, session_id):
+    MAX_RETRIES = 50
     live_config = types.LiveConnectConfig(
         response_modalities=["AUDIO"],
         speech_config=types.SpeechConfig(
             )
         ),
     )
     for attempt in range(MAX_RETRIES):
         selected_api_key, _ = get_random_api_key_and_client()
         if not selected_api_key: break
         client = genai.Client(http_options={"api_version": "v1beta"}, api_key=selected_api_key)
         unique_id_for_req = str(uuid.uuid4())[:8]
         tts_prompt = f"Please read the following text naturally: '{text}' [ID: {unique_id_for_req}]"
+        if prompt: tts_prompt = f"With a {prompt} tone, please read: '{text}'"
         try:
             logging.info(f"[{session_id}] (Live) تلاش {attempt+1} با کلید ...{selected_api_key[-4:]}")
             audio_buffer = bytearray()
             async with client.aio.live.connect(model=FIXED_MODEL_NAME_LIVE, config=live_config) as session:
                 await session.send(input=tts_prompt, end_of_turn=True)
                 async for response in session.receive():
+                    if response.data: audio_buffer.extend(response.data)
             if len(audio_buffer) > 0:
+                logging.info(f"[{session_id}] ✅ (Live) موفقیت‌آمیز.")
                 return audio_buffer
+            else: raise Exception("بافر صوتی خالی بود.")
         except Exception as e:
             logging.warning(f"[{session_id}] ⚠️ (Live) خطا در تلاش {attempt+1}: {e}")
             time.sleep(0.5)
     return None
 def save_pcm_to_wav(pcm_data, output_path):
     try:
         with wave.open(output_path, 'wb') as wf:
+            wf.setnchannels(1)
+            wf.setsampwidth(2)
+            wf.setframerate(24000)
             wf.writeframes(pcm_data)
         return True
     except Exception as e:
         logging.error(f"خطا در تبدیل PCM به WAV: {e}")
         return False
+# --- منطق Gemini Standard (اصلاح شده با retry_limit) ---
+def generate_audio_chunk_standard_with_retry(chunk_text, prompt_text, voice, temp, session_id, retry_limit):
+    if not ALL_API_KEYS: raise Exception("هیچ کلید API در دسترس نیست.")
+    # استفاده از محدودیت تعیین شده توسط Manager
+    MAX_RETRIES = retry_limit
     for attempt in range(MAX_RETRIES):
         selected_api_key, client = get_random_api_key_and_client()
         if not client: break
         try:
+            # logging.info(f"[{session_id}] (Standard) تلاش {attempt+1}/{MAX_RETRIES} با کلید ...{selected_api_key[-4:]}")
             final_text = f'{chunk_text}({prompt_text})' if prompt_text and prompt_text.strip() else chunk_text
             contents = [types.Content(role="user", parts=[types.Part.from_text(text=final_text)])]
             config = types.GenerateContentConfig(temperature=temp, response_modalities=["audio"],
             response = client.models.generate_content(model=FIXED_MODEL_NAME_STANDARD, contents=contents, config=config)
             if response.candidates and response.candidates[0].content and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data:
+                logging.info(f"[{session_id}] ✅ (Standard) موفقیت در تلاش {attempt+1}.")
                 return response.candidates[0].content.parts[0].inline_data
         except Exception as e:
             logging.warning(f"[{session_id}] ⚠️ (Standard) خطا در تلاش {attempt+1}: {e}")
             time.sleep(0.5)
     return None
+def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, session_id, use_live_model=False, retry_limit=50, fallback_to_live=False):
+    logging.info(f"[{session_id}] 🚀 شروع: Live={use_live_model}, Retry={retry_limit}, Fallback={fallback_to_live}")
     temp_dir = f"temp_{session_id}"
     os.makedirs(temp_dir, exist_ok=True)
     output_base_name = f"{temp_dir}/audio_session_{session_id}"
     final_output_path = f"output_{session_id}.wav"
     try:
+        # 1. اگر دستور مستقیم استفاده از لایف باشد (مثلاً کاربر رایگان)
         if use_live_model:
             pcm_data = asyncio.run(generate_audio_live_with_retry(text_input, prompt_input, selected_voice, session_id))
+            if pcm_data and save_pcm_to_wav(pcm_data, final_output_path):
+                return final_output_path
             else:
+                raise Exception("تولید صدا با مدل لایف ناموفق بود.")
+        # 2. استفاده از مدل استاندارد
         else:
             text_chunks = smart_text_split(text_input, DEFAULT_MAX_CHUNK_SIZE)
             generated_files = []
+            standard_failed = False
             for i, chunk in enumerate(text_chunks):
+                # تلاش با مدل استاندارد به تعداد retry_limit
+                inline_data = generate_audio_chunk_standard_with_retry(chunk, prompt_input, selected_voice, temperature_val, session_id, retry_limit)
                 if inline_data:
                     data_buffer = inline_data.data
                     ext = mimetypes.guess_extension(inline_data.mime_type) or ".wav"
                     fpath = save_binary_file(f"{output_base_name}_part{i+1:03d}{ext}", data_buffer)
                     if fpath: generated_files.append(fpath)
                 else:
+                    standard_failed = True
+                    break # شکست در تولید یکی از چانک‌ها
+            # 3. بررسی شکست و Fallback
+            if standard_failed:
+                if fallback_to_live:
+                    logging.info(f"[{session_id}] 🔄 مدل استاندارد شک��ت خورد. سوییچ به مدل لایف (Fallback)...")
+                    # پاکسازی فایل‌های ناقص قبلی
+                    generated_files = []
+                    # فراخوانی مدل لایف برای کل متن
+                    pcm_data = asyncio.run(generate_audio_live_with_retry(text_input, prompt_input, selected_voice, session_id))
+                    if pcm_data and save_pcm_to_wav(pcm_data, final_output_path):
+                        return final_output_path
+                    else:
+                        raise Exception("هم مدل استاندارد و هم مدل لایف (Fallback) شکست خوردند.")
+                else:
+                    raise Exception(f"تولید صدا با مدل استاندارد پس از {retry_limit} تلاش ناموفق بود.")
+            # اگر استاندارد موفق بود، فایل‌ها را ادغام کن
             if not generated_files: raise Exception("هیچ فایلی تولید نشد.")
             if len(generated_files) > 1:
     prompt: str | None = ""
     speaker: str
     temperature: float
+    use_live_model: bool = False
+    retry_limit: int = 50       # پارامتر جدید
+    fallback_to_live: bool = False # پارامتر جدید
 @app.post("/generate")
 def generate_audio_endpoint(request: TTSRequest):
             selected_voice=request.speaker,
             temperature_val=request.temperature,
             session_id=session_id,
+            use_live_model=request.use_live_model,
+            retry_limit=request.retry_limit,
+            fallback_to_live=request.fallback_to_live
         )
         if final_path and os.path.exists(final_path):
             from fastapi.responses import FileResponse