Ttspro7

Sleeping

App Files Files Community

Hamed744 commited on Dec 13, 2025

Commit

ff24def

verified ·

1 Parent(s): 54a4d17

Update app.py

Browse files

Files changed (1) hide show

app.py +131 -68

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py - نسخه نهایی: انتخاب تصادفی + تلاش ۵۰ باره + بدون حذف کلیدها
 import os
 import sys
@@ -12,6 +12,8 @@ import logging
 import mimetypes
 import threading
 import random
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from google import genai
@@ -36,35 +38,27 @@ def _init_api_keys():
     global ALL_API_KEYS
     all_keys_string = os.environ.get("ALL_GEMINI_API_KEYS")
     if all_keys_string:
-        # کلیدها را فقط می‌خوانیم و در لیست نگه می‌داریم
         ALL_API_KEYS = [key.strip() for key in all_keys_string.split(',') if key.strip()]
     logging.info(f"✅ تعداد {len(ALL_API_KEYS)} کلید API جیمینای شناسایی و بارگذاری شد.")
     if not ALL_API_KEYS:
         logging.warning("⛔️ هشدار: هیچ Secret با نام ALL_GEMINI_API_KEYS یافت نشد!")
 def get_random_api_key_and_client():
-    """
-    یک کلید را به صورت کاملاً تصادفی انتخاب می‌کند.
-    """
     if not ALL_API_KEYS:
         return None, None
-    # انتخاب تصادفی از لیست ثابت
     key_to_use = random.choice(ALL_API_KEYS)
     with CLIENT_CACHE_LOCK:
         if key_to_use in GEMINI_CLIENTS_CACHE:
             client = GEMINI_CLIENTS_CACHE[key_to_use]
         else:
-            # ساخت کلاینت جدید و ذخیره در کش
             client = genai.Client(api_key=key_to_use)
             GEMINI_CLIENTS_CACHE[key_to_use] = client
     return key_to_use, client
-FIXED_MODEL_NAME = "gemini-2.5-flash-preview-tts"
 DEFAULT_MAX_CHUNK_SIZE = 3800
-DEFAULT_SLEEP_BETWEEN_REQUESTS = 5 # کمی کاهش وقفه چون کلیدها تصادفی هستند
 def save_binary_file(file_name, data):
     try:
@@ -123,82 +117,152 @@ def merge_audio_files_func(file_paths, output_path):
         return True
     except Exception as e: logging.error(f"❌ خطا در ادغام فایل‌های صوتی: {e}"); return False
-# --- منطق اصلی با انتخاب تصادفی و حفظ کلیدها ---
-def generate_audio_chunk_with_retry(chunk_text, prompt_text, voice, temp, session_id):
-    if not ALL_API_KEYS: raise Exception("هیچ کلید API برای پردازش در دسترس نیست.")
-    MAX_RETRIES = 50  # تلاش تا ۵۰ بار
     for attempt in range(MAX_RETRIES):
-        # 1. انتخاب تصادفی کلید
-        selected_api_key, client = get_random_api_key_and_client()
-        if not client:
-            logging.error(f"[{session_id}] کلاینت یافت نشد.")
-            break
         try:
-            # logging.info(f"[{session_id}] تلاش {attempt+1}/{MAX_RETRIES} با کلید تصادفی ...{selected_api_key[-4:]}")
             final_text = f'{chunk_text}({prompt_text})' if prompt_text and prompt_text.strip() else chunk_text
             contents = [types.Content(role="user", parts=[types.Part.from_text(text=final_text)])]
             config = types.GenerateContentConfig(temperature=temp, response_modalities=["audio"],
                 speech_config=types.SpeechConfig(voice_config=types.VoiceConfig(
                     prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=voice))))
-            response = client.models.generate_content(model=FIXED_MODEL_NAME, contents=contents, config=config)
             if response.candidates and response.candidates[0].content and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data:
-                logging.info(f"[{session_id}] ✅ قطعه با موفقیت در تلاش {attempt+1} تولید شد.")
                 return response.candidates[0].content.parts[0].inline_data
         except Exception as e:
-            # فقط لاگ می‌کنیم و ادامه می‌دهیم. کلید را حذف نمی‌کنیم.
-            logging.warning(f"[{session_id}] ⚠️ خطا در تلاش {attempt+1} (کلید ...{selected_api_key[-4:]}): {e}")
-            # وقفه کوتاه (نیم ثانیه) قبل از انتخاب تصادفی بعدی
             time.sleep(0.5)
     return None
-def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, session_id):
-    logging.info(f"[{session_id}] 🚀 شروع فرآیند تولید صدا.")
     temp_dir = f"temp_{session_id}"
     os.makedirs(temp_dir, exist_ok=True)
     output_base_name = f"{temp_dir}/audio_session_{session_id}"
-    if not text_input or not text_input.strip(): raise ValueError("متن ورودی خالی است.")
-    text_chunks = smart_text_split(text_input, DEFAULT_MAX_CHUNK_SIZE)
-    if not text_chunks: raise ValueError("متن قابل پردازش به قطعات کوچکتر نیست.")
-    generated_files = []
     try:
-        for i, chunk in enumerate(text_chunks):
-            logging.info(f"[{session_id}] 🔊 پردازش قطعه {i+1}/{len(text_chunks)}...")
-            inline_data = generate_audio_chunk_with_retry(chunk, prompt_input, selected_voice, temperature_val, session_id)
-            if inline_data:
-                data_buffer = inline_data.data
-                ext = mimetypes.guess_extension(inline_data.mime_type) or ".wav"
-                if "audio/L" in inline_data.mime_type and ext == ".wav":
-                    data_buffer = convert_to_wav(data_buffer, inline_data.mime_type)
-                if not ext.startswith("."): ext = "." + ext
-                fpath = save_binary_file(f"{output_base_name}_part{i+1:03d}{ext}", data_buffer)
-                if fpath: generated_files.append(fpath)
             else:
-                # اگر بعد از ۵۰ بار تلاش تصادفی باز هم نشد
-                raise Exception(f"تولید قطعه {i+1} پس از ۵۰ بار تلاش تصادفی ناموفق بود. ترافیک سرورها بسیار بالاست.")
-            if i < len(text_chunks) - 1 and len(text_chunks) > 1: time.sleep(DEFAULT_SLEEP_BETWEEN_REQUESTS)
-        if not generated_files: raise Exception("هیچ فایل صوتی تولید نشد.")
-        final_output_path = f"output_{session_id}.wav"
-        if len(generated_files) > 1:
-            if PYDUB_AVAILABLE and merge_audio_files_func(generated_files, final_output_path):
-                final_audio_file = final_output_path
             else:
                 shutil.move(generated_files[0], final_output_path)
-                final_audio_file = final_output_path
-        else:
-            shutil.move(generated_files[0], final_output_path)
-            final_audio_file = final_output_path
-        logging.info(f"[{session_id}] ✅ فایل صوتی نهایی با موفقیت تولید شد: {os.path.basename(final_audio_file)}")
-        return final_audio_file
     finally:
         if os.path.exists(temp_dir):
             shutil.rmtree(temp_dir)
@@ -212,34 +276,33 @@ class TTSRequest(BaseModel):
     prompt: str | None = ""
     speaker: str
     temperature: float
 @app.post("/generate")
 def generate_audio_endpoint(request: TTSRequest):
     session_id = str(uuid.uuid4())[:8]
-    logging.info(f"[{session_id}] 🏁 درخواست جدید API در این Worker دریافت شد.")
     try:
         final_path = core_generate_audio(
             text_input=request.text,
             prompt_input=request.prompt,
             selected_voice=request.speaker,
             temperature_val=request.temperature,
-            session_id=session_id
         )
         if final_path and os.path.exists(final_path):
             from fastapi.responses import FileResponse
             return FileResponse(path=final_path, media_type='audio/wav', filename=os.path.basename(final_path), background=shutil.rmtree(os.path.dirname(final_path), ignore_errors=True))
         else:
-            raise HTTPException(status_code=500, detail="خطا در تولید فایل صوتی در Worker.")
     except Exception as e:
-        logging.error(f"[{session_id}] ❌ خطای کلی در Worker: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/")
 def health_check():
     return {"status": "ok", "message": "TTS Worker is running."}
-logging.info("✅✅✅ Application logic initialized successfully. Starting Uvicorn server...")
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
     uvicorn.run(app, host="0.0.0.0", port=port, reload=False)

+# app.py - نسخه Worker با پشتیبانی از Gemini Live و Standard
 import os
 import sys
 import mimetypes
 import threading
 import random
+import asyncio
+import wave
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from google import genai
     global ALL_API_KEYS
     all_keys_string = os.environ.get("ALL_GEMINI_API_KEYS")
     if all_keys_string:
         ALL_API_KEYS = [key.strip() for key in all_keys_string.split(',') if key.strip()]
     logging.info(f"✅ تعداد {len(ALL_API_KEYS)} کلید API جیمینای شناسایی و بارگذاری شد.")
     if not ALL_API_KEYS:
         logging.warning("⛔️ هشدار: هیچ Secret با نام ALL_GEMINI_API_KEYS یافت نشد!")
 def get_random_api_key_and_client():
     if not ALL_API_KEYS:
         return None, None
     key_to_use = random.choice(ALL_API_KEYS)
     with CLIENT_CACHE_LOCK:
         if key_to_use in GEMINI_CLIENTS_CACHE:
             client = GEMINI_CLIENTS_CACHE[key_to_use]
         else:
             client = genai.Client(api_key=key_to_use)
             GEMINI_CLIENTS_CACHE[key_to_use] = client
     return key_to_use, client
+FIXED_MODEL_NAME_STANDARD = "gemini-2.5-flash-preview-tts"
+FIXED_MODEL_NAME_LIVE = "models/gemini-2.5-flash-native-audio-preview-12-2025" # مدل لایف
 DEFAULT_MAX_CHUNK_SIZE = 3800
+DEFAULT_SLEEP_BETWEEN_REQUESTS = 5
 def save_binary_file(file_name, data):
     try:
         return True
     except Exception as e: logging.error(f"❌ خطا در ادغام فایل‌های صوتی: {e}"); return False
+# --- منطق Gemini Live (جدید) ---
+async def generate_audio_live_with_retry(text, prompt, voice, session_id):
+    """
+    اتصال به مدل لایف با استفاده از وب‌سوکت و دریافت صدا.
+    """
+    MAX_RETRIES = 50
+    # تنظیمات مدل لایف
+    live_config = types.LiveConnectConfig(
+        response_modalities=["AUDIO"],
+        speech_config=types.SpeechConfig(
+            voice_config=types.VoiceConfig(
+                prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=voice)
+            )
+        ),
+    )
     for attempt in range(MAX_RETRIES):
+        selected_api_key, _ = get_random_api_key_and_client()
+        if not selected_api_key: break
+        # برای لایف نیاز به کلاینت Async جدید داریم که با کلید خاص ساخته شود
+        # چون کلاینت‌های کش شده ممکن است سینک باشند یا تنظیمات متفاوتی داشته باشند
+        client = genai.Client(http_options={"api_version": "v1beta"}, api_key=selected_api_key)
+        unique_id_for_req = str(uuid.uuid4())[:8]
+        tts_prompt = f"Please read the following text naturally: '{text}' [ID: {unique_id_for_req}]"
+        if prompt:
+            tts_prompt = f"With a {prompt} tone, please read: '{text}'"
         try:
+            logging.info(f"[{session_id}] (Live) تلاش {attempt+1} با کلید ...{selected_api_key[-4:]}")
+            audio_buffer = bytearray()
+            async with client.aio.live.connect(model=FIXED_MODEL_NAME_LIVE, config=live_config) as session:
+                await session.send(input=tts_prompt, end_of_turn=True)
+                # دریافت استریم
+                async for response in session.receive():
+                    if response.data:
+                        audio_buffer.extend(response.data)
+                    if response.text:
+                        pass # متن را نادیده می‌گیریم
+            if len(audio_buffer) > 0:
+                logging.info(f"[{session_id}] ✅ (Live) دریافت موفقیت‌آمیز {len(audio_buffer)} بایت.")
+                return audio_buffer
+            else:
+                raise Exception("بافر صوتی خالی بود.")
+        except Exception as e:
+            logging.warning(f"[{session_id}] ⚠️ (Live) خطا در تلاش {attempt+1}: {e}")
+            time.sleep(0.5)
+    return None
+def save_pcm_to_wav(pcm_data, output_path):
+    """ذخیره دیتای خام PCM مدل لایف به فرمت WAV استاندارد"""
+    try:
+        with wave.open(output_path, 'wb') as wf:
+            wf.setnchannels(1)     # Mono
+            wf.setsampwidth(2)     # 16-bit
+            wf.setframerate(24000) # 24kHz (استاندارد مدل لایف)
+            wf.writeframes(pcm_data)
+        return True
+    except Exception as e:
+        logging.error(f"خطا در تبدیل PCM به WAV: {e}")
+        return False
+# --- منطق Gemini Standard (قدیمی) ---
+def generate_audio_chunk_standard_with_retry(chunk_text, prompt_text, voice, temp, session_id):
+    if not ALL_API_KEYS: raise Exception("هیچ کلید API برای پردازش در دسترس نیست.")
+    MAX_RETRIES = 50
+    for attempt in range(MAX_RETRIES):
+        selected_api_key, client = get_random_api_key_and_client()
+        if not client: break
+        try:
             final_text = f'{chunk_text}({prompt_text})' if prompt_text and prompt_text.strip() else chunk_text
             contents = [types.Content(role="user", parts=[types.Part.from_text(text=final_text)])]
             config = types.GenerateContentConfig(temperature=temp, response_modalities=["audio"],
                 speech_config=types.SpeechConfig(voice_config=types.VoiceConfig(
                     prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=voice))))
+            response = client.models.generate_content(model=FIXED_MODEL_NAME_STANDARD, contents=contents, config=config)
             if response.candidates and response.candidates[0].content and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data:
+                logging.info(f"[{session_id}] ✅ (Standard) قطعه تولید شد.")
                 return response.candidates[0].content.parts[0].inline_data
         except Exception as e:
+            logging.warning(f"[{session_id}] ⚠️ (Standard) خطا در تلاش {attempt+1}: {e}")
             time.sleep(0.5)
     return None
+def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, session_id, use_live_model=False):
+    logging.info(f"[{session_id}] 🚀 شروع پردازش (Live Mode: {use_live_model})")
     temp_dir = f"temp_{session_id}"
     os.makedirs(temp_dir, exist_ok=True)
     output_base_name = f"{temp_dir}/audio_session_{session_id}"
+    final_output_path = f"output_{session_id}.wav"
     try:
+        # --- مسیر ۱: استفاده از مدل لای�� ---
+        if use_live_model:
+            # در مدل لایف، متن را تکه تکه نمی‌کنیم (چون شرط <500 کاراکتر چک شده)
+            # باید تابع async را در محیط sync اجرا کنیم
+            pcm_data = asyncio.run(generate_audio_live_with_retry(text_input, prompt_input, selected_voice, session_id))
+            if pcm_data:
+                if save_pcm_to_wav(pcm_data, final_output_path):
+                    logging.info(f"[{session_id}] ✅ فایل لایف ذخیره شد.")
+                    return final_output_path
+                else:
+                    raise Exception("خطا در ذخیره فایل WAV لایف.")
             else:
+                raise Exception("تولید صدا با مدل لایف پس از تلاش‌های مکرر ناموفق بود.")
+        # --- مسیر ۲: استفاده از مدل استاندارد ---
+        else:
+            text_chunks = smart_text_split(text_input, DEFAULT_MAX_CHUNK_SIZE)
+            generated_files = []
+            for i, chunk in enumerate(text_chunks):
+                inline_data = generate_audio_chunk_standard_with_retry(chunk, prompt_input, selected_voice, temperature_val, session_id)
+                if inline_data:
+                    data_buffer = inline_data.data
+                    ext = mimetypes.guess_extension(inline_data.mime_type) or ".wav"
+                    if "audio/L" in inline_data.mime_type and ext == ".wav":
+                        data_buffer = convert_to_wav(data_buffer, inline_data.mime_type)
+                    if not ext.startswith("."): ext = "." + ext
+                    fpath = save_binary_file(f"{output_base_name}_part{i+1:03d}{ext}", data_buffer)
+                    if fpath: generated_files.append(fpath)
+                else:
+                    raise Exception(f"تولید قطعه {i+1} استاندارد ناموفق بود.")
+                if i < len(text_chunks) - 1: time.sleep(DEFAULT_SLEEP_BETWEEN_REQUESTS)
+            if not generated_files: raise Exception("هیچ فایلی تولید نشد.")
+            if len(generated_files) > 1:
+                if PYDUB_AVAILABLE and merge_audio_files_func(generated_files, final_output_path):
+                    pass
+                else:
+                    shutil.move(generated_files[0], final_output_path)
             else:
                 shutil.move(generated_files[0], final_output_path)
+            return final_output_path
     finally:
         if os.path.exists(temp_dir):
             shutil.rmtree(temp_dir)
     prompt: str | None = ""
     speaker: str
     temperature: float
+    use_live_model: bool = False # پارامتر جدید
 @app.post("/generate")
 def generate_audio_endpoint(request: TTSRequest):
     session_id = str(uuid.uuid4())[:8]
     try:
         final_path = core_generate_audio(
             text_input=request.text,
             prompt_input=request.prompt,
             selected_voice=request.speaker,
             temperature_val=request.temperature,
+            session_id=session_id,
+            use_live_model=request.use_live_model
         )
         if final_path and os.path.exists(final_path):
             from fastapi.responses import FileResponse
             return FileResponse(path=final_path, media_type='audio/wav', filename=os.path.basename(final_path), background=shutil.rmtree(os.path.dirname(final_path), ignore_errors=True))
         else:
+            raise HTTPException(status_code=500, detail="خطا در تولید فایل صوتی.")
     except Exception as e:
+        logging.error(f"[{session_id}] ❌ خطا: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/")
 def health_check():
     return {"status": "ok", "message": "TTS Worker is running."}
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
     uvicorn.run(app, host="0.0.0.0", port=port, reload=False)