| |
|
|
| import os |
| import sys |
| import traceback |
| import re |
| import struct |
| import time |
| import uuid |
| import shutil |
| import logging |
| import mimetypes |
| import threading |
| import random |
| import asyncio |
| import wave |
| from fastapi import FastAPI, HTTPException |
| from pydantic import BaseModel |
| from google import genai |
| from google.genai import types |
| import uvicorn |
|
|
| try: |
| from pydub import AudioSegment |
| PYDUB_AVAILABLE = True |
| except ImportError: |
| PYDUB_AVAILABLE = False |
|
|
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S') |
|
|
| |
| GEMINI_CLIENTS_CACHE = {} |
| CLIENT_CACHE_LOCK = threading.Lock() |
|
|
| ALL_API_KEYS: list[str] = [] |
|
|
| def _init_api_keys(): |
| global ALL_API_KEYS |
| all_keys_string = os.environ.get("ALL_GEMINI_API_KEYS") |
| if all_keys_string: |
| ALL_API_KEYS = [key.strip() for key in all_keys_string.split(',') if key.strip()] |
| logging.info(f"✅ تعداد {len(ALL_API_KEYS)} کلید API جیمینای شناسایی و بارگذاری شد.") |
| if not ALL_API_KEYS: |
| logging.warning("⛔️ هشدار: هیچ Secret با نام ALL_GEMINI_API_KEYS یافت نشد!") |
|
|
| def get_random_api_key_and_client(): |
| if not ALL_API_KEYS: |
| return None, None |
| key_to_use = random.choice(ALL_API_KEYS) |
| with CLIENT_CACHE_LOCK: |
| if key_to_use in GEMINI_CLIENTS_CACHE: |
| client = GEMINI_CLIENTS_CACHE[key_to_use] |
| else: |
| client = genai.Client(api_key=key_to_use) |
| GEMINI_CLIENTS_CACHE[key_to_use] = client |
| return key_to_use, client |
|
|
| FIXED_MODEL_NAME_STANDARD = "gemini-2.5-flash-preview-tts" |
| FIXED_MODEL_NAME_LIVE = "models/gemini-2.5-flash-native-audio-preview-12-2025" |
| DEFAULT_MAX_CHUNK_SIZE = 3800 |
| DEFAULT_SLEEP_BETWEEN_REQUESTS = 5 |
|
|
| def save_binary_file(file_name, data): |
| try: |
| with open(file_name, "wb") as f: f.write(data) |
| return file_name |
| except Exception as e: |
| logging.error(f"❌ خطا در ذخیره فایل {file_name}: {e}") |
| return None |
|
|
| def convert_to_wav(audio_data: bytes, mime_type: str) -> bytes: |
| parameters = parse_audio_mime_type(mime_type) |
| bits_per_sample, rate = parameters["bits_per_sample"], parameters["rate"] |
| num_channels, data_size = 1, len(audio_data) |
| bytes_per_sample, block_align = bits_per_sample // 8, num_channels * (bits_per_sample // 8) |
| byte_rate, chunk_size = rate * block_align, 36 + data_size |
| header = struct.pack("<4sI4s4sIHHIIHH4sI", b"RIFF", chunk_size, b"WAVE", b"fmt ", 16, 1, num_channels, rate, byte_rate, block_align, bits_per_sample, b"data", data_size) |
| return header + audio_data |
|
|
| def parse_audio_mime_type(mime_type: str) -> dict[str, int]: |
| bits, rate = 16, 24000 |
| for param in mime_type.split(";"): |
| param = param.strip() |
| if param.lower().startswith("rate="): |
| try: rate = int(param.split("=", 1)[1]) |
| except: pass |
| elif param.startswith("audio/L"): |
| try: bits = int(param.split("L", 1)[1]) |
| except: pass |
| return {"bits_per_sample": bits, "rate": rate} |
|
|
| def smart_text_split(text, max_size=3800): |
| |
| |
| return [text] |
|
|
| def merge_audio_files_func(file_paths, output_path): |
| if not PYDUB_AVAILABLE: logging.warning("⚠️ pydub برای ادغام در دسترس نیست."); return False |
| try: |
| combined = AudioSegment.empty() |
| for i, fp in enumerate(file_paths): |
| if os.path.exists(fp): combined += AudioSegment.from_file(fp) + (AudioSegment.silent(duration=150) if i < len(file_paths) - 1 else AudioSegment.empty()) |
| else: logging.warning(f"⚠️ فایل برای ادغام پیدا نشد: {fp}") |
| combined.export(output_path, format="wav") |
| return True |
| except Exception as e: logging.error(f"❌ خطا در ادغام فایلهای صوتی: {e}"); return False |
|
|
| |
| async def generate_audio_live_with_retry(text, prompt, voice, session_id): |
| MAX_RETRIES = 50 |
| live_config = types.LiveConnectConfig( |
| response_modalities=["AUDIO"], |
| speech_config=types.SpeechConfig( |
| voice_config=types.VoiceConfig( |
| prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=voice) |
| ) |
| ), |
| ) |
| for attempt in range(MAX_RETRIES): |
| selected_api_key, _ = get_random_api_key_and_client() |
| if not selected_api_key: break |
| client = genai.Client(http_options={"api_version": "v1beta"}, api_key=selected_api_key) |
| unique_id_for_req = str(uuid.uuid4())[:8] |
| tts_prompt = f"Please read the following text naturally: '{text}' [ID: {unique_id_for_req}]" |
| if prompt: tts_prompt = f"With a {prompt} tone, please read: '{text}'" |
| try: |
| logging.info(f"[{session_id}] (Live) تلاش {attempt+1} با کلید ...{selected_api_key[-4:]}") |
| audio_buffer = bytearray() |
| async with client.aio.live.connect(model=FIXED_MODEL_NAME_LIVE, config=live_config) as session: |
| await session.send(input=tts_prompt, end_of_turn=True) |
| async for response in session.receive(): |
| if response.data: audio_buffer.extend(response.data) |
| if len(audio_buffer) > 0: |
| logging.info(f"[{session_id}] ✅ (Live) موفقیتآمیز.") |
| return audio_buffer |
| else: raise Exception("بافر صوتی خالی بود.") |
| except Exception as e: |
| logging.warning(f"[{session_id}] ⚠️ (Live) خطا در تلاش {attempt+1}: {e}") |
| time.sleep(0.5) |
| return None |
|
|
| def save_pcm_to_wav(pcm_data, output_path): |
| try: |
| with wave.open(output_path, 'wb') as wf: |
| wf.setnchannels(1) |
| wf.setsampwidth(2) |
| wf.setframerate(24000) |
| wf.writeframes(pcm_data) |
| return True |
| except Exception as e: |
| logging.error(f"خطا در تبدیل PCM به WAV: {e}") |
| return False |
|
|
| |
| def generate_audio_chunk_standard_with_retry(chunk_text, prompt_text, voice, temp, session_id, retry_limit): |
| if not ALL_API_KEYS: raise Exception("هیچ کلید API در دسترس نیست.") |
| |
| |
| MAX_RETRIES = retry_limit |
| |
| for attempt in range(MAX_RETRIES): |
| selected_api_key, client = get_random_api_key_and_client() |
| if not client: break |
| try: |
| |
| final_text = f'{chunk_text}({prompt_text})' if prompt_text and prompt_text.strip() else chunk_text |
| contents = [types.Content(role="user", parts=[types.Part.from_text(text=final_text)])] |
| config = types.GenerateContentConfig(temperature=temp, response_modalities=["audio"], |
| speech_config=types.SpeechConfig(voice_config=types.VoiceConfig( |
| prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=voice)))) |
| |
| response = client.models.generate_content(model=FIXED_MODEL_NAME_STANDARD, contents=contents, config=config) |
| if response.candidates and response.candidates[0].content and response.candidates[0].content.parts and response.candidates[0].content.parts[0].inline_data: |
| logging.info(f"[{session_id}] ✅ (Standard) موفقیت در تلاش {attempt+1}.") |
| return response.candidates[0].content.parts[0].inline_data |
| except Exception as e: |
| logging.warning(f"[{session_id}] ⚠️ (Standard) خطا در تلاش {attempt+1}: {e}") |
| time.sleep(0.5) |
| return None |
|
|
| def core_generate_audio(text_input, prompt_input, selected_voice, temperature_val, session_id, use_live_model=False, retry_limit=50, fallback_to_live=False): |
| logging.info(f"[{session_id}] 🚀 شروع: Live={use_live_model}, Retry={retry_limit}, Fallback={fallback_to_live}") |
| temp_dir = f"temp_{session_id}" |
| os.makedirs(temp_dir, exist_ok=True) |
| output_base_name = f"{temp_dir}/audio_session_{session_id}" |
| final_output_path = f"output_{session_id}.wav" |
|
|
| try: |
| |
| if use_live_model: |
| pcm_data = asyncio.run(generate_audio_live_with_retry(text_input, prompt_input, selected_voice, session_id)) |
| if pcm_data and save_pcm_to_wav(pcm_data, final_output_path): |
| return final_output_path |
| else: |
| raise Exception("تولید صدا با مدل لایف ناموفق بود.") |
|
|
| |
| else: |
| |
| text_chunks = smart_text_split(text_input, DEFAULT_MAX_CHUNK_SIZE) |
| generated_files = [] |
| standard_failed = False |
|
|
| for i, chunk in enumerate(text_chunks): |
| |
| inline_data = generate_audio_chunk_standard_with_retry(chunk, prompt_input, selected_voice, temperature_val, session_id, retry_limit) |
| |
| if inline_data: |
| data_buffer = inline_data.data |
| ext = mimetypes.guess_extension(inline_data.mime_type) or ".wav" |
| if "audio/L" in inline_data.mime_type and ext == ".wav": |
| data_buffer = convert_to_wav(data_buffer, inline_data.mime_type) |
| if not ext.startswith("."): ext = "." + ext |
| fpath = save_binary_file(f"{output_base_name}_part{i+1:03d}{ext}", data_buffer) |
| if fpath: generated_files.append(fpath) |
| else: |
| standard_failed = True |
| break |
| |
| |
| if standard_failed: |
| if fallback_to_live: |
| logging.info(f"[{session_id}] 🔄 مدل استاندارد شکست خورد. سوییچ به مدل لایف (Fallback)...") |
| generated_files = [] |
| |
| pcm_data = asyncio.run(generate_audio_live_with_retry(text_input, prompt_input, selected_voice, session_id)) |
| if pcm_data and save_pcm_to_wav(pcm_data, final_output_path): |
| return final_output_path |
| else: |
| raise Exception("هم مدل استاندارد و هم مدل لایف (Fallback) شکست خوردند.") |
| else: |
| raise Exception(f"تولید صدا با مدل استاندارد پس از {retry_limit} تلاش ناموفق بود.") |
| |
| |
| if not generated_files: raise Exception("هیچ فایلی تولید نشد.") |
| |
| if len(generated_files) > 1: |
| if PYDUB_AVAILABLE and merge_audio_files_func(generated_files, final_output_path): |
| pass |
| else: |
| shutil.move(generated_files[0], final_output_path) |
| else: |
| shutil.move(generated_files[0], final_output_path) |
| |
| return final_output_path |
|
|
| finally: |
| if os.path.exists(temp_dir): |
| shutil.rmtree(temp_dir) |
|
|
| _init_api_keys() |
|
|
| app = FastAPI(title="Alpha TTS Worker API") |
|
|
| class TTSRequest(BaseModel): |
| text: str |
| prompt: str | None = "" |
| speaker: str |
| temperature: float |
| use_live_model: bool = False |
| retry_limit: int = 50 |
| fallback_to_live: bool = False |
|
|
| @app.post("/generate") |
| def generate_audio_endpoint(request: TTSRequest): |
| session_id = str(uuid.uuid4())[:8] |
| try: |
| final_path = core_generate_audio( |
| text_input=request.text, |
| prompt_input=request.prompt, |
| selected_voice=request.speaker, |
| temperature_val=request.temperature, |
| session_id=session_id, |
| use_live_model=request.use_live_model, |
| retry_limit=request.retry_limit, |
| fallback_to_live=request.fallback_to_live |
| ) |
| if final_path and os.path.exists(final_path): |
| from fastapi.responses import FileResponse |
| return FileResponse(path=final_path, media_type='audio/wav', filename=os.path.basename(final_path), background=shutil.rmtree(os.path.dirname(final_path), ignore_errors=True)) |
| else: |
| raise HTTPException(status_code=500, detail="خطا در تولید فایل صوتی.") |
| except Exception as e: |
| logging.error(f"[{session_id}] ❌ خطا: {e}") |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| @app.get("/") |
| def health_check(): |
| return {"status": "ok", "message": "TTS Worker is running."} |
|
|
| if __name__ == "__main__": |
| port = int(os.environ.get("PORT", 7860)) |
| uvicorn.run(app, host="0.0.0.0", port=port, reload=False) |