gemini_tts / app.py
amedcj's picture
Update app.py
b72c5a4 verified
import os
import gradio as gr
from google import genai
from google.genai import types
import wave
import io
# --- Configuration ---
VOICE_NAME = 'Fenrir'
# --- API Client Initialization ---
try:
client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
except Exception as e:
print(f"Error initializing Gemini client: {e}. Ensure GEMINI_API_KEY secret is set.")
client = None
# --- Helper Function for Saving Audio ---
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
"""Saves PCM data to a WAV file."""
with wave.open(filename, "wb") as wf:
wf.setnchannels(channels)
wf.setsampwidth(sample_width)
wf.setframerate(rate)
wf.writeframes(pcm)
# --- The Gradio Interface Function ---
def gemini_tts_kurmanji(kurmanji_text: str) -> str:
"""
Takes a Kurmanji text prompt, handles API errors, and checks the response content.
"""
if not client:
raise gr.Error("Gemini API Client failed to initialize. Check the GEMINI_API_KEY secret.")
print(f"Attempting to generate Kurmanji speech: '{kurmanji_text}' with voice {VOICE_NAME}")
try:
prompt = f"Speak the following text in Kurdish Kurmanji: {kurmanji_text}"
response = client.models.generate_content(
model="gemini-2.5-flash-preview-tts",
contents=prompt,
config=types.GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name=VOICE_NAME,
)
)
),
)
)
# --- NEW: Robust Error Checking ---
if not response.candidates or not response.candidates[0].content:
# Check for block reasons (e.g., safety, policy)
block_reason = response.candidates[0].finish_reason.name if response.candidates else "NO_CANDIDATE"
# This is the most important part for debugging your 'NoneType' error:
raise gr.Error(
f"TTS Generation Failed. Reason: The model returned an empty response. "
f"The finish reason was: {block_reason}. "
f"This may indicate a quota limit, or the model could not generate the requested Kurmanji speech."
)
# Proceed if the content part is valid
data = response.candidates[0].content.parts[0].inline_data.data
file_name = 'kurmanji_output.wav'
wave_file(file_name, data)
return file_name
except Exception as e:
error_message = f"An API error occurred during TTS generation: {e}"
print(error_message)
# Display the error in the Gradio interface
raise gr.Error(error_message)
# --- Gradio Interface Definition ---
demo = gr.Interface(
fn=gemini_tts_kurmanji,
inputs=gr.Textbox(
lines=3,
placeholder="Mînak: Silav, roj baş. Ez dixwazim Kurdî biaxivim.",
label="Kurmanji Text to Convert"
),
outputs=gr.Audio(
type="filepath",
label="Generated Kurmanji Speech"
),
title=f"🗣️ Gemini TTS for Kurdish Kurmanji (Voice: {VOICE_NAME})",
description="Uses Gemini 2.5 Flash and natural language prompting. Check the logs for specific failure reasons."
)
if __name__ == "__main__":
demo.launch()