Spaces:

amedcj
/

gemini_tts

Running

App Files Files Community

gemini_tts / app.py

amedcj

Update app.py

b72c5a4 verified 3 months ago

raw

history blame contribute delete

3.51 kB

	import os
	import gradio as gr
	from google import genai
	from google.genai import types
	import wave
	import io

	# --- Configuration ---
	VOICE_NAME = 'Fenrir'

	# --- API Client Initialization ---
	try:
	client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
	except Exception as e:
	print(f"Error initializing Gemini client: {e}. Ensure GEMINI_API_KEY secret is set.")
	client = None

	# --- Helper Function for Saving Audio ---
	def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
	"""Saves PCM data to a WAV file."""
	with wave.open(filename, "wb") as wf:
	wf.setnchannels(channels)
	wf.setsampwidth(sample_width)
	wf.setframerate(rate)
	wf.writeframes(pcm)

	# --- The Gradio Interface Function ---
	def gemini_tts_kurmanji(kurmanji_text: str) -> str:
	"""
	Takes a Kurmanji text prompt, handles API errors, and checks the response content.
	"""
	if not client:
	raise gr.Error("Gemini API Client failed to initialize. Check the GEMINI_API_KEY secret.")

	print(f"Attempting to generate Kurmanji speech: '{kurmanji_text}' with voice {VOICE_NAME}")

	try:
	prompt = f"Speak the following text in Kurdish Kurmanji: {kurmanji_text}"

	response = client.models.generate_content(
	model="gemini-2.5-flash-preview-tts",
	contents=prompt,
	config=types.GenerateContentConfig(
	response_modalities=["AUDIO"],
	speech_config=types.SpeechConfig(
	voice_config=types.VoiceConfig(
	prebuilt_voice_config=types.PrebuiltVoiceConfig(
	voice_name=VOICE_NAME,
	)
	)
	),
	)
	)

	# --- NEW: Robust Error Checking ---
	if not response.candidates or not response.candidates[0].content:
	# Check for block reasons (e.g., safety, policy)
	block_reason = response.candidates[0].finish_reason.name if response.candidates else "NO_CANDIDATE"

	# This is the most important part for debugging your 'NoneType' error:
	raise gr.Error(
	f"TTS Generation Failed. Reason: The model returned an empty response. "
	f"The finish reason was: {block_reason}. "
	f"This may indicate a quota limit, or the model could not generate the requested Kurmanji speech."
	)

	# Proceed if the content part is valid
	data = response.candidates[0].content.parts[0].inline_data.data
	file_name = 'kurmanji_output.wav'
	wave_file(file_name, data)

	return file_name

	except Exception as e:
	error_message = f"An API error occurred during TTS generation: {e}"
	print(error_message)
	# Display the error in the Gradio interface
	raise gr.Error(error_message)


	# --- Gradio Interface Definition ---
	demo = gr.Interface(
	fn=gemini_tts_kurmanji,
	inputs=gr.Textbox(
	lines=3,
	placeholder="Mînak: Silav, roj baş. Ez dixwazim Kurdî biaxivim.",
	label="Kurmanji Text to Convert"
	),
	outputs=gr.Audio(
	type="filepath",
	label="Generated Kurmanji Speech"
	),
	title=f"🗣️ Gemini TTS for Kurdish Kurmanji (Voice: {VOICE_NAME})",
	description="Uses Gemini 2.5 Flash and natural language prompting. Check the logs for specific failure reasons."
	)

	if __name__ == "__main__":
	demo.launch()