Spaces:

AK97GAMERZ
/

EverySpeech

Running

App Files Files Community

EverySpeech / app.py

AK97GAMERZ

Update app.py

8701484 verified 11 months ago

raw

history blame contribute delete

29.2 kB

	import tempfile
	import time
	from pathlib import Path
	from typing import Optional, Tuple, List, Dict
	import spaces # Not strictly needed if not using @spaces.GPU decorator for Piper
	import re
	import os
	import json # For Piper voice config (though Piper library handles it mostly)

	import gradio as gr
	import numpy as np
	import soundfile as sf
	# import torch # Keep if other parts might need it

	# --- Piper TTS ---
	PIPER_TTS_AVAILABLE = False
	PiperVoice = None # Placeholder for the class
	piper_voice_instance = None # Placeholder for the loaded voice instance
	PIPER_VOICE_DIR = Path("./piper_voices") # Directory to store Piper voices
	DEFAULT_PIPER_VOICE_REPO_ID = "rhasspy/piper-voices"
	# Choose a specific voice: e.g., en_US-lessac-medium from rhasspy/piper-voices
	# The path within the repo is often like: en/en_US/lessac/medium/
	DEFAULT_PIPER_VOICE_SUBPATH = "en/en_US/lessac/medium/" # Trailing slash is important
	DEFAULT_PIPER_VOICE_NAME = "en_US-lessac-medium" # Filename base

	try:
	from piper import PiperVoice
	from huggingface_hub import hf_hub_download # For downloading voices
	PIPER_TTS_AVAILABLE = True
	print("Piper TTS library and huggingface_hub loaded.")
	except ImportError:
	print("CRITICAL ERROR: piper-tts or huggingface_hub library not found. Piper TTS features will be disabled. Ensure 'piper-tts', 'onnxruntime', 'huggingface_hub' are in requirements.txt")

	def ensure_piper_voice(voice_name_base=DEFAULT_PIPER_VOICE_NAME, voice_subpath=DEFAULT_PIPER_VOICE_SUBPATH):
	"""Downloads and checks for Piper voice files (.onnx and .onnx.json)."""
	PIPER_VOICE_DIR.mkdir(parents=True, exist_ok=True)
	onnx_filename = f"{voice_name_base}.onnx"
	json_filename = f"{voice_name_base}.onnx.json"
	onnx_path = PIPER_VOICE_DIR / onnx_filename
	json_path = PIPER_VOICE_DIR / json_filename

	# Check if both files exist
	if not (onnx_path.exists() and json_path.exists()):
	print(f"Downloading Piper voice '{voice_name_base}' from repo '{DEFAULT_PIPER_VOICE_REPO_ID}' subpath '{voice_subpath}'...")
	try:
	# Download ONNX model file
	hf_hub_download(repo_id=DEFAULT_PIPER_VOICE_REPO_ID,
	filename=f"{voice_subpath.strip('/')}/{onnx_filename}",
	local_dir=PIPER_VOICE_DIR,
	local_dir_use_symlinks=False, # Recommended for Spaces
	repo_type="model") # Explicitly model type
	# Download JSON config file
	hf_hub_download(repo_id=DEFAULT_PIPER_VOICE_REPO_ID,
	filename=f"{voice_subpath.strip('/')}/{json_filename}",
	local_dir=PIPER_VOICE_DIR,
	local_dir_use_symlinks=False,
	repo_type="model")
	print(f"Piper voice '{voice_name_base}' downloaded successfully.")
	except Exception as e:
	print(f"ERROR downloading Piper voice '{voice_name_base}': {e}")
	gr.Warning(f"Failed to download Piper voice {voice_name_base}. TTS might not work.")
	return None, None # Return None if download fails
	else:
	print(f"Piper voice '{voice_name_base}' found locally at {PIPER_VOICE_DIR}.")
	return onnx_path, json_path

	if PIPER_TTS_AVAILABLE:
	onnx_path, json_path = ensure_piper_voice()
	if onnx_path and json_path and onnx_path.exists() and json_path.exists(): # Double check existence after potential download
	try:
	piper_voice_instance = PiperVoice.load(str(onnx_path), config_path=str(json_path))
	print("Default Piper TTS voice loaded successfully.")
	except Exception as e:
	print(f"ERROR loading Piper voice from {onnx_path} and {json_path}: {e}")
	PIPER_TTS_AVAILABLE = False # Disable if voice loading fails
	else:
	print("Piper voice files not found or not downloaded correctly. Piper TTS disabled.")
	PIPER_TTS_AVAILABLE = False

	from pydub import AudioSegment # For background sound mixing

	# --- Gemini Configuration (Keep your existing Gemini setup) ---
	try:
	import google.generativeai as genai
	GOOGLE_GEMINI_AVAILABLE = True
	except ImportError:
	GOOGLE_GEMINI_AVAILABLE = False
	print("Warning: google-generativeai library not found. Gemini features will be disabled.")

	GEMINI_API_KEY = os.environ.get("GOOGLE_API_KEY")
	gemini_configured_successfully = False
	gemini_model_instance = None
	GEMINI_MODEL_NAME = "gemini-1.5-flash-latest" # Or your preferred Gemini model

	if GOOGLE_GEMINI_AVAILABLE and GEMINI_API_KEY:
	try:
	genai.configure(api_key=GEMINI_API_KEY)
	gemini_model_instance = genai.GenerativeModel(GEMINI_MODEL_NAME)
	# Optional: Test call to confirm API key and model access if desired (can slow startup)
	# _ = gemini_model_instance.generate_content("test", generation_config={'candidate_count': 1})
	gemini_configured_successfully = True
	print(f"Google Gemini configured successfully with model: {GEMINI_MODEL_NAME}")
	except Exception as e:
	print(f"Error configuring Google Gemini at startup: {e}. Gemini features might not work correctly.")
	elif GOOGLE_GEMINI_AVAILABLE and not GEMINI_API_KEY:
	print("INFO: GOOGLE_API_KEY not found in environment variables/secrets for Gemini. Manual input will be required if feature is used.")
	else:
	if GOOGLE_GEMINI_AVAILABLE: # SDK is there, but no key
	print("INFO: Gemini SDK is available, but no API key found (neither in secrets nor manually provided yet).")
	else: # SDK itself is missing
	print("INFO: Gemini features disabled (SDK not available).")

	def enhance_text_with_gemini(text_to_enhance: str, enhancement_prompt: str, api_key_manual: Optional[str] = None) -> str:
	"""Enhances text using Google Gemini. (Paste your full working function here)"""
	if not GOOGLE_GEMINI_AVAILABLE:
	gr.Warning("Gemini SDK not available. Cannot enhance text.")
	return text_to_enhance

	active_gemini_model_instance = None
	temp_gemini_configured_manually = False
	original_global_api_key_for_revert = genai.API_KEY if hasattr(genai, 'API_KEY') and genai.API_KEY else None


	if api_key_manual:
	gr.Info("Attempting to use manually provided API key for this Gemini request...")
	try:
	genai.configure(api_key=api_key_manual)
	active_gemini_model_instance = genai.GenerativeModel(GEMINI_MODEL_NAME)
	_ = active_gemini_model_instance.generate_content("test", generation_config={'candidate_count': 1}) # Test call
	temp_gemini_configured_manually = True
	print("Gemini temporarily configured and tested with manual API key for this request.")
	except Exception as e:
	gr.Error(f"Failed to configure or test Gemini with provided manual API key: {e}")
	if original_global_api_key_for_revert and hasattr(genai, 'configure'): # Revert if there was an original key
	try: genai.configure(api_key=original_global_api_key_for_revert)
	except Exception: pass
	return text_to_enhance
	elif gemini_configured_successfully and gemini_model_instance:
	active_gemini_model_instance = gemini_model_instance
	else:
	gr.Warning("Google API Key for Gemini not provided or Gemini not configured correctly. Cannot enhance text.")
	return text_to_enhance

	if not active_gemini_model_instance:
	gr.Error("Gemini model instance could not be initialized. Cannot enhance text.")
	return text_to_enhance

	full_prompt = f"{enhancement_prompt}:\n\n---\n{text_to_enhance}\n---"
	print(f"Sending to Gemini: First 100 chars of prompt: {full_prompt[:100]}...")
	try:
	gen_config = {'candidate_count': 1}
	response = active_gemini_model_instance.generate_content(full_prompt, generation_config=gen_config)
	if response.parts:
	enhanced_text = response.text
	print(f"Gemini enhanced text (first 100 chars): {enhanced_text[:100]}")
	return enhanced_text
	else: # Handle no parts / blocked response
	block_reason = "Unknown"; safety_ratings_str = "N/A"
	if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
	block_reason = response.prompt_feedback.block_reason or "Not Blocked but no parts"
	if hasattr(response.prompt_feedback, 'safety_ratings'):
	safety_ratings_str = ", ".join([f"{rating.category.name}: {rating.probability.name}" for rating in response.prompt_feedback.safety_ratings])
	elif hasattr(response, 'candidates') and response.candidates and hasattr(response.candidates[0], 'finish_reason') and response.candidates[0].finish_reason != 1: # 1 is typically "STOP"
	block_reason = f"Finish Reason: {response.candidates[0].finish_reason.name if hasattr(response.candidates[0].finish_reason, 'name') else response.candidates[0].finish_reason}"

	gr.Warning(f"Gemini returned no content. Block Reason: {block_reason}. Safety: {safety_ratings_str}. Using original text.")
	return text_to_enhance
	except Exception as e:
	gr.Error(f"Error calling Gemini API: {type(e).__name__}: {e}")
	return text_to_enhance
	finally: # Revert manual key config if it was used and there was an original key
	if temp_gemini_configured_manually and original_global_api_key_for_revert and hasattr(genai, 'configure'):
	try:
	genai.configure(api_key=original_global_api_key_for_revert)
	print("Reverted Gemini configuration to use original API key (if it was set).")
	except Exception as revert_e:
	print(f"Warning: Could not revert Gemini config: {revert_e}")
	elif temp_gemini_configured_manually and not original_global_api_key_for_revert and hasattr(genai, 'configure'):
	# If there was no original global key, it's harder to "unset" a manual key.
	# For now, just print a message. The next call that doesn't provide a manual key
	# will fail if the global key isn't set via secrets.
	print("Manual Gemini API key was used. Global API key (from secrets) remains unset if it wasn't there before.")


	# --- Text Parsing for Piper TTS and Background Sounds ---
	def parse_text_for_piper_and_sounds(text: str) -> Tuple[str, List[Dict]]:
	"""
	Strips special tags not understood by Piper (like S1=(), {} )
	and extracts [sound:...] directives.
	Non-verbal tags like (laughs) will be passed as text to Piper.
	"""
	sound_requests = []

	# Strip S1=(...) and {characteristic} tags so Piper doesn't say them literally.
	# If you want Piper to try to interpret them, remove or comment out these lines.
	text_cleaned = re.sub(r"S[12]=\([^)]+\)", "", text)
	text_cleaned = re.sub(r"\{[^}]+\}", "", text_cleaned)

	# Non-verbal tags like (laughs) will be kept in the text for Piper to attempt to read.
	# Piper won't generate a laugh, but will say "laughs".

	def extract_and_remove_sounds(match):
	sound_tag_content = match.group(1) # Content inside [sound:...]
	try:
	# Reconstruct the full tag for consistent parsing if needed, or parse sound_tag_content directly
	full_sound_tag_for_parsing = f"[sound:{sound_tag_content}]"
	sound_name_match = re.search(r"sound:\s*([\w\-\_]+)", full_sound_tag_for_parsing)
	sound_name = sound_name_match.group(1).lower() if sound_name_match else None

	if not sound_name:
	gr.Warning(f"Malformed sound tag (missing sound name): '[sound:{sound_tag_content}]'. Will be spoken as text.")
	return match.group(0) # Return original malformed tag to be spoken

	volume_match = re.search(r"volume:\s*([0-9\.]+)", full_sound_tag_for_parsing)
	volume_factor = float(volume_match.group(1)) if volume_match else 0.5
	sound_requests.append({"name": sound_name, "volume_factor": max(0.0, min(1.0, volume_factor))})
	return "" # Remove the [sound:...] tag from text sent to Piper
	except Exception as e:
	gr.Warning(f"Could not parse sound tag '[sound:{sound_tag_content}]': {e}. Will be spoken as text.")
	return match.group(0) # Return original tag to be spoken if parsing fails

	text_for_tts = re.sub(r"\[sound:([^\]]+)\]", extract_and_remove_sounds, text_cleaned)
	return text_for_tts.strip(), sound_requests


	# --- Background Sound Configuration ---
	SOUND_EFFECTS_DIR = Path("./sounds")
	SUPPORTED_SOUND_FORMATS_BG = [".wav", ".mp3"]
	DEFAULT_BACKGROUND_VOLUME_ADJUST_DB = -18 # Made background sounds a bit quieter

	# --- Main Inference Function ---
	# @spaces.GPU # Not needed for Piper as it's CPU-fast
	def run_inference(
	text_input_original: str,
	piper_length_scale: float,
	piper_noise_scale: float,
	piper_noise_w: float,
	speed_factor_post_tts: float,
	enable_gemini_enhancement: bool,
	gemini_enhancement_prompt: str,
	gemini_api_key_manual: Optional[str]
	):
	if not PIPER_TTS_AVAILABLE or not piper_voice_instance:
	gr.Error("Piper TTS is not available or a voice model is not loaded. Cannot synthesize speech.")
	return (22050, np.zeros(100, dtype=np.int16)) # Default SR, short silence

	# 1. Gemini Enhancement (if enabled)
	processed_text = text_input_original
	if enable_gemini_enhancement:
	if gemini_enhancement_prompt and gemini_enhancement_prompt.strip():
	gr.Info("Enhancing text with Gemini...")
	processed_text = enhance_text_with_gemini(
	text_input_original,
	gemini_enhancement_prompt,
	gemini_api_key_manual
	)
	else:
	gr.Warning("Gemini enhancement enabled, but the enhancement prompt is empty. Using original text.")

	# 2. Parse for Piper TTS text and [sound:...] directives
	text_for_piper, sound_requests = parse_text_for_piper_and_sounds(processed_text)
	print(f"Text for Piper TTS: '{text_for_piper}'")
	print(f"Sound requests: {sound_requests}")

	# 3. Piper TTS Generation
	generated_speech_np = np.array([], dtype=np.int16) # Piper outputs int16
	output_sr = piper_voice_instance.config.sample_rate if piper_voice_instance.config else 22050 # Default if config missing

	if text_for_piper:
	gr.Info(f"Synthesizing with Piper TTS...")
	synthesis_kwargs = {
	'length_scale': piper_length_scale,
	'noise_scale': piper_noise_scale,
	'noise_w': piper_noise_w,
	}
	start_time = time.time()
	# Piper's synthesize method directly gives audio bytes or chunks of bytes
	try:
	audio_bytes = b"".join(piper_voice_instance.synthesize(text_for_piper, **synthesis_kwargs))
	end_time = time.time()
	print(f"Piper TTS Generation finished in {end_time - start_time:.2f} seconds.")
	if audio_bytes:
	generated_speech_np = np.frombuffer(audio_bytes, dtype=np.int16)
	else:
	gr.Warning("Piper TTS produced no audio bytes for the input text.")
	except Exception as e_piper:
	gr.Error(f"Error during Piper TTS synthesis: {e_piper}")
	# Fallback to silence if Piper fails
	generated_speech_np = np.zeros(int(output_sr * 0.1), dtype=np.int16)


	elif sound_requests: # Only sound requests, no text for Piper
	gr.Info("No text for Piper TTS. Will generate background sound over silence if requested.")
	generated_speech_np = np.zeros(int(output_sr * 0.1), dtype=np.int16) # 0.1 sec silence for mixing
	else: # No text and no sound requests
	if not (text_input_original and text_input_original.strip()): # Check if original input was also empty
	raise gr.Error("Input text is empty and no sound was requested.")
	else: # Original text was there but got stripped to nothing, and no sound requests
	gr.Warning("After processing, no text remained for TTS and no background sound was requested.")
	generated_speech_np = np.zeros(int(output_sr * 0.1), dtype=np.int16)


	# 4. Post-TTS Speed Adjustment (Applied to the speech generated by Piper)
	if speed_factor_post_tts != 1.0 and len(generated_speech_np) > 0:
	# Convert to float for interpolation, then back to int16
	# Ensure normalization to avoid clipping if values are already near max int16
	max_val = np.iinfo(np.int16).max
	generated_speech_float = generated_speech_np.astype(np.float32) / max_val

	original_len = len(generated_speech_float)
	speed_factor_clamped = max(0.5, min(2.0, speed_factor_post_tts))
	target_len = int(original_len / speed_factor_clamped)

	if target_len != original_len and target_len > 0:
	x_original = np.arange(original_len)
	x_resampled = np.linspace(0, original_len - 1, target_len)
	resampled_float = np.interp(x_resampled, x_original, generated_speech_float)
	# Clip before converting back to int16
	generated_speech_np = (np.clip(resampled_float, -1.0, 1.0) * max_val).astype(np.int16)
	print(f"Applied post-TTS speed factor {speed_factor_clamped}")
	else:
	print("Skipped post-TTS speed adjustment (target length invalid or no change).")

	# 5. Background Sound Mixing
	final_audio_data_np = generated_speech_np # Start with (potentially speed-adjusted) speech
	final_sr = output_sr

	if sound_requests: # Only one overall background sound is handled from the first [sound:] tag
	overall_sound_effect_to_mix = sound_requests[0]
	sound_name = overall_sound_effect_to_mix["name"]
	volume_factor = overall_sound_effect_to_mix["volume_factor"]
	bg_sound_file_path = next((p for fmt in SUPPORTED_SOUND_FORMATS_BG for p in [(SOUND_EFFECTS_DIR / (sound_name + fmt))] if p.exists()), None)

	if bg_sound_file_path:
	print(f"Mixing with background sound: {bg_sound_file_path}")
	try:
	# Convert current final_audio_data_np (which is speech) to AudioSegment
	speech_segment = AudioSegment(
	data=final_audio_data_np.tobytes(),
	sample_width=final_audio_data_np.dtype.itemsize, # Should be 2 for int16
	frame_rate=final_sr,
	channels=1 # Piper output is mono
	)
	# Load background sound
	bg_segment = AudioSegment.from_file(bg_sound_file_path).set_channels(1).set_frame_rate(final_sr)

	# Adjust background sound volume
	db_reduction = 0.0
	if volume_factor > 0.001 and volume_factor < 1.0:
	db_reduction = 20 * np.log10(volume_factor)
	elif volume_factor <= 0.001: # Make it very quiet if volume is near zero
	db_reduction = -60 # Effectively very quiet

	bg_segment_adjusted = bg_segment + db_reduction + DEFAULT_BACKGROUND_VOLUME_ADJUST_DB

	# Ensure speech_segment is not shorter than bg_segment if only bg sound is effectively playing
	if len(speech_segment) < 100 and len(bg_segment_adjusted) > 0: # If speech is very short (e.g. default silence)
	# Make the "speech" part long enough to hear the background sound for a bit
	target_duration_ms = max(len(speech_segment), min(len(bg_segment_adjusted), 5000)) # cap at 5s
	speech_segment = AudioSegment.silent(duration=target_duration_ms, frame_rate=final_sr)


	# Overlay (loop background if shorter than speech)
	# Ensure bg_segment_adjusted is not zero length before division
	loop_times = int(len(speech_segment) / len(bg_segment_adjusted)) + 1 if len(bg_segment_adjusted) > 0 else 1
	mixed_segment = speech_segment.overlay(bg_segment_adjusted, loop=True, times=loop_times)

	final_audio_data_np = np.array(mixed_segment.get_array_of_samples()).astype(np.int16)
	final_sr = mixed_segment.frame_rate # Should be same as output_sr
	except Exception as e:
	gr.Warning(f"Could not mix background sound '{sound_name}': {e}")
	print(f"Error mixing sound: {e}"); import traceback; traceback.print_exc()
	# Fallback to just the speech if mixing fails (already in final_audio_data_np)
	else:
	gr.Warning(f"Background sound '{sound_name}' not found in {SOUND_EFFECTS_DIR}. Using speech only.")

	# Ensure there's some audio, even if it's short silence, if all inputs were valid but produced no sound
	if len(final_audio_data_np) == 0:
	print("Warning: Final audio data is empty. Returning short silence.")
	final_audio_data_np = np.zeros(int(final_sr * 0.1), dtype=np.int16)


	return (final_sr, final_audio_data_np)

	# --- Gradio UI ---
	css = """
	#col-container {max-width: 90%; margin-left: auto; margin-right: auto;}
	.gr-prose {font-size: 100% !important;} .gr-prose h1 {font-size: 2.5em !important;}
	.gr-prose h2 {font-size: 1.8em !important;} .gr-prose p {font-size: 1.1em !important; margin-bottom: 0.5em !important;}
	.gr-prose ul {font-size: 1.0em !important; margin-left: 20px !important;} .gr-prose li { margin-bottom: 0.3em !important;}
	"""

	default_text_piper = """Welcome to EverySpeech with the speedy Piper TTS!
	S1=(This speaker tag will be removed) {excited} This characteristic tag will also be removed.
	(laughs) This (laughs) tag will be read out by Piper if not stripped by a more aggressive parser.
	You can add background sound too: [sound: city, volume: 0.3] I am speaking over city noise.
	"""

	example_txt_path = Path("./example.txt") # You can create this if you want
	if example_txt_path.exists():
	try:
	loaded_default_text = example_txt_path.read_text(encoding="utf-8").strip()
	if loaded_default_text: default_text_piper = loaded_default_text
	except Exception as e: print(f"Warning: Could not read example.txt: {e}")

	how_to_use_text_piper = f"""
	## How to Use EverySpeech (Piper TTS Edition):
	Fast CPU-based Text-to-Speech!
	- Write text in Input Text.
	- `S1=(description)` and `{{characteristic}}` tags are currently stripped before sending to Piper TTS.
	- Non-verbal tags like `(laughs)` or `(coughs)` will be read as text by Piper (e.g., it will say the word "laughs").
	- Background Music/Sound: Use `[sound: name, volume: X]` (e.g., `[sound: rain, volume:0.3]`). The first such tag applies to the whole output. Sound files from `sounds` directory.
	Optional Gemini Text Enhancement:
	- Enable, then provide a prompt like "Make this text more formal:". Your original text is appended.
	- Provide API key if not in Space secrets.
	Piper TTS Parameters: Control voice speed/style. Current voice: `{DEFAULT_PIPER_VOICE_NAME}`
	"""
	if not PIPER_TTS_AVAILABLE:
	how_to_use_text_piper = "## WARNING: Piper TTS FAILED TO LOAD. Speech synthesis will not work. Check Space logs and `requirements.txt` for `piper-tts` and `onnxruntime`.\n\n" + how_to_use_text_piper

	with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
	gr.Markdown("# EverySpeech by Anand (Piper TTS - Fast CPU)")
	gr.Markdown(how_to_use_text_piper)

	with gr.Row(equal_height=False):
	with gr.Column(scale=2):
	text_input_original_ui = gr.Textbox(
	label="Input Text",
	placeholder="Enter text here...", value=default_text_piper, lines=7,
	)
	with gr.Accordion("✨ Optional: Gemini Text Enhancement", open=False):
	enable_gemini_ui = gr.Checkbox(label="Enable Gemini Text Enhancement", value=False,
	visible=GOOGLE_GEMINI_AVAILABLE,
	info="If checked, your input text will first be processed by Gemini.")
	gemini_enhancement_prompt_ui = gr.Textbox(label="Gemini Enhancement Prompt",
	placeholder="e.g., Make this more descriptive:", lines=2,
	info="Your 'Input Text' above will be appended to this prompt for Gemini.",
	visible=GOOGLE_GEMINI_AVAILABLE)
	gemini_api_key_manual_ui = gr.Textbox(label="Google API Key (if not in secrets or startup config failed)", type="password",
	placeholder="Paste Google AI API key if needed",
	visible=GOOGLE_GEMINI_AVAILABLE, # Show always if SDK there
	info="Needed if GOOGLE_API_KEY secret isn't set or failed at startup.")
	# No audio_prompt_input for this basic Piper setup

	with gr.Column(scale=1):
	audio_output_ui = gr.Audio(label="Generated Audio", type="numpy", autoplay=False)
	run_button = gr.Button("Generate Audio", variant="primary", scale=1)

	with gr.Accordion("Piper TTS Generation Parameters", open=True, visible=PIPER_TTS_AVAILABLE):
	gr.Markdown(f"Using Piper voice: `{DEFAULT_PIPER_VOICE_NAME}`. Adjust parameters for style.")
	piper_length_scale_ui = gr.Slider(label="Length Scale (Prosody/Speed)", minimum=0.5, maximum=2.0, value=1.0, step=0.05, info="Smaller is faster. Affects prosody.")
	piper_noise_scale_ui = gr.Slider(label="Noise Scale (Variability)", minimum=0.0, maximum=1.5, value=0.667, step=0.05, info="Adds noise for expressiveness. Default: 0.667")
	piper_noise_w_ui = gr.Slider(label="Noise W (Phoneme Duration Noise)", minimum=0.0, maximum=1.5, value=0.8, step=0.05, info="Noise for phoneme pronunciation/duration. Default: 0.8")
	speed_factor_post_tts_ui = gr.Slider(label="Global Speed Factor (Post-TTS Resample)", minimum=0.5, maximum=2.0, value=1.0, step=0.05, info="Overall speed change after TTS generation by resampling.")

	gradio_inputs_piper = [
	text_input_original_ui,
	piper_length_scale_ui, piper_noise_scale_ui, piper_noise_w_ui,
	speed_factor_post_tts_ui,
	enable_gemini_ui, gemini_enhancement_prompt_ui, gemini_api_key_manual_ui
	]

	run_button.click(
	fn=run_inference,
	inputs=gradio_inputs_piper,
	outputs=[audio_output_ui],
	api_name="generate_audio_every_speech_piper",
	)

	gr.Examples(
	examples=[
	["Hello, this is Piper TTS. It should be very fast on CPU.", 1.0, 0.667, 0.8, 1.0, False, "", ""],
	["S1=(this will be stripped) {excited} This will also be stripped. Piper will say this part. (laughs) And it will say laughs.", 1.0, 0.7, 0.9, 1.0, False, "", ""],
	["This text will be made more formal by Gemini. [sound: city, volume:0.2]", 1.2, 0.6, 0.7, 0.9, True, "Make this text extremely formal and verbose:", ""],
	["[sound: rain]", 1.0, 0.667, 0.8, 1.0, False, "", ""], # Only sound
	],
	inputs=gradio_inputs_piper, # Ensure this matches the function inputs
	outputs=[audio_output_ui],
	fn=run_inference,
	cache_examples=False, # "lazy" or False is safer with dynamic/file-based inputs
	label="Examples (Piper TTS - Click to Run)",
	)

	if __name__ == "__main__":
	print(f"Looking for sound effects in: {SOUND_EFFECTS_DIR.resolve()}")
	if not SOUND_EFFECTS_DIR.exists():
	try:
	SOUND_EFFECTS_DIR.mkdir(parents=True, exist_ok=True)
	print(f"Created sound effects directory: '{SOUND_EFFECTS_DIR}'. Please upload sound files there for [sound:] tags.")
	except Exception as e_mkdir:
	print(f"Error creating sound directory {SOUND_EFFECTS_DIR}: {e_mkdir}")
	else:
	print(f"Sound effects directory '{SOUND_EFFECTS_DIR}' found.")

	if not PIPER_TTS_AVAILABLE:
	print("\nCRITICAL WARNING: Piper TTS FAILED TO LOAD or voice files are missing. SPEECH SYNTHESIS WILL NOT WORK.")
	print("Ensure 'piper-tts' and 'onnxruntime' are in requirements.txt and that voice files can be downloaded/found.")

	if not GOOGLE_GEMINI_AVAILABLE:
	print("\nWARNING: 'google-generativeai' package not installed. Gemini features disabled.")
	elif not GEMINI_API_KEY and not gemini_configured_successfully :
	print("\nINFO: GOOGLE_API_KEY secret not found. Gemini features will require manual API key input in the UI if used.")
	elif not gemini_configured_successfully:
	print("\nWARNING: Gemini was not configured successfully at startup (e.g. API key invalid or model access issue). Check logs. Gemini features may not work as expected if used.")
	elif gemini_configured_successfully:
	print("\nINFO: Gemini configured with API key from secrets (or at least key was found).")

	demo.launch()