# app.py # debug-friendly gradio space entrypoint for persian tts # this script prints environment info, lists repo files, logs to /tmp/startup.log # comments are english and start with lowercase import os import sys import tempfile import glob import traceback from typing import Optional, Tuple # external libs from hazm import Normalizer from huggingface_hub import snapshot_download from TTS.api import TTS import gradio as gr LOG_PATH = "/tmp/startup.log" def log(msg: str, flush: bool = True): """write message to stdout and append to startup log file""" ts = f"[startup] {msg}" print(ts) try: with open(LOG_PATH, "a", encoding="utf-8") as f: f.write(ts + "\n") except Exception: pass if flush: try: sys.stdout.flush() except Exception: pass # clear previous log try: open(LOG_PATH, "w").close() except Exception: pass log("starting app - debug mode enabled") log(f"python executable: {sys.executable}") log(f"python version: {sys.version.replace(chr(10), ' ')}") log(f"cwd: {os.getcwd()}") log("environment variables (selected):") for k in ["HF_TOKEN", "HUGGINGFACE_HUB_TOKEN", "CUDA_VISIBLE_DEVICES", "PYTHONPATH"]: log(f" {k}={os.environ.get(k)}") # list repo root files (first-level) to help debugging missing files try: root_files = os.listdir(".") log("files in repo root (first 100 entries):") for i, name in enumerate(root_files[:100]): log(f" - {name}") except Exception as e: log(f"error listing repo root: {e}") # basic config (edit as needed) HF_REPO_ID = "Kamtera/persian-tts-female-vits" HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") MAX_INPUT_LENGTH = 1200 normalizer = Normalizer() def find_model_files(model_dir: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]: """try to discover model and config files under model_dir""" model_patterns = ["**/model.pth", "**/model.pt", "**/*.pth", "**/*.pt"] config_patterns = ["**/config.json", "**/model_config.json", "**/config*.json"] vocoder_patterns = ["**/vocoder.pth", "**/vocoder.pt", "**/hifi-gan*.pth", "**/*.pth"] vocoder_config_patterns = ["**/vocoder_config.json", "**/vocoder-config.json", "**/*vocoder*.json"] def glob_first(root, patterns): for pat in patterns: matches = glob.glob(os.path.join(root, pat), recursive=True) if matches: matches.sort(key=lambda p: (len(p.split(os.sep)), p)) return matches[0] return None model_path = glob_first(model_dir, model_patterns) config_path = glob_first(model_dir, config_patterns) vocoder_path = glob_first(model_dir, vocoder_patterns) vocoder_config_path = glob_first(model_dir, vocoder_config_patterns) log("discovered model files:") log(f" model_path: {model_path}") log(f" config_path: {config_path}") log(f" vocoder_path: {vocoder_path}") log(f" vocoder_config_path: {vocoder_config_path}") return model_path, config_path, vocoder_path, vocoder_config_path # main: attempt to download and initialize model, but catch and log everything local_model_dir = None try: log(f"attempting to snapshot_download repo: {HF_REPO_ID}") local_model_dir = snapshot_download(repo_id=HF_REPO_ID, use_auth_token=HF_TOKEN) log(f"snapshot_download returned: {local_model_dir}") except Exception as e: log("snapshot_download raised an exception:") log(traceback.format_exc()) local_model_dir = None if local_model_dir is None: log("failed to download model repo. please ensure HF_TOKEN secret is set if repo is private.") # continue to start gradio with a minimal interface that returns the error message def synthesize_error(text: str): return None, "model repo not available - check space logs and HF_TOKEN" with gr.Blocks() as demo: gr.Markdown("## persian tts (debug) - model not loaded") txt = gr.Textbox(label="persian text", lines=4, placeholder="enter text...") btn = gr.Button("generate (disabled)") audio = gr.Audio(label="output audio", type="filepath") status = gr.Markdown("model repo not downloaded. check logs.") btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status]) demo.launch() sys.exit(0) # locate model files try: model_path, config_path, vocoder_path, vocoder_config_path = find_model_files(local_model_dir) except Exception: log("error during find_model_files:") log(traceback.format_exc()) model_path = config_path = vocoder_path = vocoder_config_path = None # if not found, print a short tree to aid debugging if not model_path or not config_path: log("model checkpoint or config.json not found automatically - printing repo tree (top levels):") try: for root, dirs, files in os.walk(local_model_dir): rel = os.path.relpath(root, local_model_dir) log(f"dir: {rel} - files: {files[:20]}") # limit depth printed if len(rel.split(os.sep)) > 3: break except Exception: log("error while printing tree:") log(traceback.format_exc()) log("cannot proceed to load tts. please inspect the repo structure and share the printed tree.") # start a minimal ui showing the problem with gr.Blocks() as demo: gr.Markdown("## persian tts (debug) - missing model files") gr.Markdown("model checkpoint or config.json not found in the downloaded repo. see /tmp/startup.log for details.") txt = gr.Textbox(label="persian text", lines=4) btn = gr.Button("generate (disabled)") audio = gr.Audio(label="output audio", type="filepath") status = gr.Markdown("model files missing. check logs.") btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status]) demo.launch() sys.exit(0) # prepare tts kwargs and attempt load tts_kwargs = {"model_path": model_path, "config_path": config_path, "gpu": False} if vocoder_path: tts_kwargs["vocoder_path"] = vocoder_path if vocoder_config_path: tts_kwargs["vocoder_config_path"] = vocoder_config_path log("initializing TTS with kwargs:") for k, v in tts_kwargs.items(): log(f" {k}: {v}") try: tts = TTS(**tts_kwargs) log("tts initialized successfully") except Exception as e: log("tts initialization failed:") log(traceback.format_exc()) # start a minimal ui showing the init error with gr.Blocks() as demo: gr.Markdown("## persian tts (debug) - tts init failed") gr.Markdown("see /tmp/startup.log for stacktrace") txt = gr.Textbox(label="persian text", lines=4) btn = gr.Button("generate (disabled)") audio = gr.Audio(label="output audio", type="filepath") status = gr.Markdown("tts init failed. check logs.") btn.click(lambda t: (None, "tts not available"), inputs=txt, outputs=[audio, status]) demo.launch() sys.exit(0) # normal synth function def synthesize(text: str): if not text or not text.strip(): return None, "please enter some text." if len(text) > MAX_INPUT_LENGTH: text = text[:MAX_INPUT_LENGTH] + "." text = normalizer.normalize(text) out_fd, out_path = tempfile.mkstemp(suffix=".wav") os.close(out_fd) try: tts.tts_to_file(text=text, file_path=out_path) except Exception as e: log("tts generation error:") log(traceback.format_exc()) return None, f"error during synthesis: {e}" return out_path, "speech generated successfully." # gradio ui (normal) with gr.Blocks() as demo: gr.Markdown("## persian tts — debug-enabled") text_input = gr.Textbox(label="persian text (max ~1200 chars)", lines=6, placeholder="enter your persian text here...") generate_btn = gr.Button("generate speech") audio_output = gr.Audio(label="output audio", type="filepath") status = gr.Markdown("") def run_tts(text): audio_path, msg = synthesize(text) return audio_path, msg generate_btn.click(fn=run_tts, inputs=text_input, outputs=[audio_output, status]) log("launching gradio app now") demo.launch()