ptts / app.py
mkfallah's picture
Update app.py
7791cdf verified
# app.py
# debug-friendly gradio space entrypoint for persian tts
# this script prints environment info, lists repo files, logs to /tmp/startup.log
# comments are english and start with lowercase
import os
import sys
import tempfile
import glob
import traceback
from typing import Optional, Tuple
# external libs
from hazm import Normalizer
from huggingface_hub import snapshot_download
from TTS.api import TTS
import gradio as gr
LOG_PATH = "/tmp/startup.log"
def log(msg: str, flush: bool = True):
"""write message to stdout and append to startup log file"""
ts = f"[startup] {msg}"
print(ts)
try:
with open(LOG_PATH, "a", encoding="utf-8") as f:
f.write(ts + "\n")
except Exception:
pass
if flush:
try:
sys.stdout.flush()
except Exception:
pass
# clear previous log
try:
open(LOG_PATH, "w").close()
except Exception:
pass
log("starting app - debug mode enabled")
log(f"python executable: {sys.executable}")
log(f"python version: {sys.version.replace(chr(10), ' ')}")
log(f"cwd: {os.getcwd()}")
log("environment variables (selected):")
for k in ["HF_TOKEN", "HUGGINGFACE_HUB_TOKEN", "CUDA_VISIBLE_DEVICES", "PYTHONPATH"]:
log(f" {k}={os.environ.get(k)}")
# list repo root files (first-level) to help debugging missing files
try:
root_files = os.listdir(".")
log("files in repo root (first 100 entries):")
for i, name in enumerate(root_files[:100]):
log(f" - {name}")
except Exception as e:
log(f"error listing repo root: {e}")
# basic config (edit as needed)
HF_REPO_ID = "Kamtera/persian-tts-female-vits"
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
MAX_INPUT_LENGTH = 1200
normalizer = Normalizer()
def find_model_files(model_dir: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
"""try to discover model and config files under model_dir"""
model_patterns = ["**/model.pth", "**/model.pt", "**/*.pth", "**/*.pt"]
config_patterns = ["**/config.json", "**/model_config.json", "**/config*.json"]
vocoder_patterns = ["**/vocoder.pth", "**/vocoder.pt", "**/hifi-gan*.pth", "**/*.pth"]
vocoder_config_patterns = ["**/vocoder_config.json", "**/vocoder-config.json", "**/*vocoder*.json"]
def glob_first(root, patterns):
for pat in patterns:
matches = glob.glob(os.path.join(root, pat), recursive=True)
if matches:
matches.sort(key=lambda p: (len(p.split(os.sep)), p))
return matches[0]
return None
model_path = glob_first(model_dir, model_patterns)
config_path = glob_first(model_dir, config_patterns)
vocoder_path = glob_first(model_dir, vocoder_patterns)
vocoder_config_path = glob_first(model_dir, vocoder_config_patterns)
log("discovered model files:")
log(f" model_path: {model_path}")
log(f" config_path: {config_path}")
log(f" vocoder_path: {vocoder_path}")
log(f" vocoder_config_path: {vocoder_config_path}")
return model_path, config_path, vocoder_path, vocoder_config_path
# main: attempt to download and initialize model, but catch and log everything
local_model_dir = None
try:
log(f"attempting to snapshot_download repo: {HF_REPO_ID}")
local_model_dir = snapshot_download(repo_id=HF_REPO_ID, use_auth_token=HF_TOKEN)
log(f"snapshot_download returned: {local_model_dir}")
except Exception as e:
log("snapshot_download raised an exception:")
log(traceback.format_exc())
local_model_dir = None
if local_model_dir is None:
log("failed to download model repo. please ensure HF_TOKEN secret is set if repo is private.")
# continue to start gradio with a minimal interface that returns the error message
def synthesize_error(text: str):
return None, "model repo not available - check space logs and HF_TOKEN"
with gr.Blocks() as demo:
gr.Markdown("## persian tts (debug) - model not loaded")
txt = gr.Textbox(label="persian text", lines=4, placeholder="enter text...")
btn = gr.Button("generate (disabled)")
audio = gr.Audio(label="output audio", type="filepath")
status = gr.Markdown("model repo not downloaded. check logs.")
btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status])
demo.launch()
sys.exit(0)
# locate model files
try:
model_path, config_path, vocoder_path, vocoder_config_path = find_model_files(local_model_dir)
except Exception:
log("error during find_model_files:")
log(traceback.format_exc())
model_path = config_path = vocoder_path = vocoder_config_path = None
# if not found, print a short tree to aid debugging
if not model_path or not config_path:
log("model checkpoint or config.json not found automatically - printing repo tree (top levels):")
try:
for root, dirs, files in os.walk(local_model_dir):
rel = os.path.relpath(root, local_model_dir)
log(f"dir: {rel} - files: {files[:20]}")
# limit depth printed
if len(rel.split(os.sep)) > 3:
break
except Exception:
log("error while printing tree:")
log(traceback.format_exc())
log("cannot proceed to load tts. please inspect the repo structure and share the printed tree.")
# start a minimal ui showing the problem
with gr.Blocks() as demo:
gr.Markdown("## persian tts (debug) - missing model files")
gr.Markdown("model checkpoint or config.json not found in the downloaded repo. see /tmp/startup.log for details.")
txt = gr.Textbox(label="persian text", lines=4)
btn = gr.Button("generate (disabled)")
audio = gr.Audio(label="output audio", type="filepath")
status = gr.Markdown("model files missing. check logs.")
btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status])
demo.launch()
sys.exit(0)
# prepare tts kwargs and attempt load
tts_kwargs = {"model_path": model_path, "config_path": config_path, "gpu": False}
if vocoder_path:
tts_kwargs["vocoder_path"] = vocoder_path
if vocoder_config_path:
tts_kwargs["vocoder_config_path"] = vocoder_config_path
log("initializing TTS with kwargs:")
for k, v in tts_kwargs.items():
log(f" {k}: {v}")
try:
tts = TTS(**tts_kwargs)
log("tts initialized successfully")
except Exception as e:
log("tts initialization failed:")
log(traceback.format_exc())
# start a minimal ui showing the init error
with gr.Blocks() as demo:
gr.Markdown("## persian tts (debug) - tts init failed")
gr.Markdown("see /tmp/startup.log for stacktrace")
txt = gr.Textbox(label="persian text", lines=4)
btn = gr.Button("generate (disabled)")
audio = gr.Audio(label="output audio", type="filepath")
status = gr.Markdown("tts init failed. check logs.")
btn.click(lambda t: (None, "tts not available"), inputs=txt, outputs=[audio, status])
demo.launch()
sys.exit(0)
# normal synth function
def synthesize(text: str):
if not text or not text.strip():
return None, "please enter some text."
if len(text) > MAX_INPUT_LENGTH:
text = text[:MAX_INPUT_LENGTH] + "."
text = normalizer.normalize(text)
out_fd, out_path = tempfile.mkstemp(suffix=".wav")
os.close(out_fd)
try:
tts.tts_to_file(text=text, file_path=out_path)
except Exception as e:
log("tts generation error:")
log(traceback.format_exc())
return None, f"error during synthesis: {e}"
return out_path, "speech generated successfully."
# gradio ui (normal)
with gr.Blocks() as demo:
gr.Markdown("## persian tts — debug-enabled")
text_input = gr.Textbox(label="persian text (max ~1200 chars)", lines=6, placeholder="enter your persian text here...")
generate_btn = gr.Button("generate speech")
audio_output = gr.Audio(label="output audio", type="filepath")
status = gr.Markdown("")
def run_tts(text):
audio_path, msg = synthesize(text)
return audio_path, msg
generate_btn.click(fn=run_tts, inputs=text_input, outputs=[audio_output, status])
log("launching gradio app now")
demo.launch()