Spaces:

mkfallah
/

ptts

Sleeping

File size: 8,275 Bytes

# app.py
# debug-friendly gradio space entrypoint for persian tts
# this script prints environment info, lists repo files, logs to /tmp/startup.log
# comments are english and start with lowercase

import os
import sys
import tempfile
import glob
import traceback
from typing import Optional, Tuple

# external libs
from hazm import Normalizer
from huggingface_hub import snapshot_download
from TTS.api import TTS
import gradio as gr

LOG_PATH = "/tmp/startup.log"

def log(msg: str, flush: bool = True):
    """write message to stdout and append to startup log file"""
    ts = f"[startup] {msg}"
    print(ts)
    try:
        with open(LOG_PATH, "a", encoding="utf-8") as f:
            f.write(ts + "\n")
    except Exception:
        pass
    if flush:
        try:
            sys.stdout.flush()
        except Exception:
            pass

# clear previous log
try:
    open(LOG_PATH, "w").close()
except Exception:
    pass

log("starting app - debug mode enabled")
log(f"python executable: {sys.executable}")
log(f"python version: {sys.version.replace(chr(10), ' ')}")
log(f"cwd: {os.getcwd()}")
log("environment variables (selected):")
for k in ["HF_TOKEN", "HUGGINGFACE_HUB_TOKEN", "CUDA_VISIBLE_DEVICES", "PYTHONPATH"]:
    log(f"  {k}={os.environ.get(k)}")

# list repo root files (first-level) to help debugging missing files
try:
    root_files = os.listdir(".")
    log("files in repo root (first 100 entries):")
    for i, name in enumerate(root_files[:100]):
        log(f"  - {name}")
except Exception as e:
    log(f"error listing repo root: {e}")

# basic config (edit as needed)
HF_REPO_ID = "Kamtera/persian-tts-female-vits"
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
MAX_INPUT_LENGTH = 1200

normalizer = Normalizer()

def find_model_files(model_dir: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
    """try to discover model and config files under model_dir"""
    model_patterns = ["**/model.pth", "**/model.pt", "**/*.pth", "**/*.pt"]
    config_patterns = ["**/config.json", "**/model_config.json", "**/config*.json"]
    vocoder_patterns = ["**/vocoder.pth", "**/vocoder.pt", "**/hifi-gan*.pth", "**/*.pth"]
    vocoder_config_patterns = ["**/vocoder_config.json", "**/vocoder-config.json", "**/*vocoder*.json"]

    def glob_first(root, patterns):
        for pat in patterns:
            matches = glob.glob(os.path.join(root, pat), recursive=True)
            if matches:
                matches.sort(key=lambda p: (len(p.split(os.sep)), p))
                return matches[0]
        return None

    model_path = glob_first(model_dir, model_patterns)
    config_path = glob_first(model_dir, config_patterns)
    vocoder_path = glob_first(model_dir, vocoder_patterns)
    vocoder_config_path = glob_first(model_dir, vocoder_config_patterns)

    log("discovered model files:")
    log(f"  model_path: {model_path}")
    log(f"  config_path: {config_path}")
    log(f"  vocoder_path: {vocoder_path}")
    log(f"  vocoder_config_path: {vocoder_config_path}")

    return model_path, config_path, vocoder_path, vocoder_config_path

# main: attempt to download and initialize model, but catch and log everything
local_model_dir = None
try:
    log(f"attempting to snapshot_download repo: {HF_REPO_ID}")
    local_model_dir = snapshot_download(repo_id=HF_REPO_ID, use_auth_token=HF_TOKEN)
    log(f"snapshot_download returned: {local_model_dir}")
except Exception as e:
    log("snapshot_download raised an exception:")
    log(traceback.format_exc())
    local_model_dir = None

if local_model_dir is None:
    log("failed to download model repo. please ensure HF_TOKEN secret is set if repo is private.")
    # continue to start gradio with a minimal interface that returns the error message
    def synthesize_error(text: str):
        return None, "model repo not available - check space logs and HF_TOKEN"
    with gr.Blocks() as demo:
        gr.Markdown("## persian tts (debug) - model not loaded")
        txt = gr.Textbox(label="persian text", lines=4, placeholder="enter text...")
        btn = gr.Button("generate (disabled)")
        audio = gr.Audio(label="output audio", type="filepath")
        status = gr.Markdown("model repo not downloaded. check logs.")
        btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status])
    demo.launch()
    sys.exit(0)

# locate model files
try:
    model_path, config_path, vocoder_path, vocoder_config_path = find_model_files(local_model_dir)
except Exception:
    log("error during find_model_files:")
    log(traceback.format_exc())
    model_path = config_path = vocoder_path = vocoder_config_path = None

# if not found, print a short tree to aid debugging
if not model_path or not config_path:
    log("model checkpoint or config.json not found automatically - printing repo tree (top levels):")
    try:
        for root, dirs, files in os.walk(local_model_dir):
            rel = os.path.relpath(root, local_model_dir)
            log(f"dir: {rel} - files: {files[:20]}")
            # limit depth printed
            if len(rel.split(os.sep)) > 3:
                break
    except Exception:
        log("error while printing tree:")
        log(traceback.format_exc())

    log("cannot proceed to load tts. please inspect the repo structure and share the printed tree.")
    # start a minimal ui showing the problem
    with gr.Blocks() as demo:
        gr.Markdown("## persian tts (debug) - missing model files")
        gr.Markdown("model checkpoint or config.json not found in the downloaded repo. see /tmp/startup.log for details.")
        txt = gr.Textbox(label="persian text", lines=4)
        btn = gr.Button("generate (disabled)")
        audio = gr.Audio(label="output audio", type="filepath")
        status = gr.Markdown("model files missing. check logs.")
        btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status])
    demo.launch()
    sys.exit(0)

# prepare tts kwargs and attempt load
tts_kwargs = {"model_path": model_path, "config_path": config_path, "gpu": False}
if vocoder_path:
    tts_kwargs["vocoder_path"] = vocoder_path
if vocoder_config_path:
    tts_kwargs["vocoder_config_path"] = vocoder_config_path

log("initializing TTS with kwargs:")
for k, v in tts_kwargs.items():
    log(f"  {k}: {v}")

try:
    tts = TTS(**tts_kwargs)
    log("tts initialized successfully")
except Exception as e:
    log("tts initialization failed:")
    log(traceback.format_exc())
    # start a minimal ui showing the init error
    with gr.Blocks() as demo:
        gr.Markdown("## persian tts (debug) - tts init failed")
        gr.Markdown("see /tmp/startup.log for stacktrace")
        txt = gr.Textbox(label="persian text", lines=4)
        btn = gr.Button("generate (disabled)")
        audio = gr.Audio(label="output audio", type="filepath")
        status = gr.Markdown("tts init failed. check logs.")
        btn.click(lambda t: (None, "tts not available"), inputs=txt, outputs=[audio, status])
    demo.launch()
    sys.exit(0)

# normal synth function
def synthesize(text: str):
    if not text or not text.strip():
        return None, "please enter some text."

    if len(text) > MAX_INPUT_LENGTH:
        text = text[:MAX_INPUT_LENGTH] + "."

    text = normalizer.normalize(text)
    out_fd, out_path = tempfile.mkstemp(suffix=".wav")
    os.close(out_fd)

    try:
        tts.tts_to_file(text=text, file_path=out_path)
    except Exception as e:
        log("tts generation error:")
        log(traceback.format_exc())
        return None, f"error during synthesis: {e}"

    return out_path, "speech generated successfully."

# gradio ui (normal)
with gr.Blocks() as demo:
    gr.Markdown("## persian tts — debug-enabled")
    text_input = gr.Textbox(label="persian text (max ~1200 chars)", lines=6, placeholder="enter your persian text here...")
    generate_btn = gr.Button("generate speech")
    audio_output = gr.Audio(label="output audio", type="filepath")
    status = gr.Markdown("")
    def run_tts(text):
        audio_path, msg = synthesize(text)
        return audio_path, msg
    generate_btn.click(fn=run_tts, inputs=text_input, outputs=[audio_output, status])

log("launching gradio app now")
demo.launch()