Spaces:

mkfallah
/

ptts

Sleeping

App Files Files Community

mkfallah commited on Oct 6, 2025

Commit

7791cdf

verified ·

1 Parent(s): 580b4a2

Update app.py

Browse files

Files changed (1) hide show

app.py +173 -38

app.py CHANGED Viewed

@@ -1,47 +1,188 @@
 # app.py
-# simple gradio space for Persian TTS using kamtera/persian-tts-female-vits (coqui tts)
-# loads model by first downloading the HuggingFace repo to a local folder,
-# then passes the local path to TTS to avoid Coqui's "model_name parsing" error.
 import os
 import tempfile
 from hazm import Normalizer
 from TTS.api import TTS
 import gradio as gr
-# add huggingface_hub to requirements and import here
-from huggingface_hub import snapshot_download
-# -------------------------
-# configuration
-HF_REPO_ID = "Kamtera/persian-tts-female-vits"  # huggingface repo id
-HF_TOKEN = os.environ.get("HF_TOKEN", None)     # optional token for private models
-MAX_INPUT_LENGTH = 1200                          # safety limit for long text
-# -------------------------
 normalizer = Normalizer()
-# download the HuggingFace repo to a local folder (cached by HF Hub)
-print("downloading model repo from huggingface:", HF_REPO_ID)
 try:
     local_model_dir = snapshot_download(repo_id=HF_REPO_ID, use_auth_token=HF_TOKEN)
-    print("model downloaded to:", local_model_dir)
 except Exception as e:
-    print("error while downloading model repo:", e)
     local_model_dir = None
 if local_model_dir is None:
-    raise RuntimeError("failed to download model repo. set HF_TOKEN if repo is private or check repo id.")
-# now load model from local dir (coqui expects either a coqui id or a local path)
-print("loading tts model from local folder:", local_model_dir)
-tts = TTS(model_name=local_model_dir, progress_bar=False, gpu=False)
 def synthesize(text: str):
-    """
-    text: Persian text input
-    returns: tuple(output_path_or_none, status_message)
-    """
     if not text or not text.strip():
         return None, "please enter some text."
@@ -49,35 +190,29 @@ def synthesize(text: str):
         text = text[:MAX_INPUT_LENGTH] + "."
     text = normalizer.normalize(text)
     out_fd, out_path = tempfile.mkstemp(suffix=".wav")
     os.close(out_fd)
     try:
         tts.tts_to_file(text=text, file_path=out_path)
     except Exception as e:
-        print("tts generation error:", e)
-        return None, f"error: {e}"
     return out_path, "speech generated successfully."
-# gradio ui
-with gr.Blocks(css=".gradio-container {background-color: #fafafa}") as demo:
-    gr.Markdown("## persian tts — kamtera / persian-tts-female-vits")
-    text_input = gr.Textbox(
-        label="persian text (max ~1200 chars)",
-        lines=6,
-        placeholder="enter your Persian text here..."
-    )
     generate_btn = gr.Button("generate speech")
     audio_output = gr.Audio(label="output audio", type="filepath")
     status = gr.Markdown("")
     def run_tts(text):
         audio_path, msg = synthesize(text)
         return audio_path, msg
     generate_btn.click(fn=run_tts, inputs=text_input, outputs=[audio_output, status])
-if __name__ == "__main__":
-    demo.launch()

 # app.py
+# debug-friendly gradio space entrypoint for persian tts
+# this script prints environment info, lists repo files, logs to /tmp/startup.log
+# comments are english and start with lowercase
 import os
+import sys
 import tempfile
+import glob
+import traceback
+from typing import Optional, Tuple
+# external libs
 from hazm import Normalizer
+from huggingface_hub import snapshot_download
 from TTS.api import TTS
 import gradio as gr
+LOG_PATH = "/tmp/startup.log"
+def log(msg: str, flush: bool = True):
+    """write message to stdout and append to startup log file"""
+    ts = f"[startup] {msg}"
+    print(ts)
+    try:
+        with open(LOG_PATH, "a", encoding="utf-8") as f:
+            f.write(ts + "\n")
+    except Exception:
+        pass
+    if flush:
+        try:
+            sys.stdout.flush()
+        except Exception:
+            pass
+# clear previous log
+try:
+    open(LOG_PATH, "w").close()
+except Exception:
+    pass
+log("starting app - debug mode enabled")
+log(f"python executable: {sys.executable}")
+log(f"python version: {sys.version.replace(chr(10), ' ')}")
+log(f"cwd: {os.getcwd()}")
+log("environment variables (selected):")
+for k in ["HF_TOKEN", "HUGGINGFACE_HUB_TOKEN", "CUDA_VISIBLE_DEVICES", "PYTHONPATH"]:
+    log(f"  {k}={os.environ.get(k)}")
+# list repo root files (first-level) to help debugging missing files
+try:
+    root_files = os.listdir(".")
+    log("files in repo root (first 100 entries):")
+    for i, name in enumerate(root_files[:100]):
+        log(f"  - {name}")
+except Exception as e:
+    log(f"error listing repo root: {e}")
+# basic config (edit as needed)
+HF_REPO_ID = "Kamtera/persian-tts-female-vits"
+HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
+MAX_INPUT_LENGTH = 1200
 normalizer = Normalizer()
+def find_model_files(model_dir: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
+    """try to discover model and config files under model_dir"""
+    model_patterns = ["**/model.pth", "**/model.pt", "**/*.pth", "**/*.pt"]
+    config_patterns = ["**/config.json", "**/model_config.json", "**/config*.json"]
+    vocoder_patterns = ["**/vocoder.pth", "**/vocoder.pt", "**/hifi-gan*.pth", "**/*.pth"]
+    vocoder_config_patterns = ["**/vocoder_config.json", "**/vocoder-config.json", "**/*vocoder*.json"]
+    def glob_first(root, patterns):
+        for pat in patterns:
+            matches = glob.glob(os.path.join(root, pat), recursive=True)
+            if matches:
+                matches.sort(key=lambda p: (len(p.split(os.sep)), p))
+                return matches[0]
+        return None
+    model_path = glob_first(model_dir, model_patterns)
+    config_path = glob_first(model_dir, config_patterns)
+    vocoder_path = glob_first(model_dir, vocoder_patterns)
+    vocoder_config_path = glob_first(model_dir, vocoder_config_patterns)
+    log("discovered model files:")
+    log(f"  model_path: {model_path}")
+    log(f"  config_path: {config_path}")
+    log(f"  vocoder_path: {vocoder_path}")
+    log(f"  vocoder_config_path: {vocoder_config_path}")
+    return model_path, config_path, vocoder_path, vocoder_config_path
+# main: attempt to download and initialize model, but catch and log everything
+local_model_dir = None
 try:
+    log(f"attempting to snapshot_download repo: {HF_REPO_ID}")
     local_model_dir = snapshot_download(repo_id=HF_REPO_ID, use_auth_token=HF_TOKEN)
+    log(f"snapshot_download returned: {local_model_dir}")
 except Exception as e:
+    log("snapshot_download raised an exception:")
+    log(traceback.format_exc())
     local_model_dir = None
 if local_model_dir is None:
+    log("failed to download model repo. please ensure HF_TOKEN secret is set if repo is private.")
+    # continue to start gradio with a minimal interface that returns the error message
+    def synthesize_error(text: str):
+        return None, "model repo not available - check space logs and HF_TOKEN"
+    with gr.Blocks() as demo:
+        gr.Markdown("## persian tts (debug) - model not loaded")
+        txt = gr.Textbox(label="persian text", lines=4, placeholder="enter text...")
+        btn = gr.Button("generate (disabled)")
+        audio = gr.Audio(label="output audio", type="filepath")
+        status = gr.Markdown("model repo not downloaded. check logs.")
+        btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status])
+    demo.launch()
+    sys.exit(0)
+# locate model files
+try:
+    model_path, config_path, vocoder_path, vocoder_config_path = find_model_files(local_model_dir)
+except Exception:
+    log("error during find_model_files:")
+    log(traceback.format_exc())
+    model_path = config_path = vocoder_path = vocoder_config_path = None
+# if not found, print a short tree to aid debugging
+if not model_path or not config_path:
+    log("model checkpoint or config.json not found automatically - printing repo tree (top levels):")
+    try:
+        for root, dirs, files in os.walk(local_model_dir):
+            rel = os.path.relpath(root, local_model_dir)
+            log(f"dir: {rel} - files: {files[:20]}")
+            # limit depth printed
+            if len(rel.split(os.sep)) > 3:
+                break
+    except Exception:
+        log("error while printing tree:")
+        log(traceback.format_exc())
+    log("cannot proceed to load tts. please inspect the repo structure and share the printed tree.")
+    # start a minimal ui showing the problem
+    with gr.Blocks() as demo:
+        gr.Markdown("## persian tts (debug) - missing model files")
+        gr.Markdown("model checkpoint or config.json not found in the downloaded repo. see /tmp/startup.log for details.")
+        txt = gr.Textbox(label="persian text", lines=4)
+        btn = gr.Button("generate (disabled)")
+        audio = gr.Audio(label="output audio", type="filepath")
+        status = gr.Markdown("model files missing. check logs.")
+        btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status])
+    demo.launch()
+    sys.exit(0)
+# prepare tts kwargs and attempt load
+tts_kwargs = {"model_path": model_path, "config_path": config_path, "gpu": False}
+if vocoder_path:
+    tts_kwargs["vocoder_path"] = vocoder_path
+if vocoder_config_path:
+    tts_kwargs["vocoder_config_path"] = vocoder_config_path
+log("initializing TTS with kwargs:")
+for k, v in tts_kwargs.items():
+    log(f"  {k}: {v}")
+try:
+    tts = TTS(**tts_kwargs)
+    log("tts initialized successfully")
+except Exception as e:
+    log("tts initialization failed:")
+    log(traceback.format_exc())
+    # start a minimal ui showing the init error
+    with gr.Blocks() as demo:
+        gr.Markdown("## persian tts (debug) - tts init failed")
+        gr.Markdown("see /tmp/startup.log for stacktrace")
+        txt = gr.Textbox(label="persian text", lines=4)
+        btn = gr.Button("generate (disabled)")
+        audio = gr.Audio(label="output audio", type="filepath")
+        status = gr.Markdown("tts init failed. check logs.")
+        btn.click(lambda t: (None, "tts not available"), inputs=txt, outputs=[audio, status])
+    demo.launch()
+    sys.exit(0)
+# normal synth function
 def synthesize(text: str):
     if not text or not text.strip():
         return None, "please enter some text."
         text = text[:MAX_INPUT_LENGTH] + "."
     text = normalizer.normalize(text)
     out_fd, out_path = tempfile.mkstemp(suffix=".wav")
     os.close(out_fd)
     try:
         tts.tts_to_file(text=text, file_path=out_path)
     except Exception as e:
+        log("tts generation error:")
+        log(traceback.format_exc())
+        return None, f"error during synthesis: {e}"
     return out_path, "speech generated successfully."
+# gradio ui (normal)
+with gr.Blocks() as demo:
+    gr.Markdown("## persian tts — debug-enabled")
+    text_input = gr.Textbox(label="persian text (max ~1200 chars)", lines=6, placeholder="enter your persian text here...")
     generate_btn = gr.Button("generate speech")
     audio_output = gr.Audio(label="output audio", type="filepath")
     status = gr.Markdown("")
     def run_tts(text):
         audio_path, msg = synthesize(text)
         return audio_path, msg
     generate_btn.click(fn=run_tts, inputs=text_input, outputs=[audio_output, status])
+log("launching gradio app now")
+demo.launch()