File size: 8,275 Bytes
b2b1119 7791cdf b2b1119 7791cdf b2b1119 7791cdf b2b1119 7791cdf b2b1119 7791cdf 58dac37 7791cdf b2b1119 7791cdf 58dac37 7791cdf 58dac37 7791cdf 58dac37 7791cdf 58dac37 7791cdf 58dac37 7791cdf b2b1119 7791cdf b2b1119 7791cdf b2b1119 7791cdf b2b1119 7791cdf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
# app.py
# debug-friendly gradio space entrypoint for persian tts
# this script prints environment info, lists repo files, logs to /tmp/startup.log
# comments are english and start with lowercase
import os
import sys
import tempfile
import glob
import traceback
from typing import Optional, Tuple
# external libs
from hazm import Normalizer
from huggingface_hub import snapshot_download
from TTS.api import TTS
import gradio as gr
LOG_PATH = "/tmp/startup.log"
def log(msg: str, flush: bool = True):
"""write message to stdout and append to startup log file"""
ts = f"[startup] {msg}"
print(ts)
try:
with open(LOG_PATH, "a", encoding="utf-8") as f:
f.write(ts + "\n")
except Exception:
pass
if flush:
try:
sys.stdout.flush()
except Exception:
pass
# clear previous log
try:
open(LOG_PATH, "w").close()
except Exception:
pass
log("starting app - debug mode enabled")
log(f"python executable: {sys.executable}")
log(f"python version: {sys.version.replace(chr(10), ' ')}")
log(f"cwd: {os.getcwd()}")
log("environment variables (selected):")
for k in ["HF_TOKEN", "HUGGINGFACE_HUB_TOKEN", "CUDA_VISIBLE_DEVICES", "PYTHONPATH"]:
log(f" {k}={os.environ.get(k)}")
# list repo root files (first-level) to help debugging missing files
try:
root_files = os.listdir(".")
log("files in repo root (first 100 entries):")
for i, name in enumerate(root_files[:100]):
log(f" - {name}")
except Exception as e:
log(f"error listing repo root: {e}")
# basic config (edit as needed)
HF_REPO_ID = "Kamtera/persian-tts-female-vits"
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
MAX_INPUT_LENGTH = 1200
normalizer = Normalizer()
def find_model_files(model_dir: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
"""try to discover model and config files under model_dir"""
model_patterns = ["**/model.pth", "**/model.pt", "**/*.pth", "**/*.pt"]
config_patterns = ["**/config.json", "**/model_config.json", "**/config*.json"]
vocoder_patterns = ["**/vocoder.pth", "**/vocoder.pt", "**/hifi-gan*.pth", "**/*.pth"]
vocoder_config_patterns = ["**/vocoder_config.json", "**/vocoder-config.json", "**/*vocoder*.json"]
def glob_first(root, patterns):
for pat in patterns:
matches = glob.glob(os.path.join(root, pat), recursive=True)
if matches:
matches.sort(key=lambda p: (len(p.split(os.sep)), p))
return matches[0]
return None
model_path = glob_first(model_dir, model_patterns)
config_path = glob_first(model_dir, config_patterns)
vocoder_path = glob_first(model_dir, vocoder_patterns)
vocoder_config_path = glob_first(model_dir, vocoder_config_patterns)
log("discovered model files:")
log(f" model_path: {model_path}")
log(f" config_path: {config_path}")
log(f" vocoder_path: {vocoder_path}")
log(f" vocoder_config_path: {vocoder_config_path}")
return model_path, config_path, vocoder_path, vocoder_config_path
# main: attempt to download and initialize model, but catch and log everything
local_model_dir = None
try:
log(f"attempting to snapshot_download repo: {HF_REPO_ID}")
local_model_dir = snapshot_download(repo_id=HF_REPO_ID, use_auth_token=HF_TOKEN)
log(f"snapshot_download returned: {local_model_dir}")
except Exception as e:
log("snapshot_download raised an exception:")
log(traceback.format_exc())
local_model_dir = None
if local_model_dir is None:
log("failed to download model repo. please ensure HF_TOKEN secret is set if repo is private.")
# continue to start gradio with a minimal interface that returns the error message
def synthesize_error(text: str):
return None, "model repo not available - check space logs and HF_TOKEN"
with gr.Blocks() as demo:
gr.Markdown("## persian tts (debug) - model not loaded")
txt = gr.Textbox(label="persian text", lines=4, placeholder="enter text...")
btn = gr.Button("generate (disabled)")
audio = gr.Audio(label="output audio", type="filepath")
status = gr.Markdown("model repo not downloaded. check logs.")
btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status])
demo.launch()
sys.exit(0)
# locate model files
try:
model_path, config_path, vocoder_path, vocoder_config_path = find_model_files(local_model_dir)
except Exception:
log("error during find_model_files:")
log(traceback.format_exc())
model_path = config_path = vocoder_path = vocoder_config_path = None
# if not found, print a short tree to aid debugging
if not model_path or not config_path:
log("model checkpoint or config.json not found automatically - printing repo tree (top levels):")
try:
for root, dirs, files in os.walk(local_model_dir):
rel = os.path.relpath(root, local_model_dir)
log(f"dir: {rel} - files: {files[:20]}")
# limit depth printed
if len(rel.split(os.sep)) > 3:
break
except Exception:
log("error while printing tree:")
log(traceback.format_exc())
log("cannot proceed to load tts. please inspect the repo structure and share the printed tree.")
# start a minimal ui showing the problem
with gr.Blocks() as demo:
gr.Markdown("## persian tts (debug) - missing model files")
gr.Markdown("model checkpoint or config.json not found in the downloaded repo. see /tmp/startup.log for details.")
txt = gr.Textbox(label="persian text", lines=4)
btn = gr.Button("generate (disabled)")
audio = gr.Audio(label="output audio", type="filepath")
status = gr.Markdown("model files missing. check logs.")
btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status])
demo.launch()
sys.exit(0)
# prepare tts kwargs and attempt load
tts_kwargs = {"model_path": model_path, "config_path": config_path, "gpu": False}
if vocoder_path:
tts_kwargs["vocoder_path"] = vocoder_path
if vocoder_config_path:
tts_kwargs["vocoder_config_path"] = vocoder_config_path
log("initializing TTS with kwargs:")
for k, v in tts_kwargs.items():
log(f" {k}: {v}")
try:
tts = TTS(**tts_kwargs)
log("tts initialized successfully")
except Exception as e:
log("tts initialization failed:")
log(traceback.format_exc())
# start a minimal ui showing the init error
with gr.Blocks() as demo:
gr.Markdown("## persian tts (debug) - tts init failed")
gr.Markdown("see /tmp/startup.log for stacktrace")
txt = gr.Textbox(label="persian text", lines=4)
btn = gr.Button("generate (disabled)")
audio = gr.Audio(label="output audio", type="filepath")
status = gr.Markdown("tts init failed. check logs.")
btn.click(lambda t: (None, "tts not available"), inputs=txt, outputs=[audio, status])
demo.launch()
sys.exit(0)
# normal synth function
def synthesize(text: str):
if not text or not text.strip():
return None, "please enter some text."
if len(text) > MAX_INPUT_LENGTH:
text = text[:MAX_INPUT_LENGTH] + "."
text = normalizer.normalize(text)
out_fd, out_path = tempfile.mkstemp(suffix=".wav")
os.close(out_fd)
try:
tts.tts_to_file(text=text, file_path=out_path)
except Exception as e:
log("tts generation error:")
log(traceback.format_exc())
return None, f"error during synthesis: {e}"
return out_path, "speech generated successfully."
# gradio ui (normal)
with gr.Blocks() as demo:
gr.Markdown("## persian tts — debug-enabled")
text_input = gr.Textbox(label="persian text (max ~1200 chars)", lines=6, placeholder="enter your persian text here...")
generate_btn = gr.Button("generate speech")
audio_output = gr.Audio(label="output audio", type="filepath")
status = gr.Markdown("")
def run_tts(text):
audio_path, msg = synthesize(text)
return audio_path, msg
generate_btn.click(fn=run_tts, inputs=text_input, outputs=[audio_output, status])
log("launching gradio app now")
demo.launch()
|