|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
import sys |
|
|
import tempfile |
|
|
import glob |
|
|
import traceback |
|
|
from typing import Optional, Tuple |
|
|
|
|
|
|
|
|
from hazm import Normalizer |
|
|
from huggingface_hub import snapshot_download |
|
|
from TTS.api import TTS |
|
|
import gradio as gr |
|
|
|
|
|
LOG_PATH = "/tmp/startup.log" |
|
|
|
|
|
def log(msg: str, flush: bool = True): |
|
|
"""write message to stdout and append to startup log file""" |
|
|
ts = f"[startup] {msg}" |
|
|
print(ts) |
|
|
try: |
|
|
with open(LOG_PATH, "a", encoding="utf-8") as f: |
|
|
f.write(ts + "\n") |
|
|
except Exception: |
|
|
pass |
|
|
if flush: |
|
|
try: |
|
|
sys.stdout.flush() |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
try: |
|
|
open(LOG_PATH, "w").close() |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
log("starting app - debug mode enabled") |
|
|
log(f"python executable: {sys.executable}") |
|
|
log(f"python version: {sys.version.replace(chr(10), ' ')}") |
|
|
log(f"cwd: {os.getcwd()}") |
|
|
log("environment variables (selected):") |
|
|
for k in ["HF_TOKEN", "HUGGINGFACE_HUB_TOKEN", "CUDA_VISIBLE_DEVICES", "PYTHONPATH"]: |
|
|
log(f" {k}={os.environ.get(k)}") |
|
|
|
|
|
|
|
|
try: |
|
|
root_files = os.listdir(".") |
|
|
log("files in repo root (first 100 entries):") |
|
|
for i, name in enumerate(root_files[:100]): |
|
|
log(f" - {name}") |
|
|
except Exception as e: |
|
|
log(f"error listing repo root: {e}") |
|
|
|
|
|
|
|
|
HF_REPO_ID = "Kamtera/persian-tts-female-vits" |
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") |
|
|
MAX_INPUT_LENGTH = 1200 |
|
|
|
|
|
normalizer = Normalizer() |
|
|
|
|
|
def find_model_files(model_dir: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]: |
|
|
"""try to discover model and config files under model_dir""" |
|
|
model_patterns = ["**/model.pth", "**/model.pt", "**/*.pth", "**/*.pt"] |
|
|
config_patterns = ["**/config.json", "**/model_config.json", "**/config*.json"] |
|
|
vocoder_patterns = ["**/vocoder.pth", "**/vocoder.pt", "**/hifi-gan*.pth", "**/*.pth"] |
|
|
vocoder_config_patterns = ["**/vocoder_config.json", "**/vocoder-config.json", "**/*vocoder*.json"] |
|
|
|
|
|
def glob_first(root, patterns): |
|
|
for pat in patterns: |
|
|
matches = glob.glob(os.path.join(root, pat), recursive=True) |
|
|
if matches: |
|
|
matches.sort(key=lambda p: (len(p.split(os.sep)), p)) |
|
|
return matches[0] |
|
|
return None |
|
|
|
|
|
model_path = glob_first(model_dir, model_patterns) |
|
|
config_path = glob_first(model_dir, config_patterns) |
|
|
vocoder_path = glob_first(model_dir, vocoder_patterns) |
|
|
vocoder_config_path = glob_first(model_dir, vocoder_config_patterns) |
|
|
|
|
|
log("discovered model files:") |
|
|
log(f" model_path: {model_path}") |
|
|
log(f" config_path: {config_path}") |
|
|
log(f" vocoder_path: {vocoder_path}") |
|
|
log(f" vocoder_config_path: {vocoder_config_path}") |
|
|
|
|
|
return model_path, config_path, vocoder_path, vocoder_config_path |
|
|
|
|
|
|
|
|
local_model_dir = None |
|
|
try: |
|
|
log(f"attempting to snapshot_download repo: {HF_REPO_ID}") |
|
|
local_model_dir = snapshot_download(repo_id=HF_REPO_ID, use_auth_token=HF_TOKEN) |
|
|
log(f"snapshot_download returned: {local_model_dir}") |
|
|
except Exception as e: |
|
|
log("snapshot_download raised an exception:") |
|
|
log(traceback.format_exc()) |
|
|
local_model_dir = None |
|
|
|
|
|
if local_model_dir is None: |
|
|
log("failed to download model repo. please ensure HF_TOKEN secret is set if repo is private.") |
|
|
|
|
|
def synthesize_error(text: str): |
|
|
return None, "model repo not available - check space logs and HF_TOKEN" |
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## persian tts (debug) - model not loaded") |
|
|
txt = gr.Textbox(label="persian text", lines=4, placeholder="enter text...") |
|
|
btn = gr.Button("generate (disabled)") |
|
|
audio = gr.Audio(label="output audio", type="filepath") |
|
|
status = gr.Markdown("model repo not downloaded. check logs.") |
|
|
btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status]) |
|
|
demo.launch() |
|
|
sys.exit(0) |
|
|
|
|
|
|
|
|
try: |
|
|
model_path, config_path, vocoder_path, vocoder_config_path = find_model_files(local_model_dir) |
|
|
except Exception: |
|
|
log("error during find_model_files:") |
|
|
log(traceback.format_exc()) |
|
|
model_path = config_path = vocoder_path = vocoder_config_path = None |
|
|
|
|
|
|
|
|
if not model_path or not config_path: |
|
|
log("model checkpoint or config.json not found automatically - printing repo tree (top levels):") |
|
|
try: |
|
|
for root, dirs, files in os.walk(local_model_dir): |
|
|
rel = os.path.relpath(root, local_model_dir) |
|
|
log(f"dir: {rel} - files: {files[:20]}") |
|
|
|
|
|
if len(rel.split(os.sep)) > 3: |
|
|
break |
|
|
except Exception: |
|
|
log("error while printing tree:") |
|
|
log(traceback.format_exc()) |
|
|
|
|
|
log("cannot proceed to load tts. please inspect the repo structure and share the printed tree.") |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## persian tts (debug) - missing model files") |
|
|
gr.Markdown("model checkpoint or config.json not found in the downloaded repo. see /tmp/startup.log for details.") |
|
|
txt = gr.Textbox(label="persian text", lines=4) |
|
|
btn = gr.Button("generate (disabled)") |
|
|
audio = gr.Audio(label="output audio", type="filepath") |
|
|
status = gr.Markdown("model files missing. check logs.") |
|
|
btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status]) |
|
|
demo.launch() |
|
|
sys.exit(0) |
|
|
|
|
|
|
|
|
tts_kwargs = {"model_path": model_path, "config_path": config_path, "gpu": False} |
|
|
if vocoder_path: |
|
|
tts_kwargs["vocoder_path"] = vocoder_path |
|
|
if vocoder_config_path: |
|
|
tts_kwargs["vocoder_config_path"] = vocoder_config_path |
|
|
|
|
|
log("initializing TTS with kwargs:") |
|
|
for k, v in tts_kwargs.items(): |
|
|
log(f" {k}: {v}") |
|
|
|
|
|
try: |
|
|
tts = TTS(**tts_kwargs) |
|
|
log("tts initialized successfully") |
|
|
except Exception as e: |
|
|
log("tts initialization failed:") |
|
|
log(traceback.format_exc()) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## persian tts (debug) - tts init failed") |
|
|
gr.Markdown("see /tmp/startup.log for stacktrace") |
|
|
txt = gr.Textbox(label="persian text", lines=4) |
|
|
btn = gr.Button("generate (disabled)") |
|
|
audio = gr.Audio(label="output audio", type="filepath") |
|
|
status = gr.Markdown("tts init failed. check logs.") |
|
|
btn.click(lambda t: (None, "tts not available"), inputs=txt, outputs=[audio, status]) |
|
|
demo.launch() |
|
|
sys.exit(0) |
|
|
|
|
|
|
|
|
def synthesize(text: str): |
|
|
if not text or not text.strip(): |
|
|
return None, "please enter some text." |
|
|
|
|
|
if len(text) > MAX_INPUT_LENGTH: |
|
|
text = text[:MAX_INPUT_LENGTH] + "." |
|
|
|
|
|
text = normalizer.normalize(text) |
|
|
out_fd, out_path = tempfile.mkstemp(suffix=".wav") |
|
|
os.close(out_fd) |
|
|
|
|
|
try: |
|
|
tts.tts_to_file(text=text, file_path=out_path) |
|
|
except Exception as e: |
|
|
log("tts generation error:") |
|
|
log(traceback.format_exc()) |
|
|
return None, f"error during synthesis: {e}" |
|
|
|
|
|
return out_path, "speech generated successfully." |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## persian tts — debug-enabled") |
|
|
text_input = gr.Textbox(label="persian text (max ~1200 chars)", lines=6, placeholder="enter your persian text here...") |
|
|
generate_btn = gr.Button("generate speech") |
|
|
audio_output = gr.Audio(label="output audio", type="filepath") |
|
|
status = gr.Markdown("") |
|
|
def run_tts(text): |
|
|
audio_path, msg = synthesize(text) |
|
|
return audio_path, msg |
|
|
generate_btn.click(fn=run_tts, inputs=text_input, outputs=[audio_output, status]) |
|
|
|
|
|
log("launching gradio app now") |
|
|
demo.launch() |
|
|
|