File size: 8,275 Bytes
b2b1119
7791cdf
 
 
b2b1119
 
7791cdf
b2b1119
7791cdf
 
 
 
 
b2b1119
7791cdf
b2b1119
 
 
7791cdf
58dac37
7791cdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2b1119
 
 
7791cdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58dac37
7791cdf
58dac37
7791cdf
58dac37
7791cdf
 
58dac37
 
 
7791cdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58dac37
7791cdf
 
 
b2b1119
7791cdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2b1119
 
 
 
 
 
 
 
 
 
 
 
 
 
7791cdf
 
 
b2b1119
 
 
7791cdf
 
 
 
b2b1119
 
 
 
 
 
 
 
7791cdf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# app.py
# debug-friendly gradio space entrypoint for persian tts
# this script prints environment info, lists repo files, logs to /tmp/startup.log
# comments are english and start with lowercase

import os
import sys
import tempfile
import glob
import traceback
from typing import Optional, Tuple

# external libs
from hazm import Normalizer
from huggingface_hub import snapshot_download
from TTS.api import TTS
import gradio as gr

LOG_PATH = "/tmp/startup.log"

def log(msg: str, flush: bool = True):
    """write message to stdout and append to startup log file"""
    ts = f"[startup] {msg}"
    print(ts)
    try:
        with open(LOG_PATH, "a", encoding="utf-8") as f:
            f.write(ts + "\n")
    except Exception:
        pass
    if flush:
        try:
            sys.stdout.flush()
        except Exception:
            pass

# clear previous log
try:
    open(LOG_PATH, "w").close()
except Exception:
    pass

log("starting app - debug mode enabled")
log(f"python executable: {sys.executable}")
log(f"python version: {sys.version.replace(chr(10), ' ')}")
log(f"cwd: {os.getcwd()}")
log("environment variables (selected):")
for k in ["HF_TOKEN", "HUGGINGFACE_HUB_TOKEN", "CUDA_VISIBLE_DEVICES", "PYTHONPATH"]:
    log(f"  {k}={os.environ.get(k)}")

# list repo root files (first-level) to help debugging missing files
try:
    root_files = os.listdir(".")
    log("files in repo root (first 100 entries):")
    for i, name in enumerate(root_files[:100]):
        log(f"  - {name}")
except Exception as e:
    log(f"error listing repo root: {e}")

# basic config (edit as needed)
HF_REPO_ID = "Kamtera/persian-tts-female-vits"
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
MAX_INPUT_LENGTH = 1200

normalizer = Normalizer()

def find_model_files(model_dir: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
    """try to discover model and config files under model_dir"""
    model_patterns = ["**/model.pth", "**/model.pt", "**/*.pth", "**/*.pt"]
    config_patterns = ["**/config.json", "**/model_config.json", "**/config*.json"]
    vocoder_patterns = ["**/vocoder.pth", "**/vocoder.pt", "**/hifi-gan*.pth", "**/*.pth"]
    vocoder_config_patterns = ["**/vocoder_config.json", "**/vocoder-config.json", "**/*vocoder*.json"]

    def glob_first(root, patterns):
        for pat in patterns:
            matches = glob.glob(os.path.join(root, pat), recursive=True)
            if matches:
                matches.sort(key=lambda p: (len(p.split(os.sep)), p))
                return matches[0]
        return None

    model_path = glob_first(model_dir, model_patterns)
    config_path = glob_first(model_dir, config_patterns)
    vocoder_path = glob_first(model_dir, vocoder_patterns)
    vocoder_config_path = glob_first(model_dir, vocoder_config_patterns)

    log("discovered model files:")
    log(f"  model_path: {model_path}")
    log(f"  config_path: {config_path}")
    log(f"  vocoder_path: {vocoder_path}")
    log(f"  vocoder_config_path: {vocoder_config_path}")

    return model_path, config_path, vocoder_path, vocoder_config_path

# main: attempt to download and initialize model, but catch and log everything
local_model_dir = None
try:
    log(f"attempting to snapshot_download repo: {HF_REPO_ID}")
    local_model_dir = snapshot_download(repo_id=HF_REPO_ID, use_auth_token=HF_TOKEN)
    log(f"snapshot_download returned: {local_model_dir}")
except Exception as e:
    log("snapshot_download raised an exception:")
    log(traceback.format_exc())
    local_model_dir = None

if local_model_dir is None:
    log("failed to download model repo. please ensure HF_TOKEN secret is set if repo is private.")
    # continue to start gradio with a minimal interface that returns the error message
    def synthesize_error(text: str):
        return None, "model repo not available - check space logs and HF_TOKEN"
    with gr.Blocks() as demo:
        gr.Markdown("## persian tts (debug) - model not loaded")
        txt = gr.Textbox(label="persian text", lines=4, placeholder="enter text...")
        btn = gr.Button("generate (disabled)")
        audio = gr.Audio(label="output audio", type="filepath")
        status = gr.Markdown("model repo not downloaded. check logs.")
        btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status])
    demo.launch()
    sys.exit(0)

# locate model files
try:
    model_path, config_path, vocoder_path, vocoder_config_path = find_model_files(local_model_dir)
except Exception:
    log("error during find_model_files:")
    log(traceback.format_exc())
    model_path = config_path = vocoder_path = vocoder_config_path = None

# if not found, print a short tree to aid debugging
if not model_path or not config_path:
    log("model checkpoint or config.json not found automatically - printing repo tree (top levels):")
    try:
        for root, dirs, files in os.walk(local_model_dir):
            rel = os.path.relpath(root, local_model_dir)
            log(f"dir: {rel} - files: {files[:20]}")
            # limit depth printed
            if len(rel.split(os.sep)) > 3:
                break
    except Exception:
        log("error while printing tree:")
        log(traceback.format_exc())

    log("cannot proceed to load tts. please inspect the repo structure and share the printed tree.")
    # start a minimal ui showing the problem
    with gr.Blocks() as demo:
        gr.Markdown("## persian tts (debug) - missing model files")
        gr.Markdown("model checkpoint or config.json not found in the downloaded repo. see /tmp/startup.log for details.")
        txt = gr.Textbox(label="persian text", lines=4)
        btn = gr.Button("generate (disabled)")
        audio = gr.Audio(label="output audio", type="filepath")
        status = gr.Markdown("model files missing. check logs.")
        btn.click(lambda t: (None, "model not available"), inputs=txt, outputs=[audio, status])
    demo.launch()
    sys.exit(0)

# prepare tts kwargs and attempt load
tts_kwargs = {"model_path": model_path, "config_path": config_path, "gpu": False}
if vocoder_path:
    tts_kwargs["vocoder_path"] = vocoder_path
if vocoder_config_path:
    tts_kwargs["vocoder_config_path"] = vocoder_config_path

log("initializing TTS with kwargs:")
for k, v in tts_kwargs.items():
    log(f"  {k}: {v}")

try:
    tts = TTS(**tts_kwargs)
    log("tts initialized successfully")
except Exception as e:
    log("tts initialization failed:")
    log(traceback.format_exc())
    # start a minimal ui showing the init error
    with gr.Blocks() as demo:
        gr.Markdown("## persian tts (debug) - tts init failed")
        gr.Markdown("see /tmp/startup.log for stacktrace")
        txt = gr.Textbox(label="persian text", lines=4)
        btn = gr.Button("generate (disabled)")
        audio = gr.Audio(label="output audio", type="filepath")
        status = gr.Markdown("tts init failed. check logs.")
        btn.click(lambda t: (None, "tts not available"), inputs=txt, outputs=[audio, status])
    demo.launch()
    sys.exit(0)

# normal synth function
def synthesize(text: str):
    if not text or not text.strip():
        return None, "please enter some text."

    if len(text) > MAX_INPUT_LENGTH:
        text = text[:MAX_INPUT_LENGTH] + "."

    text = normalizer.normalize(text)
    out_fd, out_path = tempfile.mkstemp(suffix=".wav")
    os.close(out_fd)

    try:
        tts.tts_to_file(text=text, file_path=out_path)
    except Exception as e:
        log("tts generation error:")
        log(traceback.format_exc())
        return None, f"error during synthesis: {e}"

    return out_path, "speech generated successfully."

# gradio ui (normal)
with gr.Blocks() as demo:
    gr.Markdown("## persian tts — debug-enabled")
    text_input = gr.Textbox(label="persian text (max ~1200 chars)", lines=6, placeholder="enter your persian text here...")
    generate_btn = gr.Button("generate speech")
    audio_output = gr.Audio(label="output audio", type="filepath")
    status = gr.Markdown("")
    def run_tts(text):
        audio_path, msg = synthesize(text)
        return audio_path, msg
    generate_btn.click(fn=run_tts, inputs=text_input, outputs=[audio_output, status])

log("launching gradio app now")
demo.launch()