mkfallah commited on
Commit
b2b1119
·
verified ·
1 Parent(s): 0bb82d5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # simple gradio space for Persian TTS using kamtera/persian-tts-female-vits (coqui tts)
3
+ # all ui messages and comments are in English
4
+
5
+ import os
6
+ import tempfile
7
+ from hazm import Normalizer
8
+ from TTS.api import TTS
9
+ import gradio as gr
10
+
11
+ # -------------------------
12
+ # configuration
13
+ MODEL_ID = "Kamtera/persian-tts-female-vits"
14
+ HF_TOKEN = os.environ.get("HF_TOKEN", None) # optional token for private models
15
+ MAX_INPUT_LENGTH = 1200 # safety limit for long text
16
+ # -------------------------
17
+
18
+ normalizer = Normalizer()
19
+
20
+ # load Coqui TTS model
21
+ print("loading tts model:", MODEL_ID)
22
+ if HF_TOKEN:
23
+ tts = TTS(model_name=MODEL_ID, progress_bar=False, gpu=False, use_auth_token=HF_TOKEN)
24
+ else:
25
+ tts = TTS(model_name=MODEL_ID, progress_bar=False, gpu=False)
26
+
27
+ def synthesize(text: str):
28
+ """
29
+ text: Persian text input
30
+ returns: path to the generated wav file
31
+ """
32
+ if not text or not text.strip():
33
+ return None, "please enter some text."
34
+
35
+ # limit input length to avoid high latency
36
+ if len(text) > MAX_INPUT_LENGTH:
37
+ text = text[:MAX_INPUT_LENGTH] + "."
38
+
39
+ # normalize persian text
40
+ text = normalizer.normalize(text)
41
+
42
+ # create a temporary output file
43
+ out_fd, out_path = tempfile.mkstemp(suffix=".wav")
44
+ os.close(out_fd)
45
+
46
+ # generate audio
47
+ try:
48
+ tts.tts_to_file(text=text, file_path=out_path)
49
+ except Exception as e:
50
+ print("tts generation error:", e)
51
+ return None, f"error: {e}"
52
+
53
+ return out_path, "speech generated successfully."
54
+
55
+ # gradio ui
56
+ with gr.Blocks(css=".gradio-container {background-color: #fafafa}") as demo:
57
+ gr.Markdown("## persian tts — kamtera / persian-tts-female-vits")
58
+ text_input = gr.Textbox(
59
+ label="persian text (max ~1200 chars)",
60
+ lines=6,
61
+ placeholder="enter your Persian text here..."
62
+ )
63
+ generate_btn = gr.Button("generate speech")
64
+ audio_output = gr.Audio(label="output audio", type="filepath")
65
+ status = gr.Markdown("")
66
+
67
+ def run_tts(text):
68
+ audio_path, msg = synthesize(text)
69
+ return audio_path, msg
70
+
71
+ generate_btn.click(fn=run_tts, inputs=text_input, outputs=[audio_output, status])
72
+
73
+ if __name__ == "__main__":
74
+ demo.launch()