arjunbroepic commited on
Commit
23e8911
·
verified ·
1 Parent(s): b403299

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -22
app.py CHANGED
@@ -1,28 +1,77 @@
 
1
  import gradio as gr
2
- import subprocess
3
  import os
4
 
5
- def generate_speech(text):
6
- output_file = "output.wav"
7
- # Adjust this command based on the binary's usage (e.g., ./moss_nano -t "text" -o output.wav)
8
- # Check the repo's README for exact CLI arguments
9
- cmd = ["./moss_nano", "--text", text, "--output", output_file]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- try:
12
- subprocess.run(cmd, check=True)
13
- if os.path.exists(output_file):
14
- return output_file
15
- else:
16
- return None
17
- except Exception as e:
18
- return f"Error: {str(e)}"
19
-
20
- demo = gr.Interface(
21
- fn=generate_speech,
22
- inputs=gr.Textbox(label="Enter Text", placeholder="Hello, I am MossTTS Nano."),
23
- outputs=gr.Audio(label="Generated Speech"),
24
- title="MossTTS-Nano (Pure C Optimized)",
25
- description="This space runs the ultra-fast C implementation of MossTTS-Nano."
26
- )
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  demo.launch()
 
1
+ import ctypes
2
  import gradio as gr
 
3
  import os
4
 
5
+ # Load the compiled Linux library
6
+ lib = ctypes.CDLL("./libnanotts.so")
7
+
8
+ # Define C-API Argument Types
9
+ lib.load_model.restype = ctypes.c_int
10
+ lib.generate_wav_from_ref.argtypes = [
11
+ ctypes.c_char_p, ctypes.c_char_p,
12
+ ctypes.POINTER(ctypes.POINTER(ctypes.c_float)),
13
+ ctypes.POINTER(ctypes.c_int),
14
+ ctypes.POINTER(ctypes.c_int),
15
+ ctypes.POINTER(ctypes.c_int),
16
+ ctypes.c_int,
17
+ ]
18
+ lib.save_wav.argtypes = [
19
+ ctypes.c_char_p, ctypes.POINTER(ctypes.c_float),
20
+ ctypes.c_int, ctypes.c_int, ctypes.c_int,
21
+ ]
22
+
23
+ # Initialize Model
24
+ print("Loading model...")
25
+ lib.load_model()
26
+
27
+ def tts_inference(ref_audio, text):
28
+ if not ref_audio or not text:
29
+ return None
30
+
31
+ wav_ptr = ctypes.POINTER(ctypes.c_float)()
32
+ samples = ctypes.c_int()
33
+ channels = ctypes.c_int()
34
+ sr = ctypes.c_int()
35
+
36
+ output_path = "output.wav"
37
+
38
+ # Run Inference
39
+ result = lib.generate_wav_from_ref(
40
+ ref_audio.encode(),
41
+ text.encode(),
42
+ ctypes.byref(wav_ptr),
43
+ ctypes.byref(samples),
44
+ ctypes.byref(channels),
45
+ ctypes.byref(sr),
46
+ 1 # Stereo
47
+ )
48
 
49
+ if result != 0:
50
+ return None
51
+
52
+ # Save to file
53
+ lib.save_wav(output_path.encode(), wav_ptr, samples, channels, sr)
54
+
55
+ # We use the standard C library to free the memory allocated by the engine
56
+ libc = ctypes.CDLL("libc.so.6")
57
+ libc.free(wav_ptr)
58
+
59
+ return output_path
60
+
61
+ # Gradio UI
62
+ with gr.Blocks() as demo:
63
+ gr.Markdown("# 🎙️ NanoTTS C Inference Engine")
64
+ gr.Markdown("Ultra-fast voice cloning using the MOSS-TTS-Nano-100M model optimized in C.")
65
+
66
+ with gr.Row():
67
+ with gr.Column():
68
+ input_text = gr.Textbox(label="Text to Synthesize", placeholder="Hello, how are you today?")
69
+ input_ref = gr.Audio(label="Reference Audio (Voice to Clone)", type="filepath")
70
+ btn = gr.Button("Generate Speech", variant="primary")
71
+
72
+ with gr.Column():
73
+ output_audio = gr.Audio(label="Synthesized Output")
74
+
75
+ btn.click(fn=tts_inference, inputs=[input_ref, input_text], outputs=output_audio)
76
 
77
  demo.launch()