Spaces:

arjunbroepic
/

mossttsnano

Running

App Files Files Community

arjunbroepic commited on 19 days ago

Commit

23e8911

verified ·

1 Parent(s): b403299

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -22

app.py CHANGED Viewed

@@ -1,28 +1,77 @@
 import gradio as gr
-import subprocess
 import os
-def generate_speech(text):
-    output_file = "output.wav"
-    # Adjust this command based on the binary's usage (e.g., ./moss_nano -t "text" -o output.wav)
-    # Check the repo's README for exact CLI arguments
-    cmd = ["./moss_nano", "--text", text, "--output", output_file]
-    try:
-        subprocess.run(cmd, check=True)
-        if os.path.exists(output_file):
-            return output_file
-        else:
-            return None
-    except Exception as e:
-        return f"Error: {str(e)}"
-demo = gr.Interface(
-    fn=generate_speech,
-    inputs=gr.Textbox(label="Enter Text", placeholder="Hello, I am MossTTS Nano."),
-    outputs=gr.Audio(label="Generated Speech"),
-    title="MossTTS-Nano (Pure C Optimized)",
-    description="This space runs the ultra-fast C implementation of MossTTS-Nano."
-)
 demo.launch()

+import ctypes
 import gradio as gr
 import os
+# Load the compiled Linux library
+lib = ctypes.CDLL("./libnanotts.so")
+# Define C-API Argument Types
+lib.load_model.restype = ctypes.c_int
+lib.generate_wav_from_ref.argtypes = [
+    ctypes.c_char_p, ctypes.c_char_p,
+    ctypes.POINTER(ctypes.POINTER(ctypes.c_float)),
+    ctypes.POINTER(ctypes.c_int),
+    ctypes.POINTER(ctypes.c_int),
+    ctypes.POINTER(ctypes.c_int),
+    ctypes.c_int,
+]
+lib.save_wav.argtypes = [
+    ctypes.c_char_p, ctypes.POINTER(ctypes.c_float),
+    ctypes.c_int, ctypes.c_int, ctypes.c_int,
+]
+# Initialize Model
+print("Loading model...")
+lib.load_model()
+def tts_inference(ref_audio, text):
+    if not ref_audio or not text:
+        return None
+    wav_ptr = ctypes.POINTER(ctypes.c_float)()
+    samples = ctypes.c_int()
+    channels = ctypes.c_int()
+    sr = ctypes.c_int()
+    output_path = "output.wav"
+    # Run Inference
+    result = lib.generate_wav_from_ref(
+        ref_audio.encode(),
+        text.encode(),
+        ctypes.byref(wav_ptr),
+        ctypes.byref(samples),
+        ctypes.byref(channels),
+        ctypes.byref(sr),
+        1 # Stereo
+    )
+    if result != 0:
+        return None
+    # Save to file
+    lib.save_wav(output_path.encode(), wav_ptr, samples, channels, sr)
+    # We use the standard C library to free the memory allocated by the engine
+    libc = ctypes.CDLL("libc.so.6")
+    libc.free(wav_ptr)
+    return output_path
+# Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("# 🎙️ NanoTTS — C Inference Engine")
+    gr.Markdown("Ultra-fast voice cloning using the MOSS-TTS-Nano-100M model optimized in C.")
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(label="Text to Synthesize", placeholder="Hello, how are you today?")
+            input_ref = gr.Audio(label="Reference Audio (Voice to Clone)", type="filepath")
+            btn = gr.Button("Generate Speech", variant="primary")
+        with gr.Column():
+            output_audio = gr.Audio(label="Synthesized Output")
+    btn.click(fn=tts_inference, inputs=[input_ref, input_text], outputs=output_audio)
 demo.launch()