import gradio as gr import onnxruntime as ort import numpy as np import soundfile as sf # Load ONNX model session = ort.InferenceSession("assets/model.onnx", providers=["CPUExecutionProvider"]) def tts(text): # Encode text to IDs (dummy simple encoding: byte values) input_ids = np.array([list(text.encode("utf-8"))], dtype=np.int64) # Run ONNX model audio = session.run(None, {"text": input_ids})[0][0] # Normalize audio if needed audio = audio.astype(np.float32) return (44100, audio) def infer(text): return tts(text) iface = gr.Interface( fn=infer, inputs=gr.Textbox(label="Input text"), outputs=gr.Audio(label="Output audio"), title="Supertonic TTS (CPU mode)" ) iface.launch()