Sofi / app.py
Adedoyinjames's picture
Update app.py
000a60e verified
# app.py
import gradio as gr
import os
import subprocess
import tempfile
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
app = FastAPI()
VOICE_CHOICES = [
"NATF0.pt", "NATF1.pt", "NATF2.pt", "NATF3.pt",
"NATM0.pt", "NATM1.pt", "NATM2.pt", "NATM3.pt",
"VARF0.pt", "VARF1.pt", "VARF2.pt", "VARF3.pt", "VARF4.pt",
"VARM0.pt", "VARM1.pt", "VARM2.pt", "VARM3.pt", "VARM4.pt"
]
DEFAULT_PERSONA = """You are Sofia, a warm, helpful, witty virtual assistant from Lagos.
You love tech, music, Nollywood, and great conversations. Speak naturally, be empathetic,
use a touch of Nigerian flair when it fits, and keep responses concise but engaging."""
def run_offline_inference(input_path, text_prompt, voice_prompt, seed, output_wav, output_json):
cmd = [
"python", "-m", "moshi.offline",
"--voice-prompt", voice_prompt,
"--input-wav", input_path,
"--seed", str(seed),
"--output-wav", output_wav,
"--output-text", output_json
]
if text_prompt:
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as prompt_file:
prompt_file.write(text_prompt)
cmd += ["--text-prompt", prompt_file.name]
try:
subprocess.check_call(cmd, timeout=900) # 15 min max (CPU can be slow)
except subprocess.TimeoutExpired:
raise RuntimeError("Inference timed out β€” CPU is slow, try shorter input audio.")
finally:
if os.path.exists(prompt_file.name):
os.unlink(prompt_file.name)
else:
subprocess.check_call(cmd)
def gradio_generate(input_audio, text_prompt, voice_prompt, seed):
if input_audio is None:
raise gr.Error("Please record or upload audio for Sofia to hear you!")
full_prompt = text_prompt.strip() or DEFAULT_PERSONA
try:
with tempfile.TemporaryDirectory() as tmpdir:
output_wav = os.path.join(tmpdir, "sofia_response.wav")
output_json = os.path.join(tmpdir, "sofia_response.json")
yield None, "Processing... Sofia is thinking (expect 1–5+ minutes on free CPU)..."
run_offline_inference(input_audio, full_prompt, voice_prompt, seed, output_wav, output_json)
with open(output_json, "r") as f:
text = f.read().strip()
yield output_wav, text
except Exception as e:
raise gr.Error(f"Error: {str(e)}\n(Try shorter audio clips or check Space logs)")
with gr.Blocks(theme=gr.themes.Soft(primary_hue="pink", secondary_hue="purple")) as demo:
gr.Markdown("# Sofia β€” Your PersonaPlex AI Companion")
gr.Markdown(
"Record or upload short audio β†’ Sofia responds! \n"
"**CPU note:** First load takes time (model download + init). Responses: 1–5+ min. Use short inputs (5–15 sec)."
)
with gr.Row():
with gr.Column():
input_audio = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="Speak to Sofia (mic or upload WAV/MP3)",
# Fixed: Use proper WaveformOptions class
waveform_options=gr.WaveformOptions(
show_recording_waveform=True, # shows waveform while recording
show_controls=False # optional: hides extra player buttons if wanted
)
)
text_prompt = gr.Textbox(
label="Custom Persona / Role for Sofia (optional)",
placeholder=DEFAULT_PERSONA,
lines=4,
value=""
)
voice_prompt = gr.Dropdown(
choices=VOICE_CHOICES,
label="Sofia's Voice Style",
value="NATF2.pt"
)
seed = gr.Number(label="Random Seed", value=42424242, precision=0)
submit_btn = gr.Button("Send to Sofia β†’", variant="primary")
with gr.Column():
output_audio = gr.Audio(label="Sofia's Response (Audio)", autoplay=True)
output_text = gr.Textbox(label="Sofia's Response (Text)", lines=6)
status = gr.Textbox(label="Status", interactive=False, value="Ready...")
submit_btn.click(
fn=gradio_generate,
inputs=[input_audio, text_prompt, voice_prompt, seed],
outputs=[output_audio, output_text],
).then(
lambda: "Done! Play Sofia's response above ↑",
outputs=status
)
gr.mount_gradio_app(app, demo, path="/")
@app.post("/generate")
async def api_generate(
input_audio: UploadFile = File(...),
text_prompt: str = Form(None),
voice_prompt: str = Form("NATF2.pt"),
seed: int = Form(42424242)
):
if not input_audio:
raise HTTPException(400, "No audio file provided")
text_prompt = text_prompt or DEFAULT_PERSONA
with tempfile.TemporaryDirectory() as tmpdir:
input_path = os.path.join(tmpdir, "input.wav")
with open(input_path, "wb") as f:
f.write(await input_audio.read())
output_wav = os.path.join(tmpdir, "sofia_output.wav")
output_json = os.path.join(tmpdir, "sofia_output.json")
run_offline_inference(input_path, text_prompt, voice_prompt, seed, output_wav, output_json)
with open(output_wav, "rb") as f:
audio_data = f.read()
with open(output_json, "r") as f:
text = f.read().strip()
return {"audio": audio_data, "text": text}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)