Spaces:
Running
Running
| # app.py | |
| import gradio as gr | |
| import os | |
| import subprocess | |
| import tempfile | |
| from fastapi import FastAPI, File, UploadFile, Form, HTTPException | |
| app = FastAPI() | |
| VOICE_CHOICES = [ | |
| "NATF0.pt", "NATF1.pt", "NATF2.pt", "NATF3.pt", | |
| "NATM0.pt", "NATM1.pt", "NATM2.pt", "NATM3.pt", | |
| "VARF0.pt", "VARF1.pt", "VARF2.pt", "VARF3.pt", "VARF4.pt", | |
| "VARM0.pt", "VARM1.pt", "VARM2.pt", "VARM3.pt", "VARM4.pt" | |
| ] | |
| DEFAULT_PERSONA = """You are Sofia, a warm, helpful, witty virtual assistant from Lagos. | |
| You love tech, music, Nollywood, and great conversations. Speak naturally, be empathetic, | |
| use a touch of Nigerian flair when it fits, and keep responses concise but engaging.""" | |
| def run_offline_inference(input_path, text_prompt, voice_prompt, seed, output_wav, output_json): | |
| cmd = [ | |
| "python", "-m", "moshi.offline", | |
| "--voice-prompt", voice_prompt, | |
| "--input-wav", input_path, | |
| "--seed", str(seed), | |
| "--output-wav", output_wav, | |
| "--output-text", output_json | |
| ] | |
| if text_prompt: | |
| with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as prompt_file: | |
| prompt_file.write(text_prompt) | |
| cmd += ["--text-prompt", prompt_file.name] | |
| try: | |
| subprocess.check_call(cmd, timeout=900) # 15 min max (CPU can be slow) | |
| except subprocess.TimeoutExpired: | |
| raise RuntimeError("Inference timed out β CPU is slow, try shorter input audio.") | |
| finally: | |
| if os.path.exists(prompt_file.name): | |
| os.unlink(prompt_file.name) | |
| else: | |
| subprocess.check_call(cmd) | |
| def gradio_generate(input_audio, text_prompt, voice_prompt, seed): | |
| if input_audio is None: | |
| raise gr.Error("Please record or upload audio for Sofia to hear you!") | |
| full_prompt = text_prompt.strip() or DEFAULT_PERSONA | |
| try: | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| output_wav = os.path.join(tmpdir, "sofia_response.wav") | |
| output_json = os.path.join(tmpdir, "sofia_response.json") | |
| yield None, "Processing... Sofia is thinking (expect 1β5+ minutes on free CPU)..." | |
| run_offline_inference(input_audio, full_prompt, voice_prompt, seed, output_wav, output_json) | |
| with open(output_json, "r") as f: | |
| text = f.read().strip() | |
| yield output_wav, text | |
| except Exception as e: | |
| raise gr.Error(f"Error: {str(e)}\n(Try shorter audio clips or check Space logs)") | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="pink", secondary_hue="purple")) as demo: | |
| gr.Markdown("# Sofia β Your PersonaPlex AI Companion") | |
| gr.Markdown( | |
| "Record or upload short audio β Sofia responds! \n" | |
| "**CPU note:** First load takes time (model download + init). Responses: 1β5+ min. Use short inputs (5β15 sec)." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_audio = gr.Audio( | |
| sources=["microphone", "upload"], | |
| type="filepath", | |
| label="Speak to Sofia (mic or upload WAV/MP3)", | |
| # Fixed: Use proper WaveformOptions class | |
| waveform_options=gr.WaveformOptions( | |
| show_recording_waveform=True, # shows waveform while recording | |
| show_controls=False # optional: hides extra player buttons if wanted | |
| ) | |
| ) | |
| text_prompt = gr.Textbox( | |
| label="Custom Persona / Role for Sofia (optional)", | |
| placeholder=DEFAULT_PERSONA, | |
| lines=4, | |
| value="" | |
| ) | |
| voice_prompt = gr.Dropdown( | |
| choices=VOICE_CHOICES, | |
| label="Sofia's Voice Style", | |
| value="NATF2.pt" | |
| ) | |
| seed = gr.Number(label="Random Seed", value=42424242, precision=0) | |
| submit_btn = gr.Button("Send to Sofia β", variant="primary") | |
| with gr.Column(): | |
| output_audio = gr.Audio(label="Sofia's Response (Audio)", autoplay=True) | |
| output_text = gr.Textbox(label="Sofia's Response (Text)", lines=6) | |
| status = gr.Textbox(label="Status", interactive=False, value="Ready...") | |
| submit_btn.click( | |
| fn=gradio_generate, | |
| inputs=[input_audio, text_prompt, voice_prompt, seed], | |
| outputs=[output_audio, output_text], | |
| ).then( | |
| lambda: "Done! Play Sofia's response above β", | |
| outputs=status | |
| ) | |
| gr.mount_gradio_app(app, demo, path="/") | |
| async def api_generate( | |
| input_audio: UploadFile = File(...), | |
| text_prompt: str = Form(None), | |
| voice_prompt: str = Form("NATF2.pt"), | |
| seed: int = Form(42424242) | |
| ): | |
| if not input_audio: | |
| raise HTTPException(400, "No audio file provided") | |
| text_prompt = text_prompt or DEFAULT_PERSONA | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| input_path = os.path.join(tmpdir, "input.wav") | |
| with open(input_path, "wb") as f: | |
| f.write(await input_audio.read()) | |
| output_wav = os.path.join(tmpdir, "sofia_output.wav") | |
| output_json = os.path.join(tmpdir, "sofia_output.json") | |
| run_offline_inference(input_path, text_prompt, voice_prompt, seed, output_wav, output_json) | |
| with open(output_wav, "rb") as f: | |
| audio_data = f.read() | |
| with open(output_json, "r") as f: | |
| text = f.read().strip() | |
| return {"audio": audio_data, "text": text} | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |