Spaces:

Adedoyinjames
/

Sofi

Running

App Files Files Community

Sofi / app.py

Adedoyinjames

Update app.py

000a60e verified 2 days ago

raw

history blame contribute delete

5.69 kB

	# app.py
	import gradio as gr
	import os
	import subprocess
	import tempfile
	from fastapi import FastAPI, File, UploadFile, Form, HTTPException

	app = FastAPI()

	VOICE_CHOICES = [
	"NATF0.pt", "NATF1.pt", "NATF2.pt", "NATF3.pt",
	"NATM0.pt", "NATM1.pt", "NATM2.pt", "NATM3.pt",
	"VARF0.pt", "VARF1.pt", "VARF2.pt", "VARF3.pt", "VARF4.pt",
	"VARM0.pt", "VARM1.pt", "VARM2.pt", "VARM3.pt", "VARM4.pt"
	]

	DEFAULT_PERSONA = """You are Sofia, a warm, helpful, witty virtual assistant from Lagos.
	You love tech, music, Nollywood, and great conversations. Speak naturally, be empathetic,
	use a touch of Nigerian flair when it fits, and keep responses concise but engaging."""

	def run_offline_inference(input_path, text_prompt, voice_prompt, seed, output_wav, output_json):
	cmd = [
	"python", "-m", "moshi.offline",
	"--voice-prompt", voice_prompt,
	"--input-wav", input_path,
	"--seed", str(seed),
	"--output-wav", output_wav,
	"--output-text", output_json
	]
	if text_prompt:
	with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as prompt_file:
	prompt_file.write(text_prompt)
	cmd += ["--text-prompt", prompt_file.name]
	try:
	subprocess.check_call(cmd, timeout=900) # 15 min max (CPU can be slow)
	except subprocess.TimeoutExpired:
	raise RuntimeError("Inference timed out — CPU is slow, try shorter input audio.")
	finally:
	if os.path.exists(prompt_file.name):
	os.unlink(prompt_file.name)
	else:
	subprocess.check_call(cmd)

	def gradio_generate(input_audio, text_prompt, voice_prompt, seed):
	if input_audio is None:
	raise gr.Error("Please record or upload audio for Sofia to hear you!")

	full_prompt = text_prompt.strip() or DEFAULT_PERSONA

	try:
	with tempfile.TemporaryDirectory() as tmpdir:
	output_wav = os.path.join(tmpdir, "sofia_response.wav")
	output_json = os.path.join(tmpdir, "sofia_response.json")

	yield None, "Processing... Sofia is thinking (expect 1–5+ minutes on free CPU)..."

	run_offline_inference(input_audio, full_prompt, voice_prompt, seed, output_wav, output_json)

	with open(output_json, "r") as f:
	text = f.read().strip()

	yield output_wav, text
	except Exception as e:
	raise gr.Error(f"Error: {str(e)}\n(Try shorter audio clips or check Space logs)")

	with gr.Blocks(theme=gr.themes.Soft(primary_hue="pink", secondary_hue="purple")) as demo:
	gr.Markdown("# Sofia — Your PersonaPlex AI Companion")
	gr.Markdown(
	"Record or upload short audio → Sofia responds! \n"
	"CPU note: First load takes time (model download + init). Responses: 1–5+ min. Use short inputs (5–15 sec)."
	)

	with gr.Row():
	with gr.Column():
	input_audio = gr.Audio(
	sources=["microphone", "upload"],
	type="filepath",
	label="Speak to Sofia (mic or upload WAV/MP3)",
	# Fixed: Use proper WaveformOptions class
	waveform_options=gr.WaveformOptions(
	show_recording_waveform=True, # shows waveform while recording
	show_controls=False # optional: hides extra player buttons if wanted
	)
	)
	text_prompt = gr.Textbox(
	label="Custom Persona / Role for Sofia (optional)",
	placeholder=DEFAULT_PERSONA,
	lines=4,
	value=""
	)
	voice_prompt = gr.Dropdown(
	choices=VOICE_CHOICES,
	label="Sofia's Voice Style",
	value="NATF2.pt"
	)
	seed = gr.Number(label="Random Seed", value=42424242, precision=0)
	submit_btn = gr.Button("Send to Sofia →", variant="primary")

	with gr.Column():
	output_audio = gr.Audio(label="Sofia's Response (Audio)", autoplay=True)
	output_text = gr.Textbox(label="Sofia's Response (Text)", lines=6)
	status = gr.Textbox(label="Status", interactive=False, value="Ready...")

	submit_btn.click(
	fn=gradio_generate,
	inputs=[input_audio, text_prompt, voice_prompt, seed],
	outputs=[output_audio, output_text],
	).then(
	lambda: "Done! Play Sofia's response above ↑",
	outputs=status
	)

	gr.mount_gradio_app(app, demo, path="/")

	@app.post("/generate")
	async def api_generate(
	input_audio: UploadFile = File(...),
	text_prompt: str = Form(None),
	voice_prompt: str = Form("NATF2.pt"),
	seed: int = Form(42424242)
	):
	if not input_audio:
	raise HTTPException(400, "No audio file provided")

	text_prompt = text_prompt or DEFAULT_PERSONA

	with tempfile.TemporaryDirectory() as tmpdir:
	input_path = os.path.join(tmpdir, "input.wav")
	with open(input_path, "wb") as f:
	f.write(await input_audio.read())

	output_wav = os.path.join(tmpdir, "sofia_output.wav")
	output_json = os.path.join(tmpdir, "sofia_output.json")

	run_offline_inference(input_path, text_prompt, voice_prompt, seed, output_wav, output_json)

	with open(output_wav, "rb") as f:
	audio_data = f.read()
	with open(output_json, "r") as f:
	text = f.read().strip()

	return {"audio": audio_data, "text": text}

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)