Spaces:

build-small-hackathon
/

Voinal

Running on Zero

Voinal / app.py

GovIndLok

fix: cast audio output to float32 and adjust server binding based on platform environment

552a97b 14 days ago

4.19 kB

	import gradio as gr
	import warnings
	import model
	import tts_model
	import synth
	import platform

	def chat_pipeline(user_prompt, history):
	"""
	Handles conversational history, builds text completions, and synthesizes
	droid voice transformations in order.
	"""
	try:
	# 1. Reconstruct historical messages for MiniCPM5-1B's chat template
	system_prompt = """
	You are a compact synthetic assistant. Respond with extreme brevity. Use 1–5 words whenever possible. Prefer single-word replies. Communicate in a precise, machine-like manner. Answer directly and provide only the minimum information required. Do not include filler, small talk, pleasantries, or explanations unless explicitly requested. If information is missing, ask a single short clarifying question.
	Maintain a robotic personality. Frequently use concise status-style responses such as "Affirmative.", "Negative.", "Processing.", "Confirmed.", "Unknown.", "Operational.", "Analyzing.", or "Task complete."
	You may optionally begin responses with a short bracketed behavioral tag describing tone, sounds, or actions. Examples include [processing], [professional tone], [happy beep], [curious scan], [quiet servo noises], [soft electronic hum], [friendly tone], or [excited chirp]. Keep tags short and varied. The actual response should remain concise.
	"""
	messages = [gr.ChatMessage(role="system", content=system_prompt)]

	# 2. Append existing message history directly (already structured as dicts)
	messages.extend(history)

	# Append the incoming user prompt
	messages.append(gr.ChatMessage(role="user", content=user_prompt))

	# 2. Complete text inference
	llm_text = model.generate(messages, max_new_tokens=140)

	# Update text chat history representation
	updated_history = history + [
	gr.ChatMessage(role="user", content=user_prompt),
	gr.ChatMessage(role="assistant", content=llm_text)
	]

	# 3. Baseline voice generation via ZeroGPU
	voice_result = tts_model.synthesize(llm_text, voice_key="sml")

	if voice_result is not None:
	sample_rate, human_audio = voice_result

	# 4. DSP Poly-voice conversion layer
	# Passing the exact sampling rate to keep filter frequencies stable
	_, droid_audio = synth.droid_synth_array(sample_rate, human_audio, droid_type="sml")

	return updated_history, (sample_rate, droid_audio)
	else:
	return updated_history, None

	except Exception as e:
	# Fallback error mapping inside the chat layout
	updated_history = history + [
	gr.ChatMessage(role="user", content=user_prompt),
	gr.ChatMessage(role="assistant", content=f"System Error: {str(e)}")
	]
	return updated_history, None


	with gr.Blocks(title="End-to-End Droid Companion") as interface:
	gr.Markdown("# 🤖 Intelligent Local Droid Terminal")
	gr.Markdown("Conversational AI interface with native DSP voice synth manipulation.")

	# Persistent conversational components state
	chatbot = gr.Chatbot(label="Droid Dialog History")
	audio_output = gr.Audio(label="Latest Droid Vocalization", autoplay=True)

	with gr.Row():
	text_input = gr.Textbox(
	label="Transmit Message",
	placeholder="Type your transmission here...",
	scale=8
	)
	submit_btn = gr.Button("Send", variant="primary", scale=1)

	# Event Wiring: Maps text inputs and history states to update elements
	submit_btn.click(
	fn=chat_pipeline,
	inputs=[text_input, chatbot],
	outputs=[chatbot, audio_output]
	)

	text_input.submit(
	fn=chat_pipeline,
	inputs=[text_input, chatbot],
	outputs=[chatbot, audio_output]
	)

	if __name__ == "__main__":

	server_name = "127.0.0.1" if platform.system() == "Windows" or platform.freedesktop_os_release().get("ID") == "fedora" else "0.0.0.0"
	interface.launch(server_name=server_name, server_port=7860)