Voinal / app.py
GovIndLok
fix: cast audio output to float32 and adjust server binding based on platform environment
552a97b
Raw
History Blame Contribute Delete
4.19 kB
import gradio as gr
import warnings
import model
import tts_model
import synth
import platform
def chat_pipeline(user_prompt, history):
"""
Handles conversational history, builds text completions, and synthesizes
droid voice transformations in order.
"""
try:
# 1. Reconstruct historical messages for MiniCPM5-1B's chat template
system_prompt = """
You are a compact synthetic assistant. Respond with extreme brevity. Use 1–5 words whenever possible. Prefer single-word replies. Communicate in a precise, machine-like manner. Answer directly and provide only the minimum information required. Do not include filler, small talk, pleasantries, or explanations unless explicitly requested. If information is missing, ask a single short clarifying question.
Maintain a robotic personality. Frequently use concise status-style responses such as "Affirmative.", "Negative.", "Processing.", "Confirmed.", "Unknown.", "Operational.", "Analyzing.", or "Task complete."
You may optionally begin responses with a short bracketed behavioral tag describing tone, sounds, or actions. Examples include [processing], [professional tone], [happy beep], [curious scan], [quiet servo noises], [soft electronic hum], [friendly tone], or [excited chirp]. Keep tags short and varied. The actual response should remain concise.
"""
messages = [gr.ChatMessage(role="system", content=system_prompt)]
# 2. Append existing message history directly (already structured as dicts)
messages.extend(history)
# Append the incoming user prompt
messages.append(gr.ChatMessage(role="user", content=user_prompt))
# 2. Complete text inference
llm_text = model.generate(messages, max_new_tokens=140)
# Update text chat history representation
updated_history = history + [
gr.ChatMessage(role="user", content=user_prompt),
gr.ChatMessage(role="assistant", content=llm_text)
]
# 3. Baseline voice generation via ZeroGPU
voice_result = tts_model.synthesize(llm_text, voice_key="sml")
if voice_result is not None:
sample_rate, human_audio = voice_result
# 4. DSP Poly-voice conversion layer
# Passing the exact sampling rate to keep filter frequencies stable
_, droid_audio = synth.droid_synth_array(sample_rate, human_audio, droid_type="sml")
return updated_history, (sample_rate, droid_audio)
else:
return updated_history, None
except Exception as e:
# Fallback error mapping inside the chat layout
updated_history = history + [
gr.ChatMessage(role="user", content=user_prompt),
gr.ChatMessage(role="assistant", content=f"System Error: {str(e)}")
]
return updated_history, None
with gr.Blocks(title="End-to-End Droid Companion") as interface:
gr.Markdown("# 🤖 Intelligent Local Droid Terminal")
gr.Markdown("Conversational AI interface with native DSP voice synth manipulation.")
# Persistent conversational components state
chatbot = gr.Chatbot(label="Droid Dialog History")
audio_output = gr.Audio(label="Latest Droid Vocalization", autoplay=True)
with gr.Row():
text_input = gr.Textbox(
label="Transmit Message",
placeholder="Type your transmission here...",
scale=8
)
submit_btn = gr.Button("Send", variant="primary", scale=1)
# Event Wiring: Maps text inputs and history states to update elements
submit_btn.click(
fn=chat_pipeline,
inputs=[text_input, chatbot],
outputs=[chatbot, audio_output]
)
text_input.submit(
fn=chat_pipeline,
inputs=[text_input, chatbot],
outputs=[chatbot, audio_output]
)
if __name__ == "__main__":
server_name = "127.0.0.1" if platform.system() == "Windows" or platform.freedesktop_os_release().get("ID") == "fedora" else "0.0.0.0"
interface.launch(server_name=server_name, server_port=7860)