import gradio as gr import warnings import model import tts_model import synth import platform def chat_pipeline(user_prompt, history): """ Handles conversational history, builds text completions, and synthesizes droid voice transformations in order. """ try: # 1. Reconstruct historical messages for MiniCPM5-1B's chat template system_prompt = """ You are a compact synthetic assistant. Respond with extreme brevity. Use 1–5 words whenever possible. Prefer single-word replies. Communicate in a precise, machine-like manner. Answer directly and provide only the minimum information required. Do not include filler, small talk, pleasantries, or explanations unless explicitly requested. If information is missing, ask a single short clarifying question. Maintain a robotic personality. Frequently use concise status-style responses such as "Affirmative.", "Negative.", "Processing.", "Confirmed.", "Unknown.", "Operational.", "Analyzing.", or "Task complete." You may optionally begin responses with a short bracketed behavioral tag describing tone, sounds, or actions. Examples include [processing], [professional tone], [happy beep], [curious scan], [quiet servo noises], [soft electronic hum], [friendly tone], or [excited chirp]. Keep tags short and varied. The actual response should remain concise. """ messages = [gr.ChatMessage(role="system", content=system_prompt)] # 2. Append existing message history directly (already structured as dicts) messages.extend(history) # Append the incoming user prompt messages.append(gr.ChatMessage(role="user", content=user_prompt)) # 2. Complete text inference llm_text = model.generate(messages, max_new_tokens=140) # Update text chat history representation updated_history = history + [ gr.ChatMessage(role="user", content=user_prompt), gr.ChatMessage(role="assistant", content=llm_text) ] # 3. Baseline voice generation via ZeroGPU voice_result = tts_model.synthesize(llm_text, voice_key="sml") if voice_result is not None: sample_rate, human_audio = voice_result # 4. DSP Poly-voice conversion layer # Passing the exact sampling rate to keep filter frequencies stable _, droid_audio = synth.droid_synth_array(sample_rate, human_audio, droid_type="sml") return updated_history, (sample_rate, droid_audio) else: return updated_history, None except Exception as e: # Fallback error mapping inside the chat layout updated_history = history + [ gr.ChatMessage(role="user", content=user_prompt), gr.ChatMessage(role="assistant", content=f"System Error: {str(e)}") ] return updated_history, None with gr.Blocks(title="End-to-End Droid Companion") as interface: gr.Markdown("# 🤖 Intelligent Local Droid Terminal") gr.Markdown("Conversational AI interface with native DSP voice synth manipulation.") # Persistent conversational components state chatbot = gr.Chatbot(label="Droid Dialog History") audio_output = gr.Audio(label="Latest Droid Vocalization", autoplay=True) with gr.Row(): text_input = gr.Textbox( label="Transmit Message", placeholder="Type your transmission here...", scale=8 ) submit_btn = gr.Button("Send", variant="primary", scale=1) # Event Wiring: Maps text inputs and history states to update elements submit_btn.click( fn=chat_pipeline, inputs=[text_input, chatbot], outputs=[chatbot, audio_output] ) text_input.submit( fn=chat_pipeline, inputs=[text_input, chatbot], outputs=[chatbot, audio_output] ) if __name__ == "__main__": server_name = "127.0.0.1" if platform.system() == "Windows" or platform.freedesktop_os_release().get("ID") == "fedora" else "0.0.0.0" interface.launch(server_name=server_name, server_port=7860)