Spaces:

hudaakram
/

Voice_Agent

Sleeping

App Files Files Community

hudaakram commited on Sep 11, 2025

Commit

5590cf9

verified ·

1 Parent(s): 1c5bf59

Create app.py

Browse files

Files changed (1) hide show

app.py +111 -0

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import gradio as gr
+from transformers import pipeline
+import time
+ASR_MODEL = "openai/whisper-tiny"  # small/medium if you switch to ZeroGPU
+ZSC_MODEL = "facebook/bart-large-mnli"  # for multilingual use: "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7"
+asr = pipeline("automatic-speech-recognition", model=ASR_MODEL)
+zsc = pipeline("zero-shot-classification", model=ZSC_MODEL)
+DEFAULT_INTENTS = [
+    "turn_on_lights","turn_off_lights","volume_up","volume_down",
+    "start_music","pause_music","set_timer","cancel_timer",
+    "open_calendar","create_note","start_recording","stop_recording"
+]
+def tool_turn_on_lights():
+    return "Lights → ON"
+def tool_turn_off_lights():
+    return "Lights → OFF"
+def tool_volume_up():
+    return "Volume → UP"
+def tool_volume_down():
+    return "Volume → DOWN"
+def tool_start_music():
+    return "Music → PLAY"
+def tool_pause_music():
+    return "Music → PAUSE"
+def tool_set_timer():
+    return "Timer → 5 min (demo)"
+def tool_cancel_timer():
+    return "Timer → CANCELLED"
+def tool_open_calendar():
+    return "Calendar → OPENED"
+def tool_create_note(text):
+    return f"Note saved: '{text[:60]}'"
+def tool_start_recording():
+    return "Recording → STARTED"
+def tool_stop_recording():
+    return "Recording → STOPPED"
+TOOLS = {
+    "turn_on_lights": tool_turn_on_lights,
+    "turn_off_lights": tool_turn_off_lights,
+    "volume_up": tool_volume_up,
+    "volume_down": tool_volume_down,
+    "start_music": tool_start_music,
+    "pause_music": tool_pause_music,
+    "set_timer": tool_set_timer,
+    "cancel_timer": tool_cancel_timer,
+    "open_calendar": tool_open_calendar,
+    "create_note": tool_create_note,
+    "start_recording": tool_start_recording,
+    "stop_recording": tool_stop_recording,
+}
+def parse_intents(custom):
+    if not custom or not custom.strip():
+        return DEFAULT_INTENTS
+    return [t.strip() for t in custom.split(",") if t.strip()]
+def agent(audio_path, custom_intents, history):
+    if not audio_path:
+        return gr.update(), gr.update(), "No audio.", history
+    transcript = asr(audio_path)["text"].strip()
+    if not transcript:
+        return gr.update(), gr.update(), "No speech detected.", history
+    intents = parse_intents(custom_intents)
+    out = zsc(transcript, candidate_labels=intents, multi_label=False)
+    labels = out["labels"]
+    scores = out["scores"]
+    top3 = {labels[i]: float(scores[i]) for i in range(min(3, len(labels)))}
+    chosen = labels[0]
+    if chosen == "create_note":
+        result = TOOLS[chosen](transcript)
+    else:
+        result = TOOLS.get(chosen, lambda: f"No tool bound: {chosen}")()
+    stamp = time.strftime("%H:%M:%S")
+    history = history + [[f"User: {transcript}", f"Agent: {chosen} → {result}"]]
+    return top3, chosen, result, history
+with gr.Blocks(title="Voice Agent: ASR → Intent → Tools") as demo:
+    gr.Markdown("# 🎙️ Voice Agent\nSpeak or upload audio → transcript via Whisper → zero-shot intent → tool execution.")
+    with gr.Row():
+        audio = gr.Audio(sources=["microphone","upload"], type="filepath", label="Audio")
+        intents_box = gr.Textbox(label="Intents (comma-separated)", value=", ".join(DEFAULT_INTENTS))
+    run = gr.Button("Run")
+    topk = gr.Label(num_top_classes=3, label="Top-k Intents")
+    chosen = gr.Textbox(label="Chosen Intent")
+    result = gr.Textbox(label="Action Result")
+    chat = gr.Chatbot(label="Execution Log")
+    state = gr.State([])
+    run.click(agent, inputs=[audio, intents_box, state], outputs=[topk, chosen, result, chat], queue=True)
+if __name__ == "__main__":
+    demo.launch()