Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import tempfile | |
| import torch | |
| # ========================= | |
| # ASR (Faster Whisper - CPU) | |
| # ========================= | |
| from faster_whisper import WhisperModel | |
| asr_model = WhisperModel( | |
| "base", | |
| device="cpu", | |
| compute_type="int8" | |
| ) | |
| def transcribe(audio): | |
| if audio is None: | |
| return "" | |
| segments, _ = asr_model.transcribe(audio) | |
| text = " ".join([seg.text for seg in segments]) | |
| return text.strip() | |
| # ========================= | |
| # LLM (Qwen 0.5B - CPU) | |
| # ========================= | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| dtype=torch.float32, | |
| low_cpu_mem_usage=True | |
| ) | |
| def generate_response(text): | |
| if not text: | |
| return "Say something..." | |
| prompt = f"User: {text}\nAssistant:" | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| with torch.no_grad(): | |
| output = model.generate( | |
| **inputs, | |
| max_new_tokens=120, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9 | |
| ) | |
| response = tokenizer.decode(output[0], skip_special_tokens=True) | |
| return response.split("Assistant:")[-1].strip() | |
| # ========================= | |
| # TTS (CPU - pyttsx3) | |
| # ========================= | |
| import pyttsx3 | |
| engine = pyttsx3.init() | |
| engine.setProperty("rate", 170) | |
| def text_to_speech(text): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: | |
| path = f.name | |
| engine.save_to_file(text, path) | |
| engine.runAndWait() | |
| return path | |
| # ========================= | |
| # FULL PIPELINE | |
| # ========================= | |
| def full_pipeline(audio): | |
| text = transcribe(audio) | |
| response = generate_response(text) | |
| audio_out = text_to_speech(response) | |
| return text, response, audio_out | |
| # ========================= | |
| # GRADIO UI | |
| # ========================= | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🎙️ CPU Voice Agent (ASR + LLM + TTS)") | |
| audio_input = gr.Audio( | |
| sources=["microphone"], | |
| type="filepath", | |
| label="Speak" | |
| ) | |
| btn = gr.Button("Run") | |
| text_out = gr.Textbox(label="Transcription") | |
| response_out = gr.Textbox(label="LLM Response") | |
| audio_out = gr.Audio(label="Response Audio") | |
| btn.click( | |
| fn=full_pipeline, | |
| inputs=audio_input, | |
| outputs=[text_out, response_out, audio_out] | |
| ) | |
| demo.launch() |