MANI2003 commited on
Commit
50d8efe
Β·
verified Β·
1 Parent(s): b51beaa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
4
+ import gradio as gr
5
+
6
+ # 1️⃣ Load Whisper model for Speech-to-Text
7
+ asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-small")
8
+
9
+ # 2️⃣ Load Qwen-style LLM for text response
10
+ model_name = "Qwen/Qwen1.5-0.5B-Chat"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto",torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
13
+
14
+ # Bot reply generator
15
+ def generate_response(user_text):
16
+ messages = [
17
+ {"role": "system", "content": "You are a helpful assistant."},
18
+ {"role": "user", "content": user_text}
19
+ ]
20
+
21
+ # Use chat template formatting
22
+ input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
23
+
24
+ with torch.no_grad():
25
+ output = model.generate(input_ids, max_new_tokens=150, pad_token_id=tokenizer.eos_token_id)
26
+
27
+ decoded = tokenizer.decode(output[0], skip_special_tokens=True)
28
+
29
+ # Return only the assistant's message
30
+ response = decoded.split("assistant")[-1].strip().replace(":", "").strip()
31
+
32
+ return response
33
+
34
+
35
+ # Complete pipeline: Audio β†’ Text β†’ Response
36
+ def audio_to_bot_response(audio_path):
37
+ print("[INFO] Transcribing audio...")
38
+ result = asr_pipe(audio_path)
39
+ user_text = result['text']
40
+
41
+
42
+ print(f"[INFO] Transcribed: {user_text}")
43
+ response = generate_response(user_text)
44
+
45
+ return f"πŸ‘€ You said: {user_text}\nπŸ€– Bot: {response}"
46
+
47
+ interface = gr.Interface(
48
+ fn=audio_to_bot_response,
49
+ inputs=gr.Audio(sources=["microphone"], type="filepath"),
50
+ outputs="text",
51
+ title="πŸŽ™ Voice to AI Bot Response",
52
+ description="Speak into the mic. The AI will transcribe and respond."
53
+ )
54
+
55
+ interface.launch()