AliDaud commited on
Commit
65ee79b
Β·
verified Β·
1 Parent(s): 375a9f9

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +95 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys, time, json, asyncio, tempfile
2
+ import gradio as gr
3
+ import soundfile as sf, edge_tts, requests
4
+ from groq import Groq
5
+ from dotenv import load_dotenv
6
+
7
+ # ───── Load .env and keys ───────────────────────────────────
8
+ load_dotenv()
9
+ HF_TOKEN = os.getenv("Hugging_Face_API")
10
+ WHISPER_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
11
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
12
+ VOICE_ID = "ur-PK-UzmaNeural"
13
+ MAX_TOKENS = 256
14
+
15
+ client = Groq(api_key=GROQ_API_KEY)
16
+
17
+ # ───── Transcribe audio ─────────────────────────────────────
18
+ def whisper_transcribe(audio_path: str) -> str:
19
+ with open(audio_path, "rb") as f:
20
+ audio = f.read()
21
+ r = requests.post(
22
+ WHISPER_URL,
23
+ headers={
24
+ "Authorization": f"Bearer {HF_TOKEN}",
25
+ "Content-Type": "audio/wav"
26
+ },
27
+ data=audio, timeout=120)
28
+ try:
29
+ txt = r.json().get("text", "")
30
+ print("πŸ“ Transcription:", txt)
31
+ return txt
32
+ except json.JSONDecodeError:
33
+ print("❌ Whisper JSON error:", r.text)
34
+ return ""
35
+
36
+ # ───── Generate Urdu response ───────────────────────────────
37
+ def generate_urdu(prompt: str) -> str:
38
+ if not prompt.strip():
39
+ return ""
40
+ messages = [
41
+ {"role": "system", "content": "براہ Ϊ©Ψ±Ω… ΫΩ…ΫŒΨ΄Ϋ سادہ اور فءیح اردو Ω…ΫŒΪΊ جواب Ψ―ΫŒΪΊΫ”"},
42
+ {"role": "user", "content": prompt.strip()}
43
+ ]
44
+ try:
45
+ res = client.chat.completions.create(
46
+ model="llama-3.1-8b-instant",
47
+ messages=messages,
48
+ temperature=0.7,
49
+ max_tokens=MAX_TOKENS
50
+ )
51
+ reply = res.choices[0].message.content.strip()
52
+ if "Ϋ”" in reply:
53
+ reply = reply.rsplit("Ϋ”", 1)[0] + "Ϋ”"
54
+ print("πŸ€– Reply:", reply)
55
+ return reply
56
+ except Exception as e:
57
+ print("❌ Groq error:", e, file=sys.stderr)
58
+ return ""
59
+
60
+ # ───── Generate TTS audio ───────────────────────────────────
61
+ async def speak(text: str) -> str:
62
+ if not text:
63
+ return ""
64
+ tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
65
+ tmp_path = tmp.name
66
+ tmp.close()
67
+ await edge_tts.Communicate(text=text, voice=VOICE_ID).save(tmp_path)
68
+ return tmp_path
69
+
70
+ # ───── Complete pipeline ────────────────────────────────────
71
+ async def process_pipeline(audio_file):
72
+ if not audio_file:
73
+ return "πŸ“­ Empty input", None
74
+
75
+ transcription = whisper_transcribe(audio_file)
76
+ reply = generate_urdu(transcription)
77
+ audio_output_path = await speak(reply)
78
+ return reply, audio_output_path
79
+
80
+ # ───── Gradio Interface ─────────────────────────────────────
81
+ with gr.Blocks() as demo:
82
+ gr.Markdown("## πŸ—£οΈ Urdu Voice Chatbot with LLaMA-3.1 + Whisper + Edge TTS")
83
+
84
+ with gr.Row():
85
+ with gr.Column():
86
+ audio_input = gr.Audio(type="filepath", label="πŸŽ™οΈ Record your question")
87
+ submit_btn = gr.Button("▢️ Process")
88
+ with gr.Column():
89
+ output_text = gr.Textbox(label="πŸ€– Urdu Response")
90
+ output_audio = gr.Audio(label="πŸ”Š Spoken Response", autoplay=True)
91
+
92
+ submit_btn.click(fn=process_pipeline, inputs=audio_input, outputs=[output_text, output_audio])
93
+
94
+ # Run
95
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ groq
3
+ python-dotenv
4
+ soundfile
5
+ requests
6
+ edge-tts