File size: 2,759 Bytes
62cb196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
import gradio as gr
import speech_recognition as sr
from gtts import gTTS
from pydub import AudioSegment
from datetime import datetime
import json
from fuzzywuzzy import fuzz

# # Set ffmpeg path
# AudioSegment.converter = r"C:\ffmpeg\ffmpeg-2025-04-17-git-7684243fbe-full_build\bin\ffmpeg.exe"

# Load FAQ data
with open('data.json', 'r', encoding='utf-8') as f:
    faq_data = json.load(f)

def clean_reply(reply):
    words = reply.strip().split()
    if len(words) > 50:
        reply = " ".join(words[:50])
    if "." in reply:
        reply = reply[:reply.rfind(".")+1]
    elif "," in reply:
        reply = reply[:reply.rfind(",")+1]
    return reply.strip()

def chat_with_ai(prompt):
    best_match = None
    best_score = 0

    for question, answer in faq_data.items():
        score = fuzz.partial_ratio(prompt, question)
        if score > best_score:
            best_score = score
            best_match = answer

    if best_score >= 70:
        return clean_reply(best_match)
    else:
        return "میں آپ کی کال اپنے سینئر کو منتقل کر رہا ہوں، وہ آپ کی مزید رہنمائی کریں گے۔"

def respond_to_audio(audio_path):
    if not audio_path:
        return None

    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_path) as source:
            audio = recognizer.record(source)
            text = recognizer.recognize_google(audio, language="ur-PK")
    except Exception as e:
        print("Speech Recognition Error:", e)
        return None

    print("📥 User said:", text)
    response_text = chat_with_ai(text)
    print("🤖 Bot responds:", response_text)

    # Generate TTS audio
    tts = gTTS(response_text, lang="ur")
    temp_path = "temp_original.mp3"
    tts.save(temp_path)

    # Load with PyDub and speed up
    sound = AudioSegment.from_file(temp_path)
    faster_sound = sound._spawn(sound.raw_data, overrides={
        "frame_rate": int(sound.frame_rate * 1.0)
    }).set_frame_rate(sound.frame_rate)

    response_audio_path = f"response_{datetime.now().strftime('%Y%m%d%H%M%S')}.mp3"
    faster_sound.export(response_audio_path, format="mp3")

    return response_audio_path

# Gradio UI
with gr.Blocks(title="🤖 Dany Tameerat Voice Assistant") as app:
    gr.Markdown("## 🤖 Dany Tameerat Voice Assistant\n📢 مائیک سے سوال کریں، AI اردو میں جواب دے گا!")

    audio_input = gr.Audio(type="filepath", label="🎤 مائک سے بولیں", streaming=False)
    audio_output = gr.Audio(label="🔊 جواب سنیں", autoplay=True)

    # Trigger automatically when recording stops
    audio_input.change(fn=respond_to_audio, inputs=audio_input, outputs=audio_output)

app.launch(share=True)