File size: 2,149 Bytes
7f08e4f
 
 
 
d06e8d0
7f08e4f
 
d06e8d0
7f08e4f
 
d06e8d0
7f08e4f
d06e8d0
7f08e4f
 
 
d06e8d0
7f08e4f
d06e8d0
7f08e4f
 
 
d06e8d0
7f08e4f
d06e8d0
 
 
 
 
 
716e25d
d06e8d0
7f08e4f
d06e8d0
7f08e4f
 
 
 
 
 
 
d06e8d0
 
 
 
7f08e4f
d06e8d0
 
 
 
7f08e4f
 
 
 
 
 
 
 
 
 
 
d06e8d0
7f08e4f
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import whisper
from TTS.api import TTS
import gradio as gr
from pydub import AudioSegment
from groq import Groq

# Initialize models
whisper_model = whisper.load_model("small")  # Use a smaller Whisper model for faster processing
tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)

# Initialize Groq Client
groq_api_key = "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"
client = Groq(api_key=groq_api_key)

# Functions for the Chatbot
def voice_to_text(audio_path):
    """Convert voice input to text using Whisper."""
    result = whisper_model.transcribe(audio_path)
    return result["text"]

def process_text_with_groq(input_text):
    """Process user text input using Groq LLM."""
    messages = [{"role": "user", "content": input_text}]
    try:
        chat_completion = client.chat.completions.create(
            messages=messages,
            model="llama3-8b-8192",
            stream=False
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        return f"Error: {str(e)}"

def text_to_voice(output_text):
    """Convert text response to voice using Coqui TTS."""
    audio_path = "response.wav"
    tts_model.tts_to_file(text=output_text, file_path=audio_path)
    return audio_path

# Gradio Interface
def chatbot(audio_file):
    # Convert audio to text
    user_input = voice_to_text(audio_file)
    
    # Get Groq LLM response
    bot_response = process_text_with_groq(user_input)
    
    # Convert text response to audio
    audio_response_path = text_to_voice(bot_response)
    
    return bot_response, audio_response_path

# Gradio UI
ui = gr.Interface(
    fn=chatbot,
    inputs=gr.Audio(type="filepath", label="Upload Audio File"),
    outputs=[
        gr.Textbox(label="Chatbot Response"),
        gr.Audio(label="Chatbot Voice Response")
    ],
    title="Zeeshan Voice-to-Voice Chatbot",
    description="Upload an audio file to interact with Zeeshan. Zeeshan will listen, process your query using Groq's LLM, and respond with both text and voice."
)

# Launch Gradio app
if __name__ == "__main__":
    ui.launch()