File size: 3,130 Bytes
04e7bed
ad991be
04e7bed
ad991be
 
 
4f5e7bd
cbc6a41
04e7bed
ad991be
 
 
cbc6a41
 
 
ad991be
cbc6a41
 
 
ad991be
 
de7930c
 
 
 
 
ad991be
 
 
 
 
 
 
8238dbe
 
 
ad991be
 
0d643bf
 
 
 
 
 
 
 
 
0562541
0d643bf
 
 
0562541
0d643bf
 
 
 
 
8238dbe
0562541
 
 
 
 
 
 
 
 
 
8238dbe
0d643bf
 
ad991be
 
0d643bf
ad991be
 
0d643bf
 
de7930c
0d643bf
de7930c
 
 
8238dbe
 
 
 
 
 
de7930c
8238dbe
de7930c
 
 
 
ad991be
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import gradio as gr
import whisper
from gtts import gTTS
from groq import Groq

GROQ_API_KEY = os.getenv("GORQ_API_KEY")
client = Groq(api_key=GROQ_API_KEY)

# Load Whisper model
model = whisper.load_model("base")

def chatbot(audio=None):
    if audio is None:
        return "No input detected. Please provide an audio input.", None

    # Transcribe the audio input using Whisper
    transcription = model.transcribe(audio)
    user_input = transcription.get("text", "")

    # Generate a response using Llama 8B via Groq API
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": user_input}],
        model="llama3-8b-8192",
    )
    response_text = chat_completion.choices[0].message.content

    # Convert the response text to speech using gTTS
    tts = gTTS(text=response_text, lang='en')
    tts.save("response.mp3")

    return response_text, "response.mp3"

def clear_inputs():
    return None, None, None

# Create a custom interface
def build_interface():
    with gr.Blocks(css="""
        .block-title {
            text-align: center; 
            color: white;
            background-color: #4CAF50; 
            padding: 10px;
            border-radius: 8px;
        }
        .gradio-row {
            background-color: #f9f9f9;
            border-radius: 8px;
            padding: 20px;
            margin: 10px;
            box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
        }
        .gradio-column {
            padding: 10px;
        }
        .gradio-button {
            background-color: #ff6347 !important;
            color: white !important;
            border-radius: 8px !important;
            padding: 10px 20px !important;
            font-size: 16px !important;
            border: none !important;
            cursor: pointer !important;
            box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.2) !important;
            transition: background-color 0.3s ease !important;
        }
        .gradio-button:hover {
            background-color: #e5533d !important;
        }
    """) as demo:
        gr.Markdown(
            """
            <h1 class="block-title">Voice-to-Voice AI Chatbot</h1>
            """
        )
        with gr.Row(elem_classes="gradio-row"):
            with gr.Column(elem_classes="gradio-column", scale=1):
                audio_input = gr.Audio(type="filepath", label="Record Your Voice")
            with gr.Column(elem_classes="gradio-column", scale=2):
                chatbot_output_text = gr.Textbox(label="Chatbot Response")
                chatbot_output_audio = gr.Audio(label="Audio Response")

        clear_button = gr.Button("Clear", elem_classes="gradio-button")

        clear_button.click(
            fn=clear_inputs,
            outputs=[audio_input, chatbot_output_text, chatbot_output_audio]
        )

        audio_input.change(
            fn=chatbot,
            inputs=[audio_input],
            outputs=[chatbot_output_text, chatbot_output_audio]
        )

    return demo

# Launch the interface
if __name__ == "__main__":
    interface = build_interface()
    interface.launch()