File size: 3,551 Bytes
303c9bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
import whisper
from gtts import gTTS
from io import BytesIO
from pydub import AudioSegment
from groq import Groq
import os
import asyncio

# Set your Groq API key
groq_api_key = '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'

# Initialize Groq client
client = Groq(api_key=groq_api_key)

# Load Whisper model
model = whisper.load_model("base")  # or "small", "medium", "large"

async def transcribe_audio_async(audio_file):
    try:
        # Ensure that audio_file is a BytesIO object
        if isinstance(audio_file, BytesIO):
            audio_file.seek(0)  # Reset file pointer to the beginning
        
            # Save to a temporary file
            temp_file_path = "temp.wav"
            with open(temp_file_path, "wb") as temp_file:
                temp_file.write(audio_file.read())
            
            # Transcribe using Whisper
            result = model.transcribe(temp_file_path)
            os.remove(temp_file_path)
            
            text = result["text"]
            return text
        else:
            raise ValueError("The provided audio file is not in the expected format.")
    except Exception as e:
        return f"Error in transcribing audio: {str(e)}"

def generate_response(text):
    try:
        if not text:
            raise ValueError("No text provided for response generation.")
        
        # Use Groq API to get response from LLaMA 8b model
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": text}],
            model="llama3-8b-8192"
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        return f"Error in generating response: {str(e)}"

def text_to_speech(text):
    try:
        if not text:
            raise ValueError("No text provided for text-to-speech.")
        
        # Convert text to speech using GTTS
        tts = gTTS(text, lang='en')
        audio_file = BytesIO()
        tts.write_to_fp(audio_file)
        audio_file.seek(0)
        
        # Convert to wav for Gradio compatibility
        audio_segment = AudioSegment.from_mp3(audio_file)
        wav_file = BytesIO()
        audio_segment.export(wav_file, format="wav")
        wav_file.seek(0)
        
        return wav_file.read()
    except Exception as e:
        return f"Error in converting text to speech: {str(e)}"

async def chatbot(audio):
    try:
        if audio is None:
            return "No audio file provided.", None
        
        # Handle Gradio audio input
        if isinstance(audio, str):
            # If the audio is a file path, convert it to BytesIO
            with open(audio, "rb") as file:
                audio = BytesIO(file.read())
        
        text = await transcribe_audio_async(audio)
        if "Error" in text:
            return text, None
        
        response_text = generate_response(text)
        if "Error" in response_text:
            return response_text, None
        
        audio_response = text_to_speech(response_text)
        if isinstance(audio_response, bytes):
            return (response_text, audio_response)
        else:
            return response_text, None
    except Exception as e:
        return f"Error in chatbot processing: {str(e)}", None

# Define Gradio interface
iface = gr.Interface(
    fn=lambda audio: asyncio.run(chatbot(audio)),
    inputs=gr.Audio(type="filepath"),  # Allow file upload
    outputs=[gr.Textbox(), gr.Audio(type="filepath")]
)

# Launch the Gradio app
iface.launch()