Spaces:

Fawad97
/

voicechatbot

Sleeping

File size: 3,551 Bytes

303c9bb

import gradio as gr
import whisper
from gtts import gTTS
from io import BytesIO
from pydub import AudioSegment
from groq import Groq
import os
import asyncio

# Set your Groq API key
groq_api_key = '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'

# Initialize Groq client
client = Groq(api_key=groq_api_key)

# Load Whisper model
model = whisper.load_model("base")  # or "small", "medium", "large"

async def transcribe_audio_async(audio_file):
    try:
        # Ensure that audio_file is a BytesIO object
        if isinstance(audio_file, BytesIO):
            audio_file.seek(0)  # Reset file pointer to the beginning
        
            # Save to a temporary file
            temp_file_path = "temp.wav"
            with open(temp_file_path, "wb") as temp_file:
                temp_file.write(audio_file.read())
            
            # Transcribe using Whisper
            result = model.transcribe(temp_file_path)
            os.remove(temp_file_path)
            
            text = result["text"]
            return text
        else:
            raise ValueError("The provided audio file is not in the expected format.")
    except Exception as e:
        return f"Error in transcribing audio: {str(e)}"

def generate_response(text):
    try:
        if not text:
            raise ValueError("No text provided for response generation.")
        
        # Use Groq API to get response from LLaMA 8b model
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": text}],
            model="llama3-8b-8192"
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        return f"Error in generating response: {str(e)}"

def text_to_speech(text):
    try:
        if not text:
            raise ValueError("No text provided for text-to-speech.")
        
        # Convert text to speech using GTTS
        tts = gTTS(text, lang='en')
        audio_file = BytesIO()
        tts.write_to_fp(audio_file)
        audio_file.seek(0)
        
        # Convert to wav for Gradio compatibility
        audio_segment = AudioSegment.from_mp3(audio_file)
        wav_file = BytesIO()
        audio_segment.export(wav_file, format="wav")
        wav_file.seek(0)
        
        return wav_file.read()
    except Exception as e:
        return f"Error in converting text to speech: {str(e)}"

async def chatbot(audio):
    try:
        if audio is None:
            return "No audio file provided.", None
        
        # Handle Gradio audio input
        if isinstance(audio, str):
            # If the audio is a file path, convert it to BytesIO
            with open(audio, "rb") as file:
                audio = BytesIO(file.read())
        
        text = await transcribe_audio_async(audio)
        if "Error" in text:
            return text, None
        
        response_text = generate_response(text)
        if "Error" in response_text:
            return response_text, None
        
        audio_response = text_to_speech(response_text)
        if isinstance(audio_response, bytes):
            return (response_text, audio_response)
        else:
            return response_text, None
    except Exception as e:
        return f"Error in chatbot processing: {str(e)}", None

# Define Gradio interface
iface = gr.Interface(
    fn=lambda audio: asyncio.run(chatbot(audio)),
    inputs=gr.Audio(type="filepath"),  # Allow file upload
    outputs=[gr.Textbox(), gr.Audio(type="filepath")]
)

# Launch the Gradio app
iface.launch()