Spaces:
Sleeping
Sleeping
File size: 3,551 Bytes
303c9bb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | import gradio as gr
import whisper
from gtts import gTTS
from io import BytesIO
from pydub import AudioSegment
from groq import Groq
import os
import asyncio
# Set your Groq API key
groq_api_key = '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'
# Initialize Groq client
client = Groq(api_key=groq_api_key)
# Load Whisper model
model = whisper.load_model("base") # or "small", "medium", "large"
async def transcribe_audio_async(audio_file):
try:
# Ensure that audio_file is a BytesIO object
if isinstance(audio_file, BytesIO):
audio_file.seek(0) # Reset file pointer to the beginning
# Save to a temporary file
temp_file_path = "temp.wav"
with open(temp_file_path, "wb") as temp_file:
temp_file.write(audio_file.read())
# Transcribe using Whisper
result = model.transcribe(temp_file_path)
os.remove(temp_file_path)
text = result["text"]
return text
else:
raise ValueError("The provided audio file is not in the expected format.")
except Exception as e:
return f"Error in transcribing audio: {str(e)}"
def generate_response(text):
try:
if not text:
raise ValueError("No text provided for response generation.")
# Use Groq API to get response from LLaMA 8b model
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": text}],
model="llama3-8b-8192"
)
return chat_completion.choices[0].message.content
except Exception as e:
return f"Error in generating response: {str(e)}"
def text_to_speech(text):
try:
if not text:
raise ValueError("No text provided for text-to-speech.")
# Convert text to speech using GTTS
tts = gTTS(text, lang='en')
audio_file = BytesIO()
tts.write_to_fp(audio_file)
audio_file.seek(0)
# Convert to wav for Gradio compatibility
audio_segment = AudioSegment.from_mp3(audio_file)
wav_file = BytesIO()
audio_segment.export(wav_file, format="wav")
wav_file.seek(0)
return wav_file.read()
except Exception as e:
return f"Error in converting text to speech: {str(e)}"
async def chatbot(audio):
try:
if audio is None:
return "No audio file provided.", None
# Handle Gradio audio input
if isinstance(audio, str):
# If the audio is a file path, convert it to BytesIO
with open(audio, "rb") as file:
audio = BytesIO(file.read())
text = await transcribe_audio_async(audio)
if "Error" in text:
return text, None
response_text = generate_response(text)
if "Error" in response_text:
return response_text, None
audio_response = text_to_speech(response_text)
if isinstance(audio_response, bytes):
return (response_text, audio_response)
else:
return response_text, None
except Exception as e:
return f"Error in chatbot processing: {str(e)}", None
# Define Gradio interface
iface = gr.Interface(
fn=lambda audio: asyncio.run(chatbot(audio)),
inputs=gr.Audio(type="filepath"), # Allow file upload
outputs=[gr.Textbox(), gr.Audio(type="filepath")]
)
# Launch the Gradio app
iface.launch()
|