Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import whisper | |
| from gtts import gTTS | |
| from io import BytesIO | |
| from pydub import AudioSegment | |
| from groq import Groq | |
| import os | |
| import asyncio | |
| # Set your Groq API key | |
| groq_api_key = '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' | |
| # Initialize Groq client | |
| client = Groq(api_key=groq_api_key) | |
| # Load Whisper model | |
| model = whisper.load_model("base") # or "small", "medium", "large" | |
| async def transcribe_audio_async(audio_file): | |
| try: | |
| # Ensure that audio_file is a BytesIO object | |
| if isinstance(audio_file, BytesIO): | |
| audio_file.seek(0) # Reset file pointer to the beginning | |
| # Save to a temporary file | |
| temp_file_path = "temp.wav" | |
| with open(temp_file_path, "wb") as temp_file: | |
| temp_file.write(audio_file.read()) | |
| # Transcribe using Whisper | |
| result = model.transcribe(temp_file_path) | |
| os.remove(temp_file_path) | |
| text = result["text"] | |
| return text | |
| else: | |
| raise ValueError("The provided audio file is not in the expected format.") | |
| except Exception as e: | |
| return f"Error in transcribing audio: {str(e)}" | |
| def generate_response(text): | |
| try: | |
| if not text: | |
| raise ValueError("No text provided for response generation.") | |
| # Use Groq API to get response from LLaMA 8b model | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "user", "content": text}], | |
| model="llama3-8b-8192" | |
| ) | |
| return chat_completion.choices[0].message.content | |
| except Exception as e: | |
| return f"Error in generating response: {str(e)}" | |
| def text_to_speech(text): | |
| try: | |
| if not text: | |
| raise ValueError("No text provided for text-to-speech.") | |
| # Convert text to speech using GTTS | |
| tts = gTTS(text, lang='en') | |
| audio_file = BytesIO() | |
| tts.write_to_fp(audio_file) | |
| audio_file.seek(0) | |
| # Convert to wav for Gradio compatibility | |
| audio_segment = AudioSegment.from_mp3(audio_file) | |
| wav_file = BytesIO() | |
| audio_segment.export(wav_file, format="wav") | |
| wav_file.seek(0) | |
| return wav_file.read() | |
| except Exception as e: | |
| return f"Error in converting text to speech: {str(e)}" | |
| async def chatbot(audio): | |
| try: | |
| if audio is None: | |
| return "No audio file provided.", None | |
| # Handle Gradio audio input | |
| if isinstance(audio, str): | |
| # If the audio is a file path, convert it to BytesIO | |
| with open(audio, "rb") as file: | |
| audio = BytesIO(file.read()) | |
| text = await transcribe_audio_async(audio) | |
| if "Error" in text: | |
| return text, None | |
| response_text = generate_response(text) | |
| if "Error" in response_text: | |
| return response_text, None | |
| audio_response = text_to_speech(response_text) | |
| if isinstance(audio_response, bytes): | |
| return (response_text, audio_response) | |
| else: | |
| return response_text, None | |
| except Exception as e: | |
| return f"Error in chatbot processing: {str(e)}", None | |
| # Define Gradio interface | |
| iface = gr.Interface( | |
| fn=lambda audio: asyncio.run(chatbot(audio)), | |
| inputs=gr.Audio(type="filepath"), # Allow file upload | |
| outputs=[gr.Textbox(), gr.Audio(type="filepath")] | |
| ) | |
| # Launch the Gradio app | |
| iface.launch() | |