Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from groq import Groq | |
| import whisper | |
| import torch | |
| from gtts import gTTS | |
| import IPython.display as ipd | |
| # Set up Whisper with a smaller model or on CPU | |
| model_name = "small" # Use "small", "base", or "medium" for smaller models | |
| whisper_model = whisper.load_model(model_name) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| whisper_model.to(device) | |
| # Set up Groq API with direct API key | |
| client = Groq( | |
| api_key="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" | |
| ) | |
| # Function to transcribe audio using Whisper | |
| def transcribe_audio(audio_path): | |
| try: | |
| print(f"Audio file path received for transcription: {audio_path}") | |
| # Load the audio file directly using Whisper's built-in functionality | |
| result = whisper_model.transcribe(audio_path) | |
| print(f"Transcription result: {result}") | |
| return result['text'] | |
| except Exception as e: | |
| print(f"Error during transcription: {e}") | |
| return "Error during transcription" | |
| # Function to get response from LLaMA using Groq API | |
| def get_llama_response(transcription): | |
| try: | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": transcription, | |
| } | |
| ], | |
| model="llama3-8b-8192", | |
| ) | |
| return chat_completion.choices[0].message.content | |
| except Exception as e: | |
| print(f"Error during LLaMA response generation: {e}") | |
| return "Error during response generation" | |
| # Function to convert text to speech using gTTS | |
| def text_to_speech(text): | |
| try: | |
| tts = gTTS(text) | |
| tts.save("response.mp3") | |
| return "response.mp3" | |
| except Exception as e: | |
| print(f"Error during text-to-speech conversion: {e}") | |
| return "Error during text-to-speech conversion" | |
| # Main function for Gradio interface | |
| def chatbot(audio_path): | |
| transcription = transcribe_audio(audio_path) | |
| response = get_llama_response(transcription) | |
| audio_path = text_to_speech(response) | |
| return transcription, response, audio_path | |
| # Gradio Interface | |
| interface = gr.Interface( | |
| fn=chatbot, | |
| inputs=gr.Audio(type="filepath"), # Use 'filepath' to work with the path of the audio file | |
| outputs=["text", "text", "audio"], | |
| live=True, | |
| description="Real-time Voice-to-Voice Chatbot" | |
| ) | |
| # Launch the interface | |
| interface.launch() |