Spaces:
Sleeping
Sleeping
| import os | |
| import whisper | |
| import gradio as gr | |
| from gtts import gTTS | |
| from groq import Groq | |
| api_key = os.environ.get("Groq_Api_Key") | |
| if not api_key: | |
| raise ValueError("GROQ_API_KEY is not set. Please add it in the Hugging Face Secrets.") | |
| # Initialize Groq client | |
| client = Groq(api_key=api_key) | |
| # Load Whisper model | |
| whisper_model = whisper.load_model("base") | |
| # Function to process audio input | |
| def process_audio_realtime(audio_file): | |
| """ | |
| Real-time processing of audio. | |
| 1. Transcribe audio with Whisper. | |
| 2. Process transcription using Llama. | |
| 3. Convert Llama output to audio using gTTS. | |
| """ | |
| # Step 1: Transcribe the audio to text using Whisper | |
| transcription = whisper_model.transcribe(audio_file)["text"] | |
| # Step 2: Process transcription using Llama model via Groq API | |
| """" | |
| llama_response = client.chat.completions.create( | |
| messages=[{"role": "user", "content": transcription}], | |
| model="llama3-8b-8192", # Replace with your actual Llama model name | |
| stream=False | |
| ).choices[0].message.content | |
| """ | |
| # Step 2: Process transcription using Llama model via Groq API | |
| llama_response = client.chat.completions.create( | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant. Please provide a concise and accurate response."}, | |
| {"role": "user", "content": transcription} | |
| ], | |
| model="llama3-8b-8192", # Replace with your actual Llama model name | |
| max_tokens=70, | |
| stream=False | |
| ).choices[0].message.content | |
| # Step 3: Convert Llama response to audio using gTTS | |
| tts = gTTS(text=llama_response, lang="en") | |
| audio_output_path = "generated_output.mp3" | |
| tts.save(audio_output_path) | |
| return llama_response, audio_output_path | |
| # Create Gradio interface for real-time simulation | |
| interface = gr.Interface( | |
| fn=process_audio_realtime, | |
| inputs=gr.Audio(type="filepath", label="Input Audio"), # Removed `source` argument | |
| outputs=[ | |
| gr.Textbox(label="Processed Text"), # Display processed text in real-time | |
| gr.Audio(type="filepath", label="Generated Audio") # Output audio | |
| ], | |
| live=True, # Enable real-time behavior | |
| title="Real-Time Audio-to-Audio Application" | |
| ) | |
| # Launch Gradio app | |
| interface.launch() | |