import os import whisper import gradio as gr from gtts import gTTS from groq import Groq api_key = os.environ.get("Groq_Api_Key") if not api_key: raise ValueError("GROQ_API_KEY is not set. Please add it in the Hugging Face Secrets.") # Initialize Groq client client = Groq(api_key=api_key) # Load Whisper model whisper_model = whisper.load_model("base") # Function to process audio input def process_audio_realtime(audio_file): """ Real-time processing of audio. 1. Transcribe audio with Whisper. 2. Process transcription using Llama. 3. Convert Llama output to audio using gTTS. """ # Step 1: Transcribe the audio to text using Whisper transcription = whisper_model.transcribe(audio_file)["text"] # Step 2: Process transcription using Llama model via Groq API """" llama_response = client.chat.completions.create( messages=[{"role": "user", "content": transcription}], model="llama3-8b-8192", # Replace with your actual Llama model name stream=False ).choices[0].message.content """ # Step 2: Process transcription using Llama model via Groq API llama_response = client.chat.completions.create( messages=[ {"role": "system", "content": "You are a helpful assistant. Please provide a concise and accurate response."}, {"role": "user", "content": transcription} ], model="llama3-8b-8192", # Replace with your actual Llama model name max_tokens=70, stream=False ).choices[0].message.content # Step 3: Convert Llama response to audio using gTTS tts = gTTS(text=llama_response, lang="en") audio_output_path = "generated_output.mp3" tts.save(audio_output_path) return llama_response, audio_output_path # Create Gradio interface for real-time simulation interface = gr.Interface( fn=process_audio_realtime, inputs=gr.Audio(type="filepath", label="Input Audio"), # Removed `source` argument outputs=[ gr.Textbox(label="Processed Text"), # Display processed text in real-time gr.Audio(type="filepath", label="Generated Audio") # Output audio ], live=True, # Enable real-time behavior title="Real-Time Audio-to-Audio Application" ) # Launch Gradio app interface.launch()