import os import gradio as gr import assemblyai as aai from cerebras.cloud.sdk import Cerebras from gtts import gTTS import tempfile Voicekey = os.getenv ("AssemblyVoice") CereAI = os.getenv ("CerebrasAI") # Set API keys aai.settings.api_key = Voicekey client = Cerebras( api_key= CereAI ) def process_audio(audio): # Check if audio is valid if audio is None: return "No audio file received." # If the audio file doesn't have a name attribute, assign a temporary name if isinstance(audio, str): # If audio is passed as a file path (string) audio_file_path = audio else: # Generate a temporary file name and save audio audio_file_path = tempfile.mktemp(suffix=".mp3") # .wav as default, you can change the format if needed with open(audio_file_path, 'wb') as f: f.write(audio.read()) # Save audio data to the file # Upload audio to AssemblyAI for transcription transcriber = aai.Transcriber() transcript = transcriber.transcribe(audio_file_path) # Transcribe the uploaded file if transcript.status == aai.TranscriptStatus.error: return f"Error transcribing audio: {transcript.error}" transcript_text = transcript.text print(f"Transcription: {transcript_text}") # Generate response using Cerebras Llama 3.3 stream = client.chat.completions.create( messages=[{ "role": "system", "content": "Conversation will be started in this chat. Try as much as possible to provide concise and informed responses to the prompt." }, { "role": "user", "content": transcript_text }], model="llama-3.3-70b", stream=True, max_completion_tokens=1024, temperature=0.4, top_p=1 ) response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream) print(f"Response from LLM: {response_text}") # Generate speech using gTTS (Google Text-to-Speech) tts = gTTS(text=response_text, lang='en', slow=False) # Save the audio to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: tts.save(tmp_file.name) audio_path = tmp_file.name return audio_path # Gradio Interface interface = gr.Interface( fn=process_audio, inputs=gr.Audio(sources=["microphone"], type="filepath"), # Use 'file' to correctly handle the audio file outputs=gr.Audio(type="filepath", label="Generated Response Audio", show_download_button=True, waveform_options=gr.WaveformOptions( waveform_color="#01C6FF", waveform_progress_color="#0066B4", skip_length=2, show_controls=False, )), title="Xplayn: Voice-to-Audio AI", description="Record your voice, and the system will transcribe it, generate a response using Llama 3.3, and return the response as audio." ) interface.launch()