Spaces:
Sleeping
Sleeping
File size: 2,899 Bytes
1b6a625 c3841bf 1b6a625 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | import os
import gradio as gr
import assemblyai as aai
from cerebras.cloud.sdk import Cerebras
from gtts import gTTS
import tempfile
Voicekey = os.getenv ("AssemblyVoice")
CereAI = os.getenv ("CerebrasAI")
# Set API keys
aai.settings.api_key = Voicekey
client = Cerebras(
api_key= CereAI
)
def process_audio(audio):
# Check if audio is valid
if audio is None:
return "No audio file received."
# If the audio file doesn't have a name attribute, assign a temporary name
if isinstance(audio, str): # If audio is passed as a file path (string)
audio_file_path = audio
else:
# Generate a temporary file name and save audio
audio_file_path = tempfile.mktemp(suffix=".mp3") # .wav as default, you can change the format if needed
with open(audio_file_path, 'wb') as f:
f.write(audio.read()) # Save audio data to the file
# Upload audio to AssemblyAI for transcription
transcriber = aai.Transcriber()
transcript = transcriber.transcribe(audio_file_path) # Transcribe the uploaded file
if transcript.status == aai.TranscriptStatus.error:
return f"Error transcribing audio: {transcript.error}"
transcript_text = transcript.text
print(f"Transcription: {transcript_text}")
# Generate response using Cerebras Llama 3.3
stream = client.chat.completions.create(
messages=[{
"role": "system", "content": "Conversation will be started in this chat. Try as much as possible to provide concise and informed responses to the prompt."
}, {
"role": "user", "content": transcript_text
}],
model="llama-3.3-70b",
stream=True,
max_completion_tokens=1024,
temperature=0.4,
top_p=1
)
response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream)
print(f"Response from LLM: {response_text}")
# Generate speech using gTTS (Google Text-to-Speech)
tts = gTTS(text=response_text, lang='en', slow=False)
# Save the audio to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tts.save(tmp_file.name)
audio_path = tmp_file.name
return audio_path
# Gradio Interface
interface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(sources=["microphone"], type="filepath"), # Use 'file' to correctly handle the audio file
outputs=gr.Audio(type="filepath", label="Generated Response Audio", show_download_button=True,
waveform_options=gr.WaveformOptions(
waveform_color="#01C6FF",
waveform_progress_color="#0066B4",
skip_length=2,
show_controls=False,
)),
title="Xplayn: Voice-to-Audio AI",
description="Record your voice, and the system will transcribe it, generate a response using Llama 3.3, and return the response as audio."
)
interface.launch() |