Spaces:

CereusTech
/

Explayn

Sleeping

App Files Files Community

Afeezee commited on Jan 4, 2025

Commit

1b6a625

verified ·

1 Parent(s): 4bd1c34

Create app.py

Browse files

Files changed (1) hide show

app.py +84 -0

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import os
+import gradio as gr
+import assemblyai as aai
+from cerebras.cloud.sdk import Cerebras
+from gtts import gTTS
+import tempfile
+Voicekey = os.getenv ("AssemblyVoice")
+CereAI = os.getenv ("CerebrasAI")
+# Set API keys
+aai.settings.api_key = AssemblyVoice
+client = Cerebras(
+    api_key= CereAI
+)
+def process_audio(audio):
+    # Check if audio is valid
+    if audio is None:
+        return "No audio file received."
+    # If the audio file doesn't have a name attribute, assign a temporary name
+    if isinstance(audio, str):  # If audio is passed as a file path (string)
+        audio_file_path = audio
+    else:
+        # Generate a temporary file name and save audio
+        audio_file_path = tempfile.mktemp(suffix=".mp3")  # .wav as default, you can change the format if needed
+        with open(audio_file_path, 'wb') as f:
+            f.write(audio.read())  # Save audio data to the file
+    # Upload audio to AssemblyAI for transcription
+    transcriber = aai.Transcriber()
+    transcript = transcriber.transcribe(audio_file_path)  # Transcribe the uploaded file
+    if transcript.status == aai.TranscriptStatus.error:
+        return f"Error transcribing audio: {transcript.error}"
+    transcript_text = transcript.text
+    print(f"Transcription: {transcript_text}")
+    # Generate response using Cerebras Llama 3.3
+    stream = client.chat.completions.create(
+        messages=[{
+            "role": "system", "content": "Conversation will be started in this chat. Try as much as possible to provide concise and informed responses to the prompt."
+        }, {
+            "role": "user", "content": transcript_text
+        }],
+        model="llama-3.3-70b",
+        stream=True,
+        max_completion_tokens=1024,
+        temperature=0.4,
+        top_p=1
+    )
+    response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream)
+    print(f"Response from LLM: {response_text}")
+    # Generate speech using gTTS (Google Text-to-Speech)
+    tts = gTTS(text=response_text, lang='en', slow=False)
+    # Save the audio to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
+        tts.save(tmp_file.name)
+        audio_path = tmp_file.name
+    return audio_path
+# Gradio Interface
+interface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(sources=["microphone"], type="filepath"),  # Use 'file' to correctly handle the audio file
+    outputs=gr.Audio(type="filepath", label="Generated Response Audio", show_download_button=True,
+    waveform_options=gr.WaveformOptions(
+        waveform_color="#01C6FF",
+        waveform_progress_color="#0066B4",
+        skip_length=2,
+        show_controls=False,
+    )),
+    title="Xplayn: Voice-to-Audio AI",
+    description="Record your voice, and the system will transcribe it, generate a response using Llama 3.3, and return the response as audio."
+)
+interface.launch()