Afeezee commited on
Commit
1b6a625
·
verified ·
1 Parent(s): 4bd1c34

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import assemblyai as aai
4
+ from cerebras.cloud.sdk import Cerebras
5
+ from gtts import gTTS
6
+ import tempfile
7
+
8
+ Voicekey = os.getenv ("AssemblyVoice")
9
+ CereAI = os.getenv ("CerebrasAI")
10
+
11
+ # Set API keys
12
+ aai.settings.api_key = AssemblyVoice
13
+
14
+ client = Cerebras(
15
+ api_key= CereAI
16
+ )
17
+
18
+ def process_audio(audio):
19
+ # Check if audio is valid
20
+ if audio is None:
21
+ return "No audio file received."
22
+
23
+ # If the audio file doesn't have a name attribute, assign a temporary name
24
+ if isinstance(audio, str): # If audio is passed as a file path (string)
25
+ audio_file_path = audio
26
+ else:
27
+ # Generate a temporary file name and save audio
28
+ audio_file_path = tempfile.mktemp(suffix=".mp3") # .wav as default, you can change the format if needed
29
+ with open(audio_file_path, 'wb') as f:
30
+ f.write(audio.read()) # Save audio data to the file
31
+
32
+ # Upload audio to AssemblyAI for transcription
33
+ transcriber = aai.Transcriber()
34
+ transcript = transcriber.transcribe(audio_file_path) # Transcribe the uploaded file
35
+
36
+ if transcript.status == aai.TranscriptStatus.error:
37
+ return f"Error transcribing audio: {transcript.error}"
38
+
39
+ transcript_text = transcript.text
40
+ print(f"Transcription: {transcript_text}")
41
+
42
+ # Generate response using Cerebras Llama 3.3
43
+ stream = client.chat.completions.create(
44
+ messages=[{
45
+ "role": "system", "content": "Conversation will be started in this chat. Try as much as possible to provide concise and informed responses to the prompt."
46
+ }, {
47
+ "role": "user", "content": transcript_text
48
+ }],
49
+ model="llama-3.3-70b",
50
+ stream=True,
51
+ max_completion_tokens=1024,
52
+ temperature=0.4,
53
+ top_p=1
54
+ )
55
+
56
+ response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream)
57
+ print(f"Response from LLM: {response_text}")
58
+
59
+ # Generate speech using gTTS (Google Text-to-Speech)
60
+ tts = gTTS(text=response_text, lang='en', slow=False)
61
+
62
+ # Save the audio to a temporary file
63
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
64
+ tts.save(tmp_file.name)
65
+ audio_path = tmp_file.name
66
+
67
+ return audio_path
68
+
69
+ # Gradio Interface
70
+ interface = gr.Interface(
71
+ fn=process_audio,
72
+ inputs=gr.Audio(sources=["microphone"], type="filepath"), # Use 'file' to correctly handle the audio file
73
+ outputs=gr.Audio(type="filepath", label="Generated Response Audio", show_download_button=True,
74
+ waveform_options=gr.WaveformOptions(
75
+ waveform_color="#01C6FF",
76
+ waveform_progress_color="#0066B4",
77
+ skip_length=2,
78
+ show_controls=False,
79
+ )),
80
+ title="Xplayn: Voice-to-Audio AI",
81
+ description="Record your voice, and the system will transcribe it, generate a response using Llama 3.3, and return the response as audio."
82
+ )
83
+
84
+ interface.launch()