AamerAkhter commited on
Commit
b58d57e
·
verified ·
1 Parent(s): 57ac48f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import tempfile
4
+ import speech_recognition as sr
5
+ import torch
6
+ from groq import Groq
7
+ from TTS.api import TTS
8
+ from pydub import AudioSegment
9
+
10
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
11
+
12
+ st.set_page_config(page_title="Voice Q&A App", layout="centered")
13
+ st.title("🎤 Voice Q&A with Groq + TTS")
14
+
15
+ uploaded_audio = st.file_uploader("Upload your question (WAV or M4A format)", type=["wav", "m4a"], key="uploader")
16
+ user_text = None
17
+
18
+ if uploaded_audio:
19
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_audio.name.split('.')[-1]}") as f:
20
+ f.write(uploaded_audio.read())
21
+ audio_path = f.name
22
+
23
+ if uploaded_audio.name.endswith(".m4a"):
24
+ wav_path = audio_path.replace(".m4a", ".wav")
25
+ sound = AudioSegment.from_file(audio_path)
26
+ sound.export(wav_path, format="wav")
27
+ audio_path = wav_path
28
+
29
+ recognizer = sr.Recognizer()
30
+ with sr.AudioFile(audio_path) as source:
31
+ audio_data = recognizer.record(source)
32
+ try:
33
+ user_text = recognizer.recognize_whisper(audio_data, model="base")
34
+ st.success(f"Recognized Text: {user_text}")
35
+ except Exception as e:
36
+ st.error(f"Speech recognition failed: {e}")
37
+
38
+ if user_text:
39
+ st.header("Response from Groq AI")
40
+ with st.spinner("Generating response..."):
41
+ try:
42
+ chat_completion = client.chat.completions.create(
43
+ messages=[{"role": "user", "content": user_text}],
44
+ model="llama-3-70b-8192"
45
+ )
46
+ answer = chat_completion.choices[0].message.content
47
+ st.success("Answer:")
48
+ st.write(answer)
49
+
50
+ st.header("Listen to the Answer")
51
+
52
+ @st.cache_resource
53
+ def load_tts():
54
+ return TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available())
55
+
56
+ tts = load_tts()
57
+ tts_file_path = tempfile.mktemp(suffix=".wav")
58
+ tts.tts_to_file(text=answer, file_path=tts_file_path)
59
+
60
+ with open(tts_file_path, "rb") as audio_file:
61
+ st.audio(audio_file.read(), format="audio/wav")
62
+ except Exception as e:
63
+ st.error(f"Groq API Error: {e}")