Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,8 @@ from langchain.memory import ConversationBufferMemory
|
|
| 10 |
from langchain.chains import ConversationalRetrievalChain
|
| 11 |
from PyPDF2 import PdfReader
|
| 12 |
from groq import Groq
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Clear ChromaDB cache to fix tenant issue
|
| 15 |
chromadb.api.client.SharedSystemClient.clear_system_cache()
|
|
@@ -51,7 +53,7 @@ def chat_chain(vectorstore):
|
|
| 51 |
)
|
| 52 |
return chain
|
| 53 |
|
| 54 |
-
#
|
| 55 |
def transcribe_audio(file_path):
|
| 56 |
"""Transcribe audio using Groq's Whisper model."""
|
| 57 |
with open(file_path, "rb") as file:
|
|
@@ -61,9 +63,13 @@ def transcribe_audio(file_path):
|
|
| 61 |
response_format="json",
|
| 62 |
language="en"
|
| 63 |
)
|
| 64 |
-
# Access the text attribute
|
| 65 |
return transcription.text
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
# Streamlit UI
|
| 68 |
st.title("Chat with PDFs via Audio ποΈπ")
|
| 69 |
|
|
@@ -74,7 +80,7 @@ if uploaded_files:
|
|
| 74 |
chain = chat_chain(vectorstore)
|
| 75 |
st.success("PDFs processed! Ready to chat.")
|
| 76 |
|
| 77 |
-
input_method = st.radio("Choose Input Method", ["Text Input", "Audio File
|
| 78 |
|
| 79 |
# Text Input Mode
|
| 80 |
if input_method == "Text Input":
|
|
@@ -84,8 +90,44 @@ if uploaded_files:
|
|
| 84 |
response = chain({"question": query})["answer"]
|
| 85 |
st.write(f"**Response:** {response}")
|
| 86 |
|
| 87 |
-
# Audio
|
| 88 |
-
elif input_method == "Audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"])
|
| 90 |
if uploaded_audio:
|
| 91 |
audio_file_path = "uploaded_audio.wav"
|
|
|
|
| 10 |
from langchain.chains import ConversationalRetrievalChain
|
| 11 |
from PyPDF2 import PdfReader
|
| 12 |
from groq import Groq
|
| 13 |
+
from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
|
| 14 |
+
import av
|
| 15 |
|
| 16 |
# Clear ChromaDB cache to fix tenant issue
|
| 17 |
chromadb.api.client.SharedSystemClient.clear_system_cache()
|
|
|
|
| 53 |
)
|
| 54 |
return chain
|
| 55 |
|
| 56 |
+
# Transcribe audio using Groq Whisper
|
| 57 |
def transcribe_audio(file_path):
|
| 58 |
"""Transcribe audio using Groq's Whisper model."""
|
| 59 |
with open(file_path, "rb") as file:
|
|
|
|
| 63 |
response_format="json",
|
| 64 |
language="en"
|
| 65 |
)
|
|
|
|
| 66 |
return transcription.text
|
| 67 |
|
| 68 |
+
# Audio Processor Class for Recording
|
| 69 |
+
class AudioProcessor(AudioProcessorBase):
|
| 70 |
+
def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
|
| 71 |
+
return frame
|
| 72 |
+
|
| 73 |
# Streamlit UI
|
| 74 |
st.title("Chat with PDFs via Audio ποΈπ")
|
| 75 |
|
|
|
|
| 80 |
chain = chat_chain(vectorstore)
|
| 81 |
st.success("PDFs processed! Ready to chat.")
|
| 82 |
|
| 83 |
+
input_method = st.radio("Choose Input Method", ["Text Input", "Record Audio", "Upload Audio File"])
|
| 84 |
|
| 85 |
# Text Input Mode
|
| 86 |
if input_method == "Text Input":
|
|
|
|
| 90 |
response = chain({"question": query})["answer"]
|
| 91 |
st.write(f"**Response:** {response}")
|
| 92 |
|
| 93 |
+
# Record Audio
|
| 94 |
+
elif input_method == "Record Audio":
|
| 95 |
+
st.write("Record your audio query:")
|
| 96 |
+
webrtc_ctx = webrtc_streamer(
|
| 97 |
+
key="record",
|
| 98 |
+
mode=WebRtcMode.SENDONLY,
|
| 99 |
+
audio_receiver_size=1024,
|
| 100 |
+
audio_processor_factory=AudioProcessor,
|
| 101 |
+
media_stream_constraints={"audio": True, "video": False},
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
if webrtc_ctx.audio_receiver:
|
| 105 |
+
st.write("Recording...")
|
| 106 |
+
audio_frames = []
|
| 107 |
+
while True:
|
| 108 |
+
frame = webrtc_ctx.audio_receiver.recv()
|
| 109 |
+
audio_frames.append(frame)
|
| 110 |
+
if len(audio_frames) > 5: # Stop recording after a few frames
|
| 111 |
+
break
|
| 112 |
+
|
| 113 |
+
# Save the recorded audio
|
| 114 |
+
audio_file_path = "recorded_audio.wav"
|
| 115 |
+
with av.open(audio_file_path, "w") as f:
|
| 116 |
+
for frame in audio_frames:
|
| 117 |
+
f.write(frame)
|
| 118 |
+
st.success("Recording complete!")
|
| 119 |
+
|
| 120 |
+
# Transcribe and Generate Response
|
| 121 |
+
st.write("Transcribing audio...")
|
| 122 |
+
transcription = transcribe_audio(audio_file_path)
|
| 123 |
+
st.write(f"**You said:** {transcription}")
|
| 124 |
+
|
| 125 |
+
with st.spinner("Generating response..."):
|
| 126 |
+
response = chain({"question": transcription})["answer"]
|
| 127 |
+
st.write(f"**Response:** {response}")
|
| 128 |
+
|
| 129 |
+
# Upload Audio File Mode
|
| 130 |
+
elif input_method == "Upload Audio File":
|
| 131 |
uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"])
|
| 132 |
if uploaded_audio:
|
| 133 |
audio_file_path = "uploaded_audio.wav"
|