Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,11 +14,15 @@ import edge_tts
|
|
| 14 |
import pandas as pd
|
| 15 |
import tempfile
|
| 16 |
import traceback
|
| 17 |
-
from streamlit_webrtc import webrtc_streamer, WebRtcMode
|
| 18 |
from twilio.rest import Client
|
| 19 |
import logging
|
| 20 |
import whisper
|
| 21 |
model = whisper.load_model("base")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
# ✅ MUST be the first Streamlit command
|
|
@@ -52,6 +56,42 @@ for key, default in {
|
|
| 52 |
if key not in st.session_state:
|
| 53 |
st.session_state[key] = default
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
# Utility functions
|
| 56 |
def extract_pdf_text(uploaded_file):
|
| 57 |
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
|
@@ -872,61 +912,55 @@ if st.session_state["generated_questions"]:
|
|
| 872 |
remaining = 15 - int(now - st.session_state.get("timer_start", 0))
|
| 873 |
if remaining > 0:
|
| 874 |
st.markdown(f"<h4 class='timer-text'>🎙️ {remaining} seconds to answer...</h4>", unsafe_allow_html=True)
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 896 |
st.error(f"❌ Transcription error: {e}")
|
| 897 |
transcript = "[Transcription error]"
|
| 898 |
-
|
| 899 |
-
st.session_state["answers"].append({
|
| 900 |
-
"question": question,
|
| 901 |
-
"response_file": st.session_state["response_file"],
|
| 902 |
-
"response_text": transcript
|
| 903 |
-
})
|
| 904 |
-
|
| 905 |
-
if st.session_state["current_question_index"] == len(st.session_state["generated_questions"]):
|
| 906 |
-
evaluate_answers()
|
| 907 |
-
st.session_state["show_summary"] = True
|
| 908 |
-
st.rerun()
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
if elapsed > 15 and "response_file" not in st.session_state:
|
| 913 |
-
st.warning("⚠️ No audio captured. Moving to next question.")
|
| 914 |
-
st.session_state["answers"].append({
|
| 915 |
-
"question": question,
|
| 916 |
-
"response": "[No response]"
|
| 917 |
-
})
|
| 918 |
-
|
| 919 |
-
st.session_state.update({
|
| 920 |
-
"record_phase": "idle",
|
| 921 |
-
"question_played": False,
|
| 922 |
-
"current_question_index": idx + 1
|
| 923 |
-
})
|
| 924 |
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
-
|
| 929 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 930 |
|
| 931 |
|
| 932 |
|
|
|
|
| 14 |
import pandas as pd
|
| 15 |
import tempfile
|
| 16 |
import traceback
|
| 17 |
+
from streamlit_webrtc import webrtc_streamer, WebRtcMode,AudioProcessorBase
|
| 18 |
from twilio.rest import Client
|
| 19 |
import logging
|
| 20 |
import whisper
|
| 21 |
model = whisper.load_model("base")
|
| 22 |
+
import numpy as np
|
| 23 |
+
import pydub
|
| 24 |
+
import io
|
| 25 |
+
import wave
|
| 26 |
|
| 27 |
|
| 28 |
# ✅ MUST be the first Streamlit command
|
|
|
|
| 56 |
if key not in st.session_state:
|
| 57 |
st.session_state[key] = default
|
| 58 |
|
| 59 |
+
########################################///////////////////////////////////////////////////#########################################
|
| 60 |
+
|
| 61 |
+
class AudioRecorder(AudioProcessorBase):
|
| 62 |
+
def __init__(self) -> None:
|
| 63 |
+
self.audio_frames = []
|
| 64 |
+
|
| 65 |
+
def recv(self, frame):
|
| 66 |
+
# We are only interested in the audio frames
|
| 67 |
+
if frame.kind == "audio":
|
| 68 |
+
self.audio_frames.append(frame)
|
| 69 |
+
return frame
|
| 70 |
+
|
| 71 |
+
def get_wav_bytes(self):
|
| 72 |
+
if not self.audio_frames:
|
| 73 |
+
return None
|
| 74 |
+
|
| 75 |
+
# Assuming mono audio with a sample width of 2 bytes (16-bit)
|
| 76 |
+
sample_width = 2
|
| 77 |
+
sample_rate = self.audio_frames[0].sample_rate
|
| 78 |
+
|
| 79 |
+
# Combine all audio frames
|
| 80 |
+
sound = pydub.AudioSegment.empty()
|
| 81 |
+
for frame in self.audio_frames:
|
| 82 |
+
sound += pydub.AudioSegment(
|
| 83 |
+
data=frame.to_ndarray().tobytes(),
|
| 84 |
+
sample_width=sample_width,
|
| 85 |
+
frame_rate=sample_rate,
|
| 86 |
+
channels=1, # Assuming mono
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
# Export to WAV in-memory
|
| 90 |
+
wav_buffer = io.BytesIO()
|
| 91 |
+
sound.export(wav_buffer, format="wav")
|
| 92 |
+
return wav_buffer.getvalue()
|
| 93 |
+
|
| 94 |
+
########################################///////////////////////////////////////////////////#########################################
|
| 95 |
# Utility functions
|
| 96 |
def extract_pdf_text(uploaded_file):
|
| 97 |
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
|
|
|
| 912 |
remaining = 15 - int(now - st.session_state.get("timer_start", 0))
|
| 913 |
if remaining > 0:
|
| 914 |
st.markdown(f"<h4 class='timer-text'>🎙️ {remaining} seconds to answer...</h4>", unsafe_allow_html=True)
|
| 915 |
+
|
| 916 |
+
webrtc_ctx = webrtc_streamer(
|
| 917 |
+
key=f"audio-recorder-{idx}",
|
| 918 |
+
mode=WebRtcMode.SENDONLY,
|
| 919 |
+
audio_processor_factory=AudioRecorder,
|
| 920 |
+
media_stream_constraints={"video": False, "audio": True},
|
| 921 |
+
)
|
| 922 |
+
|
| 923 |
+
if st.button("⏹️ Stop Recording & Confirm"):
|
| 924 |
+
if webrtc_ctx.audio_processor:
|
| 925 |
+
wav_bytes = webrtc_ctx.audio_processor.get_wav_bytes()
|
| 926 |
+
if wav_bytes:
|
| 927 |
+
st.session_state["response_audio_bytes"] = wav_bytes
|
| 928 |
+
st.audio(st.session_state["response_audio_bytes"], format="audio/wav")
|
| 929 |
+
|
| 930 |
+
# Transcribe the audio
|
| 931 |
+
try:
|
| 932 |
+
with st.spinner("🧠 Transcribing your answer..."):
|
| 933 |
+
# To use whisper, you need to save the bytes to a temporary file
|
| 934 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_file:
|
| 935 |
+
tmp_audio_file.write(wav_bytes)
|
| 936 |
+
tmp_audio_path = tmp_audio_file.name
|
| 937 |
+
|
| 938 |
+
result = model.transcribe(tmp_audio_path)
|
| 939 |
+
transcript = result["text"].strip()
|
| 940 |
+
if not transcript:
|
| 941 |
+
transcript = "[Transcription failed or empty]"
|
| 942 |
+
except Exception as e:
|
| 943 |
st.error(f"❌ Transcription error: {e}")
|
| 944 |
transcript = "[Transcription error]"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 945 |
|
| 946 |
+
st.session_state["answers"].append({
|
| 947 |
+
"question": question,
|
| 948 |
+
"response_file": tmp_audio_path, # You can store the path if needed
|
| 949 |
+
"response_text": transcript
|
| 950 |
+
})
|
| 951 |
+
|
| 952 |
+
# Move to the next question
|
| 953 |
+
st.session_state.update({
|
| 954 |
+
"record_phase": "idle",
|
| 955 |
+
"question_played": False,
|
| 956 |
+
"current_question_index": idx + 1
|
| 957 |
+
})
|
| 958 |
+
if st.session_state["current_question_index"] == len(st.session_state["generated_questions"]):
|
| 959 |
+
evaluate_answers()
|
| 960 |
+
st.session_state["show_summary"] = True
|
| 961 |
+
st.rerun()
|
| 962 |
+
else:
|
| 963 |
+
st.warning("No audio was recorded.")
|
| 964 |
|
| 965 |
|
| 966 |
|