Spaces:
Sleeping
Sleeping
Update app.py
#4
by
AbhishekShrimali
- opened
app.py
CHANGED
|
@@ -20,6 +20,54 @@ import logging
|
|
| 20 |
import whisper
|
| 21 |
model = whisper.load_model("base")
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# β
MUST be the first Streamlit command
|
| 25 |
st.set_page_config(page_title="GrillMaster", layout="wide")
|
|
@@ -868,65 +916,99 @@ if st.session_state["generated_questions"]:
|
|
| 868 |
st.session_state["show_summary"] = True
|
| 869 |
st.rerun()
|
| 870 |
|
| 871 |
-
elif st.session_state["record_phase"] == "recording":
|
| 872 |
-
remaining = 15 - int(now - st.session_state.get("timer_start", 0))
|
| 873 |
-
if remaining > 0:
|
| 874 |
-
st.markdown(f"<h4 class='timer-text'>ποΈ {remaining} seconds to answer...</h4>", unsafe_allow_html=True)
|
| 875 |
-
|
| 876 |
-
audio_value = st.audio_input("π€ Tap to record β then stop when done", key=f"audio_{idx}")
|
| 877 |
-
if audio_value and "response_file" not in st.session_state:
|
| 878 |
-
wav_path = f"response_{idx}.wav"
|
| 879 |
-
with open(wav_path, "wb") as f:
|
| 880 |
-
f.write(audio_value.getbuffer())
|
| 881 |
-
#st.audio(wav_path, format="audio/wav")
|
| 882 |
-
st.session_state["response_file"] = wav_path
|
| 883 |
-
st.session_state["record_phase"] = "listening"
|
| 884 |
-
st.success("β
Audio uploaded. You may now confirm your answer.")
|
| 885 |
-
st.audio(wav_path, format="audio/wav")
|
| 886 |
-
|
| 887 |
-
if st.button("βΉοΈ Confirm & Next"):
|
| 888 |
-
try:
|
| 889 |
-
with st.spinner("π§ Transcribing your answer..."):
|
| 890 |
-
result = model.transcribe(st.session_state["response_file"])
|
| 891 |
-
transcript = result["text"].strip()
|
| 892 |
-
if not transcript:
|
| 893 |
-
transcript = "[Transcription failed or empty]"
|
| 894 |
-
|
| 895 |
-
except Exception as e:
|
| 896 |
-
st.error(f"β Transcription error: {e}")
|
| 897 |
-
transcript = "[Transcription error]"
|
| 898 |
-
|
| 899 |
-
st.session_state["answers"].append({
|
| 900 |
-
"question": question,
|
| 901 |
-
"response_file": st.session_state["response_file"],
|
| 902 |
-
"response_text": transcript
|
| 903 |
-
})
|
| 904 |
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
|
|
|
|
|
|
|
|
|
| 909 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 910 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 911 |
|
| 912 |
-
|
| 913 |
-
|
|
|
|
|
|
|
|
|
|
| 914 |
st.session_state["answers"].append({
|
| 915 |
-
|
| 916 |
-
|
|
|
|
| 917 |
})
|
| 918 |
|
| 919 |
st.session_state.update({
|
| 920 |
-
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
|
| 924 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 925 |
|
| 926 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 927 |
evaluate_answers()
|
| 928 |
st.session_state["show_summary"] = True
|
| 929 |
st.rerun()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 930 |
|
| 931 |
|
| 932 |
|
|
|
|
| 20 |
import whisper
|
| 21 |
model = whisper.load_model("base")
|
| 22 |
|
| 23 |
+
################################################///////////////////////////////////////////////////////////////////////#############################
|
| 24 |
+
|
| 25 |
+
#
|
| 26 |
+
|
| 27 |
+
# This is a thread-safe class to hold our audio data
|
| 28 |
+
class AudioFrameBuffer:
|
| 29 |
+
def __init__(self):
|
| 30 |
+
self.frames = []
|
| 31 |
+
self.lock = threading.Lock()
|
| 32 |
+
self.wav_bytes = None
|
| 33 |
+
|
| 34 |
+
def add_frame(self, frame):
|
| 35 |
+
with self.lock:
|
| 36 |
+
self.frames.append(frame)
|
| 37 |
+
|
| 38 |
+
def get_wav_bytes(self):
|
| 39 |
+
with self.lock:
|
| 40 |
+
if self.wav_bytes:
|
| 41 |
+
return self.wav_bytes
|
| 42 |
+
|
| 43 |
+
if not self.frames:
|
| 44 |
+
return None
|
| 45 |
+
|
| 46 |
+
sound = pydub.AudioSegment.empty()
|
| 47 |
+
for frame in self.frames:
|
| 48 |
+
sound += pydub.AudioSegment(
|
| 49 |
+
data=frame.to_ndarray().tobytes(),
|
| 50 |
+
sample_width=frame.format.bytes,
|
| 51 |
+
frame_rate=frame.sample_rate,
|
| 52 |
+
channels=len(frame.layout.channels),
|
| 53 |
+
)
|
| 54 |
+
buffer = io.BytesIO()
|
| 55 |
+
sound.export(buffer, format="wav")
|
| 56 |
+
self.wav_bytes = buffer.getvalue()
|
| 57 |
+
return self.wav_bytes
|
| 58 |
+
|
| 59 |
+
# The audio processor that uses the buffer
|
| 60 |
+
class AudioRecorder(AudioProcessorBase):
|
| 61 |
+
def __init__(self, buffer: AudioFrameBuffer):
|
| 62 |
+
self.buffer = buffer
|
| 63 |
+
|
| 64 |
+
async def recv_queued(self, frames):
|
| 65 |
+
for frame in frames:
|
| 66 |
+
self.buffer.add_frame(frame)
|
| 67 |
+
|
| 68 |
+
################################################///////////////////////////////////////////////////////////////////////#############################
|
| 69 |
+
|
| 70 |
+
|
| 71 |
|
| 72 |
# β
MUST be the first Streamlit command
|
| 73 |
st.set_page_config(page_title="GrillMaster", layout="wide")
|
|
|
|
| 916 |
st.session_state["show_summary"] = True
|
| 917 |
st.rerun()
|
| 918 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 919 |
|
| 920 |
+
elif st.session_state["record_phase"] == "recording":
|
| 921 |
+
now = time.time()
|
| 922 |
+
# Increased timer to 60 seconds for more flexibility
|
| 923 |
+
remaining = 60 - int(now - st.session_state.get("timer_start", 0))
|
| 924 |
+
|
| 925 |
+
if remaining > 0:
|
| 926 |
+
st.markdown(f"<h4 class='timer-text'>π΄ Recording... Answer the question, then click STOP below. You have {remaining} seconds.</h4>", unsafe_allow_html=True)
|
| 927 |
|
| 928 |
+
# Initialize the thread-safe buffer in session state
|
| 929 |
+
if f"audio_buffer_{idx}" not in st.session_state:
|
| 930 |
+
st.session_state[f"audio_buffer_{idx}"] = AudioFrameBuffer()
|
| 931 |
+
|
| 932 |
+
webrtc_ctx = webrtc_streamer(
|
| 933 |
+
key=f"audio-recorder-{idx}",
|
| 934 |
+
mode=WebRtcMode.SENDONLY,
|
| 935 |
+
audio_processor_factory=lambda: AudioRecorder(buffer=st.session_state[f"audio_buffer_{idx}"]),
|
| 936 |
+
media_stream_constraints={"video": False, "audio": True},
|
| 937 |
+
)
|
| 938 |
+
|
| 939 |
+
# This part of the code runs AFTER the user clicks "STOP" on the component
|
| 940 |
+
if not webrtc_ctx.state.playing:
|
| 941 |
+
st.info("Recording stopped. Processing your answer...")
|
| 942 |
|
| 943 |
+
audio_buffer = st.session_state[f"audio_buffer_{idx}"]
|
| 944 |
+
wav_bytes = audio_buffer.get_wav_bytes()
|
| 945 |
+
|
| 946 |
+
if wav_bytes:
|
| 947 |
+
st.success("Audio captured! Now transcribing...")
|
| 948 |
+
st.audio(wav_bytes, format="audio/wav")
|
| 949 |
+
|
| 950 |
+
transcript = "[Could not understand audio]"
|
| 951 |
+
try:
|
| 952 |
+
with st.spinner("π§ Transcribing your answer with Whisper..."):
|
| 953 |
+
# Whisper needs a file path, so we write the bytes to a temporary file
|
| 954 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_file:
|
| 955 |
+
tmp_audio_file.write(wav_bytes)
|
| 956 |
+
tmp_audio_path = tmp_audio_file.name
|
| 957 |
+
|
| 958 |
+
result = model.transcribe(tmp_audio_path)
|
| 959 |
+
transcript = result["text"].strip() or "[No speech was detected]"
|
| 960 |
|
| 961 |
+
except Exception as e:
|
| 962 |
+
st.error(f"β Transcription error: {e}")
|
| 963 |
+
transcript = "[Transcription error]"
|
| 964 |
+
|
| 965 |
+
# Store the result and move to the next step
|
| 966 |
st.session_state["answers"].append({
|
| 967 |
+
"question": question,
|
| 968 |
+
"response_file": tmp_audio_path,
|
| 969 |
+
"response": transcript # Using 'response' key as expected by evaluate_answers
|
| 970 |
})
|
| 971 |
|
| 972 |
st.session_state.update({
|
| 973 |
+
"record_phase": "idle",
|
| 974 |
+
"question_played": False,
|
| 975 |
+
"current_question_index": idx + 1,
|
| 976 |
+
})
|
| 977 |
|
| 978 |
+
# Check if the interview is over and evaluate
|
| 979 |
+
if st.session_state["current_question_index"] >= len(st.session_state["generated_questions"]):
|
| 980 |
+
with st.spinner("All questions answered! Generating final summary..."):
|
| 981 |
+
evaluate_answers()
|
| 982 |
+
st.session_state["show_summary"] = True
|
| 983 |
|
| 984 |
+
# Rerun to show the next question or the summary page
|
| 985 |
+
st.rerun()
|
| 986 |
+
|
| 987 |
+
else:
|
| 988 |
+
st.warning("No audio was captured. Moving to the next question.")
|
| 989 |
+
st.session_state["answers"].append({"question": question, "response": "[No audio was captured]"})
|
| 990 |
+
st.session_state.update({
|
| 991 |
+
"record_phase": "idle",
|
| 992 |
+
"question_played": False,
|
| 993 |
+
"current_question_index": idx + 1,
|
| 994 |
+
})
|
| 995 |
+
if st.session_state["current_question_index"] >= len(st.session_state["generated_questions"]):
|
| 996 |
evaluate_answers()
|
| 997 |
st.session_state["show_summary"] = True
|
| 998 |
st.rerun()
|
| 999 |
+
|
| 1000 |
+
else: # This block handles the timer running out
|
| 1001 |
+
st.warning("β οΈ Time is up! Moving to the next question.")
|
| 1002 |
+
st.session_state["answers"].append({"question": question, "response": "[No response - timed out]"})
|
| 1003 |
+
st.session_state.update({
|
| 1004 |
+
"record_phase": "idle",
|
| 1005 |
+
"question_played": False,
|
| 1006 |
+
"current_question_index": idx + 1,
|
| 1007 |
+
})
|
| 1008 |
+
if st.session_state["current_question_index"] >= len(st.session_state["generated_questions"]):
|
| 1009 |
+
evaluate_answers()
|
| 1010 |
+
st.session_state["show_summary"] = True
|
| 1011 |
+
st.rerun()
|
| 1012 |
|
| 1013 |
|
| 1014 |
|