Spaces:

Imarticuslearning
/

GrillMaster

Sleeping

App Files Files Community

Update app.py

by AbhishekShrimali - opened Jun 19, 2025

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

+128

-46

Files changed (1) hide show

app.py +128 -46

app.py CHANGED Viewed

@@ -20,6 +20,54 @@ import logging
 import whisper
 model = whisper.load_model("base")
 # ✅ MUST be the first Streamlit command
 st.set_page_config(page_title="GrillMaster", layout="wide")
@@ -868,65 +916,99 @@ if st.session_state["generated_questions"]:
                     st.session_state["show_summary"] = True
                 st.rerun()
-        elif st.session_state["record_phase"] == "recording":
-            remaining = 15 - int(now - st.session_state.get("timer_start", 0))
-            if remaining > 0:
-                st.markdown(f"<h4 class='timer-text'>🎙️ {remaining} seconds to answer...</h4>", unsafe_allow_html=True)
-                audio_value = st.audio_input("🎤 Tap to record — then stop when done", key=f"audio_{idx}")
-                if audio_value and "response_file" not in st.session_state:
-                    wav_path = f"response_{idx}.wav"
-                    with open(wav_path, "wb") as f:
-                        f.write(audio_value.getbuffer())
-                    #st.audio(wav_path, format="audio/wav")
-                    st.session_state["response_file"] = wav_path
-                    st.session_state["record_phase"] = "listening"
-                    st.success("✅ Audio uploaded. You may now confirm your answer.")
-                    st.audio(wav_path, format="audio/wav")
-                    if st.button("⏹️ Confirm & Next"):
-                        try:
-                            with st.spinner("🧠 Transcribing your answer..."):
-                                result = model.transcribe(st.session_state["response_file"])
-                                transcript = result["text"].strip()
-                                if not transcript:
-                                    transcript = "[Transcription failed or empty]"
-                        except Exception as e:
-                                st.error(f"❌ Transcription error: {e}")
-                                transcript = "[Transcription error]"
-                        st.session_state["answers"].append({
-                        "question": question,
-                        "response_file": st.session_state["response_file"],
-                        "response_text": transcript
-                        })
-                    if st.session_state["current_question_index"] == len(st.session_state["generated_questions"]):
-                        evaluate_answers()
-                        st.session_state["show_summary"] = True
-                    st.rerun()
-                if elapsed > 15 and "response_file" not in st.session_state:
-                    st.warning("⚠️ No audio captured. Moving to next question.")
                     st.session_state["answers"].append({
-                    "question": question,
-                    "response": "[No response]"
                     })
                     st.session_state.update({
-                    "record_phase": "idle",
-                    "question_played": False,
-                    "current_question_index": idx + 1
-                     })
-                    if st.session_state["current_question_index"] == len(st.session_state["generated_questions"]):
                         evaluate_answers()
                         st.session_state["show_summary"] = True
                     st.rerun()

 import whisper
 model = whisper.load_model("base")
+################################################///////////////////////////////////////////////////////////////////////#############################
+#
+# This is a thread-safe class to hold our audio data
+class AudioFrameBuffer:
+    def __init__(self):
+        self.frames = []
+        self.lock = threading.Lock()
+        self.wav_bytes = None
+    def add_frame(self, frame):
+        with self.lock:
+            self.frames.append(frame)
+    def get_wav_bytes(self):
+        with self.lock:
+            if self.wav_bytes:
+                return self.wav_bytes
+            if not self.frames:
+                return None
+            sound = pydub.AudioSegment.empty()
+            for frame in self.frames:
+                sound += pydub.AudioSegment(
+                    data=frame.to_ndarray().tobytes(),
+                    sample_width=frame.format.bytes,
+                    frame_rate=frame.sample_rate,
+                    channels=len(frame.layout.channels),
+                )
+            buffer = io.BytesIO()
+            sound.export(buffer, format="wav")
+            self.wav_bytes = buffer.getvalue()
+            return self.wav_bytes
+# The audio processor that uses the buffer
+class AudioRecorder(AudioProcessorBase):
+    def __init__(self, buffer: AudioFrameBuffer):
+        self.buffer = buffer
+    async def recv_queued(self, frames):
+        for frame in frames:
+            self.buffer.add_frame(frame)
+################################################///////////////////////////////////////////////////////////////////////#############################
 # ✅ MUST be the first Streamlit command
 st.set_page_config(page_title="GrillMaster", layout="wide")
                     st.session_state["show_summary"] = True
                 st.rerun()
+    elif st.session_state["record_phase"] == "recording":
+        now = time.time()
+        # Increased timer to 60 seconds for more flexibility
+        remaining = 60 - int(now - st.session_state.get("timer_start", 0))
+        if remaining > 0:
+            st.markdown(f"<h4 class='timer-text'>🔴 Recording... Answer the question, then click STOP below. You have {remaining} seconds.</h4>", unsafe_allow_html=True)
+            # Initialize the thread-safe buffer in session state
+            if f"audio_buffer_{idx}" not in st.session_state:
+                st.session_state[f"audio_buffer_{idx}"] = AudioFrameBuffer()
+            webrtc_ctx = webrtc_streamer(
+                key=f"audio-recorder-{idx}",
+                mode=WebRtcMode.SENDONLY,
+                audio_processor_factory=lambda: AudioRecorder(buffer=st.session_state[f"audio_buffer_{idx}"]),
+                media_stream_constraints={"video": False, "audio": True},
+            )
+            # This part of the code runs AFTER the user clicks "STOP" on the component
+            if not webrtc_ctx.state.playing:
+                st.info("Recording stopped. Processing your answer...")
+                audio_buffer = st.session_state[f"audio_buffer_{idx}"]
+                wav_bytes = audio_buffer.get_wav_bytes()
+                if wav_bytes:
+                    st.success("Audio captured! Now transcribing...")
+                    st.audio(wav_bytes, format="audio/wav")
+                    transcript = "[Could not understand audio]"
+                    try:
+                        with st.spinner("🧠 Transcribing your answer with Whisper..."):
+                            # Whisper needs a file path, so we write the bytes to a temporary file
+                            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_file:
+                                tmp_audio_file.write(wav_bytes)
+                                tmp_audio_path = tmp_audio_file.name
+                            result = model.transcribe(tmp_audio_path)
+                            transcript = result["text"].strip() or "[No speech was detected]"
+                    except Exception as e:
+                        st.error(f"❌ Transcription error: {e}")
+                        transcript = "[Transcription error]"
+                    # Store the result and move to the next step
                     st.session_state["answers"].append({
+                        "question": question,
+                        "response_file": tmp_audio_path,
+                        "response": transcript  # Using 'response' key as expected by evaluate_answers
                     })
                     st.session_state.update({
+                        "record_phase": "idle",
+                        "question_played": False,
+                        "current_question_index": idx + 1,
+                    })
+                    # Check if the interview is over and evaluate
+                    if st.session_state["current_question_index"] >= len(st.session_state["generated_questions"]):
+                        with st.spinner("All questions answered! Generating final summary..."):
+                            evaluate_answers()
+                        st.session_state["show_summary"] = True
+                    # Rerun to show the next question or the summary page
+                    st.rerun()
+                else:
+                    st.warning("No audio was captured. Moving to the next question.")
+                    st.session_state["answers"].append({"question": question, "response": "[No audio was captured]"})
+                    st.session_state.update({
+                        "record_phase": "idle",
+                        "question_played": False,
+                        "current_question_index": idx + 1,
+                    })
+                    if st.session_state["current_question_index"] >= len(st.session_state["generated_questions"]):
                         evaluate_answers()
                         st.session_state["show_summary"] = True
                     st.rerun()
+        else: # This block handles the timer running out
+            st.warning("⚠️ Time is up! Moving to the next question.")
+            st.session_state["answers"].append({"question": question, "response": "[No response - timed out]"})
+            st.session_state.update({
+                "record_phase": "idle",
+                "question_played": False,
+                "current_question_index": idx + 1,
+            })
+            if st.session_state["current_question_index"] >= len(st.session_state["generated_questions"]):
+                evaluate_answers()
+                st.session_state["show_summary"] = True
+            st.rerun()