Spaces:

Imarticuslearning
/

GrillMaster

Sleeping

App Files Files Community

AbhishekShrimali commited on Jun 19, 2025

Commit

814e777

verified ·

1 Parent(s): fdee8be

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -53

app.py CHANGED Viewed

@@ -14,11 +14,15 @@ import edge_tts
 import pandas as pd
 import tempfile
 import traceback
-from streamlit_webrtc import webrtc_streamer, WebRtcMode
 from twilio.rest import Client
 import logging
 import whisper
 model = whisper.load_model("base")
 # ✅ MUST be the first Streamlit command
@@ -52,6 +56,42 @@ for key, default in {
     if key not in st.session_state:
         st.session_state[key] = default
 # Utility functions
 def extract_pdf_text(uploaded_file):
     pdf_reader = PyPDF2.PdfReader(uploaded_file)
@@ -872,61 +912,55 @@ if st.session_state["generated_questions"]:
             remaining = 15 - int(now - st.session_state.get("timer_start", 0))
             if remaining > 0:
                 st.markdown(f"<h4 class='timer-text'>🎙️ {remaining} seconds to answer...</h4>", unsafe_allow_html=True)
-                audio_value = st.audio_input("🎤 Tap to record — then stop when done", key=f"audio_{idx}")
-                if audio_value and "response_file" not in st.session_state:
-                    wav_path = f"response_{idx}.wav"
-                    with open(wav_path, "wb") as f:
-                        f.write(audio_value.getbuffer())
-                    #st.audio(wav_path, format="audio/wav")
-                    st.session_state["response_file"] = wav_path
-                    st.session_state["record_phase"] = "listening"
-                    st.success("✅ Audio uploaded. You may now confirm your answer.")
-                    st.audio(wav_path, format="audio/wav")
-                    if st.button("⏹️ Confirm & Next"):
-                        try:
-                            with st.spinner("🧠 Transcribing your answer..."):
-                                result = model.transcribe(st.session_state["response_file"])
-                                transcript = result["text"].strip()
-                                if not transcript:
-                                    transcript = "[Transcription failed or empty]"
-                        except Exception as e:
                                 st.error(f"❌ Transcription error: {e}")
                                 transcript = "[Transcription error]"
-                        st.session_state["answers"].append({
-                        "question": question,
-                        "response_file": st.session_state["response_file"],
-                        "response_text": transcript
-                        })
-                    if st.session_state["current_question_index"] == len(st.session_state["generated_questions"]):
-                        evaluate_answers()
-                        st.session_state["show_summary"] = True
-                    st.rerun()
-                if elapsed > 15 and "response_file" not in st.session_state:
-                    st.warning("⚠️ No audio captured. Moving to next question.")
-                    st.session_state["answers"].append({
-                    "question": question,
-                    "response": "[No response]"
-                    })
-                    st.session_state.update({
-                    "record_phase": "idle",
-                    "question_played": False,
-                    "current_question_index": idx + 1
-                     })
-                    if st.session_state["current_question_index"] == len(st.session_state["generated_questions"]):
-                        evaluate_answers()
-                        st.session_state["show_summary"] = True
-                    st.rerun()

 import pandas as pd
 import tempfile
 import traceback
+from streamlit_webrtc import webrtc_streamer, WebRtcMode,AudioProcessorBase
 from twilio.rest import Client
 import logging
 import whisper
 model = whisper.load_model("base")
+import numpy as np
+import pydub
+import io
+import wave
 # ✅ MUST be the first Streamlit command
     if key not in st.session_state:
         st.session_state[key] = default
+########################################///////////////////////////////////////////////////#########################################
+class AudioRecorder(AudioProcessorBase):
+    def __init__(self) -> None:
+        self.audio_frames = []
+    def recv(self, frame):
+        # We are only interested in the audio frames
+        if frame.kind == "audio":
+            self.audio_frames.append(frame)
+        return frame
+    def get_wav_bytes(self):
+        if not self.audio_frames:
+            return None
+        # Assuming mono audio with a sample width of 2 bytes (16-bit)
+        sample_width = 2
+        sample_rate = self.audio_frames[0].sample_rate
+        # Combine all audio frames
+        sound = pydub.AudioSegment.empty()
+        for frame in self.audio_frames:
+            sound += pydub.AudioSegment(
+                data=frame.to_ndarray().tobytes(),
+                sample_width=sample_width,
+                frame_rate=sample_rate,
+                channels=1,  # Assuming mono
+            )
+        # Export to WAV in-memory
+        wav_buffer = io.BytesIO()
+        sound.export(wav_buffer, format="wav")
+        return wav_buffer.getvalue()
+########################################///////////////////////////////////////////////////#########################################
 # Utility functions
 def extract_pdf_text(uploaded_file):
     pdf_reader = PyPDF2.PdfReader(uploaded_file)
             remaining = 15 - int(now - st.session_state.get("timer_start", 0))
             if remaining > 0:
                 st.markdown(f"<h4 class='timer-text'>🎙️ {remaining} seconds to answer...</h4>", unsafe_allow_html=True)
+                webrtc_ctx = webrtc_streamer(
+                    key=f"audio-recorder-{idx}",
+                    mode=WebRtcMode.SENDONLY,
+                    audio_processor_factory=AudioRecorder,
+                    media_stream_constraints={"video": False, "audio": True},
+                )
+                if st.button("⏹️ Stop Recording & Confirm"):
+                    if webrtc_ctx.audio_processor:
+                        wav_bytes = webrtc_ctx.audio_processor.get_wav_bytes()
+                        if wav_bytes:
+                            st.session_state["response_audio_bytes"] = wav_bytes
+                            st.audio(st.session_state["response_audio_bytes"], format="audio/wav")
+                            # Transcribe the audio
+                            try:
+                                with st.spinner("🧠 Transcribing your answer..."):
+                                    # To use whisper, you need to save the bytes to a temporary file
+                                    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_file:
+                                        tmp_audio_file.write(wav_bytes)
+                                        tmp_audio_path = tmp_audio_file.name
+                                    result = model.transcribe(tmp_audio_path)
+                                    transcript = result["text"].strip()
+                                    if not transcript:
+                                        transcript = "[Transcription failed or empty]"
+                            except Exception as e:
                                 st.error(f"❌ Transcription error: {e}")
                                 transcript = "[Transcription error]"
+                            st.session_state["answers"].append({
+                                "question": question,
+                                "response_file": tmp_audio_path, # You can store the path if needed
+                                "response_text": transcript
+                            })
+                            # Move to the next question
+                            st.session_state.update({
+                                "record_phase": "idle",
+                                "question_played": False,
+                                "current_question_index": idx + 1
+                            })
+                            if st.session_state["current_question_index"] == len(st.session_state["generated_questions"]):
+                                evaluate_answers()
+                                st.session_state["show_summary"] = True
+                            st.rerun()
+                        else:
+                            st.warning("No audio was recorded.")