AbhishekShrimali commited on
Commit
814e777
·
verified ·
1 Parent(s): fdee8be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -53
app.py CHANGED
@@ -14,11 +14,15 @@ import edge_tts
14
  import pandas as pd
15
  import tempfile
16
  import traceback
17
- from streamlit_webrtc import webrtc_streamer, WebRtcMode
18
  from twilio.rest import Client
19
  import logging
20
  import whisper
21
  model = whisper.load_model("base")
 
 
 
 
22
 
23
 
24
  # ✅ MUST be the first Streamlit command
@@ -52,6 +56,42 @@ for key, default in {
52
  if key not in st.session_state:
53
  st.session_state[key] = default
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  # Utility functions
56
  def extract_pdf_text(uploaded_file):
57
  pdf_reader = PyPDF2.PdfReader(uploaded_file)
@@ -872,61 +912,55 @@ if st.session_state["generated_questions"]:
872
  remaining = 15 - int(now - st.session_state.get("timer_start", 0))
873
  if remaining > 0:
874
  st.markdown(f"<h4 class='timer-text'>🎙️ {remaining} seconds to answer...</h4>", unsafe_allow_html=True)
875
-
876
- audio_value = st.audio_input("🎤 Tap to record — then stop when done", key=f"audio_{idx}")
877
- if audio_value and "response_file" not in st.session_state:
878
- wav_path = f"response_{idx}.wav"
879
- with open(wav_path, "wb") as f:
880
- f.write(audio_value.getbuffer())
881
- #st.audio(wav_path, format="audio/wav")
882
- st.session_state["response_file"] = wav_path
883
- st.session_state["record_phase"] = "listening"
884
- st.success("✅ Audio uploaded. You may now confirm your answer.")
885
- st.audio(wav_path, format="audio/wav")
886
-
887
- if st.button("⏹️ Confirm & Next"):
888
- try:
889
- with st.spinner("🧠 Transcribing your answer..."):
890
- result = model.transcribe(st.session_state["response_file"])
891
- transcript = result["text"].strip()
892
- if not transcript:
893
- transcript = "[Transcription failed or empty]"
894
-
895
- except Exception as e:
 
 
 
 
 
 
 
896
  st.error(f"❌ Transcription error: {e}")
897
  transcript = "[Transcription error]"
898
-
899
- st.session_state["answers"].append({
900
- "question": question,
901
- "response_file": st.session_state["response_file"],
902
- "response_text": transcript
903
- })
904
-
905
- if st.session_state["current_question_index"] == len(st.session_state["generated_questions"]):
906
- evaluate_answers()
907
- st.session_state["show_summary"] = True
908
- st.rerun()
909
-
910
-
911
-
912
- if elapsed > 15 and "response_file" not in st.session_state:
913
- st.warning("⚠️ No audio captured. Moving to next question.")
914
- st.session_state["answers"].append({
915
- "question": question,
916
- "response": "[No response]"
917
- })
918
-
919
- st.session_state.update({
920
- "record_phase": "idle",
921
- "question_played": False,
922
- "current_question_index": idx + 1
923
- })
924
 
925
-
926
- if st.session_state["current_question_index"] == len(st.session_state["generated_questions"]):
927
- evaluate_answers()
928
- st.session_state["show_summary"] = True
929
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
930
 
931
 
932
 
 
14
  import pandas as pd
15
  import tempfile
16
  import traceback
17
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode,AudioProcessorBase
18
  from twilio.rest import Client
19
  import logging
20
  import whisper
21
  model = whisper.load_model("base")
22
+ import numpy as np
23
+ import pydub
24
+ import io
25
+ import wave
26
 
27
 
28
  # ✅ MUST be the first Streamlit command
 
56
  if key not in st.session_state:
57
  st.session_state[key] = default
58
 
59
+ ########################################///////////////////////////////////////////////////#########################################
60
+
61
+ class AudioRecorder(AudioProcessorBase):
62
+ def __init__(self) -> None:
63
+ self.audio_frames = []
64
+
65
+ def recv(self, frame):
66
+ # We are only interested in the audio frames
67
+ if frame.kind == "audio":
68
+ self.audio_frames.append(frame)
69
+ return frame
70
+
71
+ def get_wav_bytes(self):
72
+ if not self.audio_frames:
73
+ return None
74
+
75
+ # Assuming mono audio with a sample width of 2 bytes (16-bit)
76
+ sample_width = 2
77
+ sample_rate = self.audio_frames[0].sample_rate
78
+
79
+ # Combine all audio frames
80
+ sound = pydub.AudioSegment.empty()
81
+ for frame in self.audio_frames:
82
+ sound += pydub.AudioSegment(
83
+ data=frame.to_ndarray().tobytes(),
84
+ sample_width=sample_width,
85
+ frame_rate=sample_rate,
86
+ channels=1, # Assuming mono
87
+ )
88
+
89
+ # Export to WAV in-memory
90
+ wav_buffer = io.BytesIO()
91
+ sound.export(wav_buffer, format="wav")
92
+ return wav_buffer.getvalue()
93
+
94
+ ########################################///////////////////////////////////////////////////#########################################
95
  # Utility functions
96
  def extract_pdf_text(uploaded_file):
97
  pdf_reader = PyPDF2.PdfReader(uploaded_file)
 
912
  remaining = 15 - int(now - st.session_state.get("timer_start", 0))
913
  if remaining > 0:
914
  st.markdown(f"<h4 class='timer-text'>🎙️ {remaining} seconds to answer...</h4>", unsafe_allow_html=True)
915
+
916
+ webrtc_ctx = webrtc_streamer(
917
+ key=f"audio-recorder-{idx}",
918
+ mode=WebRtcMode.SENDONLY,
919
+ audio_processor_factory=AudioRecorder,
920
+ media_stream_constraints={"video": False, "audio": True},
921
+ )
922
+
923
+ if st.button("⏹️ Stop Recording & Confirm"):
924
+ if webrtc_ctx.audio_processor:
925
+ wav_bytes = webrtc_ctx.audio_processor.get_wav_bytes()
926
+ if wav_bytes:
927
+ st.session_state["response_audio_bytes"] = wav_bytes
928
+ st.audio(st.session_state["response_audio_bytes"], format="audio/wav")
929
+
930
+ # Transcribe the audio
931
+ try:
932
+ with st.spinner("🧠 Transcribing your answer..."):
933
+ # To use whisper, you need to save the bytes to a temporary file
934
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio_file:
935
+ tmp_audio_file.write(wav_bytes)
936
+ tmp_audio_path = tmp_audio_file.name
937
+
938
+ result = model.transcribe(tmp_audio_path)
939
+ transcript = result["text"].strip()
940
+ if not transcript:
941
+ transcript = "[Transcription failed or empty]"
942
+ except Exception as e:
943
  st.error(f"❌ Transcription error: {e}")
944
  transcript = "[Transcription error]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
945
 
946
+ st.session_state["answers"].append({
947
+ "question": question,
948
+ "response_file": tmp_audio_path, # You can store the path if needed
949
+ "response_text": transcript
950
+ })
951
+
952
+ # Move to the next question
953
+ st.session_state.update({
954
+ "record_phase": "idle",
955
+ "question_played": False,
956
+ "current_question_index": idx + 1
957
+ })
958
+ if st.session_state["current_question_index"] == len(st.session_state["generated_questions"]):
959
+ evaluate_answers()
960
+ st.session_state["show_summary"] = True
961
+ st.rerun()
962
+ else:
963
+ st.warning("No audio was recorded.")
964
 
965
 
966