Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,8 +19,6 @@ from twilio.rest import Client
|
|
| 19 |
import logging
|
| 20 |
import whisper
|
| 21 |
import speech_recognition as sr
|
| 22 |
-
from audiorecorder import audiorecorder
|
| 23 |
-
import numpy as np
|
| 24 |
#model = whisper.load_model("base")
|
| 25 |
|
| 26 |
|
|
@@ -817,20 +815,7 @@ def get_ice_servers():
|
|
| 817 |
|
| 818 |
return token.ice_servers
|
| 819 |
|
| 820 |
-
# Function to generate question audio and save to a valid path
|
| 821 |
-
async def generate_question_audio(question, voice="en-IE-EmilyNeural"):
|
| 822 |
-
clean_question = question.strip().replace("\n", " ")
|
| 823 |
-
tts = edge_tts.Communicate(text=clean_question, voice=voice)
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3", dir=os.getcwd()) as tmp_file:
|
| 827 |
-
output_path = tmp_file.name
|
| 828 |
-
await tts.save(output_path)
|
| 829 |
-
return output_path
|
| 830 |
|
| 831 |
-
@st.cache_data(show_spinner=False)
|
| 832 |
-
def get_cached_question_audio(question_text):
|
| 833 |
-
return asyncio.run(generate_question_audio(question_text))
|
| 834 |
|
| 835 |
# === Main QA Interface ===
|
| 836 |
if st.session_state["generated_questions"]:
|
|
@@ -839,36 +824,17 @@ if st.session_state["generated_questions"]:
|
|
| 839 |
question = st.session_state["generated_questions"][idx].lstrip("1234567890. ").strip()
|
| 840 |
|
| 841 |
# Phase 0: Play audio first and wait 5s before countdown
|
| 842 |
-
"""
|
| 843 |
if not st.session_state.get("question_played"):
|
| 844 |
st.session_state["question_audio_file"] = asyncio.run(generate_question_audio(question))
|
| 845 |
st.session_state.update({
|
| 846 |
"question_played": True,
|
| 847 |
"question_start_time": time.time(),
|
| 848 |
"record_phase": "audio_playing",
|
| 849 |
-
|
| 850 |
})
|
| 851 |
-
"""
|
| 852 |
-
if not st.session_state.get("question_played"):
|
| 853 |
-
st.session_state["question_audio_file"] = asyncio.run(generate_question_audio(question))
|
| 854 |
-
st.session_state.update({
|
| 855 |
-
"question_played": True,
|
| 856 |
-
"question_start_time": time.time(),
|
| 857 |
-
"record_phase": "audio_playing",
|
| 858 |
-
})
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
#st.markdown(f"**Q{idx + 1}:** {question}")
|
| 862 |
-
#st.audio(st.session_state["question_audio_file"], format="audio/mp3")
|
| 863 |
|
| 864 |
st.markdown(f"**Q{idx + 1}:** {question}")
|
| 865 |
-
|
| 866 |
-
if os.path.exists(audio_file) and os.path.getsize(audio_file) > 0:
|
| 867 |
-
st.audio(audio_file, format="audio/mp3")
|
| 868 |
-
else:
|
| 869 |
-
st.warning("⚠️ Could not load question audio. Please retry.")
|
| 870 |
-
|
| 871 |
-
|
| 872 |
|
| 873 |
now = time.time()
|
| 874 |
elapsed = now - st.session_state.get("question_start_time", 0)
|
|
@@ -915,78 +881,7 @@ if st.session_state["generated_questions"]:
|
|
| 915 |
if remaining > 0:
|
| 916 |
st.markdown(f"<h4 class='timer-text'>🎙️ {remaining} seconds to answer...</h4>", unsafe_allow_html=True)
|
| 917 |
|
| 918 |
-
|
| 919 |
-
audio = audiorecorder("🎙️ Start Recording", "⏹️ Stop Recording")
|
| 920 |
-
if len(audio) > 0 and "response_file" not in st.session_state:
|
| 921 |
-
if np.max(audio) > 0.01:
|
| 922 |
-
wav_path = f"response_{idx}.wav"
|
| 923 |
-
with open(wav_path, "wb") as f:
|
| 924 |
-
f.write(audio.tobytes())
|
| 925 |
-
st.session_state["response_file"] = wav_path
|
| 926 |
-
st.session_state["record_phase"] = "listening"
|
| 927 |
-
st.success("✅ Audio recorded. Please confirm to proceed.")
|
| 928 |
-
st.audio(wav_path, format="audio/wav")
|
| 929 |
-
st.rerun()
|
| 930 |
-
else:
|
| 931 |
-
st.warning("⚠️ Silence detected. Please try again.")
|
| 932 |
-
st.rerun()
|
| 933 |
-
|
| 934 |
-
else:
|
| 935 |
-
time.sleep(1)
|
| 936 |
-
st.rerun()
|
| 937 |
-
|
| 938 |
-
else:
|
| 939 |
-
if not st.session_state.get("response_file"):
|
| 940 |
-
st.warning("⚠️ No audio captured. Moving to next question.")
|
| 941 |
-
st.session_state["answers"].append({"question": question, "response": "[No response]"})
|
| 942 |
-
st.session_state.update({
|
| 943 |
-
"record_phase": "idle",
|
| 944 |
-
"question_played": False,
|
| 945 |
-
"current_question_index": idx + 1
|
| 946 |
-
})
|
| 947 |
-
if st.session_state["current_question_index"] == len(st.session_state["generated_questions"]):
|
| 948 |
-
evaluate_answers()
|
| 949 |
-
st.session_state["show_summary"] = True
|
| 950 |
-
st.rerun()
|
| 951 |
-
elif st.session_state["record_phase"] == "listening":
|
| 952 |
-
st.success("🎧 Review your recorded response below:")
|
| 953 |
-
st.audio(st.session_state["response_file"], format="audio/wav")
|
| 954 |
-
|
| 955 |
-
if st.button("⏹️ Confirm & Next"):
|
| 956 |
-
recognizer = sr.Recognizer()
|
| 957 |
-
try:
|
| 958 |
-
with sr.AudioFile(st.session_state["response_file"]) as source:
|
| 959 |
-
audio = recognizer.record(source)
|
| 960 |
-
transcript = recognizer.recognize_google(audio)
|
| 961 |
-
except sr.UnknownValueError:
|
| 962 |
-
transcript = "[Could not understand audio]"
|
| 963 |
-
except sr.RequestError:
|
| 964 |
-
transcript = "[Google API error]"
|
| 965 |
-
except Exception as e:
|
| 966 |
-
transcript = f"[Transcription failed: {e}]"
|
| 967 |
-
|
| 968 |
-
st.session_state["answers"].append({
|
| 969 |
-
"question": question,
|
| 970 |
-
"response_file": st.session_state["response_file"],
|
| 971 |
-
"response": transcript
|
| 972 |
-
})
|
| 973 |
-
|
| 974 |
-
st.session_state.update({
|
| 975 |
-
"record_phase": "idle",
|
| 976 |
-
"recording_started": False,
|
| 977 |
-
"question_played": False,
|
| 978 |
-
"question_start_time": 0.0,
|
| 979 |
-
"current_question_index": idx + 1,
|
| 980 |
-
"response_file": None,
|
| 981 |
-
"audio_waiting": True
|
| 982 |
-
})
|
| 983 |
-
|
| 984 |
-
if st.session_state["current_question_index"] == len(st.session_state["generated_questions"]):
|
| 985 |
-
evaluate_answers()
|
| 986 |
-
st.session_state["show_summary"] = True
|
| 987 |
-
st.rerun()
|
| 988 |
-
|
| 989 |
-
"""
|
| 990 |
if audio_value and "response_file" not in st.session_state:
|
| 991 |
wav_path = f"response_{idx}.wav"
|
| 992 |
with open(wav_path, "wb") as f:
|
|
@@ -1009,7 +904,7 @@ if st.session_state["generated_questions"]:
|
|
| 1009 |
st.session_state["record_phase"] = "listening"
|
| 1010 |
st.success("✅ Audio uploaded. You may now confirm your answer.")
|
| 1011 |
#st.audio(wav_path, format="audio/wav")
|
| 1012 |
-
""
|
| 1013 |
if st.button("⏹️ Confirm & Next"):
|
| 1014 |
try:
|
| 1015 |
with st.spinner("🧠 Transcribing your answer..."):
|
|
@@ -1021,7 +916,7 @@ if st.session_state["generated_questions"]:
|
|
| 1021 |
except Exception as e:
|
| 1022 |
st.error(f"❌ Transcription error: {e}")
|
| 1023 |
transcript = "[Transcription error]"
|
| 1024 |
-
""
|
| 1025 |
|
| 1026 |
st.session_state["answers"].append({
|
| 1027 |
"question": question,
|
|
@@ -1096,7 +991,7 @@ if st.session_state["generated_questions"]:
|
|
| 1096 |
evaluate_answers()
|
| 1097 |
st.session_state["show_summary"] = True
|
| 1098 |
st.rerun()
|
| 1099 |
-
|
| 1100 |
|
| 1101 |
# === Summary Display ===
|
| 1102 |
|
|
|
|
| 19 |
import logging
|
| 20 |
import whisper
|
| 21 |
import speech_recognition as sr
|
|
|
|
|
|
|
| 22 |
#model = whisper.load_model("base")
|
| 23 |
|
| 24 |
|
|
|
|
| 815 |
|
| 816 |
return token.ice_servers
|
| 817 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
|
|
|
|
|
|
|
|
|
|
| 819 |
|
| 820 |
# === Main QA Interface ===
|
| 821 |
if st.session_state["generated_questions"]:
|
|
|
|
| 824 |
question = st.session_state["generated_questions"][idx].lstrip("1234567890. ").strip()
|
| 825 |
|
| 826 |
# Phase 0: Play audio first and wait 5s before countdown
|
|
|
|
| 827 |
if not st.session_state.get("question_played"):
|
| 828 |
st.session_state["question_audio_file"] = asyncio.run(generate_question_audio(question))
|
| 829 |
st.session_state.update({
|
| 830 |
"question_played": True,
|
| 831 |
"question_start_time": time.time(),
|
| 832 |
"record_phase": "audio_playing",
|
| 833 |
+
"recorded_text": ""
|
| 834 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 835 |
|
| 836 |
st.markdown(f"**Q{idx + 1}:** {question}")
|
| 837 |
+
st.audio(st.session_state["question_audio_file"], format="audio/mp3")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 838 |
|
| 839 |
now = time.time()
|
| 840 |
elapsed = now - st.session_state.get("question_start_time", 0)
|
|
|
|
| 881 |
if remaining > 0:
|
| 882 |
st.markdown(f"<h4 class='timer-text'>🎙️ {remaining} seconds to answer...</h4>", unsafe_allow_html=True)
|
| 883 |
|
| 884 |
+
audio_value = st.audio_input("🎤 Tap to record — then stop when done", key=f"audio_{idx}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 885 |
if audio_value and "response_file" not in st.session_state:
|
| 886 |
wav_path = f"response_{idx}.wav"
|
| 887 |
with open(wav_path, "wb") as f:
|
|
|
|
| 904 |
st.session_state["record_phase"] = "listening"
|
| 905 |
st.success("✅ Audio uploaded. You may now confirm your answer.")
|
| 906 |
#st.audio(wav_path, format="audio/wav")
|
| 907 |
+
"""
|
| 908 |
if st.button("⏹️ Confirm & Next"):
|
| 909 |
try:
|
| 910 |
with st.spinner("🧠 Transcribing your answer..."):
|
|
|
|
| 916 |
except Exception as e:
|
| 917 |
st.error(f"❌ Transcription error: {e}")
|
| 918 |
transcript = "[Transcription error]"
|
| 919 |
+
"""
|
| 920 |
|
| 921 |
st.session_state["answers"].append({
|
| 922 |
"question": question,
|
|
|
|
| 991 |
evaluate_answers()
|
| 992 |
st.session_state["show_summary"] = True
|
| 993 |
st.rerun()
|
| 994 |
+
|
| 995 |
|
| 996 |
# === Summary Display ===
|
| 997 |
|