iyosha commited on
Commit
3bc4341
·
verified ·
1 Parent(s): afa89f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -3
app.py CHANGED
@@ -8,6 +8,21 @@ from backend.helpers import get_random_session_samples
8
 
9
  dataset = load_dataset("iyosha-huji/stressBench", token=configs.HF_API_TOKEN)["test"]
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def human_eval_tab():
13
  with gr.Tab(label="Evaluation"):
@@ -85,7 +100,7 @@ def human_eval_tab():
85
  question_md = gr.Markdown()
86
  radio = gr.Radio(label="Answer:", interactive=True)
87
  with gr.Column(variant="compact"):
88
- audio_output = gr.Audio(interactive=False, type="numpy", label="Audio:")
89
 
90
  with gr.Group(
91
  visible=False, elem_id="final_page"
@@ -116,13 +131,17 @@ def human_eval_tab():
116
  # show the question
117
  true_index = session_sample_indices[i]
118
  sample = dataset[true_index]
119
- audio_data = (sample["audio"]["sampling_rate"], sample["audio"]["array"])
 
 
 
 
120
  previous_answer = answers.get(i, None)
121
  return (
122
  gr.update(visible=True),
123
  f"<div align='center'>Sample <b>{i+1}</b> out of <b>{len(session_sample_indices)}</b></div>",
124
  "Out of the following answers, according to the speaker's stressed words, what is most likely the underlying intention of the speaker?",
125
- gr.update(value=audio_data),
126
  gr.update(
127
  choices=sample["possible_answers"],
128
  value=previous_answer,
 
8
 
9
  dataset = load_dataset("iyosha-huji/stressBench", token=configs.HF_API_TOKEN)["test"]
10
 
11
+ import os
12
+ import tempfile
13
+ import soundfile as sf
14
+
15
+ TEMP_AUDIO_DIR = os.path.join(tempfile.gettempdir(), "stress_eval_audio")
16
+ os.makedirs(TEMP_AUDIO_DIR, exist_ok=True)
17
+
18
+ def write_temp_wav(audio_array, sampling_rate, interpretation_id):
19
+ filename = f"{interpretation_id}.wav"
20
+ filepath = os.path.join(TEMP_AUDIO_DIR, filename)
21
+
22
+ if not os.path.exists(filepath):
23
+ sf.write(filepath, audio_array, sampling_rate)
24
+
25
+ return filepath
26
 
27
  def human_eval_tab():
28
  with gr.Tab(label="Evaluation"):
 
100
  question_md = gr.Markdown()
101
  radio = gr.Radio(label="Answer:", interactive=True)
102
  with gr.Column(variant="compact"):
103
+ audio_output = gr.Audio(interactive=False, type="filepath", label="Audio:")
104
 
105
  with gr.Group(
106
  visible=False, elem_id="final_page"
 
131
  # show the question
132
  true_index = session_sample_indices[i]
133
  sample = dataset[true_index]
134
+ # audio_data = (sample["audio"]["sampling_rate"], sample["audio"]["array"])
135
+ # 🛠️ Save temp wav by interpretation_id
136
+ interp_id = sample["interpretation_id"]
137
+ wav_path = write_temp_wav(sample["audio"]["array"], sample["audio"]["sampling_rate"], interp_id)
138
+
139
  previous_answer = answers.get(i, None)
140
  return (
141
  gr.update(visible=True),
142
  f"<div align='center'>Sample <b>{i+1}</b> out of <b>{len(session_sample_indices)}</b></div>",
143
  "Out of the following answers, according to the speaker's stressed words, what is most likely the underlying intention of the speaker?",
144
+ gr.update(value=wav_path),
145
  gr.update(
146
  choices=sample["possible_answers"],
147
  value=previous_answer,