YanmHa commited on
Commit
072e2ee
·
verified ·
1 Parent(s): deabe21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -127
app.py CHANGED
@@ -34,14 +34,16 @@ else: print(f"警告:基础目录 '{BASE_IMAGE_DIR}' 不存在。将无法加
34
  SUBJECTS = ["subj01", "subj02", "subj05", "subj07"]
35
  SENTINEL_TRIAL_INTERVAL = 20
36
  NUM_TRIALS_PER_RUN = 100
 
37
 
38
  DATASET_REPO_ID = "YanmHa/image-aligned-experiment-data"
39
- INDIVIDUAL_LOGS_FOLDER = "individual_choice_logs" # 单个日志文件夹(如果将来恢复单条保存,此名称仍可用)
40
- BATCH_LOG_FOLDER = "run_logs_batch" # 用于批量保存的文件夹
41
  CSS = ".gr-block {margin-top: 4px !important; margin-bottom: 4px !important;} .compact_button { padding: 4px 8px; min-width: auto; }"
42
 
43
  # ==== 加载所有可用的目标图片 ====
44
  master_image_list = []
 
45
  if os.path.exists(TARGET_DIR):
46
  try:
47
  master_image_list = sorted(
@@ -55,9 +57,10 @@ if os.path.exists(TARGET_DIR):
55
  elif not os.path.exists(TARGET_DIR) and os.path.exists(BASE_IMAGE_DIR): print(f"错误:目标目录 '{TARGET_DIR}' 未找到。")
56
  if not master_image_list: print(f"关键错误:由于 '{TARGET_DIR}' 问题,无目标图片,实验无法进行。")
57
 
 
58
  # ==== 辅助函数 ====
59
  def get_next_trial_info(current_trial_idx_in_run, current_run_image_list_for_trial, num_trials_in_this_run_for_trial):
60
- # ... (此函数与您上一版代码完全一致, 返回匿名显示标签和内部详细标签) ...
61
  global TARGET_DIR, METHOD_ROOTS, SUBJECTS, SENTINEL_TRIAL_INTERVAL
62
  if not current_run_image_list_for_trial or current_trial_idx_in_run >= num_trials_in_this_run_for_trial: return None, current_trial_idx_in_run
63
  img_filename_original = current_run_image_list_for_trial[current_trial_idx_in_run]
@@ -100,71 +103,53 @@ def get_next_trial_info(current_trial_idx_in_run, current_run_image_list_for_tri
100
  })
101
  return trial_info, current_trial_idx_in_run + 1
102
 
103
- # --- ( save_single_log_to_hf_dataset 函数可以删除或注释掉,因为不再每次选择都调用) ---
104
- # def save_single_log_to_hf_dataset(log_entry, user_identifier_str):
105
- # ...
106
 
107
- # --- 新增/修改:批量保存累积日志的函数 ---
108
- def save_collected_logs_batch(list_of_log_entries, user_identifier_str, current_run_no_for_filename):
109
- global DATASET_REPO_ID, BATCH_LOG_FOLDER # 使用 BATCH_LOG_FOLDER
110
  if not list_of_log_entries:
111
- print("没有累积的日志可供批量保存。")
112
- return True # 没有东西要保存,也算“成功”
113
 
114
  identifier_safe = str(user_identifier_str if user_identifier_str else "unknown_user_session").replace('.', '_').replace(':', '_').replace('/', '_').replace(' ', '_')
115
- print(f"用户 {identifier_safe} - 准备批量保存截至轮次 {current_run_no_for_filename} 的 {len(list_of_log_entries)} 条日志...")
116
 
117
  try:
118
  token = os.getenv("HF_TOKEN")
119
- if not token:
120
- print("错误:HF_TOKEN 未设置。无法批量保存日志。")
121
- return False # 指示保存失败
122
- if not DATASET_REPO_ID:
123
- print("错误:DATASET_REPO_ID 未配置。无法批量保存日志。")
124
- return False # 指示保存失败
125
 
126
  api = HfApi(token=token)
127
-
128
  timestamp_str = datetime.now().strftime('%Y%m%d_%H%M%S_%f')
129
- # 文件名可以指明这是哪一“批”的日志,例如基于当前的轮次号
130
- batch_filename = f"batch_user-{identifier_safe}_upto-run{current_run_no_for_filename}_{timestamp_str}.jsonl"
131
- # 路径结构: BATCH_LOG_FOLDER / USER_SESSION_ID_SAFE / FILENAME.jsonl
132
  path_in_repo = f"{BATCH_LOG_FOLDER}/{identifier_safe}/{batch_filename}"
133
 
134
  jsonl_content = ""
135
  for log_entry in list_of_log_entries:
136
  try:
137
- if isinstance(log_entry, dict):
138
- jsonl_content += json.dumps(log_entry, ensure_ascii=False) + "\n"
139
- else:
140
- print(f"警告:批量保存时,日志条目不是字典格式,跳过:{log_entry}")
141
  except Exception as json_err:
142
- print(f"错误:批量保存时,序列化单条日志时出错: {log_entry}. 错误: {json_err}")
143
- jsonl_content += json.dumps({"error": "serialization_failed_in_batch",
144
- "original_data_preview": str(log_entry)[:100],
145
- "timestamp": datetime.now().isoformat()}, ensure_ascii=False) + "\n"
146
 
147
- if not jsonl_content.strip():
148
- print(f"用户 {identifier_safe} 截至轮次 {current_run_no_for_filename} 没有可序列化的日志内容。")
149
- return True # 没有东西要上传
150
 
151
  log_bytes = jsonl_content.encode('utf-8')
152
  file_like_object = io.BytesIO(log_bytes)
153
 
154
  print(f"准备批量上传日志文件: {path_in_repo} ({len(log_bytes)} bytes)")
155
  api.upload_file(
156
- path_or_fileobj=file_like_object,
157
- path_in_repo=path_in_repo,
158
- repo_id=DATASET_REPO_ID,
159
- repo_type="dataset", # 确保与您的DATASET_REPO_ID类型匹配
160
- commit_message=f"Batch logs for user {identifier_safe} up to run {current_run_no_for_filename}"
161
  )
162
  print(f"批量日志已成功保存到 HF Dataset: {DATASET_REPO_ID}/{path_in_repo}")
163
- return True # 指示保存成功
164
  except Exception as e:
165
- print(f"批量保存日志 (user {identifier_safe}, up to run {current_run_no_for_filename}) Hugging Face Dataset 时发生严重错误: {e}")
166
- traceback.print_exc()
167
- return False # 指示保存失败
168
 
169
  # ==== 主要的 Gradio 事件处理函数 ====
170
  def process_experiment_step(
@@ -172,7 +157,8 @@ def process_experiment_step(
172
  s_current_run_image_list_val, s_num_trials_this_run_val,
173
  action_type=None, choice_value=None, request: gr.Request = None
174
  ):
175
- global master_image_list, NUM_TRIALS_PER_RUN, outputs_ui_components_definition
 
176
  output_s_trial_idx = s_trial_idx_val; output_s_run_no = s_run_no_val
177
  output_s_user_logs = list(s_user_logs_val); output_s_current_trial_data = dict(s_current_trial_data_val) if s_current_trial_data_val else {}
178
  output_s_user_session_id = s_user_session_id_val; output_s_current_run_image_list = list(s_current_run_image_list_val)
@@ -187,6 +173,7 @@ def process_experiment_step(
187
  if action_type == "record_choice":
188
  if output_s_current_trial_data.get("data") and output_s_current_trial_data["data"].get("left_internal_label"):
189
  chosen_internal_label = (output_s_current_trial_data["data"]["left_internal_label"] if choice_value == "left" else output_s_current_trial_data["data"]["right_internal_label"])
 
190
  parsed_chosen_method, parsed_chosen_subject, parsed_chosen_filename = "N/A", "N/A", "N/A"
191
  if chosen_internal_label == "目标图像": parsed_chosen_method, parsed_chosen_subject, parsed_chosen_filename = "TARGET", "GT", output_s_current_trial_data["data"]["image_id"]
192
  else:
@@ -194,7 +181,7 @@ def process_experiment_step(
194
  if len(parts) == 3: parsed_chosen_method, parsed_chosen_subject, parsed_chosen_filename = parts[0].strip(), parts[1].strip(), parts[2].strip()
195
  elif len(parts) == 2: parsed_chosen_method, parsed_chosen_subject = parts[0].strip(), parts[1].strip()
196
  elif len(parts) == 1: parsed_chosen_method = parts[0].strip()
197
- log_entry = { # ... (log_entry 创建与之前一致) ...
198
  "timestamp": datetime.now().isoformat(), "user_identifier": user_identifier_for_logging, "run_no": output_s_run_no,
199
  "image_id": output_s_current_trial_data["data"]["image_id"],
200
  "left_internal_label": output_s_current_trial_data["data"]["left_internal_label"],
@@ -204,81 +191,67 @@ def process_experiment_step(
204
  "trial_sequence_in_run": output_s_current_trial_data["data"]["cur_no"],
205
  "is_sentinel": output_s_current_trial_data["data"]["is_sentinel"]
206
  }
207
- output_s_user_logs.append(log_entry) # <--- 累积日志到列表
208
  print(f"用户 {user_identifier_for_logging} 记录选择 (img: {log_entry['image_id']})。当前批次日志数: {len(output_s_user_logs)}")
209
- # !!! 移除 save_single_log_to_hf_dataset 的调用 !!!
 
 
 
 
 
 
 
 
 
 
 
210
  else:
211
  # ... (错误处理) ...
212
- print(f"用户 {user_identifier_for_logging} 错误:记录选择时数据为空!")
213
  error_ui_updates = create_ui_error_tuple("记录选择时内部错误。", f"用户ID: {user_id_display_text} | 进度:{output_s_trial_idx}/{output_s_num_trials_this_run}")
214
  return output_s_trial_idx, output_s_run_no, output_s_user_logs, output_s_current_trial_data, output_s_user_session_id, output_s_current_run_image_list, output_s_num_trials_this_run, *error_ui_updates
215
 
 
216
  if action_type == "start_experiment":
217
  is_first = (output_s_num_trials_this_run == 0 and output_s_trial_idx == 0 and output_s_run_no == 1)
218
  is_completed_for_restart = (output_s_num_trials_this_run > 0 and output_s_trial_idx >= output_s_num_trials_this_run)
219
-
220
  if is_first or is_completed_for_restart:
221
- # ... (开始新轮次的逻辑,与之前代码一致,例如:随机选择图片,重置trial_idx等) ...
222
- # ... (注意:当 is_completed_for_restart 时,output_s_run_no 应该已经增加了) ...
223
- # output_s_user_logs 不在这里重置,它会累积
224
- if not master_image_list: error_ui = create_ui_error_tuple("错误: 无可用目标图片!", f"用户ID: {user_id_display_text} | 进度: 0/0"); return 0, output_s_run_no, output_s_user_logs, {}, output_s_user_session_id, [], 0, *error_ui # output_s_user_logs is passed as is
225
 
226
- # 这段逻辑用于确定是否是真正的“下一轮”并增加轮次号
227
- # 如果是点击“开始实验”按钮且上一轮刚结束,则轮次号增加
228
  if is_completed_for_restart:
229
- # 此时 output_s_trial_idx 指向的是刚完成轮次的总数,output_s_run_no 是刚完成的轮次号
230
- # 所以,下一轮的轮次号应该是 output_s_run_no + 1
231
- # 但 process_experiment_step 的状态管理是,它返回的值会成为下一次调用的输入值
232
- # 所以,如果这里要开始新轮次,应该在这里增加 output_s_run_no
233
- output_s_run_no += 1 # 增加轮次号为新的一轮
234
-
235
  num_avail = len(master_image_list); run_size = min(num_avail, NUM_TRIALS_PER_RUN)
236
  if run_size == 0: error_ui = create_ui_error_tuple("错误: 采样图片数为0!", f"用户ID: {user_id_display_text} | 进度: 0/0"); return 0, output_s_run_no, output_s_user_logs, {}, output_s_user_session_id, [], 0, *error_ui
237
 
238
  output_s_current_run_image_list = random.sample(master_image_list, run_size)
239
  output_s_num_trials_this_run = run_size
240
- output_s_trial_idx = 0 # 新轮次从0开始
241
- # output_s_user_logs 不重置
242
  output_s_current_trial_data = {}
243
 
244
- # 只有在会话ID为空(即应用刚启动,用户第一次点击“同意”后,再点击“开始实验”)时才生成新的基于时间戳的ID
245
- # 或者,如果策略是每轮实验都用新的用户ID,则可以在这里(is_first or is_completed_for_restart)重新生成
246
- # 当前代码中,user_session_id 是在欢迎页的 handle_agree_and_start 中设置的,这里不再修改它
247
- # user_identifier_for_logging 会使用这个ID
248
  print(f"开始/继续轮次 {output_s_run_no} (用户ID: {output_s_user_session_id}). 随机选择 {output_s_num_trials_this_run} 张图片.")
249
-
250
- else: # 在轮次中途点击了开始按钮
251
  print(f"用户 {user_identifier_for_logging} 在第 {output_s_run_no} 轮,试验 {output_s_trial_idx} 点击开始,但轮次未完成。忽略。")
252
  no_change_ui = create_no_change_tuple()
253
  return output_s_trial_idx, output_s_run_no, output_s_user_logs, output_s_current_trial_data, output_s_user_session_id, output_s_current_run_image_list, output_s_num_trials_this_run, *no_change_ui
254
-
255
- # --- 轮次结束处理 ---
256
  if output_s_trial_idx >= output_s_num_trials_this_run and output_s_num_trials_this_run > 0:
257
- logs_for_this_run_count = sum(1 for log in output_s_user_logs if log.get("run_no") == output_s_run_no) # 粗略计算本轮日志数
258
- if logs_for_this_run_count > 0 or not output_s_user_logs: # 如果本轮有日志,或总日志为空(可能刚开始)
259
- print(f"用户 {output_s_user_session_id} 完成第 {output_s_run_no} 轮。累积日志数: {len(output_s_user_logs)}")
260
-
261
- # 检查是否达到批量保存的条件
262
- if output_s_run_no > 0 and output_s_run_no % 10 == 0:
263
- if output_s_user_logs: # 只有当有日志时才尝试保存
264
- print(f"达到第 {output_s_run_no} 轮,准备批量保存累积的日志...")
265
- save_success = save_collected_logs_batch(list(output_s_user_logs), user_identifier_for_logging, output_s_run_no)
266
- if save_success:
267
- print("批量日志已成功(或尝试)保存,将清空累积日志列表。")
268
- output_s_user_logs = [] # 清空已保存的日志
269
- else:
270
- print("警告:批量日志保存失败。日志将继续累积。")
271
- else:
272
- print(f"第 {output_s_run_no} 轮结束,但累积日志为空,无需批量保存。")
273
-
274
  prog_text = f"用户ID: {output_s_user_session_id} | 进度:{output_s_num_trials_this_run}/{output_s_num_trials_this_run} | 第 {output_s_run_no} 轮 🎉"
275
- ui_updates = list(create_ui_error_tuple(f"🎉 第 {output_s_run_no} 轮完成!请点击“开始试验 / 下一轮”开始新的轮次。", prog_text))
 
276
  ui_updates[7]=gr.update(interactive=True); ui_updates[8]=gr.update(interactive=False); ui_updates[9]=gr.update(interactive=False)
277
  ui_updates[0]=gr.update(value=None,visible=False); ui_updates[1]=gr.update(value=None,visible=False); ui_updates[2]=gr.update(value=None,visible=False)
278
  yield output_s_trial_idx, output_s_run_no, output_s_user_logs, output_s_current_trial_data, output_s_user_session_id, output_s_current_run_image_list, output_s_num_trials_this_run, *ui_updates; return
279
-
280
- # --- 获取并显示下一个试验 ---
281
- # ... (与上一版代码相同,确保 prog_text 使用了新的 user_id_display_text 或 output_s_user_session_id) ...
282
  if not output_s_current_run_image_list or output_s_num_trials_this_run == 0:
283
  error_ui = create_ui_error_tuple("错误: 无法加载试验图片 (列表为空)", f"用户ID: {user_id_display_text} | 进度: N/A")
284
  return output_s_trial_idx, output_s_run_no, output_s_user_logs, {"data": None}, output_s_user_session_id, [], 0, *error_ui
@@ -302,12 +275,14 @@ def process_experiment_step(
302
  ui_show_candidates_updates[7]=gr.update(interactive=False); ui_show_candidates_updates[8]=gr.update(interactive=True); ui_show_candidates_updates[9]=gr.update(interactive=True)
303
  yield next_s_trial_idx_for_state, output_s_run_no, output_s_user_logs, output_s_current_trial_data, output_s_user_session_id, output_s_current_run_image_list, output_s_num_trials_this_run, *ui_show_candidates_updates
304
 
305
-
306
  welcome_page_markdown = """
307
  ## 欢迎加入实验!
308
  您好!非常感谢您抽出宝贵时间参与我们的视觉偏好评估实验。您的选择将帮助我们改进重建算法,让机器生成的图像更贴近人类视觉体验!
 
309
  1. **实验目的**
310
  通过比较两幅 重建图像 与原始 目标图像 的相似度。
 
311
  2. **操作流程**
312
  * 点击下方的「我已阅读并同意开始实验」按钮。
313
  * 然后点击主实验界面的「开始试验 / 下一轮」按钮。
@@ -315,17 +290,20 @@ welcome_page_markdown = """
315
  * 随后自动切换到 **两张重建图像**。
316
  * 根据刚才的观察记忆,选出您认为与目标图像最相似的一张。
317
  * 选择后系统会自动进入下一轮比较。
 
318
  3. **温馨提示**
319
  * 请勿刷新或关闭页面,以免中断实验。
320
  * 若图片加载稍有延迟,请耐心等待;持续异常可联系邮箱 yangminghan@bupt.edu.cn。
321
  * 本实验将保护您的任何个人隐私信息,所有数据仅用于学术研究,请您认真选择和填写。
 
322
  4. **奖励说明**
323
  * 完成全部轮次后,请截图记录您所完成的实验总数(可累积,页面左下角将显示进度,请保证截取到为您分配的ID,轮次)。
324
  * 将截图发送至邮箱 yangminghan@bupt.edu.cn,我们将在核验后发放奖励。
 
325
  再次感谢您的参与与支持!您每一次认真选择都对我们的研究意义重大。祝您一切顺利,实验愉快!
326
  """
327
-
328
  def handle_agree_and_start(name, gender, age, education, request: gr.Request):
 
329
  error_messages_list = []
330
  if not name or str(name).strip() == "": error_messages_list.append("姓名 不能为空。")
331
  if gender is None or str(gender).strip() == "": error_messages_list.append("性别 必须选择。")
@@ -335,16 +313,11 @@ def handle_agree_and_start(name, gender, age, education, request: gr.Request):
335
  except (ValueError, TypeError): error_messages_list.append("年龄必须是一个有效的数字。")
336
  else:
337
  if not (1 <= num_age <= 120): error_messages_list.append("年龄必须在 1 到 120 之间。")
338
- if education is None or str(education).strip() == "" or str(education).strip() == "其他" and not name: # 稍微修改逻辑,如果选了“其他”但没填姓名,也可能需要提示
339
- # 更简单的逻辑:如果education是None或空字符串
340
- if education is None or str(education).strip() == "":
341
- error_messages_list.append("学历 必须选择。")
342
-
343
  if error_messages_list:
344
  full_error_message = "请修正以下错误:\n" + "\n".join([f"- {msg}" for msg in error_messages_list])
345
  print(f"用户输入验证失败: {full_error_message}")
346
  return gr.update(), False, gr.update(visible=True), gr.update(visible=False), full_error_message
347
-
348
  s_name = str(name).strip().replace(" ","_").replace("/","_").replace("\\","_")
349
  s_gender = str(gender).strip().replace(" ","_").replace("/","_").replace("\\","_")
350
  s_age = str(int(float(age)))
@@ -354,37 +327,28 @@ def handle_agree_and_start(name, gender, age, education, request: gr.Request):
354
  return user_id_str, True, gr.update(visible=False), gr.update(visible=True), ""
355
 
356
  with gr.Blocks(css=CSS, title="图像重建主观评估") as demo:
357
- s_show_experiment_ui = gr.State(False)
358
- s_trial_index = gr.State(0); s_run_no = gr.State(1); s_user_logs = gr.State([])
359
- s_current_trial_data = gr.State({}); s_user_session_id = gr.State(None)
360
  s_current_run_image_list = gr.State([]); s_num_trials_this_run = gr.State(0)
361
-
362
  welcome_container = gr.Column(visible=True)
363
  experiment_container = gr.Column(visible=False)
364
 
365
  with welcome_container:
 
366
  gr.Markdown(welcome_page_markdown)
367
- with gr.Row():
368
- user_name_input = gr.Textbox(label="请输入您的姓名或代号 (例如 ZS User001)", placeholder="例如:张三 -> ZS") # 修改了label
369
- user_gender_input = gr.Radio(label="性别", choices=["男", "女", "其他", "不愿透露"], value="不愿透露")
370
- with gr.Row():
371
- user_age_input = gr.Number(label="年龄 (请输入1-120的整数)", minimum=1, maximum=120, step=1, value=25) # 修改了label和value
372
- user_education_input = gr.Dropdown(label="学历", choices=["其他","初中及以下","高中(含中专)", "大专", "本科", "硕士", "博士(含在读)"], value="本科") # 修改了选项和value
373
  welcome_error_msg = gr.Markdown(value="")
374
  btn_agree_and_start = gr.Button("我已阅读上述说明并同意参与实验")
375
 
376
  with experiment_container:
377
- gr.Markdown("## 🧠 图像重建主观评估实验")
378
- gr.Markdown(f"每轮实验大约有 {NUM_TRIALS_PER_RUN} 次比较。")
379
  with gr.Row():
380
- with gr.Column(scale=1, min_width=300):
381
- left_img = gr.Image(label="左候选图", visible=False, height=400, interactive=False)
382
- left_lbl = gr.Textbox(label="左图信息", visible=False, interactive=False, max_lines=1)
383
- btn_left = gr.Button("选择左图 (更相似)", interactive=False, elem_classes="compact_button")
384
- with gr.Column(scale=1, min_width=300):
385
- right_img = gr.Image(label="右候选图", visible=False, height=400, interactive=False)
386
- right_lbl = gr.Textbox(label="右图信息", visible=False, interactive=False, max_lines=1)
387
- btn_right = gr.Button("选择右图 (更相似)", interactive=False, elem_classes="compact_button")
388
  with gr.Row(): target_img = gr.Image(label="目标图像 (观察3秒后消失)", visible=False, height=400, interactive=False)
389
  with gr.Row(): status_text = gr.Markdown(value="请点击“开始试验 / 下一轮”按钮。")
390
  with gr.Row(): progress_text = gr.Markdown()
@@ -404,16 +368,14 @@ with gr.Blocks(css=CSS, title="图像重建主观评估") as demo:
404
  s_current_run_image_list, s_num_trials_this_run, *outputs_ui_components_definition
405
  ]
406
 
407
- btn_agree_and_start.click(
408
- fn=handle_agree_and_start,
409
- inputs=[user_name_input, user_gender_input, user_age_input, user_education_input],
410
- outputs=[s_user_session_id, s_show_experiment_ui, welcome_container, experiment_container, welcome_error_msg]
411
- )
412
  btn_start.click(fn=partial(process_experiment_step, action_type="start_experiment"), inputs=click_inputs_base, outputs=event_outputs, queue=True)
413
  btn_left.click(fn=partial(process_experiment_step, action_type="record_choice", choice_value="left"), inputs=click_inputs_base, outputs=event_outputs, queue=True)
414
  btn_right.click(fn=partial(process_experiment_step, action_type="record_choice", choice_value="right"), inputs=click_inputs_base, outputs=event_outputs, queue=True)
415
 
 
416
  if __name__ == "__main__":
 
417
  if not master_image_list: print("\n关键错误:程序无法启动,因无目标图片。"); exit()
418
  else:
419
  print(f"从 '{TARGET_DIR}' 加载 {len(master_image_list)} 张目标图片。每轮选 {NUM_TRIALS_PER_RUN} 张。")
@@ -422,8 +384,9 @@ if __name__ == "__main__":
422
  if not SUBJECTS: print("警告: SUBJECTS ���表为空。")
423
  else: print(f"Subjects: {SUBJECTS}")
424
  print(f"日志保存到 Dataset: '{DATASET_REPO_ID}'")
425
- if BATCH_LOG_FOLDER: print(f" - 批量日志文件夹: '{BATCH_LOG_FOLDER}/'") # 使用 BATCH_LOG_FOLDER
426
- if INDIVIDUAL_LOGS_FOLDER: print(f" - 单个选择日志文件夹: '{INDIVIDUAL_LOGS_FOLDER}/'")
 
427
  if not os.getenv("HF_TOKEN"): print("警告: HF_TOKEN 未设置。日志无法保存。\n 请在 Space Secrets 中设置 HF_TOKEN。")
428
  else: print("HF_TOKEN 已找到。")
429
  path_to_allow_serving_from = BASE_IMAGE_DIR
@@ -431,7 +394,7 @@ if __name__ == "__main__":
431
  if os.path.exists(path_to_allow_serving_from) and os.path.isdir(path_to_allow_serving_from):
432
  allowed_paths_list.append(os.path.abspath(path_to_allow_serving_from))
433
  print(f"Gradio `demo.launch()` 配置 allowed_paths: {allowed_paths_list}")
434
- else: print(f"关键警告:图片基础目录 '{path_to_allow_serving_from}' 不存在或非目录。")
435
  print("启动 Gradio 应用...")
436
  if allowed_paths_list: demo.launch(allowed_paths=allowed_paths_list)
437
  else: demo.launch()
 
34
  SUBJECTS = ["subj01", "subj02", "subj05", "subj07"]
35
  SENTINEL_TRIAL_INTERVAL = 20
36
  NUM_TRIALS_PER_RUN = 100
37
+ LOG_BATCH_SIZE = 5 # <--- 新增:每多少条选择日志进行一次批量上传
38
 
39
  DATASET_REPO_ID = "YanmHa/image-aligned-experiment-data"
40
+ INDIVIDUAL_LOGS_FOLDER = "individual_choice_logs" # 此文件夹可能不再主要使用,但变量保留
41
+ BATCH_LOG_FOLDER = "run_logs_batch"
42
  CSS = ".gr-block {margin-top: 4px !important; margin-bottom: 4px !important;} .compact_button { padding: 4px 8px; min-width: auto; }"
43
 
44
  # ==== 加载所有可用的目标图片 ====
45
  master_image_list = []
46
+ # ... (master_image_list 加载逻辑与您上一版代码相同) ...
47
  if os.path.exists(TARGET_DIR):
48
  try:
49
  master_image_list = sorted(
 
57
  elif not os.path.exists(TARGET_DIR) and os.path.exists(BASE_IMAGE_DIR): print(f"错误:目标目录 '{TARGET_DIR}' 未找到。")
58
  if not master_image_list: print(f"关键错误:由于 '{TARGET_DIR}' 问题,无目标图片,实验无法进行。")
59
 
60
+
61
  # ==== 辅助函数 ====
62
  def get_next_trial_info(current_trial_idx_in_run, current_run_image_list_for_trial, num_trials_in_this_run_for_trial):
63
+ # ... (此函数与您上一版代码完全一致) ...
64
  global TARGET_DIR, METHOD_ROOTS, SUBJECTS, SENTINEL_TRIAL_INTERVAL
65
  if not current_run_image_list_for_trial or current_trial_idx_in_run >= num_trials_in_this_run_for_trial: return None, current_trial_idx_in_run
66
  img_filename_original = current_run_image_list_for_trial[current_trial_idx_in_run]
 
103
  })
104
  return trial_info, current_trial_idx_in_run + 1
105
 
106
+ # def save_single_log_to_hf_dataset(log_entry, user_identifier_str): # <--- 不再需要此函数,注释或删除
107
+ # pass
 
108
 
109
+ # --- 批量保存累积日志的函数 ---
110
+ def save_collected_logs_batch(list_of_log_entries, user_identifier_str, batch_identifier): # batch_identifier 可以是轮次号或时间戳
111
+ global DATASET_REPO_ID, BATCH_LOG_FOLDER
112
  if not list_of_log_entries:
113
+ print("批量保存:没有累积的日志。")
114
+ return True
115
 
116
  identifier_safe = str(user_identifier_str if user_identifier_str else "unknown_user_session").replace('.', '_').replace(':', '_').replace('/', '_').replace(' ', '_')
117
+ print(f"用户 {identifier_safe} - 准备批量保存 {len(list_of_log_entries)} 条日志 (批次标识: {batch_identifier})...")
118
 
119
  try:
120
  token = os.getenv("HF_TOKEN")
121
+ if not token: print("错误:HF_TOKEN 未设置。无法批量保存日志。"); return False
122
+ if not DATASET_REPO_ID: print("错误:DATASET_REPO_ID 未配置。无法批量保存日志。"); return False
 
 
 
 
123
 
124
  api = HfApi(token=token)
 
125
  timestamp_str = datetime.now().strftime('%Y%m%d_%H%M%S_%f')
126
+ batch_filename = f"batch_user-{identifier_safe}_id-{batch_identifier}_{timestamp_str}_logs-{len(list_of_log_entries)}.jsonl"
 
 
127
  path_in_repo = f"{BATCH_LOG_FOLDER}/{identifier_safe}/{batch_filename}"
128
 
129
  jsonl_content = ""
130
  for log_entry in list_of_log_entries:
131
  try:
132
+ if isinstance(log_entry, dict): jsonl_content += json.dumps(log_entry, ensure_ascii=False) + "\n"
133
+ else: print(f"警告:批量保存时,日志条目非字典:{log_entry}")
 
 
134
  except Exception as json_err:
135
+ print(f"错误:批量保存序列化单条日志时出错: {log_entry}. 错误: {json_err}")
136
+ jsonl_content += json.dumps({"error": "serialization_failed_in_batch", "original_data_preview": str(log_entry)[:100],"timestamp": datetime.now().isoformat()}, ensure_ascii=False) + "\n"
 
 
137
 
138
+ if not jsonl_content.strip(): print(f"用户 {identifier_safe} (批次 {batch_identifier}) 无可序列化日志。"); return True
 
 
139
 
140
  log_bytes = jsonl_content.encode('utf-8')
141
  file_like_object = io.BytesIO(log_bytes)
142
 
143
  print(f"准备批量上传日志文件: {path_in_repo} ({len(log_bytes)} bytes)")
144
  api.upload_file(
145
+ path_or_fileobj=file_like_object, path_in_repo=path_in_repo, repo_id=DATASET_REPO_ID, repo_type="dataset",
146
+ commit_message=f"Batch logs for user {identifier_safe}, batch_id {batch_identifier} ({len(list_of_log_entries)} entries)"
 
 
 
147
  )
148
  print(f"批量日志已成功保存到 HF Dataset: {DATASET_REPO_ID}/{path_in_repo}")
149
+ return True
150
  except Exception as e:
151
+ print(f"批量保存日志 (user {identifier_safe}, batch_id {batch_identifier}) 失败: {e}"); traceback.print_exc()
152
+ return False
 
153
 
154
  # ==== 主要的 Gradio 事件处理函数 ====
155
  def process_experiment_step(
 
157
  s_current_run_image_list_val, s_num_trials_this_run_val,
158
  action_type=None, choice_value=None, request: gr.Request = None
159
  ):
160
+ global master_image_list, NUM_TRIALS_PER_RUN, outputs_ui_components_definition, LOG_BATCH_SIZE
161
+
162
  output_s_trial_idx = s_trial_idx_val; output_s_run_no = s_run_no_val
163
  output_s_user_logs = list(s_user_logs_val); output_s_current_trial_data = dict(s_current_trial_data_val) if s_current_trial_data_val else {}
164
  output_s_user_session_id = s_user_session_id_val; output_s_current_run_image_list = list(s_current_run_image_list_val)
 
173
  if action_type == "record_choice":
174
  if output_s_current_trial_data.get("data") and output_s_current_trial_data["data"].get("left_internal_label"):
175
  chosen_internal_label = (output_s_current_trial_data["data"]["left_internal_label"] if choice_value == "left" else output_s_current_trial_data["data"]["right_internal_label"])
176
+ # ... (log_entry 创建逻辑,与上一版一致,包含 chosen_method, chosen_subject, chosen_filename) ...
177
  parsed_chosen_method, parsed_chosen_subject, parsed_chosen_filename = "N/A", "N/A", "N/A"
178
  if chosen_internal_label == "目标图像": parsed_chosen_method, parsed_chosen_subject, parsed_chosen_filename = "TARGET", "GT", output_s_current_trial_data["data"]["image_id"]
179
  else:
 
181
  if len(parts) == 3: parsed_chosen_method, parsed_chosen_subject, parsed_chosen_filename = parts[0].strip(), parts[1].strip(), parts[2].strip()
182
  elif len(parts) == 2: parsed_chosen_method, parsed_chosen_subject = parts[0].strip(), parts[1].strip()
183
  elif len(parts) == 1: parsed_chosen_method = parts[0].strip()
184
+ log_entry = {
185
  "timestamp": datetime.now().isoformat(), "user_identifier": user_identifier_for_logging, "run_no": output_s_run_no,
186
  "image_id": output_s_current_trial_data["data"]["image_id"],
187
  "left_internal_label": output_s_current_trial_data["data"]["left_internal_label"],
 
191
  "trial_sequence_in_run": output_s_current_trial_data["data"]["cur_no"],
192
  "is_sentinel": output_s_current_trial_data["data"]["is_sentinel"]
193
  }
194
+ output_s_user_logs.append(log_entry)
195
  print(f"用户 {user_identifier_for_logging} 记录选择 (img: {log_entry['image_id']})。当前批次日志数: {len(output_s_user_logs)}")
196
+
197
+ # !!! 修改:每 LOG_BATCH_SIZE 条选择后尝试批量保存 !!!
198
+ if len(output_s_user_logs) >= LOG_BATCH_SIZE:
199
+ print(f"累积日志达到 {LOG_BATCH_SIZE} 条,准备批量保存...")
200
+ # 使用当前轮次号和时间戳组合作为批次标识符,或者仅用时间戳
201
+ batch_id_for_filename = f"run{output_s_run_no}_trial{output_s_trial_idx}_count{len(output_s_user_logs)}"
202
+ save_success = save_collected_logs_batch(list(output_s_user_logs), user_identifier_for_logging, batch_id_for_filename)
203
+ if save_success:
204
+ print("批量日志已成功(或尝试)保存,将清空累积日志列表。")
205
+ output_s_user_logs = [] # 清空已保存的日志
206
+ else:
207
+ print("警告:批量日志保存失败。日志将继续累积,下次达到阈值时重试。")
208
  else:
209
  # ... (错误处理) ...
210
+ print(f"用户 {user_identifier_for_logging} 错误:记录选择时数据为空!") # ... (rest of error handling) ...
211
  error_ui_updates = create_ui_error_tuple("记录选择时内部错误。", f"用户ID: {user_id_display_text} | 进度:{output_s_trial_idx}/{output_s_num_trials_this_run}")
212
  return output_s_trial_idx, output_s_run_no, output_s_user_logs, output_s_current_trial_data, output_s_user_session_id, output_s_current_run_image_list, output_s_num_trials_this_run, *error_ui_updates
213
 
214
+ # --- "start_experiment" 逻辑 ---
215
  if action_type == "start_experiment":
216
  is_first = (output_s_num_trials_this_run == 0 and output_s_trial_idx == 0 and output_s_run_no == 1)
217
  is_completed_for_restart = (output_s_num_trials_this_run > 0 and output_s_trial_idx >= output_s_num_trials_this_run)
 
218
  if is_first or is_completed_for_restart:
219
+ if not master_image_list: error_ui = create_ui_error_tuple("错误: 无可用目标图片!", f"用户ID: {user_id_display_text} | 进度: 0/0"); return 0, output_s_run_no, output_s_user_logs, {}, output_s_user_session_id, [], 0, *error_ui
 
 
 
220
 
221
+ # 如果是完成一轮后重启,并且此时累积的日志未达到 LOG_BATCH_SIZE,它们会保留到下一批
222
+ # 轮次号的增加在这里是正确的,用于显示
223
  if is_completed_for_restart:
224
+ output_s_run_no += 1
225
+
 
 
 
 
226
  num_avail = len(master_image_list); run_size = min(num_avail, NUM_TRIALS_PER_RUN)
227
  if run_size == 0: error_ui = create_ui_error_tuple("错误: 采样图片数为0!", f"用户ID: {user_id_display_text} | 进度: 0/0"); return 0, output_s_run_no, output_s_user_logs, {}, output_s_user_session_id, [], 0, *error_ui
228
 
229
  output_s_current_run_image_list = random.sample(master_image_list, run_size)
230
  output_s_num_trials_this_run = run_size
231
+ output_s_trial_idx = 0
232
+ # output_s_user_logs 不在这里重置,它会持续累积直到达到 LOG_BATCH_SIZE
233
  output_s_current_trial_data = {}
234
 
235
+ # 用户会话ID在欢迎页已设置,这里不再修改,除非有特殊需求
 
 
 
236
  print(f"开始/继续轮次 {output_s_run_no} (用户ID: {output_s_user_session_id}). 随机选择 {output_s_num_trials_this_run} 张图片.")
237
+ else: # 在轮次中途点击开始
 
238
  print(f"用户 {user_identifier_for_logging} 在第 {output_s_run_no} 轮,试验 {output_s_trial_idx} 点击开始,但轮次未完成。忽略。")
239
  no_change_ui = create_no_change_tuple()
240
  return output_s_trial_idx, output_s_run_no, output_s_user_logs, output_s_current_trial_data, output_s_user_session_id, output_s_current_run_image_list, output_s_num_trials_this_run, *no_change_ui
241
+
242
+ # --- 轮次结束的UI处理 (不再包含批量保存逻辑) ---
243
  if output_s_trial_idx >= output_s_num_trials_this_run and output_s_num_trials_this_run > 0:
244
+ # 仅打印完成信息,实际的批量保存由 record_choice 中的计数器触发
245
+ print(f"用户 {output_s_user_session_id} 已完成第 {output_s_run_no} 轮。等待下一批或下一轮开始。")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  prog_text = f"用户ID: {output_s_user_session_id} | 进度:{output_s_num_trials_this_run}/{output_s_num_trials_this_run} | 第 {output_s_run_no} 轮 🎉"
247
+ ui_updates = list(create_ui_error_tuple(f"🎉 第 {output_s_run_no} 轮完成!请点击“开始试验 / 下一轮”继续或开始新批次。", prog_text))
248
+ # ... (UI 更新与之前一致) ...
249
  ui_updates[7]=gr.update(interactive=True); ui_updates[8]=gr.update(interactive=False); ui_updates[9]=gr.update(interactive=False)
250
  ui_updates[0]=gr.update(value=None,visible=False); ui_updates[1]=gr.update(value=None,visible=False); ui_updates[2]=gr.update(value=None,visible=False)
251
  yield output_s_trial_idx, output_s_run_no, output_s_user_logs, output_s_current_trial_data, output_s_user_session_id, output_s_current_run_image_list, output_s_num_trials_this_run, *ui_updates; return
252
+
253
+ # --- 获取并显示下一个试验 (与之前一致) ---
254
+ # ... (省略这部分代码,与您上一版相同,确保 prog_text 使用了更新后的用户ID格式) ...
255
  if not output_s_current_run_image_list or output_s_num_trials_this_run == 0:
256
  error_ui = create_ui_error_tuple("错误: 无法加载试验图片 (列表为空)", f"用户ID: {user_id_display_text} | 进度: N/A")
257
  return output_s_trial_idx, output_s_run_no, output_s_user_logs, {"data": None}, output_s_user_session_id, [], 0, *error_ui
 
275
  ui_show_candidates_updates[7]=gr.update(interactive=False); ui_show_candidates_updates[8]=gr.update(interactive=True); ui_show_candidates_updates[9]=gr.update(interactive=True)
276
  yield next_s_trial_idx_for_state, output_s_run_no, output_s_user_logs, output_s_current_trial_data, output_s_user_session_id, output_s_current_run_image_list, output_s_num_trials_this_run, *ui_show_candidates_updates
277
 
278
+ # ==== Gradio UI 定义 ====
279
  welcome_page_markdown = """
280
  ## 欢迎加入实验!
281
  您好!非常感谢您抽出宝贵时间参与我们的视觉偏好评估实验。您的选择将帮助我们改进重建算法,让机器生成的图像更贴近人类视觉体验!
282
+
283
  1. **实验目的**
284
  通过比较两幅 重建图像 与原始 目标图像 的相似度。
285
+
286
  2. **操作流程**
287
  * 点击下方的「我已阅读并同意开始实验」按钮。
288
  * 然后点击主实验界面的「开始试验 / 下一轮」按钮。
 
290
  * 随后自动切换到 **两张重建图像**。
291
  * 根据刚才的观察记忆,选出您认为与目标图像最相似的一张。
292
  * 选择后系统会自动进入下一轮比较。
293
+
294
  3. **温馨提示**
295
  * 请勿刷新或关闭页面,以免中断实验。
296
  * 若图片加载稍有延迟,请耐心等待;持续异常可联系邮箱 yangminghan@bupt.edu.cn。
297
  * 本实验将保护您的任何个人隐私信息,所有数据仅用于学术研究,请您认真选择和填写。
298
+
299
  4. **奖励说明**
300
  * 完成全部轮次后,请截图记录您所完成的实验总数(可累积,页面左下角将显示进度,请保证截取到为您分配的ID,轮次)。
301
  * 将截图发送至邮箱 yangminghan@bupt.edu.cn,我们将在核验后发放奖励。
302
+
303
  再次感谢您的参与与支持!您每一次认真选择都对我们的研究意义重大。祝您一切顺利,实验愉快!
304
  """
 
305
  def handle_agree_and_start(name, gender, age, education, request: gr.Request):
306
+ # ... (此函数与您上一版代码完全一致) ...
307
  error_messages_list = []
308
  if not name or str(name).strip() == "": error_messages_list.append("姓名 不能为空。")
309
  if gender is None or str(gender).strip() == "": error_messages_list.append("性别 必须选择。")
 
313
  except (ValueError, TypeError): error_messages_list.append("年龄必须是一个有效的数字。")
314
  else:
315
  if not (1 <= num_age <= 120): error_messages_list.append("年龄必须在 1 到 120 之间。")
316
+ if education is None or str(education).strip() == "": error_messages_list.append("学历 必须选择。") # 修正:如果选“其他”但希望是有效输入,这里的逻辑可能需要调整
 
 
 
 
317
  if error_messages_list:
318
  full_error_message = "请修正以下错误:\n" + "\n".join([f"- {msg}" for msg in error_messages_list])
319
  print(f"用户输入验证失败: {full_error_message}")
320
  return gr.update(), False, gr.update(visible=True), gr.update(visible=False), full_error_message
 
321
  s_name = str(name).strip().replace(" ","_").replace("/","_").replace("\\","_")
322
  s_gender = str(gender).strip().replace(" ","_").replace("/","_").replace("\\","_")
323
  s_age = str(int(float(age)))
 
327
  return user_id_str, True, gr.update(visible=False), gr.update(visible=True), ""
328
 
329
  with gr.Blocks(css=CSS, title="图像重建主观评估") as demo:
330
+ # ... (所有 State 变量定义,与您上一版代码相同) ...
331
+ s_show_experiment_ui = gr.State(False); s_trial_index = gr.State(0); s_run_no = gr.State(1)
332
+ s_user_logs = gr.State([]); s_current_trial_data = gr.State({}); s_user_session_id = gr.State(None)
333
  s_current_run_image_list = gr.State([]); s_num_trials_this_run = gr.State(0)
334
+
335
  welcome_container = gr.Column(visible=True)
336
  experiment_container = gr.Column(visible=False)
337
 
338
  with welcome_container:
339
+ # ... (欢迎页UI,与您上一版代码相同) ...
340
  gr.Markdown(welcome_page_markdown)
341
+ with gr.Row(): user_name_input = gr.Textbox(label="请输入您的姓名或代号 (例如 ZS 或 User001)", placeholder="例如:张三 -> ZS"); user_gender_input = gr.Radio(label="性别", choices=["男", "女", "其他", "不愿透露"], value="不愿透露")
342
+ with gr.Row(): user_age_input = gr.Number(label="年龄 (请输入1-120的整数)", minimum=1, maximum=120, step=1, value=25); user_education_input = gr.Dropdown(label="学历", choices=["其他","初中及以下","高中(含中专)", "大专", "本科", "硕士", "博士(含在读)"], value="本科")
 
 
 
 
343
  welcome_error_msg = gr.Markdown(value="")
344
  btn_agree_and_start = gr.Button("我已阅读上述说明并同意参与实验")
345
 
346
  with experiment_container:
347
+ # ... (实验主界面UI,与您上一版代码相同,包括新的按钮布局和隐藏的标签) ...
348
+ gr.Markdown("## 🧠 图像重建主观评估实验"); gr.Markdown(f"每轮实验大约有 {NUM_TRIALS_PER_RUN} 次比较。")
349
  with gr.Row():
350
+ with gr.Column(scale=1, min_width=300): left_img = gr.Image(label="左候选图", visible=False, height=400, interactive=False); left_lbl = gr.Textbox(label="左图信息", visible=False, interactive=False, max_lines=1); btn_left = gr.Button("选择左图 (更相似)", interactive=False, elem_classes="compact_button")
351
+ with gr.Column(scale=1, min_width=300): right_img = gr.Image(label="右候选图", visible=False, height=400, interactive=False); right_lbl = gr.Textbox(label="右图信息", visible=False, interactive=False, max_lines=1); btn_right = gr.Button("选择右图 (更相似)", interactive=False, elem_classes="compact_button")
 
 
 
 
 
 
352
  with gr.Row(): target_img = gr.Image(label="目标图像 (观察3秒后消失)", visible=False, height=400, interactive=False)
353
  with gr.Row(): status_text = gr.Markdown(value="请点击“开始试验 / 下一轮”按钮。")
354
  with gr.Row(): progress_text = gr.Markdown()
 
368
  s_current_run_image_list, s_num_trials_this_run, *outputs_ui_components_definition
369
  ]
370
 
371
+ btn_agree_and_start.click(fn=handle_agree_and_start, inputs=[user_name_input, user_gender_input, user_age_input, user_education_input], outputs=[s_user_session_id, s_show_experiment_ui, welcome_container, experiment_container, welcome_error_msg])
 
 
 
 
372
  btn_start.click(fn=partial(process_experiment_step, action_type="start_experiment"), inputs=click_inputs_base, outputs=event_outputs, queue=True)
373
  btn_left.click(fn=partial(process_experiment_step, action_type="record_choice", choice_value="left"), inputs=click_inputs_base, outputs=event_outputs, queue=True)
374
  btn_right.click(fn=partial(process_experiment_step, action_type="record_choice", choice_value="right"), inputs=click_inputs_base, outputs=event_outputs, queue=True)
375
 
376
+ # ==== 程序入口 ====
377
  if __name__ == "__main__":
378
+ # ... (与您上一版相同的启动检查和打印逻辑, 确保 allowed_paths 正确) ...
379
  if not master_image_list: print("\n关键错误:程序无法启动,因无目标图片。"); exit()
380
  else:
381
  print(f"从 '{TARGET_DIR}' 加载 {len(master_image_list)} 张目标图片。每轮选 {NUM_TRIALS_PER_RUN} 张。")
 
384
  if not SUBJECTS: print("警告: SUBJECTS ���表为空。")
385
  else: print(f"Subjects: {SUBJECTS}")
386
  print(f"日志保存到 Dataset: '{DATASET_REPO_ID}'")
387
+ if BATCH_LOG_FOLDER: print(f" - 批量日志文件夹: '{BATCH_LOG_FOLDER}/'")
388
+ # INDIVIDUAL_LOGS_FOLDER 的打印可以保留或移除,因为现在主要用批量
389
+ # if INDIVIDUAL_LOGS_FOLDER: print(f" - 单个选择日志文件夹 (可能未使用): '{INDIVIDUAL_LOGS_FOLDER}/'")
390
  if not os.getenv("HF_TOKEN"): print("警告: HF_TOKEN 未设置。日志无法保存。\n 请在 Space Secrets 中设置 HF_TOKEN。")
391
  else: print("HF_TOKEN 已找到。")
392
  path_to_allow_serving_from = BASE_IMAGE_DIR
 
394
  if os.path.exists(path_to_allow_serving_from) and os.path.isdir(path_to_allow_serving_from):
395
  allowed_paths_list.append(os.path.abspath(path_to_allow_serving_from))
396
  print(f"Gradio `demo.launch()` 配置 allowed_paths: {allowed_paths_list}")
397
+ else: print(f"关键警告:图片基础目录 '{path_to_allow_serving_from}' ({os.path.abspath(path_to_allow_serving_from) if path_to_allow_serving_from else 'N/A'}) 不存在或非目录。")
398
  print("启动 Gradio 应用...")
399
  if allowed_paths_list: demo.launch(allowed_paths=allowed_paths_list)
400
  else: demo.launch()