WHU1psh commited on
Commit
74e3caf
·
verified ·
1 Parent(s): bde3bc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -2
app.py CHANGED
@@ -7,13 +7,14 @@ import json
7
  import os
8
  import threading
9
  import html
 
10
  from collections import defaultdict
11
  from datetime import datetime
12
  from pathlib import Path
13
  from typing import Any, Dict, List, Optional, Tuple
14
 
15
  import gradio as gr
16
- from huggingface_hub import CommitScheduler, snapshot_download
17
 
18
  # 路径配置(按用户要求)
19
  # Spaces 推荐优先读取当前 Space 仓库内文件(app.py 同级)
@@ -35,6 +36,7 @@ VIDEO_DIR = INPUT_DIR / "video"
35
 
36
  Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
37
  scheduler: Optional[CommitScheduler] = None
 
38
 
39
 
40
  def _set_paths(input_dir: Path, output_dir: Path) -> None:
@@ -95,7 +97,8 @@ def _try_download_from_hub() -> bool:
95
  else:
96
  return False
97
 
98
- hub_output = hub_root / "user_study_results"
 
99
  _set_paths(hub_input, hub_output)
100
  return True
101
 
@@ -391,8 +394,47 @@ def load_evaluated_method_story_pairs() -> set:
391
  return evaluated
392
 
393
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  def build_pending_samples() -> List[Dict[str, Any]]:
395
  """构建待评估样本池,并分配匿名ID。"""
 
396
  all_samples = load_dataset_index()
397
  evaluated_pairs = load_evaluated_method_story_pairs()
398
  pending = [
@@ -502,6 +544,39 @@ def recompute_method_aggregates() -> Path:
502
  return out_path
503
 
504
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  def build_sample_brief_html(sample: Dict[str, Any], index: int, total: int) -> str:
506
  story = sample.get("story_text") or "(未找到对应 story 文本,请检查 clip_movie_story 下是否有同名 txt)"
507
  safe_story = html.escape(story)
@@ -620,6 +695,7 @@ def create_app():
620
  evaluator_id = (evaluator_id or "anonymous").strip() or "anonymous"
621
 
622
  # 防重复:方法-故事只允许评估一次
 
623
  evaluated_pairs = load_evaluated_method_story_pairs()
624
  if (sample["method"], sample["story_name"]) in evaluated_pairs:
625
  return "⚠️ 该方法-故事已经被评估过一次,请选择其他匿名样本。"
@@ -636,6 +712,10 @@ def create_app():
636
  with SAVE_LOCK:
637
  single_path = save_single_result(sample, evaluator_id, scores, reasons, summary or "")
638
  agg_path = recompute_method_aggregates()
 
 
 
 
639
 
640
  _ = (single_path, agg_path)
641
  return ""
 
7
  import os
8
  import threading
9
  import html
10
+ import shutil
11
  from collections import defaultdict
12
  from datetime import datetime
13
  from pathlib import Path
14
  from typing import Any, Dict, List, Optional, Tuple
15
 
16
  import gradio as gr
17
+ from huggingface_hub import CommitScheduler, HfApi, snapshot_download
18
 
19
  # 路径配置(按用户要求)
20
  # Spaces 推荐优先读取当前 Space 仓库内文件(app.py 同级)
 
36
 
37
  Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
38
  scheduler: Optional[CommitScheduler] = None
39
+ hf_api = HfApi()
40
 
41
 
42
  def _set_paths(input_dir: Path, output_dir: Path) -> None:
 
97
  else:
98
  return False
99
 
100
+ # 结果统一写到 Space 仓库内目录,避免写到缓存目录后用户难以定位
101
+ hub_output = LOCAL_OUTPUT_DIR
102
  _set_paths(hub_input, hub_output)
103
  return True
104
 
 
394
  return evaluated
395
 
396
 
397
+ def sync_results_from_hub_to_local() -> None:
398
+ """
399
+ 从远程结果仓库拉取最新结果到本地 OUTPUT_DIR。
400
+ 仅用于“判定哪些样本已评估”,保证展示逻辑以远程为准。
401
+ """
402
+ if not RESULTS_REPO_ID:
403
+ return
404
+ sync_root = APP_DIR / ".hf_results_sync_cache"
405
+ try:
406
+ snapshot_download(
407
+ repo_id=RESULTS_REPO_ID,
408
+ repo_type="dataset",
409
+ local_dir=str(sync_root),
410
+ token=HF_TOKEN,
411
+ allow_patterns=["user_study_results/**"],
412
+ )
413
+ except Exception as e:
414
+ print(f"[SYNC] pull results repo failed: {e}")
415
+ return
416
+
417
+ remote_results_root = sync_root / "user_study_results"
418
+ if not remote_results_root.exists():
419
+ return
420
+
421
+ remote_raw = remote_results_root / "raw_results"
422
+ local_raw = OUTPUT_DIR / "raw_results"
423
+ if local_raw.exists():
424
+ shutil.rmtree(local_raw)
425
+ if remote_raw.exists():
426
+ shutil.copytree(remote_raw, local_raw)
427
+
428
+ remote_agg = remote_results_root / "method_aggregates.json"
429
+ local_agg = OUTPUT_DIR / "method_aggregates.json"
430
+ if remote_agg.exists():
431
+ local_agg.parent.mkdir(parents=True, exist_ok=True)
432
+ shutil.copy2(remote_agg, local_agg)
433
+
434
+
435
  def build_pending_samples() -> List[Dict[str, Any]]:
436
  """构建待评估样本池,并分配匿名ID。"""
437
+ sync_results_from_hub_to_local()
438
  all_samples = load_dataset_index()
439
  evaluated_pairs = load_evaluated_method_story_pairs()
440
  pending = [
 
544
  return out_path
545
 
546
 
547
+ def push_result_files_to_hub(single_path: Path, agg_path: Path) -> Optional[str]:
548
+ """
549
+ 提交后立即把结果文件上传到 RESULTS_REPO_ID,避免仅依赖定时 CommitScheduler。
550
+ 返回 None 表示成功;返回字符串表示失败原因。
551
+ """
552
+ if not RESULTS_REPO_ID:
553
+ return "未配置 RESULTS_REPO_ID。"
554
+ if not HF_TOKEN:
555
+ return "未配置 HF_TOKEN,无法写入 Hugging Face 远程仓库。"
556
+
557
+ try:
558
+ single_rel = single_path.relative_to(OUTPUT_DIR).as_posix()
559
+ hf_api.upload_file(
560
+ path_or_fileobj=str(single_path),
561
+ path_in_repo=f"user_study_results/{single_rel}",
562
+ repo_id=RESULTS_REPO_ID,
563
+ repo_type="dataset",
564
+ token=HF_TOKEN,
565
+ )
566
+
567
+ agg_rel = agg_path.relative_to(OUTPUT_DIR).as_posix()
568
+ hf_api.upload_file(
569
+ path_or_fileobj=str(agg_path),
570
+ path_in_repo=f"user_study_results/{agg_rel}",
571
+ repo_id=RESULTS_REPO_ID,
572
+ repo_type="dataset",
573
+ token=HF_TOKEN,
574
+ )
575
+ return None
576
+ except Exception as e:
577
+ return str(e)
578
+
579
+
580
  def build_sample_brief_html(sample: Dict[str, Any], index: int, total: int) -> str:
581
  story = sample.get("story_text") or "(未找到对应 story 文本,请检查 clip_movie_story 下是否有同名 txt)"
582
  safe_story = html.escape(story)
 
695
  evaluator_id = (evaluator_id or "anonymous").strip() or "anonymous"
696
 
697
  # 防重复:方法-故事只允许评估一次
698
+ sync_results_from_hub_to_local()
699
  evaluated_pairs = load_evaluated_method_story_pairs()
700
  if (sample["method"], sample["story_name"]) in evaluated_pairs:
701
  return "⚠️ 该方法-故事已经被评估过一次,请选择其他匿名样本。"
 
712
  with SAVE_LOCK:
713
  single_path = save_single_result(sample, evaluator_id, scores, reasons, summary or "")
714
  agg_path = recompute_method_aggregates()
715
+ push_err = push_result_files_to_hub(single_path, agg_path)
716
+
717
+ if push_err:
718
+ return f"❌ 结果已本地保存,但写入远程 `{RESULTS_REPO_ID}` 失败:{push_err}"
719
 
720
  _ = (single_path, agg_path)
721
  return ""