Spaces:

MemDirector
/

VideoEval_user

Sleeping

App Files Files Community

WHU1psh commited on Apr 21

Commit

74e3caf

verified ·

1 Parent(s): bde3bc7

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -2

app.py CHANGED Viewed

@@ -7,13 +7,14 @@ import json
 import os
 import threading
 import html
 from collections import defaultdict
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 import gradio as gr
-from huggingface_hub import CommitScheduler, snapshot_download
 # 路径配置（按用户要求）
 # Spaces 推荐优先读取当前 Space 仓库内文件（app.py 同级）
@@ -35,6 +36,7 @@ VIDEO_DIR = INPUT_DIR / "video"
 Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
 scheduler: Optional[CommitScheduler] = None
 def _set_paths(input_dir: Path, output_dir: Path) -> None:
@@ -95,7 +97,8 @@ def _try_download_from_hub() -> bool:
     else:
         return False
-    hub_output = hub_root / "user_study_results"
     _set_paths(hub_input, hub_output)
     return True
@@ -391,8 +394,47 @@ def load_evaluated_method_story_pairs() -> set:
     return evaluated
 def build_pending_samples() -> List[Dict[str, Any]]:
     """构建待评估样本池，并分配匿名ID。"""
     all_samples = load_dataset_index()
     evaluated_pairs = load_evaluated_method_story_pairs()
     pending = [
@@ -502,6 +544,39 @@ def recompute_method_aggregates() -> Path:
     return out_path
 def build_sample_brief_html(sample: Dict[str, Any], index: int, total: int) -> str:
     story = sample.get("story_text") or "(未找到对应 story 文本，请检查 clip_movie_story 下是否有同名 txt)"
     safe_story = html.escape(story)
@@ -620,6 +695,7 @@ def create_app():
             evaluator_id = (evaluator_id or "anonymous").strip() or "anonymous"
             # 防重复：方法-故事只允许评估一次
             evaluated_pairs = load_evaluated_method_story_pairs()
             if (sample["method"], sample["story_name"]) in evaluated_pairs:
                 return "⚠️ 该方法-故事已经被评估过一次，请选择其他匿名样本。"
@@ -636,6 +712,10 @@ def create_app():
             with SAVE_LOCK:
                 single_path = save_single_result(sample, evaluator_id, scores, reasons, summary or "")
                 agg_path = recompute_method_aggregates()
             _ = (single_path, agg_path)
             return ""

 import os
 import threading
 import html
+import shutil
 from collections import defaultdict
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 import gradio as gr
+from huggingface_hub import CommitScheduler, HfApi, snapshot_download
 # 路径配置（按用户要求）
 # Spaces 推荐优先读取当前 Space 仓库内文件（app.py 同级）
 Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
 scheduler: Optional[CommitScheduler] = None
+hf_api = HfApi()
 def _set_paths(input_dir: Path, output_dir: Path) -> None:
     else:
         return False
+    # 结果统一写到 Space 仓库内目录，避免写到缓存目录后用户难以定位
+    hub_output = LOCAL_OUTPUT_DIR
     _set_paths(hub_input, hub_output)
     return True
     return evaluated
+def sync_results_from_hub_to_local() -> None:
+    """
+    从远程结果仓库拉取最新结果到本地 OUTPUT_DIR。
+    仅用于“判定哪些样本已评估”，保证展示逻辑以远程为准。
+    """
+    if not RESULTS_REPO_ID:
+        return
+    sync_root = APP_DIR / ".hf_results_sync_cache"
+    try:
+        snapshot_download(
+            repo_id=RESULTS_REPO_ID,
+            repo_type="dataset",
+            local_dir=str(sync_root),
+            token=HF_TOKEN,
+            allow_patterns=["user_study_results/**"],
+        )
+    except Exception as e:
+        print(f"[SYNC] pull results repo failed: {e}")
+        return
+    remote_results_root = sync_root / "user_study_results"
+    if not remote_results_root.exists():
+        return
+    remote_raw = remote_results_root / "raw_results"
+    local_raw = OUTPUT_DIR / "raw_results"
+    if local_raw.exists():
+        shutil.rmtree(local_raw)
+    if remote_raw.exists():
+        shutil.copytree(remote_raw, local_raw)
+    remote_agg = remote_results_root / "method_aggregates.json"
+    local_agg = OUTPUT_DIR / "method_aggregates.json"
+    if remote_agg.exists():
+        local_agg.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy2(remote_agg, local_agg)
 def build_pending_samples() -> List[Dict[str, Any]]:
     """构建待评估样本池，并分配匿名ID。"""
+    sync_results_from_hub_to_local()
     all_samples = load_dataset_index()
     evaluated_pairs = load_evaluated_method_story_pairs()
     pending = [
     return out_path
+def push_result_files_to_hub(single_path: Path, agg_path: Path) -> Optional[str]:
+    """
+    提交后立即把结果文件上传到 RESULTS_REPO_ID，避免仅依赖定时 CommitScheduler。
+    返回 None 表示成功；返回字符串表示失败原因。
+    """
+    if not RESULTS_REPO_ID:
+        return "未配置 RESULTS_REPO_ID。"
+    if not HF_TOKEN:
+        return "未配置 HF_TOKEN，无法写入 Hugging Face 远程仓库。"
+    try:
+        single_rel = single_path.relative_to(OUTPUT_DIR).as_posix()
+        hf_api.upload_file(
+            path_or_fileobj=str(single_path),
+            path_in_repo=f"user_study_results/{single_rel}",
+            repo_id=RESULTS_REPO_ID,
+            repo_type="dataset",
+            token=HF_TOKEN,
+        )
+        agg_rel = agg_path.relative_to(OUTPUT_DIR).as_posix()
+        hf_api.upload_file(
+            path_or_fileobj=str(agg_path),
+            path_in_repo=f"user_study_results/{agg_rel}",
+            repo_id=RESULTS_REPO_ID,
+            repo_type="dataset",
+            token=HF_TOKEN,
+        )
+        return None
+    except Exception as e:
+        return str(e)
 def build_sample_brief_html(sample: Dict[str, Any], index: int, total: int) -> str:
     story = sample.get("story_text") or "(未找到对应 story 文本，请检查 clip_movie_story 下是否有同名 txt)"
     safe_story = html.escape(story)
             evaluator_id = (evaluator_id or "anonymous").strip() or "anonymous"
             # 防重复：方法-故事只允许评估一次
+            sync_results_from_hub_to_local()
             evaluated_pairs = load_evaluated_method_story_pairs()
             if (sample["method"], sample["story_name"]) in evaluated_pairs:
                 return "⚠️ 该方法-故事已经被评估过一次，请选择其他匿名样本。"
             with SAVE_LOCK:
                 single_path = save_single_result(sample, evaluator_id, scores, reasons, summary or "")
                 agg_path = recompute_method_aggregates()
+                push_err = push_result_files_to_hub(single_path, agg_path)
+            if push_err:
+                return f"❌ 结果已本地保存，但写入远程 `{RESULTS_REPO_ID}` 失败：{push_err}"
             _ = (single_path, agg_path)
             return ""