Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,13 +7,14 @@ import json
|
|
| 7 |
import os
|
| 8 |
import threading
|
| 9 |
import html
|
|
|
|
| 10 |
from collections import defaultdict
|
| 11 |
from datetime import datetime
|
| 12 |
from pathlib import Path
|
| 13 |
from typing import Any, Dict, List, Optional, Tuple
|
| 14 |
|
| 15 |
import gradio as gr
|
| 16 |
-
from huggingface_hub import CommitScheduler, snapshot_download
|
| 17 |
|
| 18 |
# 路径配置(按用户要求)
|
| 19 |
# Spaces 推荐优先读取当前 Space 仓库内文件(app.py 同级)
|
|
@@ -35,6 +36,7 @@ VIDEO_DIR = INPUT_DIR / "video"
|
|
| 35 |
|
| 36 |
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
|
| 37 |
scheduler: Optional[CommitScheduler] = None
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
def _set_paths(input_dir: Path, output_dir: Path) -> None:
|
|
@@ -95,7 +97,8 @@ def _try_download_from_hub() -> bool:
|
|
| 95 |
else:
|
| 96 |
return False
|
| 97 |
|
| 98 |
-
|
|
|
|
| 99 |
_set_paths(hub_input, hub_output)
|
| 100 |
return True
|
| 101 |
|
|
@@ -391,8 +394,47 @@ def load_evaluated_method_story_pairs() -> set:
|
|
| 391 |
return evaluated
|
| 392 |
|
| 393 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
def build_pending_samples() -> List[Dict[str, Any]]:
|
| 395 |
"""构建待评估样本池,并分配匿名ID。"""
|
|
|
|
| 396 |
all_samples = load_dataset_index()
|
| 397 |
evaluated_pairs = load_evaluated_method_story_pairs()
|
| 398 |
pending = [
|
|
@@ -502,6 +544,39 @@ def recompute_method_aggregates() -> Path:
|
|
| 502 |
return out_path
|
| 503 |
|
| 504 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
def build_sample_brief_html(sample: Dict[str, Any], index: int, total: int) -> str:
|
| 506 |
story = sample.get("story_text") or "(未找到对应 story 文本,请检查 clip_movie_story 下是否有同名 txt)"
|
| 507 |
safe_story = html.escape(story)
|
|
@@ -620,6 +695,7 @@ def create_app():
|
|
| 620 |
evaluator_id = (evaluator_id or "anonymous").strip() or "anonymous"
|
| 621 |
|
| 622 |
# 防重复:方法-故事只允许评估一次
|
|
|
|
| 623 |
evaluated_pairs = load_evaluated_method_story_pairs()
|
| 624 |
if (sample["method"], sample["story_name"]) in evaluated_pairs:
|
| 625 |
return "⚠️ 该方法-故事已经被评估过一次,请选择其他匿名样本。"
|
|
@@ -636,6 +712,10 @@ def create_app():
|
|
| 636 |
with SAVE_LOCK:
|
| 637 |
single_path = save_single_result(sample, evaluator_id, scores, reasons, summary or "")
|
| 638 |
agg_path = recompute_method_aggregates()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
|
| 640 |
_ = (single_path, agg_path)
|
| 641 |
return ""
|
|
|
|
| 7 |
import os
|
| 8 |
import threading
|
| 9 |
import html
|
| 10 |
+
import shutil
|
| 11 |
from collections import defaultdict
|
| 12 |
from datetime import datetime
|
| 13 |
from pathlib import Path
|
| 14 |
from typing import Any, Dict, List, Optional, Tuple
|
| 15 |
|
| 16 |
import gradio as gr
|
| 17 |
+
from huggingface_hub import CommitScheduler, HfApi, snapshot_download
|
| 18 |
|
| 19 |
# 路径配置(按用户要求)
|
| 20 |
# Spaces 推荐优先读取当前 Space 仓库内文件(app.py 同级)
|
|
|
|
| 36 |
|
| 37 |
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
|
| 38 |
scheduler: Optional[CommitScheduler] = None
|
| 39 |
+
hf_api = HfApi()
|
| 40 |
|
| 41 |
|
| 42 |
def _set_paths(input_dir: Path, output_dir: Path) -> None:
|
|
|
|
| 97 |
else:
|
| 98 |
return False
|
| 99 |
|
| 100 |
+
# 结果统一写到 Space 仓库内目录,避免写到缓存目录后用户难以定位
|
| 101 |
+
hub_output = LOCAL_OUTPUT_DIR
|
| 102 |
_set_paths(hub_input, hub_output)
|
| 103 |
return True
|
| 104 |
|
|
|
|
| 394 |
return evaluated
|
| 395 |
|
| 396 |
|
| 397 |
+
def sync_results_from_hub_to_local() -> None:
|
| 398 |
+
"""
|
| 399 |
+
从远程结果仓库拉取最新结果到本地 OUTPUT_DIR。
|
| 400 |
+
仅用于“判定哪些样本已评估”,保证展示逻辑以远程为准。
|
| 401 |
+
"""
|
| 402 |
+
if not RESULTS_REPO_ID:
|
| 403 |
+
return
|
| 404 |
+
sync_root = APP_DIR / ".hf_results_sync_cache"
|
| 405 |
+
try:
|
| 406 |
+
snapshot_download(
|
| 407 |
+
repo_id=RESULTS_REPO_ID,
|
| 408 |
+
repo_type="dataset",
|
| 409 |
+
local_dir=str(sync_root),
|
| 410 |
+
token=HF_TOKEN,
|
| 411 |
+
allow_patterns=["user_study_results/**"],
|
| 412 |
+
)
|
| 413 |
+
except Exception as e:
|
| 414 |
+
print(f"[SYNC] pull results repo failed: {e}")
|
| 415 |
+
return
|
| 416 |
+
|
| 417 |
+
remote_results_root = sync_root / "user_study_results"
|
| 418 |
+
if not remote_results_root.exists():
|
| 419 |
+
return
|
| 420 |
+
|
| 421 |
+
remote_raw = remote_results_root / "raw_results"
|
| 422 |
+
local_raw = OUTPUT_DIR / "raw_results"
|
| 423 |
+
if local_raw.exists():
|
| 424 |
+
shutil.rmtree(local_raw)
|
| 425 |
+
if remote_raw.exists():
|
| 426 |
+
shutil.copytree(remote_raw, local_raw)
|
| 427 |
+
|
| 428 |
+
remote_agg = remote_results_root / "method_aggregates.json"
|
| 429 |
+
local_agg = OUTPUT_DIR / "method_aggregates.json"
|
| 430 |
+
if remote_agg.exists():
|
| 431 |
+
local_agg.parent.mkdir(parents=True, exist_ok=True)
|
| 432 |
+
shutil.copy2(remote_agg, local_agg)
|
| 433 |
+
|
| 434 |
+
|
| 435 |
def build_pending_samples() -> List[Dict[str, Any]]:
|
| 436 |
"""构建待评估样本池,并分配匿名ID。"""
|
| 437 |
+
sync_results_from_hub_to_local()
|
| 438 |
all_samples = load_dataset_index()
|
| 439 |
evaluated_pairs = load_evaluated_method_story_pairs()
|
| 440 |
pending = [
|
|
|
|
| 544 |
return out_path
|
| 545 |
|
| 546 |
|
| 547 |
+
def push_result_files_to_hub(single_path: Path, agg_path: Path) -> Optional[str]:
|
| 548 |
+
"""
|
| 549 |
+
提交后立即把结果文件上传到 RESULTS_REPO_ID,避免仅依赖定时 CommitScheduler。
|
| 550 |
+
返回 None 表示成功;返回字符串表示失败原因。
|
| 551 |
+
"""
|
| 552 |
+
if not RESULTS_REPO_ID:
|
| 553 |
+
return "未配置 RESULTS_REPO_ID。"
|
| 554 |
+
if not HF_TOKEN:
|
| 555 |
+
return "未配置 HF_TOKEN,无法写入 Hugging Face 远程仓库。"
|
| 556 |
+
|
| 557 |
+
try:
|
| 558 |
+
single_rel = single_path.relative_to(OUTPUT_DIR).as_posix()
|
| 559 |
+
hf_api.upload_file(
|
| 560 |
+
path_or_fileobj=str(single_path),
|
| 561 |
+
path_in_repo=f"user_study_results/{single_rel}",
|
| 562 |
+
repo_id=RESULTS_REPO_ID,
|
| 563 |
+
repo_type="dataset",
|
| 564 |
+
token=HF_TOKEN,
|
| 565 |
+
)
|
| 566 |
+
|
| 567 |
+
agg_rel = agg_path.relative_to(OUTPUT_DIR).as_posix()
|
| 568 |
+
hf_api.upload_file(
|
| 569 |
+
path_or_fileobj=str(agg_path),
|
| 570 |
+
path_in_repo=f"user_study_results/{agg_rel}",
|
| 571 |
+
repo_id=RESULTS_REPO_ID,
|
| 572 |
+
repo_type="dataset",
|
| 573 |
+
token=HF_TOKEN,
|
| 574 |
+
)
|
| 575 |
+
return None
|
| 576 |
+
except Exception as e:
|
| 577 |
+
return str(e)
|
| 578 |
+
|
| 579 |
+
|
| 580 |
def build_sample_brief_html(sample: Dict[str, Any], index: int, total: int) -> str:
|
| 581 |
story = sample.get("story_text") or "(未找到对应 story 文本,请检查 clip_movie_story 下是否有同名 txt)"
|
| 582 |
safe_story = html.escape(story)
|
|
|
|
| 695 |
evaluator_id = (evaluator_id or "anonymous").strip() or "anonymous"
|
| 696 |
|
| 697 |
# 防重复:方法-故事只允许评估一次
|
| 698 |
+
sync_results_from_hub_to_local()
|
| 699 |
evaluated_pairs = load_evaluated_method_story_pairs()
|
| 700 |
if (sample["method"], sample["story_name"]) in evaluated_pairs:
|
| 701 |
return "⚠️ 该方法-故事已经被评估过一次,请选择其他匿名样本。"
|
|
|
|
| 712 |
with SAVE_LOCK:
|
| 713 |
single_path = save_single_result(sample, evaluator_id, scores, reasons, summary or "")
|
| 714 |
agg_path = recompute_method_aggregates()
|
| 715 |
+
push_err = push_result_files_to_hub(single_path, agg_path)
|
| 716 |
+
|
| 717 |
+
if push_err:
|
| 718 |
+
return f"❌ 结果已本地保存,但写入远程 `{RESULTS_REPO_ID}` 失败:{push_err}"
|
| 719 |
|
| 720 |
_ = (single_path, agg_path)
|
| 721 |
return ""
|