WHU1psh commited on
Commit
e58ba0f
·
verified ·
1 Parent(s): 1786f76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +290 -4
app.py CHANGED
@@ -1,7 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
1
+ """
2
+ VideoEval Movie-Level 问卷应用(Hugging Face Spaces)
3
+ 仅保留 Movie-Level 评测,并支持方法级别统计输出。
4
+ """
5
+
6
+ import json
7
+ import os
8
+ import threading
9
+ from collections import defaultdict
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Tuple
13
+
14
  import gradio as gr
15
 
16
+ # 路径配置(按用户要求)
17
+ ROOT_DIR = Path(os.environ.get("VIDEOEVAL_ROOT", "MemDirector"))
18
+ INPUT_DIR = ROOT_DIR / "user_study_input"
19
+ OUTPUT_DIR = ROOT_DIR / "user_study_results"
20
+ STORY_DIR = INPUT_DIR / "clip_movie_story"
21
+ VIDEO_DIR = INPUT_DIR / "video"
22
+
23
+ Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
24
+
25
+ # Movie-Level 指标定义
26
+ MOVIE_CRITERIA: List[Tuple[str, str, str]] = [
27
+ ("SF", "Script Faithfulness (剧本忠实度)", "生成的视觉内容与原始剧本描述的吻合程度。"),
28
+ ("NC", "Narrative Coherence (叙事连贯性)", "镜头间情节发展的逻辑性,确保故事表达清晰、不破碎。"),
29
+ ("VQ", "Visual Quality (视觉质量)", "画面的清晰度、噪点控制、光影效果等基础图像质量。"),
30
+ ("CC", "Character Consistency (角色一致性)", "同一角色在不同镜头、不同角度下的外貌、服装及特征的稳定性。"),
31
+ ("PLC", "Physical Law Compliance (物理规律符合度)", "运动、重力、碰撞等是否符合现实物理逻辑,是否存在严重 AI 幻觉。"),
32
+ ("V_AQ", "Voice/Audio Quality (语音/音频质量)", "配音、背景音乐和音效的清晰度、自然度及技术品质。"),
33
+ ("CT", "Cinematic Techniques (电影技巧)", "镜头运动、景深控制及构图的专业性。"),
34
+ ("AVR", "Audio-Visual Richness (视听丰富度)", "画面细节精细度以及音频层次(音效、氛围音)的丰富程度。"),
35
+ ("NP", "Narrative Pacing (叙事节奏)", "镜头剪辑长短切换是否契合故事情节张力需求。"),
36
+ ("VAC", "Video-Audio Coordination (视听协调性)", "画面动作与音效、音乐卡点的同步率。"),
37
+ ("CD", "Compelling Degree (引人入胜程度)", "吸引注意力并引发情感共鸣或沉浸感的能力。"),
38
+ ("OQ", "Overall Quality (整体质量)", "对生成视频作为“电影作品”的综合观感评分。"),
39
+ ]
40
+
41
+ BASE_METRIC_KEYS = [k for k, _, _ in MOVIE_CRITERIA]
42
+ SAVE_LOCK = threading.Lock()
43
+
44
+
45
+ def _safe_read_text(path: Path) -> str:
46
+ if not path.exists():
47
+ return ""
48
+ return path.read_text(encoding="utf-8-sig").strip()
49
+
50
+
51
+ def load_dataset_index() -> List[Dict[str, Any]]:
52
+ """扫描输入目录,构建可评测样本列表。"""
53
+ stories = {p.stem: _safe_read_text(p) for p in sorted(STORY_DIR.glob("*.txt"))}
54
+ samples: List[Dict[str, Any]] = []
55
+
56
+ if not VIDEO_DIR.exists():
57
+ return samples
58
+
59
+ for method_dir in sorted([d for d in VIDEO_DIR.iterdir() if d.is_dir()]):
60
+ method = method_dir.name
61
+ for story_dir in sorted([d for d in method_dir.iterdir() if d.is_dir()]):
62
+ story_name = story_dir.name
63
+ # 一个 story 文件夹里可能多个 mp4,逐个作为样本
64
+ for video_path in sorted(story_dir.glob("*.mp4")):
65
+ sample_id = f"{method}__{story_name}__{video_path.stem}"
66
+ samples.append(
67
+ {
68
+ "sample_id": sample_id,
69
+ "method": method,
70
+ "story_name": story_name,
71
+ "video_name": video_path.name,
72
+ "video_path": str(video_path.resolve()),
73
+ "story_text": stories.get(story_name, ""),
74
+ }
75
+ )
76
+ return samples
77
+
78
+
79
+ def compute_derived(scores: Dict[str, float]) -> Dict[str, float]:
80
+ """计算 CL / CRh / AVG。"""
81
+ cl = (
82
+ (scores["SF"] + scores["NC"] + scores["VQ"] + scores["CC"] + scores["PLC"]) / 5.0
83
+ + 0.5 * ((scores["CT"] + scores["AVR"]) / 2.0)
84
+ )
85
+ crh = (
86
+ (scores["V_AQ"] + scores["NP"] + scores["VAC"] + scores["CD"] + scores["OQ"]) / 5.0
87
+ + 0.5 * ((scores["CT"] + scores["AVR"]) / 2.0)
88
+ )
89
+ avg = sum(scores[k] for k in BASE_METRIC_KEYS) / len(BASE_METRIC_KEYS)
90
+ return {"CL": cl, "CRh": crh, "AVG": avg}
91
+
92
+
93
+ def save_single_result(sample: Dict[str, Any], evaluator_id: str, scores: Dict[str, int], reasons: Dict[str, str], summary: str) -> Path:
94
+ """保存单个问卷结果。"""
95
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
96
+ result_dir = OUTPUT_DIR / "raw_results" / sample["method"] / sample["story_name"]
97
+ result_dir.mkdir(parents=True, exist_ok=True)
98
+ out_path = result_dir / f"{sample['video_name'].replace('.mp4', '')}_{evaluator_id}_{ts}.json"
99
+
100
+ score_float = {k: float(v) for k, v in scores.items()}
101
+ derived = compute_derived(score_float)
102
+
103
+ payload = {
104
+ "timestamp": datetime.now().isoformat(),
105
+ "evaluator_id": evaluator_id,
106
+ "sample": sample,
107
+ "scores": scores,
108
+ "reasons": reasons,
109
+ "summary": summary,
110
+ "derived": derived,
111
+ }
112
+ with open(out_path, "w", encoding="utf-8") as f:
113
+ json.dump(payload, f, ensure_ascii=False, indent=2)
114
+ return out_path
115
+
116
+
117
+ def recompute_method_aggregates() -> Path:
118
+ """
119
+ 统计每个方法各维度均分,并输出 method_aggregates.json。
120
+ 同时给出 CL/CRh/AVG 的方法均值。
121
+ """
122
+ raw_root = OUTPUT_DIR / "raw_results"
123
+ method_scores: Dict[str, Dict[str, List[float]]] = defaultdict(lambda: defaultdict(list))
124
+ method_count: Dict[str, int] = defaultdict(int)
125
+
126
+ if raw_root.exists():
127
+ for fp in raw_root.rglob("*.json"):
128
+ with open(fp, "r", encoding="utf-8-sig") as f:
129
+ data = json.load(f)
130
+ method = data.get("sample", {}).get("method", "UNKNOWN")
131
+ scores = data.get("scores", {})
132
+ if not all(k in scores for k in BASE_METRIC_KEYS):
133
+ continue
134
+ method_count[method] += 1
135
+ for k in BASE_METRIC_KEYS:
136
+ method_scores[method][k].append(float(scores[k]))
137
+
138
+ # 衍生指标也参与方法均值统计
139
+ derived = compute_derived({k: float(scores[k]) for k in BASE_METRIC_KEYS})
140
+ for d_key, d_val in derived.items():
141
+ method_scores[method][d_key].append(float(d_val))
142
+
143
+ agg = {
144
+ "updated_at": datetime.now().isoformat(),
145
+ "metric_keys": BASE_METRIC_KEYS,
146
+ "derived_keys": ["CL", "CRh", "AVG"],
147
+ "methods": {},
148
+ }
149
+ for method in sorted(method_scores.keys()):
150
+ metric_avg = {}
151
+ for key, vals in method_scores[method].items():
152
+ metric_avg[key] = round(sum(vals) / len(vals), 4) if vals else None
153
+ agg["methods"][method] = {
154
+ "num_submissions": method_count[method],
155
+ "avg_scores": metric_avg,
156
+ }
157
+
158
+ out_path = OUTPUT_DIR / "method_aggregates.json"
159
+ with open(out_path, "w", encoding="utf-8") as f:
160
+ json.dump(agg, f, ensure_ascii=False, indent=2)
161
+ return out_path
162
+
163
+
164
+ def build_sample_brief(sample: Dict[str, Any], index: int, total: int) -> str:
165
+ story = sample.get("story_text") or "(未找到对应 story 文本,请检查 clip_movie_story 下是否有同名 txt)"
166
+ return (
167
+ f"### 当前样本 {index + 1}/{total}\n"
168
+ f"- **Method**: `{sample['method']}`\n"
169
+ f"- **Story**: `{sample['story_name']}`\n"
170
+ f"- **Video**: `{sample['video_name']}`\n\n"
171
+ f"### Story Description\n{story}"
172
+ )
173
+
174
+
175
+ def create_app():
176
+ samples = load_dataset_index()
177
+ sample_map = {s["sample_id"]: s for s in samples}
178
+
179
+ with gr.Blocks(title="VideoEval Movie-Level Evaluation") as app:
180
+ gr.Markdown("# VideoEval - Movie-Level Evaluation")
181
+ gr.Markdown(
182
+ f"- 输入目录: `{INPUT_DIR}` \n"
183
+ f"- 输出目录: `{OUTPUT_DIR}` \n"
184
+ "- 指标: SF/NC/VQ/CC/PLC/V_AQ/CT/AVR/NP/VAC/CD/OQ + CL/CRh/AVG"
185
+ )
186
+
187
+ current_idx = gr.State(0)
188
+ evaluator_state = gr.State("anonymous")
189
+
190
+ with gr.Row():
191
+ evaluator_input = gr.Textbox(label="Evaluator ID", value="anonymous")
192
+ sample_dropdown = gr.Dropdown(
193
+ label="选择评测样本",
194
+ choices=[s["sample_id"] for s in samples],
195
+ value=samples[0]["sample_id"] if samples else None,
196
+ interactive=True,
197
+ )
198
+
199
+ sample_info = gr.Markdown("无可用样本" if not samples else build_sample_brief(samples[0], 0, len(samples)))
200
+ movie_video = gr.Video(label="Movie Video", value=samples[0]["video_path"] if samples else None, height=420)
201
+
202
+ gr.Markdown("## Movie-Level 评分(1-5)")
203
+ score_widgets: Dict[str, gr.Radio] = {}
204
+ reason_widgets: Dict[str, gr.Textbox] = {}
205
+ for key, name, desc in MOVIE_CRITERIA:
206
+ with gr.Group():
207
+ gr.Markdown(f"**{key} - {name}**\n\n{desc}")
208
+ score_widgets[key] = gr.Radio(choices=[1, 2, 3, 4, 5], label=f"{key} Score")
209
+ reason_widgets[key] = gr.Textbox(label=f"{key} Reason", lines=2, placeholder="可选:补充评分理由")
210
+
211
+ final_summary = gr.Textbox(label="Final Summary", lines=4, placeholder="可选:整体评价总结")
212
+
213
+ with gr.Row():
214
+ prev_btn = gr.Button("← Previous")
215
+ next_btn = gr.Button("Next →")
216
+ submit_btn = gr.Button("提交当前评分并统计", variant="primary")
217
+ status = gr.Markdown("")
218
+
219
+ def _sync_sample_from_dropdown(sample_id: str) -> Tuple[str, str, int]:
220
+ if not sample_id or sample_id not in sample_map:
221
+ return None, "未找到样本", 0
222
+ idx = next(i for i, s in enumerate(samples) if s["sample_id"] == sample_id)
223
+ sample = samples[idx]
224
+ return sample["video_path"], build_sample_brief(sample, idx, len(samples)), idx
225
+
226
+ def _go_prev(idx: int) -> Tuple[str, str, str, int]:
227
+ if not samples:
228
+ return None, "无可用样本", None, 0
229
+ idx = max(0, idx - 1)
230
+ sample = samples[idx]
231
+ return sample["video_path"], build_sample_brief(sample, idx, len(samples)), sample["sample_id"], idx
232
+
233
+ def _go_next(idx: int) -> Tuple[str, str, str, int]:
234
+ if not samples:
235
+ return None, "无可用样本", None, 0
236
+ idx = min(len(samples) - 1, idx + 1)
237
+ sample = samples[idx]
238
+ return sample["video_path"], build_sample_brief(sample, idx, len(samples)), sample["sample_id"], idx
239
+
240
+ def _submit(evaluator_id: str, sample_id: str, summary: str, *score_reason_vals):
241
+ if not samples:
242
+ return "❌ 没有可提交样本。"
243
+ if not sample_id or sample_id not in sample_map:
244
+ return "❌ 请先选择样本。"
245
+ sample = sample_map[sample_id]
246
+ evaluator_id = (evaluator_id or "anonymous").strip() or "anonymous"
247
+
248
+ scores: Dict[str, int] = {}
249
+ reasons: Dict[str, str] = {}
250
+ for i, key in enumerate(BASE_METRIC_KEYS):
251
+ score = score_reason_vals[i * 2]
252
+ reason = score_reason_vals[i * 2 + 1]
253
+ if score is None:
254
+ return f"❌ 请为 `{key}` 打分。"
255
+ scores[key] = int(score)
256
+ reasons[key] = (reason or "").strip()
257
+
258
+ with SAVE_LOCK:
259
+ single_path = save_single_result(sample, evaluator_id, scores, reasons, summary or "")
260
+ agg_path = recompute_method_aggregates()
261
+
262
+ return f"✅ 已保存: `{single_path}`\n\n✅ 已更新方法统计: `{agg_path}`"
263
+
264
+ sample_dropdown.change(
265
+ _sync_sample_from_dropdown,
266
+ inputs=[sample_dropdown],
267
+ outputs=[movie_video, sample_info, current_idx],
268
+ )
269
+ prev_btn.click(_go_prev, inputs=[current_idx], outputs=[movie_video, sample_info, sample_dropdown, current_idx])
270
+ next_btn.click(_go_next, inputs=[current_idx], outputs=[movie_video, sample_info, sample_dropdown, current_idx])
271
+
272
+ submit_inputs = [evaluator_input, sample_dropdown, final_summary]
273
+ for key in BASE_METRIC_KEYS:
274
+ submit_inputs.append(score_widgets[key])
275
+ submit_inputs.append(reason_widgets[key])
276
+ submit_btn.click(_submit, inputs=submit_inputs, outputs=[status])
277
+
278
+ app.load(lambda x: x, inputs=[evaluator_input], outputs=[evaluator_state])
279
+
280
+ return app
281
+
282
+
283
+ demo = create_app()
284
 
285
+ if __name__ == "__main__":
286
+ allowed_paths = [str(INPUT_DIR.resolve())] if INPUT_DIR.exists() else None
287
+ demo.launch(
288
+ server_name="0.0.0.0",
289
+ server_port=7860,
290
+ share=False,
291
+ show_error=True,
292
+ allowed_paths=allowed_paths,
293
+ )