Spaces:
Sleeping
Sleeping
Youngsun Lim
commited on
Commit
·
eebf759
1
Parent(s):
6c10661
1014_3pm
Browse files
app.py
CHANGED
|
@@ -25,6 +25,43 @@ INSTRUCTION_MD = """
|
|
| 25 |
"""
|
| 26 |
|
| 27 |
# -------------------- Helper funcs --------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
def _get_video_id(v: dict) -> str:
|
| 29 |
if "id" in v and v["id"]:
|
| 30 |
return v["id"]
|
|
@@ -41,15 +78,6 @@ def _read_csv_bytes():
|
|
| 41 |
except Exception:
|
| 42 |
return None
|
| 43 |
|
| 44 |
-
# def _append(old_bytes, row):
|
| 45 |
-
# s = io.StringIO()
|
| 46 |
-
# w = csv.writer(s)
|
| 47 |
-
# if not old_bytes:
|
| 48 |
-
# w.writerow(["ts_iso", "participant_id", "action", "score_0_10", "notes"])
|
| 49 |
-
# else:
|
| 50 |
-
# s.write(old_bytes.decode("utf-8", errors="ignore"))
|
| 51 |
-
# w.writerow(row)
|
| 52 |
-
# return s.getvalue().encode("utf-8")
|
| 53 |
|
| 54 |
def _append(old_bytes, row):
|
| 55 |
s = io.StringIO()
|
|
@@ -413,37 +441,142 @@ with gr.Blocks(fill_height=True, css=GLOBAL_CSS) as demo:
|
|
| 413 |
pid.change(_toggle_by_pid, inputs=pid, outputs=save_next)
|
| 414 |
|
| 415 |
# -------- 페이지 전환 & 첫 로드 --------
|
| 416 |
-
ANCHOR_IDX = 0
|
| 417 |
-
ANCHOR_REPEATS = 5
|
| 418 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
def _start_and_load_first():
|
| 420 |
total = TOTAL_PER_PARTICIPANT
|
| 421 |
-
order =
|
| 422 |
total=total,
|
| 423 |
anchor_idx=ANCHOR_IDX,
|
| 424 |
repeats=ANCHOR_REPEATS,
|
| 425 |
-
|
| 426 |
-
min_gap=1 # 인접 금지
|
| 427 |
)
|
| 428 |
-
|
| 429 |
first_idx = order[0]
|
| 430 |
v0 = V[first_idx]
|
| 431 |
-
url0 = v0["url"]
|
| 432 |
-
action0 = _extract_action(v0)
|
| 433 |
-
vid0 = _get_video_id(v0) # ✅ 여기서 원본 id
|
| 434 |
-
|
| 435 |
return (
|
| 436 |
-
gr.update(visible=False),
|
| 437 |
-
gr.update(visible=True),
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
5.0,
|
| 441 |
gr.update(visible=False, value=""),
|
| 442 |
-
0,
|
| 443 |
_progress_html(0, TOTAL_PER_PARTICIPANT),
|
| 444 |
-
order,
|
| 445 |
-
1,
|
| 446 |
-
|
| 447 |
)
|
| 448 |
|
| 449 |
|
|
|
|
| 25 |
"""
|
| 26 |
|
| 27 |
# -------------------- Helper funcs --------------------
|
| 28 |
+
def _load_eval_counts():
|
| 29 |
+
"""
|
| 30 |
+
Hugging Face dataset의 results.csv를 읽어 video_id별 평가 개수(dict)를 반환.
|
| 31 |
+
없으면 0으로 초기화.
|
| 32 |
+
"""
|
| 33 |
+
# 모든 id를 0으로 초기화
|
| 34 |
+
counts = {}
|
| 35 |
+
for v in V:
|
| 36 |
+
vid = _get_video_id(v)
|
| 37 |
+
counts[vid] = 0
|
| 38 |
+
|
| 39 |
+
b = _read_csv_bytes()
|
| 40 |
+
if not b:
|
| 41 |
+
return counts
|
| 42 |
+
|
| 43 |
+
s = io.StringIO(b.decode("utf-8", errors="ignore"))
|
| 44 |
+
r = csv.reader(s)
|
| 45 |
+
rows = list(r)
|
| 46 |
+
if not rows:
|
| 47 |
+
return counts
|
| 48 |
+
|
| 49 |
+
# 헤더 파악
|
| 50 |
+
header = rows[0]
|
| 51 |
+
body = rows[1:] if header and ("video_id" in header or "overall" in header) else rows
|
| 52 |
+
vid_col = None
|
| 53 |
+
if header and "video_id" in header:
|
| 54 |
+
vid_col = header.index("video_id")
|
| 55 |
+
|
| 56 |
+
for row in body:
|
| 57 |
+
try:
|
| 58 |
+
vid = row[vid_col] if vid_col is not None else row[2] # 기본 포맷: ts, pid, video_id, overall, notes
|
| 59 |
+
if vid in counts:
|
| 60 |
+
counts[vid] += 1
|
| 61 |
+
except Exception:
|
| 62 |
+
continue
|
| 63 |
+
return counts
|
| 64 |
+
|
| 65 |
def _get_video_id(v: dict) -> str:
|
| 66 |
if "id" in v and v["id"]:
|
| 67 |
return v["id"]
|
|
|
|
| 78 |
except Exception:
|
| 79 |
return None
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
def _append(old_bytes, row):
|
| 83 |
s = io.StringIO()
|
|
|
|
| 441 |
pid.change(_toggle_by_pid, inputs=pid, outputs=save_next)
|
| 442 |
|
| 443 |
# -------- 페이지 전환 & 첫 로드 --------
|
| 444 |
+
ANCHOR_IDX = 0 # videos.json의 맨 첫 비디오
|
| 445 |
+
ANCHOR_REPEATS = 5 # 앵커 5회
|
| 446 |
+
MIN_GAP = 1 # 앵커 연속 금지(인접 금지)
|
| 447 |
+
|
| 448 |
+
def _build_order_least_first_with_anchor(total:int, anchor_idx:int, repeats:int, min_gap:int=1):
|
| 449 |
+
"""
|
| 450 |
+
- results.csv를 읽어 video_id별 카운트를 계산
|
| 451 |
+
- 앵커(첫 비디오) 5회 포함, 연속 금지
|
| 452 |
+
- 나머지는 '가장 적게 평가된 순'으로 중복 없이 채움
|
| 453 |
+
"""
|
| 454 |
+
assert repeats <= total
|
| 455 |
+
N = len(V)
|
| 456 |
+
assert N >= 1
|
| 457 |
+
|
| 458 |
+
# 0) id 매핑
|
| 459 |
+
def vid_of(i): return _get_video_id(V[i])
|
| 460 |
+
|
| 461 |
+
# 1) 현재 누적 카운트 로드
|
| 462 |
+
counts = _load_eval_counts()
|
| 463 |
+
|
| 464 |
+
# 2) 앵커 제외 후보(중복 없이) 정렬: 카운트 오름차순, 동률은 랜덤 셔플
|
| 465 |
+
anchor_vid = vid_of(anchor_idx)
|
| 466 |
+
candidates = [i for i in range(N) if i != anchor_idx]
|
| 467 |
+
# 동률 랜덤화를 위해 일단 셔플
|
| 468 |
+
random.shuffle(candidates)
|
| 469 |
+
candidates.sort(key=lambda i: counts.get(vid_of(i), 0))
|
| 470 |
+
|
| 471 |
+
others_needed = total - repeats
|
| 472 |
+
if len(candidates) < others_needed:
|
| 473 |
+
raise ValueError("Not enough unique non-anchor videos to fill the schedule without duplication.")
|
| 474 |
+
|
| 475 |
+
others = candidates[:others_needed] # 중복 없이 선택
|
| 476 |
+
|
| 477 |
+
# 3) others를 베이스 시퀀스로(랜덤 살짝 섞기)
|
| 478 |
+
random.shuffle(others)
|
| 479 |
+
|
| 480 |
+
# 4) 앵커를 구간 배치(연속 금지)
|
| 481 |
+
seq = [None] * total
|
| 482 |
+
segment = total // repeats if repeats > 0 else total
|
| 483 |
+
anchor_positions = []
|
| 484 |
+
for k in range(repeats):
|
| 485 |
+
lo = k * segment
|
| 486 |
+
hi = (k + 1) * segment if k < repeats - 1 else total
|
| 487 |
+
cand = random.randrange(lo, hi)
|
| 488 |
+
|
| 489 |
+
def ok(pos):
|
| 490 |
+
return all(abs(pos - p) >= (min_gap + 1) for p in anchor_positions)
|
| 491 |
+
|
| 492 |
+
found = None
|
| 493 |
+
for d in range(0, max(1, segment)):
|
| 494 |
+
for sgn in (+1, -1):
|
| 495 |
+
pos = cand + sgn * d
|
| 496 |
+
if 0 <= pos < total and ok(pos):
|
| 497 |
+
found = pos
|
| 498 |
+
break
|
| 499 |
+
if found is not None:
|
| 500 |
+
break
|
| 501 |
+
if found is None:
|
| 502 |
+
# 마지막 수단: 전체 탐색
|
| 503 |
+
for pos in range(total):
|
| 504 |
+
if ok(pos):
|
| 505 |
+
found = pos
|
| 506 |
+
break
|
| 507 |
+
if found is None:
|
| 508 |
+
raise RuntimeError("Failed to place anchor without adjacency.")
|
| 509 |
+
anchor_positions.append(found)
|
| 510 |
+
|
| 511 |
+
for pos in anchor_positions:
|
| 512 |
+
seq[pos] = anchor_idx
|
| 513 |
+
|
| 514 |
+
# 5) 빈 자리를 others로 채우기
|
| 515 |
+
j = 0
|
| 516 |
+
for i in range(total):
|
| 517 |
+
if seq[i] is None:
|
| 518 |
+
seq[i] = others[j]
|
| 519 |
+
j += 1
|
| 520 |
+
|
| 521 |
+
# 6) 안전 체크
|
| 522 |
+
assert sum(1 for x in seq if x == anchor_idx) == repeats
|
| 523 |
+
for i in range(1, total):
|
| 524 |
+
assert not (seq[i] == anchor_idx and seq[i-1] == anchor_idx), "Adjacent anchors found."
|
| 525 |
+
|
| 526 |
+
return seq
|
| 527 |
+
|
| 528 |
+
|
| 529 |
+
# def _start_and_load_first():
|
| 530 |
+
# total = TOTAL_PER_PARTICIPANT
|
| 531 |
+
# order = _build_order_with_anchor(
|
| 532 |
+
# total=total,
|
| 533 |
+
# anchor_idx=ANCHOR_IDX,
|
| 534 |
+
# repeats=ANCHOR_REPEATS,
|
| 535 |
+
# pool_size=len(V),
|
| 536 |
+
# min_gap=1 # 인접 금지
|
| 537 |
+
# )
|
| 538 |
+
|
| 539 |
+
# first_idx = order[0]
|
| 540 |
+
# v0 = V[first_idx]
|
| 541 |
+
# url0 = v0["url"]
|
| 542 |
+
# action0 = _extract_action(v0)
|
| 543 |
+
# vid0 = _get_video_id(v0) # ✅ 여기서 원본 id
|
| 544 |
+
|
| 545 |
+
# return (
|
| 546 |
+
# gr.update(visible=False), # page_intro off
|
| 547 |
+
# gr.update(visible=True), # page_eval on
|
| 548 |
+
# url0, # video
|
| 549 |
+
# action0, # action_tb (표시용)
|
| 550 |
+
# 5.0, # score 초기값
|
| 551 |
+
# gr.update(visible=False, value=""),
|
| 552 |
+
# 0, # done_state
|
| 553 |
+
# _progress_html(0, TOTAL_PER_PARTICIPANT),
|
| 554 |
+
# order, # order_state
|
| 555 |
+
# 1, # ptr_state
|
| 556 |
+
# vid0 # ✅ cur_video_id
|
| 557 |
+
# )
|
| 558 |
def _start_and_load_first():
|
| 559 |
total = TOTAL_PER_PARTICIPANT
|
| 560 |
+
order = _build_order_least_first_with_anchor(
|
| 561 |
total=total,
|
| 562 |
anchor_idx=ANCHOR_IDX,
|
| 563 |
repeats=ANCHOR_REPEATS,
|
| 564 |
+
min_gap=MIN_GAP
|
|
|
|
| 565 |
)
|
|
|
|
| 566 |
first_idx = order[0]
|
| 567 |
v0 = V[first_idx]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 568 |
return (
|
| 569 |
+
gr.update(visible=False),
|
| 570 |
+
gr.update(visible=True),
|
| 571 |
+
v0["url"],
|
| 572 |
+
_extract_action(v0),
|
| 573 |
+
5.0,
|
| 574 |
gr.update(visible=False, value=""),
|
| 575 |
+
0,
|
| 576 |
_progress_html(0, TOTAL_PER_PARTICIPANT),
|
| 577 |
+
order,
|
| 578 |
+
1,
|
| 579 |
+
_get_video_id(v0) # cur_video_id
|
| 580 |
)
|
| 581 |
|
| 582 |
|