Youngsun Lim commited on
Commit
eebf759
·
1 Parent(s): 6c10661
Files changed (1) hide show
  1. app.py +162 -29
app.py CHANGED
@@ -25,6 +25,43 @@ INSTRUCTION_MD = """
25
  """
26
 
27
  # -------------------- Helper funcs --------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def _get_video_id(v: dict) -> str:
29
  if "id" in v and v["id"]:
30
  return v["id"]
@@ -41,15 +78,6 @@ def _read_csv_bytes():
41
  except Exception:
42
  return None
43
 
44
- # def _append(old_bytes, row):
45
- # s = io.StringIO()
46
- # w = csv.writer(s)
47
- # if not old_bytes:
48
- # w.writerow(["ts_iso", "participant_id", "action", "score_0_10", "notes"])
49
- # else:
50
- # s.write(old_bytes.decode("utf-8", errors="ignore"))
51
- # w.writerow(row)
52
- # return s.getvalue().encode("utf-8")
53
 
54
  def _append(old_bytes, row):
55
  s = io.StringIO()
@@ -413,37 +441,142 @@ with gr.Blocks(fill_height=True, css=GLOBAL_CSS) as demo:
413
  pid.change(_toggle_by_pid, inputs=pid, outputs=save_next)
414
 
415
  # -------- 페이지 전환 & 첫 로드 --------
416
- ANCHOR_IDX = 0 # videos.json의 맨 첫 번째 비디오
417
- ANCHOR_REPEATS = 5 # 앵커 5회 노출
418
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  def _start_and_load_first():
420
  total = TOTAL_PER_PARTICIPANT
421
- order = _build_order_with_anchor(
422
  total=total,
423
  anchor_idx=ANCHOR_IDX,
424
  repeats=ANCHOR_REPEATS,
425
- pool_size=len(V),
426
- min_gap=1 # 인접 금지
427
  )
428
-
429
  first_idx = order[0]
430
  v0 = V[first_idx]
431
- url0 = v0["url"]
432
- action0 = _extract_action(v0)
433
- vid0 = _get_video_id(v0) # ✅ 여기서 원본 id
434
-
435
  return (
436
- gr.update(visible=False), # page_intro off
437
- gr.update(visible=True), # page_eval on
438
- url0, # video
439
- action0, # action_tb (표시용)
440
- 5.0, # score 초기값
441
  gr.update(visible=False, value=""),
442
- 0, # done_state
443
  _progress_html(0, TOTAL_PER_PARTICIPANT),
444
- order, # order_state
445
- 1, # ptr_state
446
- vid0 # cur_video_id
447
  )
448
 
449
 
 
25
  """
26
 
27
  # -------------------- Helper funcs --------------------
28
+ def _load_eval_counts():
29
+ """
30
+ Hugging Face dataset의 results.csv를 읽어 video_id별 평가 개수(dict)를 반환.
31
+ 없으면 0으로 초기화.
32
+ """
33
+ # 모든 id를 0으로 초기화
34
+ counts = {}
35
+ for v in V:
36
+ vid = _get_video_id(v)
37
+ counts[vid] = 0
38
+
39
+ b = _read_csv_bytes()
40
+ if not b:
41
+ return counts
42
+
43
+ s = io.StringIO(b.decode("utf-8", errors="ignore"))
44
+ r = csv.reader(s)
45
+ rows = list(r)
46
+ if not rows:
47
+ return counts
48
+
49
+ # 헤더 파악
50
+ header = rows[0]
51
+ body = rows[1:] if header and ("video_id" in header or "overall" in header) else rows
52
+ vid_col = None
53
+ if header and "video_id" in header:
54
+ vid_col = header.index("video_id")
55
+
56
+ for row in body:
57
+ try:
58
+ vid = row[vid_col] if vid_col is not None else row[2] # 기본 포맷: ts, pid, video_id, overall, notes
59
+ if vid in counts:
60
+ counts[vid] += 1
61
+ except Exception:
62
+ continue
63
+ return counts
64
+
65
  def _get_video_id(v: dict) -> str:
66
  if "id" in v and v["id"]:
67
  return v["id"]
 
78
  except Exception:
79
  return None
80
 
 
 
 
 
 
 
 
 
 
81
 
82
  def _append(old_bytes, row):
83
  s = io.StringIO()
 
441
  pid.change(_toggle_by_pid, inputs=pid, outputs=save_next)
442
 
443
  # -------- 페이지 전환 & 첫 로드 --------
444
+ ANCHOR_IDX = 0 # videos.json의 맨 첫 비디오
445
+ ANCHOR_REPEATS = 5 # 앵커 5회
446
+ MIN_GAP = 1 # 앵커 연속 금지(인접 금지)
447
+
448
+ def _build_order_least_first_with_anchor(total:int, anchor_idx:int, repeats:int, min_gap:int=1):
449
+ """
450
+ - results.csv를 읽어 video_id별 카운트를 계산
451
+ - 앵커(첫 비디오) 5회 포함, 연속 금지
452
+ - 나머지는 '가장 적게 평가된 순'으로 중복 없이 채움
453
+ """
454
+ assert repeats <= total
455
+ N = len(V)
456
+ assert N >= 1
457
+
458
+ # 0) id 매핑
459
+ def vid_of(i): return _get_video_id(V[i])
460
+
461
+ # 1) 현재 누적 카운트 로드
462
+ counts = _load_eval_counts()
463
+
464
+ # 2) 앵커 제외 후보(중복 없이) 정렬: 카운트 오름차순, 동률은 랜덤 셔플
465
+ anchor_vid = vid_of(anchor_idx)
466
+ candidates = [i for i in range(N) if i != anchor_idx]
467
+ # 동률 랜덤화를 위해 일단 셔플
468
+ random.shuffle(candidates)
469
+ candidates.sort(key=lambda i: counts.get(vid_of(i), 0))
470
+
471
+ others_needed = total - repeats
472
+ if len(candidates) < others_needed:
473
+ raise ValueError("Not enough unique non-anchor videos to fill the schedule without duplication.")
474
+
475
+ others = candidates[:others_needed] # 중복 없이 선택
476
+
477
+ # 3) others를 베이스 시퀀스로(랜덤 살짝 섞기)
478
+ random.shuffle(others)
479
+
480
+ # 4) 앵커를 구간 배치(연속 금지)
481
+ seq = [None] * total
482
+ segment = total // repeats if repeats > 0 else total
483
+ anchor_positions = []
484
+ for k in range(repeats):
485
+ lo = k * segment
486
+ hi = (k + 1) * segment if k < repeats - 1 else total
487
+ cand = random.randrange(lo, hi)
488
+
489
+ def ok(pos):
490
+ return all(abs(pos - p) >= (min_gap + 1) for p in anchor_positions)
491
+
492
+ found = None
493
+ for d in range(0, max(1, segment)):
494
+ for sgn in (+1, -1):
495
+ pos = cand + sgn * d
496
+ if 0 <= pos < total and ok(pos):
497
+ found = pos
498
+ break
499
+ if found is not None:
500
+ break
501
+ if found is None:
502
+ # 마지막 수단: 전체 탐색
503
+ for pos in range(total):
504
+ if ok(pos):
505
+ found = pos
506
+ break
507
+ if found is None:
508
+ raise RuntimeError("Failed to place anchor without adjacency.")
509
+ anchor_positions.append(found)
510
+
511
+ for pos in anchor_positions:
512
+ seq[pos] = anchor_idx
513
+
514
+ # 5) 빈 자리를 others로 채우기
515
+ j = 0
516
+ for i in range(total):
517
+ if seq[i] is None:
518
+ seq[i] = others[j]
519
+ j += 1
520
+
521
+ # 6) 안전 체크
522
+ assert sum(1 for x in seq if x == anchor_idx) == repeats
523
+ for i in range(1, total):
524
+ assert not (seq[i] == anchor_idx and seq[i-1] == anchor_idx), "Adjacent anchors found."
525
+
526
+ return seq
527
+
528
+
529
+ # def _start_and_load_first():
530
+ # total = TOTAL_PER_PARTICIPANT
531
+ # order = _build_order_with_anchor(
532
+ # total=total,
533
+ # anchor_idx=ANCHOR_IDX,
534
+ # repeats=ANCHOR_REPEATS,
535
+ # pool_size=len(V),
536
+ # min_gap=1 # 인접 금지
537
+ # )
538
+
539
+ # first_idx = order[0]
540
+ # v0 = V[first_idx]
541
+ # url0 = v0["url"]
542
+ # action0 = _extract_action(v0)
543
+ # vid0 = _get_video_id(v0) # ✅ 여기서 원본 id
544
+
545
+ # return (
546
+ # gr.update(visible=False), # page_intro off
547
+ # gr.update(visible=True), # page_eval on
548
+ # url0, # video
549
+ # action0, # action_tb (표시용)
550
+ # 5.0, # score 초기값
551
+ # gr.update(visible=False, value=""),
552
+ # 0, # done_state
553
+ # _progress_html(0, TOTAL_PER_PARTICIPANT),
554
+ # order, # order_state
555
+ # 1, # ptr_state
556
+ # vid0 # ✅ cur_video_id
557
+ # )
558
  def _start_and_load_first():
559
  total = TOTAL_PER_PARTICIPANT
560
+ order = _build_order_least_first_with_anchor(
561
  total=total,
562
  anchor_idx=ANCHOR_IDX,
563
  repeats=ANCHOR_REPEATS,
564
+ min_gap=MIN_GAP
 
565
  )
 
566
  first_idx = order[0]
567
  v0 = V[first_idx]
 
 
 
 
568
  return (
569
+ gr.update(visible=False),
570
+ gr.update(visible=True),
571
+ v0["url"],
572
+ _extract_action(v0),
573
+ 5.0,
574
  gr.update(visible=False, value=""),
575
+ 0,
576
  _progress_html(0, TOTAL_PER_PARTICIPANT),
577
+ order,
578
+ 1,
579
+ _get_video_id(v0) # cur_video_id
580
  )
581
 
582