Spaces:

SGTLIM
/

videoeval_humaneval

Sleeping

App Files Files Community

Youngsun Lim commited on Oct 14, 2025

Commit

eebf759

1 Parent(s): 6c10661

1014_3pm

Browse files

Files changed (1) hide show

app.py +162 -29

app.py CHANGED Viewed

@@ -25,6 +25,43 @@ INSTRUCTION_MD = """
 """
 # -------------------- Helper funcs --------------------
 def _get_video_id(v: dict) -> str:
     if "id" in v and v["id"]:
         return v["id"]
@@ -41,15 +78,6 @@ def _read_csv_bytes():
     except Exception:
         return None
-# def _append(old_bytes, row):
-#     s = io.StringIO()
-#     w = csv.writer(s)
-#     if not old_bytes:
-#         w.writerow(["ts_iso", "participant_id", "action", "score_0_10", "notes"])
-#     else:
-#         s.write(old_bytes.decode("utf-8", errors="ignore"))
-#     w.writerow(row)
-#     return s.getvalue().encode("utf-8")
 def _append(old_bytes, row):
     s = io.StringIO()
@@ -413,37 +441,142 @@ with gr.Blocks(fill_height=True, css=GLOBAL_CSS) as demo:
         pid.change(_toggle_by_pid, inputs=pid, outputs=save_next)
     # -------- 페이지 전환 & 첫 로드 --------
-    ANCHOR_IDX = 0         # videos.json의 맨 첫 번째 비디오
-    ANCHOR_REPEATS = 5     # 앵커 5회 노출
     def _start_and_load_first():
         total = TOTAL_PER_PARTICIPANT
-        order = _build_order_with_anchor(
             total=total,
             anchor_idx=ANCHOR_IDX,
             repeats=ANCHOR_REPEATS,
-            pool_size=len(V),
-            min_gap=1  # 인접 금지
         )
         first_idx = order[0]
         v0 = V[first_idx]
-        url0 = v0["url"]
-        action0 = _extract_action(v0)
-        vid0 = _get_video_id(v0)             # ✅ 여기서 원본 id
         return (
-            gr.update(visible=False),         # page_intro off
-            gr.update(visible=True),          # page_eval on
-            url0,                             # video
-            action0,                          # action_tb (표시용)
-            5.0,                              # score 초기값
             gr.update(visible=False, value=""),
-            0,                                # done_state
             _progress_html(0, TOTAL_PER_PARTICIPANT),
-            order,                            # order_state
-            1,                                # ptr_state
-            vid0                              # ✅ cur_video_id
         )

 """
 # -------------------- Helper funcs --------------------
+def _load_eval_counts():
+    """
+    Hugging Face dataset의 results.csv를 읽어 video_id별 평가 개수(dict)를 반환.
+    없으면 0으로 초기화.
+    """
+    # 모든 id를 0으로 초기화
+    counts = {}
+    for v in V:
+        vid = _get_video_id(v)
+        counts[vid] = 0
+    b = _read_csv_bytes()
+    if not b:
+        return counts
+    s = io.StringIO(b.decode("utf-8", errors="ignore"))
+    r = csv.reader(s)
+    rows = list(r)
+    if not rows:
+        return counts
+    # 헤더 파악
+    header = rows[0]
+    body = rows[1:] if header and ("video_id" in header or "overall" in header) else rows
+    vid_col = None
+    if header and "video_id" in header:
+        vid_col = header.index("video_id")
+    for row in body:
+        try:
+            vid = row[vid_col] if vid_col is not None else row[2]  # 기본 포맷: ts, pid, video_id, overall, notes
+            if vid in counts:
+                counts[vid] += 1
+        except Exception:
+            continue
+    return counts
 def _get_video_id(v: dict) -> str:
     if "id" in v and v["id"]:
         return v["id"]
     except Exception:
         return None
 def _append(old_bytes, row):
     s = io.StringIO()
         pid.change(_toggle_by_pid, inputs=pid, outputs=save_next)
     # -------- 페이지 전환 & 첫 로드 --------
+    ANCHOR_IDX = 0       # videos.json의 맨 첫 비디오
+    ANCHOR_REPEATS = 5   # 앵커 5회
+    MIN_GAP = 1          # 앵커 연속 금지(인접 금지)
+    def _build_order_least_first_with_anchor(total:int, anchor_idx:int, repeats:int, min_gap:int=1):
+        """
+        - results.csv를 읽어 video_id별 카운트를 계산
+        - 앵커(첫 비디오) 5회 포함, 연속 금지
+        - 나머지는 '가장 적게 평가된 순'으로 중복 없이 채움
+        """
+        assert repeats <= total
+        N = len(V)
+        assert N >= 1
+        # 0) id 매핑
+        def vid_of(i): return _get_video_id(V[i])
+        # 1) 현재 누적 카운트 로드
+        counts = _load_eval_counts()
+        # 2) 앵커 제외 후보(중복 없이) 정렬: 카운트 오름차순, 동률은 랜덤 셔플
+        anchor_vid = vid_of(anchor_idx)
+        candidates = [i for i in range(N) if i != anchor_idx]
+        # 동률 랜덤화를 위해 일단 셔플
+        random.shuffle(candidates)
+        candidates.sort(key=lambda i: counts.get(vid_of(i), 0))
+        others_needed = total - repeats
+        if len(candidates) < others_needed:
+            raise ValueError("Not enough unique non-anchor videos to fill the schedule without duplication.")
+        others = candidates[:others_needed]  # 중복 없이 선택
+        # 3) others를 베이스 시퀀스로(랜덤 살짝 섞기)
+        random.shuffle(others)
+        # 4) 앵커를 구간 배치(연속 금지)
+        seq = [None] * total
+        segment = total // repeats if repeats > 0 else total
+        anchor_positions = []
+        for k in range(repeats):
+            lo = k * segment
+            hi = (k + 1) * segment if k < repeats - 1 else total
+            cand = random.randrange(lo, hi)
+            def ok(pos):
+                return all(abs(pos - p) >= (min_gap + 1) for p in anchor_positions)
+            found = None
+            for d in range(0, max(1, segment)):
+                for sgn in (+1, -1):
+                    pos = cand + sgn * d
+                    if 0 <= pos < total and ok(pos):
+                        found = pos
+                        break
+                if found is not None:
+                    break
+            if found is None:
+                # 마지막 수단: 전체 탐색
+                for pos in range(total):
+                    if ok(pos):
+                        found = pos
+                        break
+            if found is None:
+                raise RuntimeError("Failed to place anchor without adjacency.")
+            anchor_positions.append(found)
+        for pos in anchor_positions:
+            seq[pos] = anchor_idx
+        # 5) 빈 자리를 others로 채우기
+        j = 0
+        for i in range(total):
+            if seq[i] is None:
+                seq[i] = others[j]
+                j += 1
+        # 6) 안전 체크
+        assert sum(1 for x in seq if x == anchor_idx) == repeats
+        for i in range(1, total):
+            assert not (seq[i] == anchor_idx and seq[i-1] == anchor_idx), "Adjacent anchors found."
+        return seq
+    # def _start_and_load_first():
+    #     total = TOTAL_PER_PARTICIPANT
+    #     order = _build_order_with_anchor(
+    #         total=total,
+    #         anchor_idx=ANCHOR_IDX,
+    #         repeats=ANCHOR_REPEATS,
+    #         pool_size=len(V),
+    #         min_gap=1  # 인접 금지
+    #     )
+    #     first_idx = order[0]
+    #     v0 = V[first_idx]
+    #     url0 = v0["url"]
+    #     action0 = _extract_action(v0)
+    #     vid0 = _get_video_id(v0)             # ✅ 여기서 원본 id
+    #     return (
+    #         gr.update(visible=False),         # page_intro off
+    #         gr.update(visible=True),          # page_eval on
+    #         url0,                             # video
+    #         action0,                          # action_tb (표시용)
+    #         5.0,                              # score 초기값
+    #         gr.update(visible=False, value=""),
+    #         0,                                # done_state
+    #         _progress_html(0, TOTAL_PER_PARTICIPANT),
+    #         order,                            # order_state
+    #         1,                                # ptr_state
+    #         vid0                              # ✅ cur_video_id
+    #     )
     def _start_and_load_first():
         total = TOTAL_PER_PARTICIPANT
+        order = _build_order_least_first_with_anchor(
             total=total,
             anchor_idx=ANCHOR_IDX,
             repeats=ANCHOR_REPEATS,
+            min_gap=MIN_GAP
         )
         first_idx = order[0]
         v0 = V[first_idx]
         return (
+            gr.update(visible=False),
+            gr.update(visible=True),
+            v0["url"],
+            _extract_action(v0),
+            5.0,
             gr.update(visible=False, value=""),
+            0,
             _progress_html(0, TOTAL_PER_PARTICIPANT),
+            order,
+            1,
+            _get_video_id(v0)  # cur_video_id
         )