vidhimudaliar commited on
Commit
adec429
·
verified ·
1 Parent(s): ad5a303

Delete combine_raters.py

Browse files
Files changed (1) hide show
  1. combine_raters.py +0 -91
combine_raters.py DELETED
@@ -1,91 +0,0 @@
1
- import pandas as pd
2
- import json
3
- import re
4
-
5
- # -----------------------------------------------------
6
- # CONFIG: Rater files mapping
7
- # -----------------------------------------------------
8
- RATER_FILES = {
9
- "Vidhi": "data/Vidhi_50.csv",
10
- "Grant": "data/Grant_50.csv",
11
- "Amanuel": "data/Amanuel_50.csv"
12
- }
13
-
14
- OUTPUT_CSV = "combined_rater_with_embed.csv"
15
-
16
-
17
- def extract_video_id(filename):
18
- """Extract numeric ID from filename '0111.mp4' → '0111'."""
19
- m = re.match(r"(\d+)", filename)
20
- return m.group(1) if m else None
21
-
22
-
23
- def parse_video_labels(field):
24
- """Parse JSON-like videoLabels field."""
25
- try:
26
- items = json.loads(field)
27
- except:
28
- return []
29
-
30
- out = []
31
- for entry in items:
32
- labels = entry.get("timelinelabels") or entry.get("timelineLabels") or []
33
- ranges = entry.get("ranges") or []
34
-
35
- if not labels or not ranges:
36
- continue
37
-
38
- label = labels[0]
39
- for r in ranges:
40
- out.append({
41
- "label": label,
42
- "start": r.get("start"),
43
- "end": r.get("end")
44
- })
45
- return out
46
-
47
-
48
- records = []
49
-
50
- for rater_name, csv_path in RATER_FILES.items():
51
- df = pd.read_csv(csv_path)
52
-
53
- for _, row in df.iterrows():
54
- filename = row["filename"]
55
- vid_id = extract_video_id(filename)
56
-
57
- # Local video file path
58
- video_path = f"videos/{filename}"
59
-
60
- labels = parse_video_labels(row["videoLabels"])
61
-
62
- if not labels:
63
- records.append({
64
- "filename": filename,
65
- "video_id": vid_id,
66
- "video_path": video_path,
67
- "label": None,
68
- "rater": rater_name,
69
- "start": None,
70
- "end": None
71
- })
72
- else:
73
- for lab in labels:
74
- records.append({
75
- "filename": filename,
76
- "video_id": vid_id,
77
- "video_path": video_path,
78
- "label": lab["label"],
79
- "rater": rater_name,
80
- "start": lab["start"],
81
- "end": lab["end"]
82
- })
83
-
84
- # Build DataFrame with explicit column order so the web app can rely on it
85
- columns = ["filename", "video_id", "video_path", "label", "rater", "start", "end"]
86
- combined = pd.DataFrame(records)
87
- # Ensure columns exist in the desired order (missing keys will produce NaN)
88
- combined = combined.reindex(columns=columns)
89
- combined.to_csv(OUTPUT_CSV, index=False)
90
-
91
- print(f"✅ CSV created → {OUTPUT_CSV} (columns: {', '.join(columns)})")