traj-eval / app.py
KaushikSid
Add debug logging to track issue_type saving
2577a76
import gradio as gr
import pandas as pd
import os
from pathlib import Path
from datasets import load_dataset, get_dataset_config_names
# HF Dataset sync
try:
from hf_dataset_sync import init_dataset_sync
dataset_sync_enabled = init_dataset_sync()
except Exception as e:
dataset_sync_enabled = False
print(f"⚠️ Dataset sync disabled: {e}")
# Predefined datasets (configs fetched dynamically)
PREDEFINED_DATASETS = [
"abraranwar/agibotworld_alpha_rfm",
"abraranwar/libero_rfm",
"abraranwar/usc_koch_rewind_rfm",
"aliangdw/metaworld",
"anqil/rh20t_rfm",
"anqil/rh20t_subset_rfm",
"jesbu1/auto_eval_rfm",
"jesbu1/egodex_rfm",
"jesbu1/epic_rfm",
"jesbu1/fino_net_rfm",
"jesbu1/failsafe_rfm",
"jesbu1/hand_paired_rfm",
"jesbu1/galaxea_rfm",
"jesbu1/h2r_rfm",
"jesbu1/humanoid_everyday_rfm",
"jesbu1/molmoact_rfm",
"jesbu1/motif_rfm",
"jesbu1/oxe_rfm",
"jesbu1/oxe_rfm_eval",
"jesbu1/ph2d_rfm",
"jesbu1/racer_rfm",
"jesbu1/roboarena_0825_rfm",
"jesbu1/soar_rfm",
"ykorkmaz/libero_failure_rfm",
"aliangdw/usc_xarm_policy_ranking",
"aliangdw/usc_franka_policy_ranking",
"aliangdw/utd_so101_policy_ranking",
"aliangdw/utd_so101_human",
]
# Global state
current_trajectories = []
current_idx = 0
evaluations_df = pd.DataFrame(columns=[
"dataset_repo", "config_name", "trajectory_id", "task",
"decision", "issue_type", "notes", "timestamp"
])
def load_evaluations():
global evaluations_df
# Try loading from shared HF dataset first (if enabled)
if dataset_sync_enabled:
try:
from huggingface_hub import hf_hub_download
DATASET_REPO = os.getenv("EVAL_DATASET_REPO")
HF_TOKEN = os.getenv("HF_TOKEN")
csv_file = hf_hub_download(
DATASET_REPO,
"traj_evaluations.csv",
repo_type="dataset",
token=HF_TOKEN,
force_download=True # Always get fresh data
)
evaluations_df = pd.read_csv(csv_file, keep_default_na=False, na_values=[''])
# Clean up any legacy "nan" strings from old data
evaluations_df = evaluations_df.replace(['nan', 'NaN', 'None'], '')
# Debug: check issue_type values
unique_issues = evaluations_df['issue_type'].unique()
print(f"📊 Loaded {len(evaluations_df)} evaluations from shared dataset")
print(f"🔍 Unique issue_type values: {unique_issues}")
return
except Exception as e:
print(f"⚠️ Could not load from shared dataset: {e}")
# Fallback to local CSV
csv_path = Path("data/evaluations.csv") if os.getenv("SPACE_ID") else Path("evaluations.csv")
if csv_path.exists():
evaluations_df = pd.read_csv(csv_path, keep_default_na=False, na_values=[''])
# Clean up any legacy "nan" strings from old data
evaluations_df = evaluations_df.replace(['nan', 'NaN', 'None'], '')
print(f"📊 Loaded {len(evaluations_df)} evaluations from local CSV")
def save_evaluations():
# Replace NaN with empty strings before saving
df_to_save = evaluations_df.fillna("")
if os.getenv("SPACE_ID"):
os.makedirs("data", exist_ok=True)
df_to_save.to_csv("data/evaluations.csv", index=False)
else:
df_to_save.to_csv("evaluations.csv", index=False)
def get_stats():
total = len(evaluations_df)
if total == 0:
return "No labels yet"
keeps = len(evaluations_df[evaluations_df['decision'] == 'keep'])
removes = len(evaluations_df[evaluations_df['decision'] == 'remove'])
reviews = len(evaluations_df[evaluations_df['decision'] == 'review'])
return f"Total: {total} | ✅ {keeps} | ❌ {removes} | 🔍 {reviews}"
def fetch_configs(dataset_repo):
"""Fetch configs dynamically (fast API call)."""
if not dataset_repo:
return gr.update(choices=[], value=""), "", 0, 20
try:
configs = get_dataset_config_names(dataset_repo)
if configs:
return gr.update(choices=configs, value=configs[0]), "", 0, 20
return gr.update(choices=["default"], value="default"), "", 0, 20
except Exception as e:
print(f"Config fetch error: {e}")
return gr.update(choices=["default"], value="default"), "", 0, 20
def analyze_dataset_progress(dataset_repo, config_name):
"""Analyze labeling progress for selected dataset and suggest range."""
if not dataset_repo:
return "", 0, 20
config = config_name if config_name and config_name != "default" else None
# Filter evaluations for this dataset
dataset_evals = evaluations_df[
(evaluations_df['dataset_repo'] == dataset_repo) &
(evaluations_df['config_name'] == (config if config else ''))
]
# Try to get dataset size
try:
# First try to get from dataset info (fast)
from datasets import get_dataset_infos
try:
infos = get_dataset_infos(dataset_repo)
config_key = config if config else list(infos.keys())[0]
if config_key in infos and infos[config_key].splits.get('train'):
dataset_size = infos[config_key].splits['train'].num_examples
else:
raise Exception("No info available")
except:
# Fallback: iterate through dataset
ds = load_dataset(dataset_repo, config, split="train", streaming=True)
dataset_size = 0
for i, _ in enumerate(ds):
dataset_size = i + 1
if i >= 9999: # Cap at 10,000 for performance
dataset_size = f"~{dataset_size}"
break
except Exception as e:
dataset_size = "Unknown"
if len(dataset_evals) == 0:
return f"📊 **No trajectories labeled yet for this dataset**\n\n**Dataset size:** {dataset_size} trajectories", 0, 20
# Get labeled trajectory IDs
labeled_ids = set(dataset_evals['trajectory_id'].unique())
# Count by decision
keeps = len(dataset_evals[dataset_evals['decision'] == 'keep'])
removes = len(dataset_evals[dataset_evals['decision'] == 'remove'])
reviews = len(dataset_evals[dataset_evals['decision'] == 'review'])
# Try to get total dataset size first (fast)
try:
from datasets import get_dataset_infos
try:
infos = get_dataset_infos(dataset_repo)
config_key = config if config else list(infos.keys())[0]
if config_key in infos and infos[config_key].splits.get('train'):
dataset_size = infos[config_key].splits['train'].num_examples
else:
dataset_size = None
except:
dataset_size = None
except:
dataset_size = None
# Try to estimate gaps and count trajectories
try:
ds = load_dataset(dataset_repo, config, split="train", streaming=True)
# Sample to find gaps in labeled trajectories
checked_count = 0
unlabeled_ranges = []
current_unlabeled_start = None
for i, sample in enumerate(ds):
traj_id = sample.get("id")
if traj_id not in labeled_ids:
if current_unlabeled_start is None:
current_unlabeled_start = i
else:
if current_unlabeled_start is not None:
unlabeled_ranges.append((current_unlabeled_start, i-1))
current_unlabeled_start = None
checked_count = i + 1
if dataset_size is None:
dataset_size = i + 1
if checked_count >= 1000: # Check first 1000 for gaps
if dataset_size is None or dataset_size == checked_count:
dataset_size = f"~{checked_count}"
break
# Add final range if still unlabeled
if current_unlabeled_start is not None:
unlabeled_ranges.append((current_unlabeled_start, checked_count-1))
# Find best range to suggest
if unlabeled_ranges:
# Suggest first significant gap (at least 10 trajectories)
for start, end in unlabeled_ranges:
if end - start >= 10:
suggested_start = start
suggested_end = min(start+20, end)
break
else:
# No big gaps, suggest first unlabeled range
start, end = unlabeled_ranges[0]
suggested_start = start
suggested_end = min(start+20, end)
else:
# All checked trajectories are labeled, suggest next batch
suggested_start = checked_count
suggested_end = checked_count+20
analysis = f"""📊 **Dataset Progress: {dataset_repo}** {'(' + config + ')' if config else ''}
**Labeled:** {len(labeled_ids)} trajectories
- ✅ Keep: {keeps} ({keeps/len(labeled_ids)*100:.1f}%)
- ❌ Remove: {removes} ({removes/len(labeled_ids)*100:.1f}%)
- 🔍 Review: {reviews} ({reviews/len(labeled_ids)*100:.1f}%)
**Dataset size:** {dataset_size} trajectories (checked: {checked_count})
"""
if unlabeled_ranges[:3]: # Show first 3 gaps
gaps = ", ".join([f"{s}-{e}" for s, e in unlabeled_ranges[:3]])
analysis += f"\n🎯 **Unlabeled gaps:** {gaps}"
return analysis, suggested_start, suggested_end
except Exception as e:
# If can't analyze structure, suggest continuing after last labeled
suggested_start = len(labeled_ids)
suggested_end = len(labeled_ids) + 20
# Still try to get dataset size from info API
try:
from datasets import get_dataset_infos
infos = get_dataset_infos(dataset_repo)
config_key = config if config else list(infos.keys())[0]
if config_key in infos and infos[config_key].splits.get('train'):
ds_size = infos[config_key].splits['train'].num_examples
size_info = f"**Dataset size:** {ds_size} trajectories\n\n"
else:
size_info = ""
except:
size_info = ""
return f"""📊 **Dataset Progress: {dataset_repo}**
**Labeled:** {len(labeled_ids)} trajectories
- ✅ Keep: {keeps} ({keeps/len(labeled_ids)*100:.1f}%)
- ❌ Remove: {removes} ({removes/len(labeled_ids)*100:.1f}%)
- 🔍 Review: {reviews} ({reviews/len(labeled_ids)*100:.1f}%)
{size_info}⚠️ Could not analyze dataset structure: {str(e)[:50]}
""", suggested_start, suggested_end
def get_video_url(dataset_repo, video_path):
"""Get direct HuggingFace URL for video (no download needed)."""
return f"https://huggingface.co/datasets/{dataset_repo}/resolve/main/{video_path}"
def load_trajectories(dataset_repo, config_name, start_idx, end_idx, traj_id):
"""Load trajectories by range or specific ID."""
global current_trajectories, current_idx
# Refresh evaluations from shared dataset to get latest labels
load_evaluations()
if not dataset_repo:
return (gr.update(visible=True), gr.update(visible=False),
None, "Select a dataset",
gr.update(value=None), gr.update(visible=False), gr.update(value=None), gr.update(value=""), "",
evaluations_df.tail(10), "⚠️ Select a dataset")
config = config_name if config_name and config_name != "default" else None
start = int(start_idx) if start_idx else 0
end = int(end_idx) if end_idx else start + 20
target_id = traj_id.strip() if traj_id else None
try:
ds = load_dataset(dataset_repo, config, split="train", streaming=True)
current_trajectories = []
for i, sample in enumerate(ds):
# If looking for specific ID
if target_id:
if sample.get("id") == target_id:
video_path = sample.get("frames")
if video_path:
sample["video_url"] = get_video_url(dataset_repo, video_path)
sample["dataset_repo"] = dataset_repo
sample["config_name"] = config
current_trajectories.append(sample)
break
continue
# Range-based loading
if i < start:
continue
if i > end:
break
video_path = sample.get("frames")
if video_path:
sample["video_url"] = get_video_url(dataset_repo, video_path)
sample["dataset_repo"] = dataset_repo
sample["config_name"] = config
current_trajectories.append(sample)
current_idx = 0
if not current_trajectories:
return (gr.update(visible=True), gr.update(visible=False),
None, "❌ No trajectories found",
gr.update(value=None), gr.update(visible=False), gr.update(value=None), gr.update(value=""), "",
evaluations_df.tail(10), "❌ No trajectories found")
return show_labeling_view()
except Exception as e:
return (gr.update(visible=True), gr.update(visible=False),
None, f"❌ {str(e)[:50]}",
gr.update(value=None), gr.update(visible=False), gr.update(value=None), gr.update(value=""), "",
evaluations_df.tail(10), f"❌ Error: {str(e)[:50]}")
def get_trajectory_metadata(traj):
"""Extract and format trajectory metadata."""
metadata = []
# Success/failure status - check multiple possible fields
if 'quality_label' in traj and traj['quality_label']:
label = str(traj['quality_label']).lower()
if 'success' in label:
metadata.append("✅ Success")
elif 'fail' in label:
metadata.append("❌ Failure")
elif 'suboptimal' in label:
metadata.append("⚠️ Suboptimal")
else:
metadata.append(f"Quality: {traj['quality_label']}")
elif 'success' in traj:
success = traj['success']
if success == True or success == 1 or success == 1.0:
metadata.append("✅ Success")
elif success == False or success == 0 or success == 0.0:
metadata.append("❌ Failure")
else:
metadata.append(f"Status: {success}")
elif 'is_success' in traj:
if traj['is_success']:
metadata.append("✅ Success")
else:
metadata.append("❌ Failure")
# Suboptimal marker (separate from quality_label)
if 'suboptimal' in traj and traj['suboptimal']:
if "⚠️ Suboptimal" not in metadata: # Don't duplicate
metadata.append("⚠️ Suboptimal")
elif 'is_suboptimal' in traj and traj['is_suboptimal']:
if "⚠️ Suboptimal" not in metadata:
metadata.append("⚠️ Suboptimal")
# Source (human/robot)
if 'is_robot' in traj:
metadata.append("🤖 Robot" if traj['is_robot'] else "👤 Human")
elif 'source' in traj:
source = str(traj['source']).lower()
if 'human' in source:
metadata.append("👤 Human")
elif 'robot' in source or 'policy' in source:
metadata.append("🤖 Robot")
else:
metadata.append(f"Source: {traj['source']}")
return " | ".join(metadata) if metadata else ""
def show_labeling_view():
"""Switch to labeling view with first trajectory."""
traj = current_trajectories[current_idx]
video_url = traj.get("video_url")
task = traj.get("task", "No task")
traj_id = traj.get("id", "Unknown")
progress = f"Progress: {current_idx + 1}/{len(current_trajectories)} | ID: {traj_id[:8]}..."
# Auto-apply previous label if exists
prev_decision = None
prev_issue = None
prev_notes = ""
if traj_id in evaluations_df['trajectory_id'].values:
prev_row = evaluations_df[evaluations_df['trajectory_id'] == traj_id].iloc[-1]
prev_decision = prev_row['decision']
# Ensure prev_issue is None or a valid choice
valid_choices = ['too_short', 'too_long', 'wrong_description', 'task_already_completed', 'mislabeled_success', 'mislabeled_failure', 'mislabeled_suboptimal', 'other']
issue_val = prev_row['issue_type']
prev_issue = issue_val if (issue_val and str(issue_val).strip() and str(issue_val) in valid_choices) else None
prev_notes = prev_row['notes'] if pd.notna(prev_row['notes']) else ""
progress += f" (prev: {prev_decision})"
# Get trajectory metadata
metadata = get_trajectory_metadata(traj)
# Combine progress, metadata, and task
if metadata:
task_with_progress = f"{progress}\n{metadata}\n\n{task}"
else:
task_with_progress = f"{progress}\n\n{task}"
return (
gr.update(visible=False),
gr.update(visible=True),
video_url,
task_with_progress,
gr.update(value=prev_decision), # decision_radio
gr.update(visible=(prev_decision == "review")), # review_options visibility
gr.update(value=prev_issue), # review_reason
gr.update(value=prev_notes if prev_notes else ""), # notes_input
"", # save_status (empty)
evaluations_df.tail(10),
f"✅ Loaded {len(current_trajectories)} trajectories"
)
def show_current():
if not current_trajectories or current_idx >= len(current_trajectories):
return (
None,
"No data",
gr.update(value=None),
gr.update(visible=False),
gr.update(value=None),
gr.update(value="")
)
traj = current_trajectories[current_idx]
video_url = traj.get("video_url")
task = traj.get("task", "No task")
traj_id = traj.get("id", "Unknown")
progress = f"Progress: {current_idx + 1}/{len(current_trajectories)} | ID: {traj_id[:8]}..."
# Auto-apply previous label if exists
prev_decision = None
prev_issue = None
prev_notes = ""
if traj_id in evaluations_df['trajectory_id'].values:
prev_row = evaluations_df[evaluations_df['trajectory_id'] == traj_id].iloc[-1]
prev_decision = prev_row['decision']
# Ensure prev_issue is None or a valid choice
valid_choices = ['too_short', 'too_long', 'wrong_description', 'task_already_completed', 'mislabeled_success', 'mislabeled_failure', 'mislabeled_suboptimal', 'other']
issue_val = prev_row['issue_type']
prev_issue = issue_val if (issue_val and str(issue_val).strip() and str(issue_val) in valid_choices) else None
prev_notes = prev_row['notes'] if pd.notna(prev_row['notes']) else ""
progress += f" (prev: {prev_decision})"
# Get trajectory metadata
metadata = get_trajectory_metadata(traj)
# Combine progress, metadata, and task
if metadata:
task_with_progress = f"{progress}\n{metadata}\n\n{task}"
else:
task_with_progress = f"{progress}\n\n{task}"
return (
video_url,
task_with_progress,
gr.update(value=prev_decision), # decision_radio
gr.update(visible=(prev_decision == "review")), # review_options
gr.update(value=prev_issue), # review_reason
gr.update(value=prev_notes if prev_notes else "") # notes_input
)
def navigate(direction):
global current_idx
if direction == "next":
current_idx = min(current_idx + 1, len(current_trajectories) - 1)
else:
current_idx = max(current_idx - 1, 0)
return show_current()
def save_label(decision, issue_type="", notes=""):
"""Save label and advance. Updates existing if trajectory already labeled."""
global evaluations_df, current_idx
if not current_trajectories or current_idx >= len(current_trajectories):
# Returns: video, task, decision, review_vis, review_reason, notes, status, table
return show_current() + ("", evaluations_df.tail(10))
traj = current_trajectories[current_idx]
traj_id = traj.get("id", "")
# Debug logging
print(f"💾 save_label called:")
print(f" decision: {decision}")
print(f" issue_type: '{issue_type}' (type: {type(issue_type)}, len: {len(str(issue_type))})")
print(f" notes: {notes}")
print(f" traj_id: {traj_id[:20]}...")
row_data = {
"dataset_repo": traj.get("dataset_repo", ""),
"config_name": traj.get("config_name", ""),
"trajectory_id": traj_id,
"task": traj.get("task", ""),
"decision": decision,
"issue_type": issue_type,
"notes": notes,
"timestamp": pd.Timestamp.now().isoformat()
}
print(f" 📋 row_data issue_type: '{row_data['issue_type']}'")
# Upsert: update if exists
existing_mask = evaluations_df['trajectory_id'] == traj_id
is_update = existing_mask.any()
if is_update:
idx = evaluations_df[existing_mask].index[-1]
print(f" 🔄 Updating existing row at index {idx}")
for col, val in row_data.items():
evaluations_df.at[idx, col] = val
print(f" ✅ After update, issue_type = '{evaluations_df.at[idx, 'issue_type']}'")
status_msg = f"✅ Updated: {decision}"
else:
print(f" ➕ Adding new row")
evaluations_df = pd.concat([evaluations_df, pd.DataFrame([row_data])], ignore_index=True)
new_idx = evaluations_df.index[-1]
print(f" ✅ After add, issue_type = '{evaluations_df.at[new_idx, 'issue_type']}'")
status_msg = f"✅ Added: {decision}"
save_evaluations()
print(f" 💾 Saved to CSV")
if dataset_sync_enabled:
from hf_dataset_sync import append_to_dataset
append_to_dataset(row_data)
current_idx = min(current_idx + 1, len(current_trajectories) - 1)
# Returns: video, task, decision, review_vis, review_reason, notes, status, table
return show_current() + (status_msg, evaluations_df.tail(10))
def save_with_decision(decision, review_reason, notes):
# Ensure issue is always empty string unless decision is "review" with valid reason
valid_choices = ['too_short', 'too_long', 'wrong_description', 'task_already_completed', 'mislabeled_success', 'mislabeled_failure', 'mislabeled_suboptimal', 'other']
# Debug logging
print(f"🔍 save_with_decision called:")
print(f" decision: {decision} (type: {type(decision)})")
print(f" review_reason: {review_reason} (type: {type(review_reason)})")
print(f" notes: {notes}")
issue = ""
if decision == "review" and review_reason and str(review_reason) in valid_choices:
issue = str(review_reason)
print(f" ✅ Setting issue_type to: {issue}")
else:
print(f" ❌ Issue NOT set. Checks:")
print(f" decision == 'review': {decision == 'review'}")
print(f" review_reason truthy: {bool(review_reason)}")
if review_reason:
print(f" review_reason in valid_choices: {str(review_reason) in valid_choices}")
return save_label(decision, issue, notes)
def back_to_setup():
return gr.update(visible=True), gr.update(visible=False)
def update_review_visibility(decision):
return gr.update(visible=(decision == "review"))
# Load existing
load_evaluations()
# CSS
css = """
.container { max-width: 1000px; margin: 0 auto; }
.decision-btn { min-height: 50px !important; font-size: 16px !important; }
.task-box { background: #f8f9fa; padding: 12px; border-radius: 6px; border-left: 4px solid #667eea; }
.thin-back-btn button {
min-height: 35px !important;
font-size: 13px !important;
margin-bottom: 8px !important;
}
#dataset_analysis {
background: #f0f9ff;
padding: 16px;
border-radius: 8px;
border-left: 4px solid #3b82f6;
margin: 12px 0;
}
#save_status, #load_status {
font-weight: 600;
padding: 8px;
border-radius: 6px;
text-align: center;
margin-top: 8px;
}
#save_status {
color: #10b981;
background: #d1fae5;
}
#load_status {
color: #667eea;
background: #e0e7ff;
}
#speed_1x, #speed_2x, #speed_4x {
border: 2px solid #e5e7eb !important;
transition: all 0.2s;
}
#speed_1x.speed-active, #speed_2x.speed-active, #speed_4x.speed-active {
background: #667eea !important;
color: white !important;
border-color: #667eea !important;
}
"""
with gr.Blocks(title="Trajectory Reviewer", css=css) as demo:
gr.Markdown("# 🎯 Trajectory Reviewer")
# Stage 1: Setup
with gr.Column(visible=True) as setup_view:
gr.Markdown("### Dataset")
with gr.Row():
dataset_dropdown = gr.Dropdown(
choices=PREDEFINED_DATASETS,
value="jesbu1/epic_rfm",
label="Dataset",
allow_custom_value=True,
scale=3
)
refresh_btn = gr.Button("🔄", scale=0)
config_dropdown = gr.Dropdown(
choices=[],
value="",
label="Config",
allow_custom_value=True
)
dataset_analysis = gr.Markdown("", elem_id="dataset_analysis")
gr.Markdown("### Selection")
with gr.Row():
with gr.Column():
start_idx = gr.Number(label="Start Index", value=0, precision=0)
end_idx = gr.Number(label="End Index", value=20, precision=0)
with gr.Column():
traj_id_input = gr.Textbox(label="Or Specific ID", placeholder="Leave empty for range")
load_btn = gr.Button("🚀 Load & Start", variant="primary", size="lg")
load_status = gr.Markdown("", elem_id="load_status")
# Stage 2: Labeling
with gr.Column(visible=False) as labeling_view:
with gr.Row():
with gr.Column(scale=3):
back_btn = gr.Button("← Back to Setup", variant="secondary", size="sm", elem_classes=["thin-back-btn"])
video_player = gr.Video(label="Video", elem_id="traj_video", autoplay=True)
# Speed controls
gr.Markdown("**Playback Speed**")
with gr.Row():
speed_1x = gr.Button("1x", size="sm", elem_id="speed_1x")
speed_2x = gr.Button("2x", size="sm", elem_id="speed_2x")
speed_4x = gr.Button("4x", size="sm", elem_id="speed_4x")
with gr.Column(scale=2):
task_display = gr.Textbox(label="📋 Task", interactive=False, lines=3, elem_classes=["task-box"])
gr.Markdown("### Decision")
decision_radio = gr.Radio(
choices=["keep", "remove", "review"],
label="Select",
value=None
)
with gr.Column(visible=False) as review_options:
review_reason = gr.Radio(
choices=[
"too_short",
"too_long",
"wrong_description",
"task_already_completed",
"mislabeled_success",
"mislabeled_failure",
"mislabeled_suboptimal",
"other"
],
label="Review Reason",
value=None
)
notes_input = gr.Textbox(label="Notes", placeholder="Optional...", lines=2)
save_btn = gr.Button("💾 Save & Next", variant="primary", size="lg", elem_classes=["decision-btn"])
save_status = gr.Markdown("", elem_id="save_status")
with gr.Row():
prev_btn = gr.Button("← Prev", size="sm")
next_btn = gr.Button("Next →", size="sm")
gr.Markdown("### Recent Labels")
evals_table = gr.Dataframe(
value=evaluations_df.tail(10),
max_height=150
)
# Speed control JS
def set_speed_js(rate, btn_id):
return f"""
() => {{
const setSpeed = () => {{
const video = document.querySelector('#traj_video video');
if (video) {{
video.playbackRate = {rate};
// Highlight active button
['#speed_1x', '#speed_2x', '#speed_4x'].forEach(id => {{
const btn = document.querySelector(id);
if (btn) btn.classList.remove('speed-active');
}});
document.querySelector('{btn_id}')?.classList.add('speed-active');
}}
}};
setSpeed();
// Also set on video load events
setTimeout(setSpeed, 100);
setTimeout(setSpeed, 500);
}}
"""
speed_1x.click(None, None, None, js=set_speed_js(1.0, '#speed_1x'))
speed_2x.click(None, None, None, js=set_speed_js(2.0, '#speed_2x'))
speed_4x.click(None, None, None, js=set_speed_js(4.0, '#speed_4x'))
# Set 2x on video load
video_player.change(
None, None, None,
js="() => { setTimeout(() => { const v = document.querySelector('#traj_video video'); if (v) v.playbackRate = 2.0; }, 500); }"
)
# Events
dataset_dropdown.change(fetch_configs, [dataset_dropdown], [config_dropdown, dataset_analysis, start_idx, end_idx])
refresh_btn.click(fetch_configs, [dataset_dropdown], [config_dropdown, dataset_analysis, start_idx, end_idx])
config_dropdown.change(analyze_dataset_progress, [dataset_dropdown, config_dropdown], [dataset_analysis, start_idx, end_idx])
load_btn.click(
lambda: "⏳ Loading trajectories...",
None,
load_status
).then(
load_trajectories,
[dataset_dropdown, config_dropdown, start_idx, end_idx, traj_id_input],
[setup_view, labeling_view, video_player, task_display,
decision_radio, review_options, review_reason, notes_input, save_status,
evals_table, load_status]
)
back_btn.click(back_to_setup, outputs=[setup_view, labeling_view])
decision_radio.change(update_review_visibility, [decision_radio], [review_options])
save_btn.click(
save_with_decision,
[decision_radio, review_reason, notes_input],
[video_player, task_display, decision_radio, review_options,
review_reason, notes_input, save_status, evals_table]
).then(
None, None, save_status,
js="() => { setTimeout(() => document.querySelector('#save_status').textContent = '', 3000); }"
)
prev_btn.click(
lambda: navigate("prev"),
outputs=[video_player, task_display, decision_radio,
review_options, review_reason, notes_input]
)
next_btn.click(
lambda: navigate("next"),
outputs=[video_player, task_display, decision_radio,
review_options, review_reason, notes_input]
)
# Load configs and analysis on startup
demo.load(fetch_configs, [dataset_dropdown], [config_dropdown, dataset_analysis, start_idx, end_idx])
if __name__ == "__main__":
demo.launch()