Spaces:

robometer
/

rewardeval_ui

Running

App Files Files Community

Anthony Liang commited on Mar 2

Commit

88e2e89

1 Parent(s): 468a132

updates

Browse files

Files changed (10) hide show

README.md +3 -3
app.py +16 -25
eval_viz_utils.py +1 -1
requirements.txt +2 -2
samplers/README.md +0 -182
samplers/__init__.py +9 -13
samplers/base.py +37 -8
samplers/pref.py +9 -6
samplers/progress.py +9 -6
samplers/sim.py +0 -420

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
-title: Rewardfm Eval Ui
 emoji: 🔥
 colorFrom: gray
-colorTo: red
 sdk: gradio
 sdk_version: 6.0.0
 app_file: app.py
 pinned: false
-short_description: UI for rfm evals
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Robometer
 emoji: 🔥
 colorFrom: gray
+colorTo: blue
 sdk: gradio
 sdk_version: 6.0.0
 app_file: app.py
 pinned: false
+short_description: Robometer Reward Eval UI
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Gradio app for RBM (Reward Foundation Model) inference visualization.
 Supports single video (progress/success) and dual video (preference/progress) predictions.
 Uses eval server for inference instead of loading models locally.
 """
@@ -701,34 +701,25 @@ with demo:
         gr.Markdown("### Links")
         gr.HTML(
             """
             <div style="display: flex; flex-wrap: wrap; gap: 0.5rem;">
-                <a href="https://robometer.github.io/" target="_blank" rel="noopener"
-                   title="Project page"
-                   style="display: inline-flex; align-items: center; gap: 0.35rem; padding: 0.25rem 0.65rem;
-                          border-radius: 999px; border: 1px solid #e0e0e0; font-size: 0.9rem;
-                          text-decoration: none;">
-                    <span>🌐</span><span>Project</span>
                 </a>
-                <a href="https://github.com/robometer" target="_blank" rel="noopener"
-                   title="GitHub"
-                   style="display: inline-flex; align-items: center; gap: 0.35rem; padding: 0.25rem 0.65rem;
-                          border-radius: 999px; border: 1px solid #e0e0e0; font-size: 0.9rem;
-                          text-decoration: none;">
-                    <span>📂</span><span>Code</span>
                 </a>
-                <a href="https://huggingface.co/datasets/rewardfm/rbm-1m" target="_blank" rel="noopener"
-                   title="RBM-1M Dataset"
-                   style="display: inline-flex; align-items: center; gap: 0.35rem; padding: 0.25rem 0.65rem;
-                          border-radius: 999px; border: 1px solid #e0e0e0; font-size: 0.9rem;
-                          text-decoration: none;">
-                    <span>📊</span><span>Dataset</span>
                 </a>
-                <a href="https://huggingface.co/aliangdw/Robometer-4B" target="_blank" rel="noopener"
-                   title="Model weights on Hugging Face"
-                   style="display: inline-flex; align-items: center; gap: 0.35rem; padding: 0.25rem 0.65rem;
-                          border-radius: 999px; border: 1px solid #e0e0e0; font-size: 0.9rem;
-                          text-decoration: none;">
-                    <span>💾</span><span>Weights</span>
                 </a>
             </div>
             """

 #!/usr/bin/env python3
 """
+Gradio app for Robometer (RBM) inference visualization.
 Supports single video (progress/success) and dual video (preference/progress) predictions.
 Uses eval server for inference instead of loading models locally.
 """
         gr.Markdown("### Links")
         gr.HTML(
             """
+            <style>
+            .sidebar-pill { display: inline-flex; align-items: center; gap: 0.35rem; padding: 0.25rem 0.65rem;
+                border-radius: 999px; border: 1px solid #e4e4e7; font-size: 0.9rem; text-decoration: none;
+                color: inherit; }
+            .sidebar-pill:hover { background: rgba(0,0,0,0.04); }
+            .sidebar-pill .arrow { opacity: 0.6; font-size: 0.75em; }
+            </style>
             <div style="display: flex; flex-wrap: wrap; gap: 0.5rem;">
+                <a href="https://robometer.github.io/" target="_blank" rel="noopener" class="sidebar-pill" title="Project page">
+                    <span>🌐</span><span>Project</span><span class="arrow">↗</span>
                 </a>
+                <a href="https://github.com/robometer" target="_blank" rel="noopener" class="sidebar-pill" title="GitHub">
+                    <span>📂</span><span>Code</span><span class="arrow">↗</span>
                 </a>
+                <a href="https://huggingface.co/datasets/rewardfm/rbm-1m" target="_blank" rel="noopener" class="sidebar-pill" title="RBM-1M Dataset">
+                    <span>📊</span><span>Dataset</span><span class="arrow">↗</span>
                 </a>
+                <a href="https://huggingface.co/robometer/Robometer-4B" target="_blank" rel="noopener" class="sidebar-pill" title="Model weights">
+                    <span>💾</span><span>Weights</span><span class="arrow">↗</span>
                 </a>
             </div>
             """

eval_viz_utils.py CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Utility functions for visualization in RFM evaluations.
 """
 from typing import Optional

 #!/usr/bin/env python3
 """
+Utility functions for visualization in Robometer (RBM) evaluations.
 """
 from typing import Optional

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-# Requirements for RFM Eval UI Gradio App
 # Core dependencies
 matplotlib>=3.5.0
@@ -24,7 +24,7 @@ decord>=0.6.0  # For video frame extraction (same as preprocess_datasets.py)
 # Development tools (optional, for auto-reload)
 watchfiles  # For file watching during development
-# RFM package (installed from git repository)
 # For local development, you can also install with: pip install -e ../ (from parent directory)
 # git+https://github.com/aliang8/reward_fm.git@anthony_working

+# Requirements for Robometer (RBM) Eval UI Gradio App
 # Core dependencies
 matplotlib>=3.5.0
 # Development tools (optional, for auto-reload)
 watchfiles  # For file watching during development
+# RBM package (installed from git repository)
 # For local development, you can also install with: pip install -e ../ (from parent directory)
 # git+https://github.com/aliang8/reward_fm.git@anthony_working

samplers/README.md DELETED Viewed

@@ -1,182 +0,0 @@
-# Sampler Strategies Documentation
-This document summarizes the data generation strategies used by each sampler type in the RFM data pipeline.
-## Overview
-The codebase contains three main sampler types:
-- **SimSampler**: Generates similarity scoring samples
-- **PrefSampler**: Generates preference prediction samples
-- **ProgressSampler**: Generates progress prediction samples
-Each sampler implements multiple strategies for generating training data, with automatic retry logic and strategy rebalancing on failure.
----
-## SimSampler (Similarity Scoring)
-The `SimSampler` creates similarity scoring samples where two trajectories (`o^1` and `o^2`) are ranked against a reference trajectory (`o^ref`). The goal is to learn that `o^1` should be ranked higher than `o^2`.
-### Strategies
-#### 1. **REWIND**
-- **Description**: Creates a similarity sample where `o^1` is an optimal trajectory from the same task, and `o^2` is a rewound (subsampled) version of the reference trajectory.
-- **Purpose**: Learn to distinguish between optimal and suboptimal trajectories from the same task.
-- **Implementation**:
-  - `traj_sim`: Optimal trajectory from same task (via `_get_same_task_optimal`)
-  - `traj_diff`: Rewound trajectory from reference (via `subsample_rewind`)
-#### 2. **SUBOPTIMAL**
-- **Description**: Creates a similarity sample where `o^1` is an optimal trajectory from the same task, and `o^2` is a suboptimal/failure trajectory from the same task.
-- **Purpose**: Learn to distinguish between optimal and suboptimal trajectories from the same task.
-- **Conditions**: Only available when:
-  - Data source is in the failure category (`is_failure_ds`)
-  - Probability is boosted by 2x for failure category data sources
-- **Implementation**:
-  - `traj_sim`: Optimal trajectory from same task (via `_get_same_task_optimal`)
-  - `traj_diff`: Suboptimal trajectory from same task (via `_get_same_task_suboptimal`)
-#### 3. **PAIRED_HUMAN_ROBOT**
-- **Description**: Creates a similarity sample where `o^1` is a paired human/robot trajectory (opposite type from reference, same task), and `o^2` is from a different task.
-- **Purpose**: Learn to distinguish between same-task and different-task trajectories, leveraging paired human/robot demonstrations.
-- **Conditions**: Only available when:
-  - Data source is in the paired category (`is_paired_ds`)
-  - Paired human/robot data exists for the task
-  - Probability is boosted by 2x for paired category data sources
-- **Implementation**:
-  - `traj_sim`: Paired human/robot trajectory (via `_get_paired_human_robot_traj`)
-  - `traj_diff`: Trajectory from different task (via `_get_different_video_traj`)
-### Strategy Selection
-- Strategies are selected probabilistically based on `similarity_strategy_ratio` configuration
-- Probabilities are rebalanced when strategies fail
-- Strategies are removed after 4 consecutive failures
-- Maximum 10 total attempts per sample generation
-### Reference Trajectory Requirements
-- For non-RoboArena: Must have `quality_label == "successful"`
-- For RoboArena: Must have `partial_success` field present
----
-## PrefSampler (Preference Prediction)
-The `PrefSampler` creates preference prediction samples with a chosen (preferred) trajectory and a rejected (suboptimal) trajectory.
-### Strategies
-#### 1. **REWIND**
-- **Description**: Uses the same optimal trajectory for both chosen and rejected, but applies rewind subsampling to the rejected trajectory.
-- **Purpose**: Learn that full trajectories are preferred over truncated/rewound versions.
-- **Implementation**:
-  - `chosen_trajectory`: Original optimal trajectory (forward subsampling)
-  - `rejected_trajectory`: Same trajectory with `subsample_rewind` strategy
-#### 2. **SUBOPTIMAL**
-- **Description**: Uses an optimal trajectory as chosen and a suboptimal/failure trajectory from the same task as rejected.
-- **Purpose**: Learn to prefer optimal trajectories over suboptimal ones from the same task.
-- **Conditions**: Only available when suboptimal trajectories exist for the task
-- **Implementation**:
-  - `chosen_trajectory`: Optimal trajectory
-  - `rejected_trajectory`: Suboptimal trajectory from same task (via `_get_same_task_suboptimal`)
-#### 3. **DIFFERENT_TASK**
-- **Description**: Uses an optimal trajectory as chosen and a trajectory from a completely different task as rejected.
-- **Purpose**: Learn that trajectories from the same task are preferred over trajectories from different tasks.
-- **Implementation**:
-  - `chosen_trajectory`: Optimal trajectory
-  - `rejected_trajectory`: Trajectory from different task (via `_get_different_video_traj`)
-  - **Note**: Rejected trajectory's `target_progress` is set to `[0.0]` for all timesteps
-#### 4. **REVERSE_PROGRESS**
-- **Description**: Uses the same optimal trajectory for both chosen and rejected, but applies reverse uniform sampling to the rejected trajectory.
-- **Purpose**: Learn that forward progress is preferred over reverse progress.
-- **Implementation**:
-  - `chosen_trajectory`: Original optimal trajectory (forward subsampling)
-  - `rejected_trajectory`: Same trajectory with `subsample_reverse` strategy
-#### 5. **ROBOARENA_PARTIAL_SUCCESS**
-- **Description**: Uses two trajectories from the same task with different `partial_success` values. The trajectory with higher `partial_success` becomes chosen, and the one with lower `partial_success` becomes rejected.
-- **Purpose**: Learn to prefer trajectories with higher partial success scores (RoboArena-specific).
-- **Conditions**: Only available for RoboArena trajectories (has `partial_success` field and data_source contains "roboarena")
-- **Implementation**:
-  - Finds a different trajectory from same task (via `_get_different_partial_success_traj`)
-  - Swaps trajectories if found trajectory has higher `partial_success`
-  - `chosen_trajectory`: Trajectory with higher `partial_success`
-  - `rejected_trajectory`: Trajectory with lower `partial_success`
-### Special Handling
-- **Non-successful trajectories**: If a trajectory has `quality_label != "successful"` (and is not RoboArena), it is automatically used as the rejected trajectory, with an optimal trajectory from the same task as the chosen trajectory.
-### Strategy Selection
-- Strategies are selected probabilistically based on `preference_strategy_ratio` configuration
-- Probabilities are rebalanced when strategies fail
-- Strategies are removed after 3 consecutive failures
-- Maximum 10 total attempts per sample generation
----
-## ProgressSampler (Progress Prediction)
-The `ProgressSampler` creates progress prediction samples from a single trajectory, applying different subsampling strategies to create training data.
-### Strategies
-#### 1. **DIFFERENT_TASK_INSTRUCTION**
-- **Description**: Uses a trajectory from a different task, but keeps the original task's embeddings and instruction.
-- **Purpose**: Learn that progress should be 0.0 when the trajectory doesn't match the task instruction.
-- **Implementation**:
-  - Gets trajectory from different task (via `_get_different_task_instruction`)
-  - Replaces embeddings with original task's embeddings
-  - Sets `target_progress = [0.0]` for all timesteps
-  - Uses forward subsampling
-#### 2. **FORWARD_PROGRESS**
-- **Description**: Samples the same trajectory with forward direction (start < middle < end).
-- **Purpose**: Learn normal forward progress patterns.
-- **Implementation**:
-  - Uses same trajectory with `subsample_forward` strategy
-  - Progress increases from start to end
-#### 3. **REVERSE_PROGRESS**
-- **Description**: Samples the same trajectory with reverse direction (end < middle < start).
-- **Purpose**: Learn to handle reverse progress scenarios.
-- **Implementation**:
-  - Uses same trajectory with `subsample_reverse` strategy
-  - Progress decreases from start to end
-#### 4. **REWIND**
-- **Description**: Samples the same trajectory with rewind direction (start < end < middle).
-- **Purpose**: Learn to handle non-monotonic progress patterns.
-- **Implementation**:
-  - Uses same trajectory with `subsample_rewind` strategy
-  - Progress pattern: increases, then decreases
-### Strategy Selection
-- Strategies are selected probabilistically based on `progress_strategy_ratio` configuration
-- Probabilities are rebalanced when strategies fail
-- Failed strategies are immediately removed (no retry count threshold)
-- Maximum 10 total attempts per sample generation
----
-## Common Features
-### Retry Logic
-All samplers implement retry logic with:
-- Maximum attempt limits (typically 10 attempts)
-- Strategy-specific retry counts (3-4 attempts per strategy)
-- Automatic strategy removal after consecutive failures
-- Probability rebalancing when strategies are removed
-### Subsample Strategies
-Common subsampling strategies used across samplers:
-- `subsample_forward`: Normal forward sampling (start → end)
-- `subsample_reverse`: Reverse sampling (end → start)
-- `subsample_rewind`: Rewind sampling (start → end → start)
-### Data Source Filtering
-- Strategies may be filtered or boosted based on data source categories:
-  - **Failure category**: Boosts SUBOPTIMAL strategy probability by 2x
-  - **Paired category**: Boosts PAIRED_HUMAN_ROBOT strategy probability by 2x
-  - **RoboArena**: Special handling for `partial_success` field

samplers/__init__.py CHANGED Viewed

@@ -1,23 +1,19 @@
-from rfm.data.samplers.base import RFMBaseSampler
-from rfm.data.samplers.pref import PrefSampler
-from rfm.data.samplers.sim import SimSampler
-from rfm.data.samplers.progress import ProgressSampler
-from rfm.data.samplers.eval.confusion_matrix import ConfusionMatrixSampler
-from rfm.data.samplers.eval.progress_policy_ranking import ProgressPolicyRankingSampler
-from rfm.data.samplers.eval.reward_alignment import RewardAlignmentSampler
-from rfm.data.samplers.eval.quality_preference import QualityPreferenceSampler
-from rfm.data.samplers.eval.roboarena_quality_preference import RoboArenaQualityPreferenceSampler
-from rfm.data.samplers.eval.similarity_score import SimilarityScoreSampler
 __all__ = [
-    "RFMBaseSampler",
     "PrefSampler",
-    "SimSampler",
     "ProgressSampler",
     "ConfusionMatrixSampler",
     "ProgressPolicyRankingSampler",
     "RewardAlignmentSampler",
     "QualityPreferenceSampler",
     "RoboArenaQualityPreferenceSampler",
-    "SimilarityScoreSampler",
 ]

+from robometer.data.samplers.base import RBMBaseSampler
+from robometer.data.samplers.pref import PrefSampler
+from robometer.data.samplers.progress import ProgressSampler
+from robometer.data.samplers.eval.confusion_matrix import ConfusionMatrixSampler
+from robometer.data.samplers.eval.progress_policy_ranking import ProgressPolicyRankingSampler
+from robometer.data.samplers.eval.reward_alignment import RewardAlignmentSampler
+from robometer.data.samplers.eval.quality_preference import QualityPreferenceSampler
+from robometer.data.samplers.eval.roboarena_quality_preference import RoboArenaQualityPreferenceSampler
 __all__ = [
+    "RBMBaseSampler",
     "PrefSampler",
     "ProgressSampler",
     "ConfusionMatrixSampler",
     "ProgressPolicyRankingSampler",
     "RewardAlignmentSampler",
     "QualityPreferenceSampler",
     "RoboArenaQualityPreferenceSampler",
 ]

samplers/base.py CHANGED Viewed

@@ -7,8 +7,8 @@ import torch
 from random import Random
 from datasets import Dataset
-from rfm.configs.experiment_configs import DataConfig
-from rfm.data.datasets.helpers import (
     load_frames_from_npz,
     get_segment_indices_with_middle,
     compute_progress_from_segment,
@@ -20,13 +20,14 @@ from rfm.data.datasets.helpers import (
     linspace_subsample_frames,
     convert_continuous_to_discrete_bins,
 )
-from rfm.data.dataset_types import Trajectory
-from rfm.utils.logger import get_logger
 logger = get_logger()
-class RFMBaseSampler:
     """Base sampler class that provides trajectory retrieval functions for generating samples."""
     def __init__(
@@ -37,6 +38,7 @@ class RFMBaseSampler:
         dataset_success_cutoff_map: Optional[Dict[str, float]] = None,
         verbose: bool = True,
         random_seed: int = 42,
     ):
         """Initialize sampler with dataset and indices.
@@ -53,7 +55,7 @@ class RFMBaseSampler:
         self.verbose = verbose
         self.dataset_success_cutoff_map = dataset_success_cutoff_map or {}
         self._local_random = Random(random_seed)
         self._cached_ids = self.dataset["id"]
         self._cached_is_robot = self.dataset["is_robot"]
@@ -108,7 +110,7 @@ class RFMBaseSampler:
             item: An item from the dataset (typically a trajectory dict)
         Returns:
-            A sample object (e.g., PreferenceSample, SimilaritySample, ProgressSample)
         """
         raise NotImplementedError("Subclasses must implement _generate_sample")
@@ -577,6 +579,7 @@ class RFMBaseSampler:
         subsample_strategy: str | None = None,
         frame_indices: List[int] | None = None,
         metadata: Dict[str, Any] | None = None,
     ) -> Trajectory:
         """Load, subsample, and optionally pad trajectory data and create a Trajectory object.
@@ -585,6 +588,7 @@ class RFMBaseSampler:
             subsample_strategy: Optional strategy for subsampling ("subsample_forward", "subsample_reverse", "subsample_rewind", or None for default/bidirectional). Ignored if frame_indices is provided.
             frame_indices: Optional list of specific frame indices to use. If provided, subsample_strategy is ignored.
             metadata: Optional metadata dict to merge into trajectory metadata.
         Returns:
             Trajectory object with loaded and subsampled data (padded)
@@ -701,7 +705,7 @@ class RFMBaseSampler:
             indices = [indices[idx] for idx in frame_indices_subsample] if isinstance(indices, list) else indices
         # Pad if needed
-        if target_progress:
             if self.config.load_embeddings:
                 subsampled, target_progress = pad_trajectory_to_max_frames_torch(
                     subsampled, target_progress, self.config.max_frames
@@ -711,6 +715,30 @@ class RFMBaseSampler:
                     subsampled, target_progress, self.config.max_frames
                 )
         # Update frames_shape
         frames_shape = subsampled.shape if hasattr(subsampled, "shape") else tuple()
@@ -747,6 +775,7 @@ class RFMBaseSampler:
                 "target_progress": target_progress,
                 "success_label": success_label,
                 "partial_success": partial_success,
                 "metadata": metadata,
             },
         )

 from random import Random
 from datasets import Dataset
+from robometer.configs.experiment_configs import DataConfig
+from robometer.data.datasets.helpers import (
     load_frames_from_npz,
     get_segment_indices_with_middle,
     compute_progress_from_segment,
     linspace_subsample_frames,
     convert_continuous_to_discrete_bins,
 )
+from robometer.data.dataset_types import Trajectory
+from robometer.utils.logger import get_logger
+from robometer.data.dataset_category import is_preference_only_ds
 logger = get_logger()
+class RBMBaseSampler:
     """Base sampler class that provides trajectory retrieval functions for generating samples."""
     def __init__(
         dataset_success_cutoff_map: Optional[Dict[str, float]] = None,
         verbose: bool = True,
         random_seed: int = 42,
+        pad_frames: bool = True,
     ):
         """Initialize sampler with dataset and indices.
         self.verbose = verbose
         self.dataset_success_cutoff_map = dataset_success_cutoff_map or {}
         self._local_random = Random(random_seed)
+        self.pad_frames = pad_frames
         self._cached_ids = self.dataset["id"]
         self._cached_is_robot = self.dataset["is_robot"]
             item: An item from the dataset (typically a trajectory dict)
         Returns:
+            A sample object (e.g., PreferenceSample, ProgressSample)
         """
         raise NotImplementedError("Subclasses must implement _generate_sample")
         subsample_strategy: str | None = None,
         frame_indices: List[int] | None = None,
         metadata: Dict[str, Any] | None = None,
+        pad_frames: bool = True,
     ) -> Trajectory:
         """Load, subsample, and optionally pad trajectory data and create a Trajectory object.
             subsample_strategy: Optional strategy for subsampling ("subsample_forward", "subsample_reverse", "subsample_rewind", or None for default/bidirectional). Ignored if frame_indices is provided.
             frame_indices: Optional list of specific frame indices to use. If provided, subsample_strategy is ignored.
             metadata: Optional metadata dict to merge into trajectory metadata.
+            pad_frames: Whether to pad the trajectory data to max_frames.
         Returns:
             Trajectory object with loaded and subsampled data (padded)
             indices = [indices[idx] for idx in frame_indices_subsample] if isinstance(indices, list) else indices
         # Pad if needed
+        if target_progress and pad_frames:
             if self.config.load_embeddings:
                 subsampled, target_progress = pad_trajectory_to_max_frames_torch(
                     subsampled, target_progress, self.config.max_frames
                     subsampled, target_progress, self.config.max_frames
                 )
+        # Create predict_last_frame_mask: mark the last frame if partial_success < 1.0
+        # If predict_last_frame_partial_progress is True and partial_success < 1.0 and the last original frame is in the subsampled indices,
+        # mark all positions where it appears with 1.0, all others 0.0. Otherwise, all 1.0s.
+        final_frame_count = len(subsampled)
+        predict_last_frame_mask = [1.0] * final_frame_count  # Default: all 1.0s (no masking)
+        if self.config.predict_last_frame_partial_progress and partial_success is not None:
+            if partial_success == 1.0 and not is_preference_only_ds(traj["data_source"]):
+                pass
+            else:
+                last_original_frame_idx = num_frames_total - 1
+                if isinstance(indices, list) and last_original_frame_idx in indices:
+                    # Find all positions where the last frame index appears
+                    last_frame_positions = [
+                        i for i, idx in enumerate(indices) if idx == last_original_frame_idx and i < final_frame_count
+                    ]
+                    if last_frame_positions:
+                        # Mark all positions where the last frame appears with 1.0, all others 0.0
+                        predict_last_frame_mask = [0.0] * final_frame_count
+                        for pos in last_frame_positions:
+                            predict_last_frame_mask[pos] = 1.0
+                else:
+                    predict_last_frame_mask = [0.0] * final_frame_count
         # Update frames_shape
         frames_shape = subsampled.shape if hasattr(subsampled, "shape") else tuple()
                 "target_progress": target_progress,
                 "success_label": success_label,
                 "partial_success": partial_success,
+                "predict_last_frame_mask": predict_last_frame_mask,
                 "metadata": metadata,
             },
         )

samplers/pref.py CHANGED Viewed

@@ -7,19 +7,19 @@ from typing import Dict, List, Optional, Any
 import random
-from rfm.data.dataset_types import PreferenceSample, Trajectory
-from rfm.data.samplers.base import RFMBaseSampler
-from rfm.data.datasets.helpers import (
     DataGenStrat,
     convert_continuous_to_discrete_bins,
 )
-from rfm.utils.logger import get_logger, rank_0_info, trace
-from rfm.utils.timer import timer
 logger = get_logger()
-class PrefSampler(RFMBaseSampler):
     """Data generator for producing batches of preference prediction data."""
     def __init__(self, is_evaluation=False, **kwargs):
@@ -343,6 +343,9 @@ class PrefSampler(RFMBaseSampler):
         rejected_trajectory = self._get_traj_from_data(rejected_traj, subsample_strategy=rejected_subsample_strategy)
         # If our strategy is different task, make sure the rejected trajectory has 0 progress and 0 success labels
         if strategy_used in [
             DataGenStrat.DIFFERENT_TASK,

 import random
+from robometer.data.dataset_types import PreferenceSample, Trajectory
+from robometer.data.samplers.base import RBMBaseSampler
+from robometer.data.datasets.helpers import (
     DataGenStrat,
     convert_continuous_to_discrete_bins,
 )
+from robometer.utils.logger import get_logger, rank_0_info, trace
+from robometer.utils.timer import timer
 logger = get_logger()
+class PrefSampler(RBMBaseSampler):
     """Data generator for producing batches of preference prediction data."""
     def __init__(self, is_evaluation=False, **kwargs):
         rejected_trajectory = self._get_traj_from_data(rejected_traj, subsample_strategy=rejected_subsample_strategy)
+        if rejected_trajectory is None or chosen_trajectory is None:
+            return None
         # If our strategy is different task, make sure the rejected trajectory has 0 progress and 0 success labels
         if strategy_used in [
             DataGenStrat.DIFFERENT_TASK,

samplers/progress.py CHANGED Viewed

@@ -3,20 +3,20 @@ from typing import Dict, Any, Optional
 import random
 import torch
-from rfm.data.dataset_types import ProgressSample, Trajectory
-from rfm.data.samplers.base import RFMBaseSampler
-from rfm.data.datasets.helpers import (
     DataGenStrat,
     load_embeddings_from_path,
     convert_continuous_to_discrete_bins,
 )
-from rfm.utils.distributed import rank_0_print
-from rfm.utils.logger import get_logger
 logger = get_logger()
-class ProgressSampler(RFMBaseSampler):
     """Data generator for progress samples."""
     def __init__(self, is_evaluation=False, **kwargs):
@@ -147,6 +147,9 @@ class ProgressSampler(RFMBaseSampler):
         progress_traj = self._get_traj_from_data(processed_traj, subsample_strategy=subsample_strategy)
         # Handle special cases
         if strategy_used in [DataGenStrat.DIFFERENT_TASK, DataGenStrat.DIFFERENT_TASK_INSTRUCTION]:
             # We need to use the original task embeddings instead of the different task embeddings

 import random
 import torch
+from robometer.data.dataset_types import ProgressSample, Trajectory
+from robometer.data.samplers.base import RBMBaseSampler
+from robometer.data.datasets.helpers import (
     DataGenStrat,
     load_embeddings_from_path,
     convert_continuous_to_discrete_bins,
 )
+from robometer.utils.distributed import rank_0_print
+from robometer.utils.logger import get_logger
 logger = get_logger()
+class ProgressSampler(RBMBaseSampler):
     """Data generator for progress samples."""
     def __init__(self, is_evaluation=False, **kwargs):
         progress_traj = self._get_traj_from_data(processed_traj, subsample_strategy=subsample_strategy)
+        if progress_traj is None:
+            return None
         # Handle special cases
         if strategy_used in [DataGenStrat.DIFFERENT_TASK, DataGenStrat.DIFFERENT_TASK_INSTRUCTION]:
             # We need to use the original task embeddings instead of the different task embeddings

samplers/sim.py DELETED Viewed

@@ -1,420 +0,0 @@
-#!/usr/bin/env python3
-from typing import Dict, List, Tuple, Optional, Union, Any
-import torch
-from rfm.data.dataset_types import SimilaritySample, Trajectory
-from rfm.data.samplers.base import RFMBaseSampler
-from rfm.data.datasets.helpers import DataGenStrat, convert_continuous_to_discrete_bins
-from rfm.data.dataset_category import is_failure_ds, is_paired_ds
-from rfm.utils.logger import get_logger, rank_0_info
-logger = get_logger()
-class SimSampler(RFMBaseSampler):
-    """Data generator for producing batches for similarity scoring."""
-    def __init__(self, is_evaluation=False, **kwargs):
-        super().__init__(**kwargs)
-        self.similarity_strategy_ratio: List[float] = self.config.similarity_strategy_ratio
-        self._has_paired_human_robot = (
-            any(
-                len(entry.get("robot", [])) > 0 and len(entry.get("human", [])) > 0
-                for entry in self.paired_human_robot_by_task.values()
-            )
-            if self.paired_human_robot_by_task
-            else False
-        )
-        self._has_suboptimal = (
-            any(len(indices) > 0 for indices in self.suboptimal_by_task.values()) if self.suboptimal_by_task else False
-        )
-        rank_0_info(
-            f"[SIM SAMPLER] Has paired human/robot: {self._has_paired_human_robot}, Has suboptimal: {self._has_suboptimal}"
-        )
-    def _generate_sample(self, item: dict, preferred_strategy: Optional[DataGenStrat] = None):
-        return self._create_similarity_sample(ref_traj=item, preferred_strategy=preferred_strategy)
-    def _execute_strategy(
-        self, strategy: DataGenStrat, ref_traj: Dict[str, Any]
-    ) -> tuple[Dict[str, Any], Dict[str, Any]] | None:
-        """Execute a strategy to get trajectory pairs.
-        Args:
-            strategy: The strategy to execute
-            ref_traj: The reference trajectory
-        Returns:
-            Tuple of (traj_sim, traj_diff) or None if failed
-        """
-        if strategy == DataGenStrat.REWIND:
-            return self._get_traj_dicts_for_rewind(ref_traj)
-        elif strategy == DataGenStrat.SUBOPTIMAL:
-            return self._get_traj_dicts_for_suboptimal(ref_traj)
-        elif strategy == DataGenStrat.PAIRED_HUMAN_ROBOT:
-            return self._get_traj_dicts_for_paired_human_robot(ref_traj)
-        else:
-            return None
-    def _create_similarity_sample(
-        self, ref_traj: Optional[Dict[str, Any]] = None, preferred_strategy: Optional[DataGenStrat] = None
-    ) -> SimilaritySample:
-        """Create a similarity scoring sample: o^1 and o^2 ranked against o^ref.
-        Two modes:
-        1. Rewind mode: o^1 is rewound from same task, o^2 is from different task
-            - here o^1 is preferred and should be ranked higher than o^2
-        2. Optimal/Suboptimal mode: o^1 is optimal/suboptimal from same task, o^2 varies
-            - here o^1 is preferred and should be ranked higher than o^2
-        Args:
-            ref_traj: Optional reference trajectory. If None, samples from optimal trajectories.
-        """
-        # Log when similarity sampler is called
-        traj_id = ref_traj.get("id", "unknown") if ref_traj is not None else "sampling_new"
-        logger.trace(f"[SIM SAMPLER] Creating similarity sample for trajectory ID: {traj_id}")
-        # Use provided reference trajectory if given; otherwise sample one
-        if ref_traj is None:
-            # Use preprocessed optimal trajectories from index maps
-            if not self.optimal_by_task:
-                return None
-            # Filter out tasks with empty optimal_indices to avoid infinite loop
-            valid_tasks = {
-                task: indices
-                for task, indices in self.optimal_by_task.items()
-                if indices  # Only include tasks with non-empty indices
-            }
-            if not valid_tasks:
-                # No valid tasks with optimal trajectories available
-                return None
-            # Get a random task and optimal trajectory from it
-            task_name = self._local_random.choice(list(valid_tasks.keys()))
-            optimal_indices = valid_tasks[task_name]
-            # Double-check that we have valid indices (should always be true now)
-            if not optimal_indices:
-                return None
-            optimal_idx = self._local_random.choice(optimal_indices)
-            ref_traj = self.dataset[optimal_idx]
-        # Check if ref_traj is successful - if not, return None to try a different trajectory
-        quality_label = ref_traj.get("quality_label")
-        partial_success = ref_traj.get("partial_success")
-        use_partial_success = partial_success is not None
-        if use_partial_success:
-            # For trajectories with partial_success, require partial_success to exist
-            if partial_success is None:
-                logger.trace(
-                    f"[SIM SAMPLER] Ref trajectory {ref_traj.get('id', 'unknown')} missing partial_success, skipping"
-                )
-                return None
-        else:
-            # For trajectories without partial_success, require quality_label to be "successful"
-            if quality_label != "successful":
-                logger.trace(
-                    f"[SIM SAMPLER] Ref trajectory {ref_traj.get('id', 'unknown')} is not successful (quality_label: {quality_label}), skipping"
-                )
-                return None
-        traj_sim, traj_diff = None, None
-        strategy_used = None
-        data_source = ref_traj.get("data_source")
-        is_failure_source = is_failure_ds(data_source) if data_source else False
-        is_paired_source = is_paired_ds(data_source) if data_source else False
-        # Strategy selection: use preferred_strategy if provided, otherwise select based on ratios
-        if preferred_strategy is not None:
-            # Use the preferred strategy directly
-            logger.trace(f"[SIM SAMPLER] Using preferred strategy: {preferred_strategy.value}")
-            result = self._execute_strategy(preferred_strategy, ref_traj)
-            if result is None:
-                logger.trace(f"[SIM SAMPLER] Preferred strategy {preferred_strategy.value} failed, returning None")
-                return None
-            traj_sim, traj_diff = result
-            strategy_used = preferred_strategy
-            attempt = 1  # Set attempt for preferred strategy path
-        else:
-            # Strategy selection with data_source-based filtering and boosting
-            strategies = []
-            # Always include REWIND if ratio > 0
-            if self.similarity_strategy_ratio[0] > 0:
-                strategies.append((DataGenStrat.REWIND, self.similarity_strategy_ratio[0]))
-            # SUBOPTIMAL: include if data_source is in failure category
-            if len(self.similarity_strategy_ratio) > 1 and self.similarity_strategy_ratio[1] > 0 and is_failure_source:
-                # Boost probability by 2x if data_source is in failure category
-                boosted_prob = self.similarity_strategy_ratio[1] * 2.0
-                strategies.append((DataGenStrat.SUBOPTIMAL, boosted_prob))
-            # PAIRED_HUMAN_ROBOT: only include if data_source is in paired category
-            if (
-                self._has_paired_human_robot
-                and len(self.similarity_strategy_ratio) > 2
-                and self.similarity_strategy_ratio[2] > 0
-                and is_paired_source
-            ):
-                # Boost probability by 2x if data_source is in paired category
-                boosted_prob = self.similarity_strategy_ratio[2] * 2.0
-                strategies.append((DataGenStrat.PAIRED_HUMAN_ROBOT, boosted_prob))
-            # Remove strategies with zero probability
-            strategies = [(strat, prob) for strat, prob in strategies if prob > 0]
-            max_attempts = 10  # Limit retry attempts to prevent infinite loops
-            max_strategy_attempts = 4  # Maximum attempts per strategy before removing it
-            attempt = 0
-            strategies_tried = []
-            # Track attempts per strategy
-            strategy_attempt_counts = {strat: 0 for strat, _ in strategies}
-            while traj_sim is None and attempt < max_attempts:
-                attempt += 1
-                # Check if we have any strategies left
-                if not strategies:
-                    return None
-                # Rebalance probabilities based on remaining strategies
-                total_prob = sum(prob for _, prob in strategies)
-                if total_prob == 0:
-                    return None
-                # Normalize probabilities
-                normalized_strategies = [(strat, prob / total_prob) for strat, prob in strategies]
-                # Select strategy based on rebalanced probabilities
-                prob = self._local_random.random()
-                cumulative_prob = 0.0
-                selected_strategy = None
-                for strat, normalized_prob in normalized_strategies:
-                    cumulative_prob += normalized_prob
-                    if prob <= cumulative_prob:
-                        selected_strategy = strat
-                        strategies_tried.append(selected_strategy)
-                        break
-                # Log strategy attempt
-                logger.trace(
-                    f"[SIM SAMPLER] Attempt {attempt}/{max_attempts}: Trying strategy {selected_strategy.value if selected_strategy else 'None'}"
-                )
-                # Execute selected strategy
-                result = self._execute_strategy(selected_strategy, ref_traj)
-                if result is not None:
-                    traj_sim, traj_diff = result
-                    strategy_used = selected_strategy
-                    logger.trace(f"[SIM SAMPLER] Strategy {selected_strategy.value} succeeded on attempt {attempt}")
-                else:
-                    # Strategy failed - increment attempt count
-                    strategy_attempt_counts[selected_strategy] = strategy_attempt_counts.get(selected_strategy, 0) + 1
-                    failed_count = strategy_attempt_counts[selected_strategy]
-                    logger.trace(
-                        f"[SIM SAMPLER] Strategy {selected_strategy.value} failed (failure count: {failed_count}/{max_strategy_attempts})"
-                    )
-                    # Only remove strategy if it has failed max_strategy_attempts times
-                    if strategy_attempt_counts[selected_strategy] >= max_strategy_attempts:
-                        logger.trace(
-                            f"[SIM SAMPLER] Removing strategy {selected_strategy.value} after {max_strategy_attempts} consecutive failures"
-                        )
-                        strategies = [(strat, prob) for strat, prob in strategies if strat != selected_strategy]
-                        continue
-            # If we still don't have a sample after all attempts, return None
-            if traj_sim is None or traj_diff is None:
-                logger.trace(
-                    f"[SIM SAMPLER] Failed to generate similarity sample after {max_attempts} attempts - all strategies exhausted"
-                )
-                return None
-        # Create trajectories
-        ref_trajectory = self._get_traj_from_data(ref_traj)
-        sim_trajectory = self._get_traj_from_data(traj_sim)
-        diff_trajectory = self._get_traj_from_data(traj_diff)
-        # Handle different task trajectories: set progress=0 and success=0
-        # For SUBOPTIMAL strategy, diff is suboptimal same task (progress masked, success=0 handled automatically)
-        # For PAIRED_HUMAN_ROBOT strategy, diff could be suboptimal same task or different task
-        if strategy_used == DataGenStrat.PAIRED_HUMAN_ROBOT:
-            # Check if diff is from different task (compare task names)
-            if diff_trajectory.task != ref_traj["task"]:
-                # Different task: set progress=0 and success=0
-                diff_trajectory.target_progress = [0.0] * len(diff_trajectory.target_progress)
-                if self.config.progress_loss_type.lower() == "discrete":
-                    diff_trajectory.target_progress = convert_continuous_to_discrete_bins(
-                        diff_trajectory.target_progress, self.config.progress_discrete_bins
-                    )
-                if diff_trajectory.success_label is not None:
-                    diff_trajectory.success_label = [0.0] * len(diff_trajectory.success_label)
-            # If same task, it's suboptimal - progress will be masked by should_compute_progress, success=0 already set
-        sample = SimilaritySample(
-            ref_trajectory=ref_trajectory,
-            sim_trajectory=sim_trajectory,
-            diff_trajectory=diff_trajectory,
-            data_gen_strategy=strategy_used.value,
-        )
-        sample.resample_attempts = attempt
-        return sample
-    def _get_traj_dicts_for_rewind(self, ref_traj: dict) -> tuple[dict | Trajectory, dict] | None:
-        """Get traj_sim and traj_diff for rewind strategy.
-        Returns:
-            Tuple of (traj_sim, traj_diff) where:
-            - traj_sim = optimal trajectory from same task
-            - traj_diff = rewound trajectory
-            Returns None if either cannot be generated after retries.
-            The main strategy loop will handle retries with different strategies.
-        """
-        max_retries = 3  # Number of retry attempts for sampling
-        # Try to get optimal trajectory from same task for sim
-        traj_sim = None
-        for _ in range(max_retries):
-            traj_sim = self._get_same_task_optimal(ref_traj)
-            if traj_sim is not None:
-                break
-        # Try to get rewound trajectory for diff
-        traj_diff = None
-        for _ in range(max_retries):
-            traj_diff = self._get_traj_from_data(ref_traj, subsample_strategy="subsample_rewind")
-            if traj_diff is not None:
-                break
-        # Return both if successful, otherwise return None (main loop will handle retries)
-        if traj_sim is not None and traj_diff is not None:
-            return traj_sim, traj_diff
-        return None
-    def _get_robot_suboptimal_same_task(self, ref_traj: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-        """Get robot suboptimal trajectory from same task.
-        Args:
-            ref_traj: Reference trajectory (should be human)
-        Returns:
-            Robot suboptimal trajectory dict from same task or None if not available
-        """
-        task_name = ref_traj["task"]
-        same_task_suboptimal_indices = self.suboptimal_by_task.get(task_name, [])
-        if not same_task_suboptimal_indices:
-            logger.trace(f"[SIM SAMPLER] _get_robot_suboptimal_same_task: No suboptimal indices for task '{task_name}'")
-            return None
-        # Filter to only robot trajectories
-        robot_suboptimal_indices = [idx for idx in same_task_suboptimal_indices if self._cached_is_robot[idx]]
-        if not robot_suboptimal_indices:
-            logger.trace(
-                f"[SIM SAMPLER] _get_robot_suboptimal_same_task: No robot suboptimal indices for task '{task_name}'"
-            )
-            return None
-        chosen_id = ref_traj["id"]
-        # Filter out the reference trajectory if it somehow appears
-        filtered_indices = [idx for idx in robot_suboptimal_indices if self._cached_ids[idx] != chosen_id]
-        if not filtered_indices:
-            logger.trace(
-                f"[SIM SAMPLER] _get_robot_suboptimal_same_task: All robot suboptimal trajectories have same ID '{chosen_id}' for task '{task_name}'"
-            )
-            return None
-        selected_idx = self._local_random.choice(filtered_indices)
-        result = self.dataset[selected_idx]
-        logger.trace(
-            f"[SIM SAMPLER] _get_robot_suboptimal_same_task: Found robot suboptimal trajectory {result.get('id', 'unknown')} for task '{task_name}'"
-        )
-        return result
-    def _get_traj_dicts_for_paired_human_robot(
-        self, ref_traj: Dict[str, Any]
-    ) -> Optional[Tuple[Dict[str, Any], Union[Dict[str, Any], Trajectory]]]:
-        """Get traj_sim and traj_diff for paired human/robot strategy.
-        Args:
-            ref_traj: Reference trajectory (should be human successful)
-        Returns:
-            Tuple of (traj_sim, traj_diff) or None if not available. Both can be dict or Trajectory objects.
-            traj_sim is robot successful same task (progress: yes, success: yes)
-            traj_diff is robot suboptimal same task OR different task
-                - If suboptimal same task: progress masked, success=0
-                - If different task: progress=0, success=0
-        """
-        max_retries = 3  # Number of retry attempts for sampling
-        # Get robot successful trajectory from same task for sim
-        traj_sim = None
-        for _ in range(max_retries):
-            traj_sim = self._get_paired_human_robot_traj(ref_traj)
-            if traj_sim is not None:
-                break
-        # 50/50 random choice between robot suboptimal same task and different task
-        traj_diff = None
-        for _ in range(max_retries):
-            # Randomly choose between robot suboptimal same task and different task
-            if self._local_random.random() < 0.5:
-                # Try robot suboptimal same task
-                traj_diff = self._get_robot_suboptimal_same_task(ref_traj)
-            else:
-                # Try different task
-                traj_diff = self._get_different_video_traj(ref_traj)
-            if traj_diff is not None:
-                break
-        if traj_sim is not None and traj_diff is not None:
-            return traj_sim, traj_diff
-        return None
-    def _get_traj_dicts_for_suboptimal(
-        self, ref_traj: Dict[str, Any]
-    ) -> Optional[Tuple[Dict[str, Any], Union[Dict[str, Any], Trajectory]]]:
-        """Get traj_sim and traj_diff for suboptimal strategy.
-        Args:
-            ref_traj: Reference trajectory (must be successful)
-        Returns:
-            Tuple of (traj_sim, traj_diff) or None if not available. Both can be dict or Trajectory objects.
-            traj_sim is an optimal trajectory from same task (progress: yes, success: yes)
-            traj_diff is a suboptimal trajectory from same task (progress masked, success=0)
-        """
-        max_retries = 3  # Number of retry attempts for sampling
-        # Get optimal trajectory from same task for sim
-        traj_sim = None
-        for _ in range(max_retries):
-            traj_sim = self._get_same_task_optimal(ref_traj)
-            if traj_sim is not None:
-                break
-        # Get suboptimal trajectory from same task for diff
-        traj_diff = None
-        for _ in range(max_retries):
-            traj_diff = self._get_same_task_suboptimal(ref_traj)
-            if traj_diff is not None:
-                break
-        if traj_sim is not None and traj_diff is not None:
-            return traj_sim, traj_diff
-        return None