Anthony Liang commited on
Commit
88e2e89
·
1 Parent(s): 468a132
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: Rewardfm Eval Ui
3
  emoji: 🔥
4
  colorFrom: gray
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 6.0.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: UI for rfm evals
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Robometer
3
  emoji: 🔥
4
  colorFrom: gray
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 6.0.0
8
  app_file: app.py
9
  pinned: false
10
+ short_description: Robometer Reward Eval UI
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,6 +1,6 @@
1
  #!/usr/bin/env python3
2
  """
3
- Gradio app for RBM (Reward Foundation Model) inference visualization.
4
  Supports single video (progress/success) and dual video (preference/progress) predictions.
5
  Uses eval server for inference instead of loading models locally.
6
  """
@@ -701,34 +701,25 @@ with demo:
701
  gr.Markdown("### Links")
702
  gr.HTML(
703
  """
 
 
 
 
 
 
 
704
  <div style="display: flex; flex-wrap: wrap; gap: 0.5rem;">
705
- <a href="https://robometer.github.io/" target="_blank" rel="noopener"
706
- title="Project page"
707
- style="display: inline-flex; align-items: center; gap: 0.35rem; padding: 0.25rem 0.65rem;
708
- border-radius: 999px; border: 1px solid #e0e0e0; font-size: 0.9rem;
709
- text-decoration: none;">
710
- <span>🌐</span><span>Project</span>
711
  </a>
712
- <a href="https://github.com/robometer" target="_blank" rel="noopener"
713
- title="GitHub"
714
- style="display: inline-flex; align-items: center; gap: 0.35rem; padding: 0.25rem 0.65rem;
715
- border-radius: 999px; border: 1px solid #e0e0e0; font-size: 0.9rem;
716
- text-decoration: none;">
717
- <span>📂</span><span>Code</span>
718
  </a>
719
- <a href="https://huggingface.co/datasets/rewardfm/rbm-1m" target="_blank" rel="noopener"
720
- title="RBM-1M Dataset"
721
- style="display: inline-flex; align-items: center; gap: 0.35rem; padding: 0.25rem 0.65rem;
722
- border-radius: 999px; border: 1px solid #e0e0e0; font-size: 0.9rem;
723
- text-decoration: none;">
724
- <span>📊</span><span>Dataset</span>
725
  </a>
726
- <a href="https://huggingface.co/aliangdw/Robometer-4B" target="_blank" rel="noopener"
727
- title="Model weights on Hugging Face"
728
- style="display: inline-flex; align-items: center; gap: 0.35rem; padding: 0.25rem 0.65rem;
729
- border-radius: 999px; border: 1px solid #e0e0e0; font-size: 0.9rem;
730
- text-decoration: none;">
731
- <span>💾</span><span>Weights</span>
732
  </a>
733
  </div>
734
  """
 
1
  #!/usr/bin/env python3
2
  """
3
+ Gradio app for Robometer (RBM) inference visualization.
4
  Supports single video (progress/success) and dual video (preference/progress) predictions.
5
  Uses eval server for inference instead of loading models locally.
6
  """
 
701
  gr.Markdown("### Links")
702
  gr.HTML(
703
  """
704
+ <style>
705
+ .sidebar-pill { display: inline-flex; align-items: center; gap: 0.35rem; padding: 0.25rem 0.65rem;
706
+ border-radius: 999px; border: 1px solid #e4e4e7; font-size: 0.9rem; text-decoration: none;
707
+ color: inherit; }
708
+ .sidebar-pill:hover { background: rgba(0,0,0,0.04); }
709
+ .sidebar-pill .arrow { opacity: 0.6; font-size: 0.75em; }
710
+ </style>
711
  <div style="display: flex; flex-wrap: wrap; gap: 0.5rem;">
712
+ <a href="https://robometer.github.io/" target="_blank" rel="noopener" class="sidebar-pill" title="Project page">
713
+ <span>🌐</span><span>Project</span><span class="arrow">↗</span>
 
 
 
 
714
  </a>
715
+ <a href="https://github.com/robometer" target="_blank" rel="noopener" class="sidebar-pill" title="GitHub">
716
+ <span>📂</span><span>Code</span><span class="arrow">↗</span>
 
 
 
 
717
  </a>
718
+ <a href="https://huggingface.co/datasets/rewardfm/rbm-1m" target="_blank" rel="noopener" class="sidebar-pill" title="RBM-1M Dataset">
719
+ <span>📊</span><span>Dataset</span><span class="arrow">↗</span>
 
 
 
 
720
  </a>
721
+ <a href="https://huggingface.co/robometer/Robometer-4B" target="_blank" rel="noopener" class="sidebar-pill" title="Model weights">
722
+ <span>💾</span><span>Weights</span><span class="arrow">↗</span>
 
 
 
 
723
  </a>
724
  </div>
725
  """
eval_viz_utils.py CHANGED
@@ -1,6 +1,6 @@
1
  #!/usr/bin/env python3
2
  """
3
- Utility functions for visualization in RFM evaluations.
4
  """
5
 
6
  from typing import Optional
 
1
  #!/usr/bin/env python3
2
  """
3
+ Utility functions for visualization in Robometer (RBM) evaluations.
4
  """
5
 
6
  from typing import Optional
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- # Requirements for RFM Eval UI Gradio App
2
 
3
  # Core dependencies
4
  matplotlib>=3.5.0
@@ -24,7 +24,7 @@ decord>=0.6.0 # For video frame extraction (same as preprocess_datasets.py)
24
  # Development tools (optional, for auto-reload)
25
  watchfiles # For file watching during development
26
 
27
- # RFM package (installed from git repository)
28
  # For local development, you can also install with: pip install -e ../ (from parent directory)
29
  # git+https://github.com/aliang8/reward_fm.git@anthony_working
30
 
 
1
+ # Requirements for Robometer (RBM) Eval UI Gradio App
2
 
3
  # Core dependencies
4
  matplotlib>=3.5.0
 
24
  # Development tools (optional, for auto-reload)
25
  watchfiles # For file watching during development
26
 
27
+ # RBM package (installed from git repository)
28
  # For local development, you can also install with: pip install -e ../ (from parent directory)
29
  # git+https://github.com/aliang8/reward_fm.git@anthony_working
30
 
samplers/README.md DELETED
@@ -1,182 +0,0 @@
1
- # Sampler Strategies Documentation
2
-
3
- This document summarizes the data generation strategies used by each sampler type in the RFM data pipeline.
4
-
5
- ## Overview
6
-
7
- The codebase contains three main sampler types:
8
- - **SimSampler**: Generates similarity scoring samples
9
- - **PrefSampler**: Generates preference prediction samples
10
- - **ProgressSampler**: Generates progress prediction samples
11
-
12
- Each sampler implements multiple strategies for generating training data, with automatic retry logic and strategy rebalancing on failure.
13
-
14
- ---
15
-
16
- ## SimSampler (Similarity Scoring)
17
-
18
- The `SimSampler` creates similarity scoring samples where two trajectories (`o^1` and `o^2`) are ranked against a reference trajectory (`o^ref`). The goal is to learn that `o^1` should be ranked higher than `o^2`.
19
-
20
- ### Strategies
21
-
22
- #### 1. **REWIND**
23
- - **Description**: Creates a similarity sample where `o^1` is an optimal trajectory from the same task, and `o^2` is a rewound (subsampled) version of the reference trajectory.
24
- - **Purpose**: Learn to distinguish between optimal and suboptimal trajectories from the same task.
25
- - **Implementation**:
26
- - `traj_sim`: Optimal trajectory from same task (via `_get_same_task_optimal`)
27
- - `traj_diff`: Rewound trajectory from reference (via `subsample_rewind`)
28
-
29
- #### 2. **SUBOPTIMAL**
30
- - **Description**: Creates a similarity sample where `o^1` is an optimal trajectory from the same task, and `o^2` is a suboptimal/failure trajectory from the same task.
31
- - **Purpose**: Learn to distinguish between optimal and suboptimal trajectories from the same task.
32
- - **Conditions**: Only available when:
33
- - Data source is in the failure category (`is_failure_ds`)
34
- - Probability is boosted by 2x for failure category data sources
35
- - **Implementation**:
36
- - `traj_sim`: Optimal trajectory from same task (via `_get_same_task_optimal`)
37
- - `traj_diff`: Suboptimal trajectory from same task (via `_get_same_task_suboptimal`)
38
-
39
- #### 3. **PAIRED_HUMAN_ROBOT**
40
- - **Description**: Creates a similarity sample where `o^1` is a paired human/robot trajectory (opposite type from reference, same task), and `o^2` is from a different task.
41
- - **Purpose**: Learn to distinguish between same-task and different-task trajectories, leveraging paired human/robot demonstrations.
42
- - **Conditions**: Only available when:
43
- - Data source is in the paired category (`is_paired_ds`)
44
- - Paired human/robot data exists for the task
45
- - Probability is boosted by 2x for paired category data sources
46
- - **Implementation**:
47
- - `traj_sim`: Paired human/robot trajectory (via `_get_paired_human_robot_traj`)
48
- - `traj_diff`: Trajectory from different task (via `_get_different_video_traj`)
49
-
50
- ### Strategy Selection
51
- - Strategies are selected probabilistically based on `similarity_strategy_ratio` configuration
52
- - Probabilities are rebalanced when strategies fail
53
- - Strategies are removed after 4 consecutive failures
54
- - Maximum 10 total attempts per sample generation
55
-
56
- ### Reference Trajectory Requirements
57
- - For non-RoboArena: Must have `quality_label == "successful"`
58
- - For RoboArena: Must have `partial_success` field present
59
-
60
- ---
61
-
62
- ## PrefSampler (Preference Prediction)
63
-
64
- The `PrefSampler` creates preference prediction samples with a chosen (preferred) trajectory and a rejected (suboptimal) trajectory.
65
-
66
- ### Strategies
67
-
68
- #### 1. **REWIND**
69
- - **Description**: Uses the same optimal trajectory for both chosen and rejected, but applies rewind subsampling to the rejected trajectory.
70
- - **Purpose**: Learn that full trajectories are preferred over truncated/rewound versions.
71
- - **Implementation**:
72
- - `chosen_trajectory`: Original optimal trajectory (forward subsampling)
73
- - `rejected_trajectory`: Same trajectory with `subsample_rewind` strategy
74
-
75
- #### 2. **SUBOPTIMAL**
76
- - **Description**: Uses an optimal trajectory as chosen and a suboptimal/failure trajectory from the same task as rejected.
77
- - **Purpose**: Learn to prefer optimal trajectories over suboptimal ones from the same task.
78
- - **Conditions**: Only available when suboptimal trajectories exist for the task
79
- - **Implementation**:
80
- - `chosen_trajectory`: Optimal trajectory
81
- - `rejected_trajectory`: Suboptimal trajectory from same task (via `_get_same_task_suboptimal`)
82
-
83
- #### 3. **DIFFERENT_TASK**
84
- - **Description**: Uses an optimal trajectory as chosen and a trajectory from a completely different task as rejected.
85
- - **Purpose**: Learn that trajectories from the same task are preferred over trajectories from different tasks.
86
- - **Implementation**:
87
- - `chosen_trajectory`: Optimal trajectory
88
- - `rejected_trajectory`: Trajectory from different task (via `_get_different_video_traj`)
89
- - **Note**: Rejected trajectory's `target_progress` is set to `[0.0]` for all timesteps
90
-
91
- #### 4. **REVERSE_PROGRESS**
92
- - **Description**: Uses the same optimal trajectory for both chosen and rejected, but applies reverse uniform sampling to the rejected trajectory.
93
- - **Purpose**: Learn that forward progress is preferred over reverse progress.
94
- - **Implementation**:
95
- - `chosen_trajectory`: Original optimal trajectory (forward subsampling)
96
- - `rejected_trajectory`: Same trajectory with `subsample_reverse` strategy
97
-
98
- #### 5. **ROBOARENA_PARTIAL_SUCCESS**
99
- - **Description**: Uses two trajectories from the same task with different `partial_success` values. The trajectory with higher `partial_success` becomes chosen, and the one with lower `partial_success` becomes rejected.
100
- - **Purpose**: Learn to prefer trajectories with higher partial success scores (RoboArena-specific).
101
- - **Conditions**: Only available for RoboArena trajectories (has `partial_success` field and data_source contains "roboarena")
102
- - **Implementation**:
103
- - Finds a different trajectory from same task (via `_get_different_partial_success_traj`)
104
- - Swaps trajectories if found trajectory has higher `partial_success`
105
- - `chosen_trajectory`: Trajectory with higher `partial_success`
106
- - `rejected_trajectory`: Trajectory with lower `partial_success`
107
-
108
- ### Special Handling
109
- - **Non-successful trajectories**: If a trajectory has `quality_label != "successful"` (and is not RoboArena), it is automatically used as the rejected trajectory, with an optimal trajectory from the same task as the chosen trajectory.
110
-
111
- ### Strategy Selection
112
- - Strategies are selected probabilistically based on `preference_strategy_ratio` configuration
113
- - Probabilities are rebalanced when strategies fail
114
- - Strategies are removed after 3 consecutive failures
115
- - Maximum 10 total attempts per sample generation
116
-
117
- ---
118
-
119
- ## ProgressSampler (Progress Prediction)
120
-
121
- The `ProgressSampler` creates progress prediction samples from a single trajectory, applying different subsampling strategies to create training data.
122
-
123
- ### Strategies
124
-
125
- #### 1. **DIFFERENT_TASK_INSTRUCTION**
126
- - **Description**: Uses a trajectory from a different task, but keeps the original task's embeddings and instruction.
127
- - **Purpose**: Learn that progress should be 0.0 when the trajectory doesn't match the task instruction.
128
- - **Implementation**:
129
- - Gets trajectory from different task (via `_get_different_task_instruction`)
130
- - Replaces embeddings with original task's embeddings
131
- - Sets `target_progress = [0.0]` for all timesteps
132
- - Uses forward subsampling
133
-
134
- #### 2. **FORWARD_PROGRESS**
135
- - **Description**: Samples the same trajectory with forward direction (start < middle < end).
136
- - **Purpose**: Learn normal forward progress patterns.
137
- - **Implementation**:
138
- - Uses same trajectory with `subsample_forward` strategy
139
- - Progress increases from start to end
140
-
141
- #### 3. **REVERSE_PROGRESS**
142
- - **Description**: Samples the same trajectory with reverse direction (end < middle < start).
143
- - **Purpose**: Learn to handle reverse progress scenarios.
144
- - **Implementation**:
145
- - Uses same trajectory with `subsample_reverse` strategy
146
- - Progress decreases from start to end
147
-
148
- #### 4. **REWIND**
149
- - **Description**: Samples the same trajectory with rewind direction (start < end < middle).
150
- - **Purpose**: Learn to handle non-monotonic progress patterns.
151
- - **Implementation**:
152
- - Uses same trajectory with `subsample_rewind` strategy
153
- - Progress pattern: increases, then decreases
154
-
155
- ### Strategy Selection
156
- - Strategies are selected probabilistically based on `progress_strategy_ratio` configuration
157
- - Probabilities are rebalanced when strategies fail
158
- - Failed strategies are immediately removed (no retry count threshold)
159
- - Maximum 10 total attempts per sample generation
160
-
161
- ---
162
-
163
- ## Common Features
164
-
165
- ### Retry Logic
166
- All samplers implement retry logic with:
167
- - Maximum attempt limits (typically 10 attempts)
168
- - Strategy-specific retry counts (3-4 attempts per strategy)
169
- - Automatic strategy removal after consecutive failures
170
- - Probability rebalancing when strategies are removed
171
-
172
- ### Subsample Strategies
173
- Common subsampling strategies used across samplers:
174
- - `subsample_forward`: Normal forward sampling (start → end)
175
- - `subsample_reverse`: Reverse sampling (end → start)
176
- - `subsample_rewind`: Rewind sampling (start → end → start)
177
-
178
- ### Data Source Filtering
179
- - Strategies may be filtered or boosted based on data source categories:
180
- - **Failure category**: Boosts SUBOPTIMAL strategy probability by 2x
181
- - **Paired category**: Boosts PAIRED_HUMAN_ROBOT strategy probability by 2x
182
- - **RoboArena**: Special handling for `partial_success` field
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
samplers/__init__.py CHANGED
@@ -1,23 +1,19 @@
1
- from rfm.data.samplers.base import RFMBaseSampler
2
- from rfm.data.samplers.pref import PrefSampler
3
- from rfm.data.samplers.sim import SimSampler
4
- from rfm.data.samplers.progress import ProgressSampler
5
- from rfm.data.samplers.eval.confusion_matrix import ConfusionMatrixSampler
6
- from rfm.data.samplers.eval.progress_policy_ranking import ProgressPolicyRankingSampler
7
- from rfm.data.samplers.eval.reward_alignment import RewardAlignmentSampler
8
- from rfm.data.samplers.eval.quality_preference import QualityPreferenceSampler
9
- from rfm.data.samplers.eval.roboarena_quality_preference import RoboArenaQualityPreferenceSampler
10
- from rfm.data.samplers.eval.similarity_score import SimilarityScoreSampler
11
 
12
  __all__ = [
13
- "RFMBaseSampler",
14
  "PrefSampler",
15
- "SimSampler",
16
  "ProgressSampler",
17
  "ConfusionMatrixSampler",
18
  "ProgressPolicyRankingSampler",
19
  "RewardAlignmentSampler",
20
  "QualityPreferenceSampler",
21
  "RoboArenaQualityPreferenceSampler",
22
- "SimilarityScoreSampler",
23
  ]
 
1
+ from robometer.data.samplers.base import RBMBaseSampler
2
+ from robometer.data.samplers.pref import PrefSampler
3
+ from robometer.data.samplers.progress import ProgressSampler
4
+ from robometer.data.samplers.eval.confusion_matrix import ConfusionMatrixSampler
5
+ from robometer.data.samplers.eval.progress_policy_ranking import ProgressPolicyRankingSampler
6
+ from robometer.data.samplers.eval.reward_alignment import RewardAlignmentSampler
7
+ from robometer.data.samplers.eval.quality_preference import QualityPreferenceSampler
8
+ from robometer.data.samplers.eval.roboarena_quality_preference import RoboArenaQualityPreferenceSampler
 
 
9
 
10
  __all__ = [
11
+ "RBMBaseSampler",
12
  "PrefSampler",
 
13
  "ProgressSampler",
14
  "ConfusionMatrixSampler",
15
  "ProgressPolicyRankingSampler",
16
  "RewardAlignmentSampler",
17
  "QualityPreferenceSampler",
18
  "RoboArenaQualityPreferenceSampler",
 
19
  ]
samplers/base.py CHANGED
@@ -7,8 +7,8 @@ import torch
7
  from random import Random
8
  from datasets import Dataset
9
 
10
- from rfm.configs.experiment_configs import DataConfig
11
- from rfm.data.datasets.helpers import (
12
  load_frames_from_npz,
13
  get_segment_indices_with_middle,
14
  compute_progress_from_segment,
@@ -20,13 +20,14 @@ from rfm.data.datasets.helpers import (
20
  linspace_subsample_frames,
21
  convert_continuous_to_discrete_bins,
22
  )
23
- from rfm.data.dataset_types import Trajectory
24
- from rfm.utils.logger import get_logger
 
25
 
26
  logger = get_logger()
27
 
28
 
29
- class RFMBaseSampler:
30
  """Base sampler class that provides trajectory retrieval functions for generating samples."""
31
 
32
  def __init__(
@@ -37,6 +38,7 @@ class RFMBaseSampler:
37
  dataset_success_cutoff_map: Optional[Dict[str, float]] = None,
38
  verbose: bool = True,
39
  random_seed: int = 42,
 
40
  ):
41
  """Initialize sampler with dataset and indices.
42
 
@@ -53,7 +55,7 @@ class RFMBaseSampler:
53
  self.verbose = verbose
54
  self.dataset_success_cutoff_map = dataset_success_cutoff_map or {}
55
  self._local_random = Random(random_seed)
56
-
57
  self._cached_ids = self.dataset["id"]
58
  self._cached_is_robot = self.dataset["is_robot"]
59
 
@@ -108,7 +110,7 @@ class RFMBaseSampler:
108
  item: An item from the dataset (typically a trajectory dict)
109
 
110
  Returns:
111
- A sample object (e.g., PreferenceSample, SimilaritySample, ProgressSample)
112
  """
113
  raise NotImplementedError("Subclasses must implement _generate_sample")
114
 
@@ -577,6 +579,7 @@ class RFMBaseSampler:
577
  subsample_strategy: str | None = None,
578
  frame_indices: List[int] | None = None,
579
  metadata: Dict[str, Any] | None = None,
 
580
  ) -> Trajectory:
581
  """Load, subsample, and optionally pad trajectory data and create a Trajectory object.
582
 
@@ -585,6 +588,7 @@ class RFMBaseSampler:
585
  subsample_strategy: Optional strategy for subsampling ("subsample_forward", "subsample_reverse", "subsample_rewind", or None for default/bidirectional). Ignored if frame_indices is provided.
586
  frame_indices: Optional list of specific frame indices to use. If provided, subsample_strategy is ignored.
587
  metadata: Optional metadata dict to merge into trajectory metadata.
 
588
 
589
  Returns:
590
  Trajectory object with loaded and subsampled data (padded)
@@ -701,7 +705,7 @@ class RFMBaseSampler:
701
  indices = [indices[idx] for idx in frame_indices_subsample] if isinstance(indices, list) else indices
702
 
703
  # Pad if needed
704
- if target_progress:
705
  if self.config.load_embeddings:
706
  subsampled, target_progress = pad_trajectory_to_max_frames_torch(
707
  subsampled, target_progress, self.config.max_frames
@@ -711,6 +715,30 @@ class RFMBaseSampler:
711
  subsampled, target_progress, self.config.max_frames
712
  )
713
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714
  # Update frames_shape
715
  frames_shape = subsampled.shape if hasattr(subsampled, "shape") else tuple()
716
 
@@ -747,6 +775,7 @@ class RFMBaseSampler:
747
  "target_progress": target_progress,
748
  "success_label": success_label,
749
  "partial_success": partial_success,
 
750
  "metadata": metadata,
751
  },
752
  )
 
7
  from random import Random
8
  from datasets import Dataset
9
 
10
+ from robometer.configs.experiment_configs import DataConfig
11
+ from robometer.data.datasets.helpers import (
12
  load_frames_from_npz,
13
  get_segment_indices_with_middle,
14
  compute_progress_from_segment,
 
20
  linspace_subsample_frames,
21
  convert_continuous_to_discrete_bins,
22
  )
23
+ from robometer.data.dataset_types import Trajectory
24
+ from robometer.utils.logger import get_logger
25
+ from robometer.data.dataset_category import is_preference_only_ds
26
 
27
  logger = get_logger()
28
 
29
 
30
+ class RBMBaseSampler:
31
  """Base sampler class that provides trajectory retrieval functions for generating samples."""
32
 
33
  def __init__(
 
38
  dataset_success_cutoff_map: Optional[Dict[str, float]] = None,
39
  verbose: bool = True,
40
  random_seed: int = 42,
41
+ pad_frames: bool = True,
42
  ):
43
  """Initialize sampler with dataset and indices.
44
 
 
55
  self.verbose = verbose
56
  self.dataset_success_cutoff_map = dataset_success_cutoff_map or {}
57
  self._local_random = Random(random_seed)
58
+ self.pad_frames = pad_frames
59
  self._cached_ids = self.dataset["id"]
60
  self._cached_is_robot = self.dataset["is_robot"]
61
 
 
110
  item: An item from the dataset (typically a trajectory dict)
111
 
112
  Returns:
113
+ A sample object (e.g., PreferenceSample, ProgressSample)
114
  """
115
  raise NotImplementedError("Subclasses must implement _generate_sample")
116
 
 
579
  subsample_strategy: str | None = None,
580
  frame_indices: List[int] | None = None,
581
  metadata: Dict[str, Any] | None = None,
582
+ pad_frames: bool = True,
583
  ) -> Trajectory:
584
  """Load, subsample, and optionally pad trajectory data and create a Trajectory object.
585
 
 
588
  subsample_strategy: Optional strategy for subsampling ("subsample_forward", "subsample_reverse", "subsample_rewind", or None for default/bidirectional). Ignored if frame_indices is provided.
589
  frame_indices: Optional list of specific frame indices to use. If provided, subsample_strategy is ignored.
590
  metadata: Optional metadata dict to merge into trajectory metadata.
591
+ pad_frames: Whether to pad the trajectory data to max_frames.
592
 
593
  Returns:
594
  Trajectory object with loaded and subsampled data (padded)
 
705
  indices = [indices[idx] for idx in frame_indices_subsample] if isinstance(indices, list) else indices
706
 
707
  # Pad if needed
708
+ if target_progress and pad_frames:
709
  if self.config.load_embeddings:
710
  subsampled, target_progress = pad_trajectory_to_max_frames_torch(
711
  subsampled, target_progress, self.config.max_frames
 
715
  subsampled, target_progress, self.config.max_frames
716
  )
717
 
718
+ # Create predict_last_frame_mask: mark the last frame if partial_success < 1.0
719
+ # If predict_last_frame_partial_progress is True and partial_success < 1.0 and the last original frame is in the subsampled indices,
720
+ # mark all positions where it appears with 1.0, all others 0.0. Otherwise, all 1.0s.
721
+ final_frame_count = len(subsampled)
722
+ predict_last_frame_mask = [1.0] * final_frame_count # Default: all 1.0s (no masking)
723
+
724
+ if self.config.predict_last_frame_partial_progress and partial_success is not None:
725
+ if partial_success == 1.0 and not is_preference_only_ds(traj["data_source"]):
726
+ pass
727
+ else:
728
+ last_original_frame_idx = num_frames_total - 1
729
+ if isinstance(indices, list) and last_original_frame_idx in indices:
730
+ # Find all positions where the last frame index appears
731
+ last_frame_positions = [
732
+ i for i, idx in enumerate(indices) if idx == last_original_frame_idx and i < final_frame_count
733
+ ]
734
+ if last_frame_positions:
735
+ # Mark all positions where the last frame appears with 1.0, all others 0.0
736
+ predict_last_frame_mask = [0.0] * final_frame_count
737
+ for pos in last_frame_positions:
738
+ predict_last_frame_mask[pos] = 1.0
739
+ else:
740
+ predict_last_frame_mask = [0.0] * final_frame_count
741
+
742
  # Update frames_shape
743
  frames_shape = subsampled.shape if hasattr(subsampled, "shape") else tuple()
744
 
 
775
  "target_progress": target_progress,
776
  "success_label": success_label,
777
  "partial_success": partial_success,
778
+ "predict_last_frame_mask": predict_last_frame_mask,
779
  "metadata": metadata,
780
  },
781
  )
samplers/pref.py CHANGED
@@ -7,19 +7,19 @@ from typing import Dict, List, Optional, Any
7
 
8
  import random
9
 
10
- from rfm.data.dataset_types import PreferenceSample, Trajectory
11
- from rfm.data.samplers.base import RFMBaseSampler
12
- from rfm.data.datasets.helpers import (
13
  DataGenStrat,
14
  convert_continuous_to_discrete_bins,
15
  )
16
- from rfm.utils.logger import get_logger, rank_0_info, trace
17
- from rfm.utils.timer import timer
18
 
19
  logger = get_logger()
20
 
21
 
22
- class PrefSampler(RFMBaseSampler):
23
  """Data generator for producing batches of preference prediction data."""
24
 
25
  def __init__(self, is_evaluation=False, **kwargs):
@@ -343,6 +343,9 @@ class PrefSampler(RFMBaseSampler):
343
 
344
  rejected_trajectory = self._get_traj_from_data(rejected_traj, subsample_strategy=rejected_subsample_strategy)
345
 
 
 
 
346
  # If our strategy is different task, make sure the rejected trajectory has 0 progress and 0 success labels
347
  if strategy_used in [
348
  DataGenStrat.DIFFERENT_TASK,
 
7
 
8
  import random
9
 
10
+ from robometer.data.dataset_types import PreferenceSample, Trajectory
11
+ from robometer.data.samplers.base import RBMBaseSampler
12
+ from robometer.data.datasets.helpers import (
13
  DataGenStrat,
14
  convert_continuous_to_discrete_bins,
15
  )
16
+ from robometer.utils.logger import get_logger, rank_0_info, trace
17
+ from robometer.utils.timer import timer
18
 
19
  logger = get_logger()
20
 
21
 
22
+ class PrefSampler(RBMBaseSampler):
23
  """Data generator for producing batches of preference prediction data."""
24
 
25
  def __init__(self, is_evaluation=False, **kwargs):
 
343
 
344
  rejected_trajectory = self._get_traj_from_data(rejected_traj, subsample_strategy=rejected_subsample_strategy)
345
 
346
+ if rejected_trajectory is None or chosen_trajectory is None:
347
+ return None
348
+
349
  # If our strategy is different task, make sure the rejected trajectory has 0 progress and 0 success labels
350
  if strategy_used in [
351
  DataGenStrat.DIFFERENT_TASK,
samplers/progress.py CHANGED
@@ -3,20 +3,20 @@ from typing import Dict, Any, Optional
3
  import random
4
  import torch
5
 
6
- from rfm.data.dataset_types import ProgressSample, Trajectory
7
- from rfm.data.samplers.base import RFMBaseSampler
8
- from rfm.data.datasets.helpers import (
9
  DataGenStrat,
10
  load_embeddings_from_path,
11
  convert_continuous_to_discrete_bins,
12
  )
13
- from rfm.utils.distributed import rank_0_print
14
- from rfm.utils.logger import get_logger
15
 
16
  logger = get_logger()
17
 
18
 
19
- class ProgressSampler(RFMBaseSampler):
20
  """Data generator for progress samples."""
21
 
22
  def __init__(self, is_evaluation=False, **kwargs):
@@ -147,6 +147,9 @@ class ProgressSampler(RFMBaseSampler):
147
 
148
  progress_traj = self._get_traj_from_data(processed_traj, subsample_strategy=subsample_strategy)
149
 
 
 
 
150
  # Handle special cases
151
  if strategy_used in [DataGenStrat.DIFFERENT_TASK, DataGenStrat.DIFFERENT_TASK_INSTRUCTION]:
152
  # We need to use the original task embeddings instead of the different task embeddings
 
3
  import random
4
  import torch
5
 
6
+ from robometer.data.dataset_types import ProgressSample, Trajectory
7
+ from robometer.data.samplers.base import RBMBaseSampler
8
+ from robometer.data.datasets.helpers import (
9
  DataGenStrat,
10
  load_embeddings_from_path,
11
  convert_continuous_to_discrete_bins,
12
  )
13
+ from robometer.utils.distributed import rank_0_print
14
+ from robometer.utils.logger import get_logger
15
 
16
  logger = get_logger()
17
 
18
 
19
+ class ProgressSampler(RBMBaseSampler):
20
  """Data generator for progress samples."""
21
 
22
  def __init__(self, is_evaluation=False, **kwargs):
 
147
 
148
  progress_traj = self._get_traj_from_data(processed_traj, subsample_strategy=subsample_strategy)
149
 
150
+ if progress_traj is None:
151
+ return None
152
+
153
  # Handle special cases
154
  if strategy_used in [DataGenStrat.DIFFERENT_TASK, DataGenStrat.DIFFERENT_TASK_INSTRUCTION]:
155
  # We need to use the original task embeddings instead of the different task embeddings
samplers/sim.py DELETED
@@ -1,420 +0,0 @@
1
- #!/usr/bin/env python3
2
- from typing import Dict, List, Tuple, Optional, Union, Any
3
-
4
- import torch
5
-
6
- from rfm.data.dataset_types import SimilaritySample, Trajectory
7
- from rfm.data.samplers.base import RFMBaseSampler
8
- from rfm.data.datasets.helpers import DataGenStrat, convert_continuous_to_discrete_bins
9
- from rfm.data.dataset_category import is_failure_ds, is_paired_ds
10
- from rfm.utils.logger import get_logger, rank_0_info
11
-
12
- logger = get_logger()
13
-
14
-
15
- class SimSampler(RFMBaseSampler):
16
- """Data generator for producing batches for similarity scoring."""
17
-
18
- def __init__(self, is_evaluation=False, **kwargs):
19
- super().__init__(**kwargs)
20
- self.similarity_strategy_ratio: List[float] = self.config.similarity_strategy_ratio
21
- self._has_paired_human_robot = (
22
- any(
23
- len(entry.get("robot", [])) > 0 and len(entry.get("human", [])) > 0
24
- for entry in self.paired_human_robot_by_task.values()
25
- )
26
- if self.paired_human_robot_by_task
27
- else False
28
- )
29
- self._has_suboptimal = (
30
- any(len(indices) > 0 for indices in self.suboptimal_by_task.values()) if self.suboptimal_by_task else False
31
- )
32
- rank_0_info(
33
- f"[SIM SAMPLER] Has paired human/robot: {self._has_paired_human_robot}, Has suboptimal: {self._has_suboptimal}"
34
- )
35
-
36
- def _generate_sample(self, item: dict, preferred_strategy: Optional[DataGenStrat] = None):
37
- return self._create_similarity_sample(ref_traj=item, preferred_strategy=preferred_strategy)
38
-
39
- def _execute_strategy(
40
- self, strategy: DataGenStrat, ref_traj: Dict[str, Any]
41
- ) -> tuple[Dict[str, Any], Dict[str, Any]] | None:
42
- """Execute a strategy to get trajectory pairs.
43
-
44
- Args:
45
- strategy: The strategy to execute
46
- ref_traj: The reference trajectory
47
-
48
- Returns:
49
- Tuple of (traj_sim, traj_diff) or None if failed
50
- """
51
- if strategy == DataGenStrat.REWIND:
52
- return self._get_traj_dicts_for_rewind(ref_traj)
53
- elif strategy == DataGenStrat.SUBOPTIMAL:
54
- return self._get_traj_dicts_for_suboptimal(ref_traj)
55
- elif strategy == DataGenStrat.PAIRED_HUMAN_ROBOT:
56
- return self._get_traj_dicts_for_paired_human_robot(ref_traj)
57
- else:
58
- return None
59
-
60
- def _create_similarity_sample(
61
- self, ref_traj: Optional[Dict[str, Any]] = None, preferred_strategy: Optional[DataGenStrat] = None
62
- ) -> SimilaritySample:
63
- """Create a similarity scoring sample: o^1 and o^2 ranked against o^ref.
64
-
65
- Two modes:
66
- 1. Rewind mode: o^1 is rewound from same task, o^2 is from different task
67
- - here o^1 is preferred and should be ranked higher than o^2
68
- 2. Optimal/Suboptimal mode: o^1 is optimal/suboptimal from same task, o^2 varies
69
- - here o^1 is preferred and should be ranked higher than o^2
70
-
71
- Args:
72
- ref_traj: Optional reference trajectory. If None, samples from optimal trajectories.
73
- """
74
- # Log when similarity sampler is called
75
- traj_id = ref_traj.get("id", "unknown") if ref_traj is not None else "sampling_new"
76
- logger.trace(f"[SIM SAMPLER] Creating similarity sample for trajectory ID: {traj_id}")
77
-
78
- # Use provided reference trajectory if given; otherwise sample one
79
- if ref_traj is None:
80
- # Use preprocessed optimal trajectories from index maps
81
- if not self.optimal_by_task:
82
- return None
83
-
84
- # Filter out tasks with empty optimal_indices to avoid infinite loop
85
- valid_tasks = {
86
- task: indices
87
- for task, indices in self.optimal_by_task.items()
88
- if indices # Only include tasks with non-empty indices
89
- }
90
-
91
- if not valid_tasks:
92
- # No valid tasks with optimal trajectories available
93
- return None
94
-
95
- # Get a random task and optimal trajectory from it
96
- task_name = self._local_random.choice(list(valid_tasks.keys()))
97
- optimal_indices = valid_tasks[task_name]
98
-
99
- # Double-check that we have valid indices (should always be true now)
100
- if not optimal_indices:
101
- return None
102
-
103
- optimal_idx = self._local_random.choice(optimal_indices)
104
- ref_traj = self.dataset[optimal_idx]
105
-
106
- # Check if ref_traj is successful - if not, return None to try a different trajectory
107
- quality_label = ref_traj.get("quality_label")
108
- partial_success = ref_traj.get("partial_success")
109
- use_partial_success = partial_success is not None
110
-
111
- if use_partial_success:
112
- # For trajectories with partial_success, require partial_success to exist
113
- if partial_success is None:
114
- logger.trace(
115
- f"[SIM SAMPLER] Ref trajectory {ref_traj.get('id', 'unknown')} missing partial_success, skipping"
116
- )
117
- return None
118
- else:
119
- # For trajectories without partial_success, require quality_label to be "successful"
120
- if quality_label != "successful":
121
- logger.trace(
122
- f"[SIM SAMPLER] Ref trajectory {ref_traj.get('id', 'unknown')} is not successful (quality_label: {quality_label}), skipping"
123
- )
124
- return None
125
-
126
- traj_sim, traj_diff = None, None
127
- strategy_used = None
128
- data_source = ref_traj.get("data_source")
129
- is_failure_source = is_failure_ds(data_source) if data_source else False
130
- is_paired_source = is_paired_ds(data_source) if data_source else False
131
-
132
- # Strategy selection: use preferred_strategy if provided, otherwise select based on ratios
133
- if preferred_strategy is not None:
134
- # Use the preferred strategy directly
135
- logger.trace(f"[SIM SAMPLER] Using preferred strategy: {preferred_strategy.value}")
136
- result = self._execute_strategy(preferred_strategy, ref_traj)
137
- if result is None:
138
- logger.trace(f"[SIM SAMPLER] Preferred strategy {preferred_strategy.value} failed, returning None")
139
- return None
140
- traj_sim, traj_diff = result
141
- strategy_used = preferred_strategy
142
- attempt = 1 # Set attempt for preferred strategy path
143
- else:
144
- # Strategy selection with data_source-based filtering and boosting
145
- strategies = []
146
-
147
- # Always include REWIND if ratio > 0
148
- if self.similarity_strategy_ratio[0] > 0:
149
- strategies.append((DataGenStrat.REWIND, self.similarity_strategy_ratio[0]))
150
-
151
- # SUBOPTIMAL: include if data_source is in failure category
152
- if len(self.similarity_strategy_ratio) > 1 and self.similarity_strategy_ratio[1] > 0 and is_failure_source:
153
- # Boost probability by 2x if data_source is in failure category
154
- boosted_prob = self.similarity_strategy_ratio[1] * 2.0
155
- strategies.append((DataGenStrat.SUBOPTIMAL, boosted_prob))
156
-
157
- # PAIRED_HUMAN_ROBOT: only include if data_source is in paired category
158
- if (
159
- self._has_paired_human_robot
160
- and len(self.similarity_strategy_ratio) > 2
161
- and self.similarity_strategy_ratio[2] > 0
162
- and is_paired_source
163
- ):
164
- # Boost probability by 2x if data_source is in paired category
165
- boosted_prob = self.similarity_strategy_ratio[2] * 2.0
166
- strategies.append((DataGenStrat.PAIRED_HUMAN_ROBOT, boosted_prob))
167
-
168
- # Remove strategies with zero probability
169
- strategies = [(strat, prob) for strat, prob in strategies if prob > 0]
170
-
171
- max_attempts = 10 # Limit retry attempts to prevent infinite loops
172
- max_strategy_attempts = 4 # Maximum attempts per strategy before removing it
173
- attempt = 0
174
-
175
- strategies_tried = []
176
- # Track attempts per strategy
177
- strategy_attempt_counts = {strat: 0 for strat, _ in strategies}
178
-
179
- while traj_sim is None and attempt < max_attempts:
180
- attempt += 1
181
-
182
- # Check if we have any strategies left
183
- if not strategies:
184
- return None
185
-
186
- # Rebalance probabilities based on remaining strategies
187
- total_prob = sum(prob for _, prob in strategies)
188
- if total_prob == 0:
189
- return None
190
-
191
- # Normalize probabilities
192
- normalized_strategies = [(strat, prob / total_prob) for strat, prob in strategies]
193
-
194
- # Select strategy based on rebalanced probabilities
195
- prob = self._local_random.random()
196
- cumulative_prob = 0.0
197
- selected_strategy = None
198
-
199
- for strat, normalized_prob in normalized_strategies:
200
- cumulative_prob += normalized_prob
201
- if prob <= cumulative_prob:
202
- selected_strategy = strat
203
- strategies_tried.append(selected_strategy)
204
- break
205
-
206
- # Log strategy attempt
207
- logger.trace(
208
- f"[SIM SAMPLER] Attempt {attempt}/{max_attempts}: Trying strategy {selected_strategy.value if selected_strategy else 'None'}"
209
- )
210
-
211
- # Execute selected strategy
212
- result = self._execute_strategy(selected_strategy, ref_traj)
213
- if result is not None:
214
- traj_sim, traj_diff = result
215
- strategy_used = selected_strategy
216
- logger.trace(f"[SIM SAMPLER] Strategy {selected_strategy.value} succeeded on attempt {attempt}")
217
- else:
218
- # Strategy failed - increment attempt count
219
- strategy_attempt_counts[selected_strategy] = strategy_attempt_counts.get(selected_strategy, 0) + 1
220
- failed_count = strategy_attempt_counts[selected_strategy]
221
-
222
- logger.trace(
223
- f"[SIM SAMPLER] Strategy {selected_strategy.value} failed (failure count: {failed_count}/{max_strategy_attempts})"
224
- )
225
-
226
- # Only remove strategy if it has failed max_strategy_attempts times
227
- if strategy_attempt_counts[selected_strategy] >= max_strategy_attempts:
228
- logger.trace(
229
- f"[SIM SAMPLER] Removing strategy {selected_strategy.value} after {max_strategy_attempts} consecutive failures"
230
- )
231
- strategies = [(strat, prob) for strat, prob in strategies if strat != selected_strategy]
232
- continue
233
-
234
- # If we still don't have a sample after all attempts, return None
235
- if traj_sim is None or traj_diff is None:
236
- logger.trace(
237
- f"[SIM SAMPLER] Failed to generate similarity sample after {max_attempts} attempts - all strategies exhausted"
238
- )
239
- return None
240
-
241
- # Create trajectories
242
- ref_trajectory = self._get_traj_from_data(ref_traj)
243
- sim_trajectory = self._get_traj_from_data(traj_sim)
244
- diff_trajectory = self._get_traj_from_data(traj_diff)
245
-
246
- # Handle different task trajectories: set progress=0 and success=0
247
- # For SUBOPTIMAL strategy, diff is suboptimal same task (progress masked, success=0 handled automatically)
248
- # For PAIRED_HUMAN_ROBOT strategy, diff could be suboptimal same task or different task
249
- if strategy_used == DataGenStrat.PAIRED_HUMAN_ROBOT:
250
- # Check if diff is from different task (compare task names)
251
- if diff_trajectory.task != ref_traj["task"]:
252
- # Different task: set progress=0 and success=0
253
- diff_trajectory.target_progress = [0.0] * len(diff_trajectory.target_progress)
254
- if self.config.progress_loss_type.lower() == "discrete":
255
- diff_trajectory.target_progress = convert_continuous_to_discrete_bins(
256
- diff_trajectory.target_progress, self.config.progress_discrete_bins
257
- )
258
- if diff_trajectory.success_label is not None:
259
- diff_trajectory.success_label = [0.0] * len(diff_trajectory.success_label)
260
- # If same task, it's suboptimal - progress will be masked by should_compute_progress, success=0 already set
261
-
262
- sample = SimilaritySample(
263
- ref_trajectory=ref_trajectory,
264
- sim_trajectory=sim_trajectory,
265
- diff_trajectory=diff_trajectory,
266
- data_gen_strategy=strategy_used.value,
267
- )
268
- sample.resample_attempts = attempt
269
- return sample
270
-
271
- def _get_traj_dicts_for_rewind(self, ref_traj: dict) -> tuple[dict | Trajectory, dict] | None:
272
- """Get traj_sim and traj_diff for rewind strategy.
273
-
274
- Returns:
275
- Tuple of (traj_sim, traj_diff) where:
276
- - traj_sim = optimal trajectory from same task
277
- - traj_diff = rewound trajectory
278
- Returns None if either cannot be generated after retries.
279
- The main strategy loop will handle retries with different strategies.
280
- """
281
- max_retries = 3 # Number of retry attempts for sampling
282
-
283
- # Try to get optimal trajectory from same task for sim
284
- traj_sim = None
285
- for _ in range(max_retries):
286
- traj_sim = self._get_same_task_optimal(ref_traj)
287
- if traj_sim is not None:
288
- break
289
-
290
- # Try to get rewound trajectory for diff
291
- traj_diff = None
292
- for _ in range(max_retries):
293
- traj_diff = self._get_traj_from_data(ref_traj, subsample_strategy="subsample_rewind")
294
- if traj_diff is not None:
295
- break
296
-
297
- # Return both if successful, otherwise return None (main loop will handle retries)
298
- if traj_sim is not None and traj_diff is not None:
299
- return traj_sim, traj_diff
300
-
301
- return None
302
-
303
- def _get_robot_suboptimal_same_task(self, ref_traj: Dict[str, Any]) -> Optional[Dict[str, Any]]:
304
- """Get robot suboptimal trajectory from same task.
305
-
306
- Args:
307
- ref_traj: Reference trajectory (should be human)
308
-
309
- Returns:
310
- Robot suboptimal trajectory dict from same task or None if not available
311
- """
312
- task_name = ref_traj["task"]
313
- same_task_suboptimal_indices = self.suboptimal_by_task.get(task_name, [])
314
-
315
- if not same_task_suboptimal_indices:
316
- logger.trace(f"[SIM SAMPLER] _get_robot_suboptimal_same_task: No suboptimal indices for task '{task_name}'")
317
- return None
318
-
319
- # Filter to only robot trajectories
320
- robot_suboptimal_indices = [idx for idx in same_task_suboptimal_indices if self._cached_is_robot[idx]]
321
-
322
- if not robot_suboptimal_indices:
323
- logger.trace(
324
- f"[SIM SAMPLER] _get_robot_suboptimal_same_task: No robot suboptimal indices for task '{task_name}'"
325
- )
326
- return None
327
-
328
- chosen_id = ref_traj["id"]
329
- # Filter out the reference trajectory if it somehow appears
330
- filtered_indices = [idx for idx in robot_suboptimal_indices if self._cached_ids[idx] != chosen_id]
331
-
332
- if not filtered_indices:
333
- logger.trace(
334
- f"[SIM SAMPLER] _get_robot_suboptimal_same_task: All robot suboptimal trajectories have same ID '{chosen_id}' for task '{task_name}'"
335
- )
336
- return None
337
-
338
- selected_idx = self._local_random.choice(filtered_indices)
339
- result = self.dataset[selected_idx]
340
- logger.trace(
341
- f"[SIM SAMPLER] _get_robot_suboptimal_same_task: Found robot suboptimal trajectory {result.get('id', 'unknown')} for task '{task_name}'"
342
- )
343
- return result
344
-
345
- def _get_traj_dicts_for_paired_human_robot(
346
- self, ref_traj: Dict[str, Any]
347
- ) -> Optional[Tuple[Dict[str, Any], Union[Dict[str, Any], Trajectory]]]:
348
- """Get traj_sim and traj_diff for paired human/robot strategy.
349
-
350
- Args:
351
- ref_traj: Reference trajectory (should be human successful)
352
-
353
- Returns:
354
- Tuple of (traj_sim, traj_diff) or None if not available. Both can be dict or Trajectory objects.
355
- traj_sim is robot successful same task (progress: yes, success: yes)
356
- traj_diff is robot suboptimal same task OR different task
357
- - If suboptimal same task: progress masked, success=0
358
- - If different task: progress=0, success=0
359
- """
360
- max_retries = 3 # Number of retry attempts for sampling
361
-
362
- # Get robot successful trajectory from same task for sim
363
- traj_sim = None
364
- for _ in range(max_retries):
365
- traj_sim = self._get_paired_human_robot_traj(ref_traj)
366
- if traj_sim is not None:
367
- break
368
-
369
- # 50/50 random choice between robot suboptimal same task and different task
370
- traj_diff = None
371
- for _ in range(max_retries):
372
- # Randomly choose between robot suboptimal same task and different task
373
- if self._local_random.random() < 0.5:
374
- # Try robot suboptimal same task
375
- traj_diff = self._get_robot_suboptimal_same_task(ref_traj)
376
- else:
377
- # Try different task
378
- traj_diff = self._get_different_video_traj(ref_traj)
379
-
380
- if traj_diff is not None:
381
- break
382
-
383
- if traj_sim is not None and traj_diff is not None:
384
- return traj_sim, traj_diff
385
-
386
- return None
387
-
388
- def _get_traj_dicts_for_suboptimal(
389
- self, ref_traj: Dict[str, Any]
390
- ) -> Optional[Tuple[Dict[str, Any], Union[Dict[str, Any], Trajectory]]]:
391
- """Get traj_sim and traj_diff for suboptimal strategy.
392
-
393
- Args:
394
- ref_traj: Reference trajectory (must be successful)
395
-
396
- Returns:
397
- Tuple of (traj_sim, traj_diff) or None if not available. Both can be dict or Trajectory objects.
398
- traj_sim is an optimal trajectory from same task (progress: yes, success: yes)
399
- traj_diff is a suboptimal trajectory from same task (progress masked, success=0)
400
- """
401
- max_retries = 3 # Number of retry attempts for sampling
402
-
403
- # Get optimal trajectory from same task for sim
404
- traj_sim = None
405
- for _ in range(max_retries):
406
- traj_sim = self._get_same_task_optimal(ref_traj)
407
- if traj_sim is not None:
408
- break
409
-
410
- # Get suboptimal trajectory from same task for diff
411
- traj_diff = None
412
- for _ in range(max_retries):
413
- traj_diff = self._get_same_task_suboptimal(ref_traj)
414
- if traj_diff is not None:
415
- break
416
-
417
- if traj_sim is not None and traj_diff is not None:
418
- return traj_sim, traj_diff
419
-
420
- return None