Spaces:

leggedrobotics
/

navitrace_leaderboard

Running

App Files Files Community

TimWindecker commited on Oct 13

Commit

0e5ff65

verified ·

1 Parent(s): 641159b

Update src/score_calculation/score.py

Browse files

Files changed (1) hide show

src/score_calculation/score.py +36 -10

src/score_calculation/score.py CHANGED Viewed

@@ -41,7 +41,6 @@ def create_penalty_lookup(embodiment: str) -> Dict[int, float]:
     return label_id_to_penalty
 def rasterize_gt_trace(
     gt_trace: List[List[float]], height: int, width: int
 ) -> np.ndarray:
@@ -68,7 +67,6 @@ def rasterize_gt_trace(
     return np.array(gt_line_pixels)
 def create_penalty_mask(
     segmentation_mask: np.ndarray,
     gt_trace: List[List[float]],
@@ -115,7 +113,6 @@ def create_penalty_mask(
     return penalty_mask
 def resample_to_match_length(
     trace_1: np.ndarray, trace_2: np.ndarray
 ) -> Tuple[np.ndarray, np.ndarray]:
@@ -149,7 +146,6 @@ def resample_to_match_length(
     else:
         return shorter, longer
 def calculate_semantic_penalty(
     prediction: np.ndarray, penalty_mask: np.ndarray
 ) -> List[float]:
@@ -169,12 +165,10 @@ def calculate_semantic_penalty(
     return np.mean(penalties)
 def calculate_fde(prediction: np.ndarray, ground_truth: np.ndarray):
     return np.linalg.norm(prediction[-1] - ground_truth[-1])
 def calculate_dtw(prediction: np.ndarray, ground_truth: np.ndarray):
     # Create cost matrix
@@ -197,7 +191,6 @@ def calculate_dtw(prediction: np.ndarray, ground_truth: np.ndarray):
     return cost_matrix[n, m]
 def score(
     prediction: List[List[float]],
     ground_truths: List[List[List[float]]],
@@ -227,7 +220,6 @@ def score(
     # Select the best score
     return min(scores)
 def _initialize_worker(results_path, dataset_id, split_name):
@@ -246,7 +238,6 @@ def _initialize_worker(results_path, dataset_id, split_name):
     _get_sample = get_sample
 def _score_chunk(indices: List[int]) -> List[Tuple[int, float]]:
     results = []
@@ -275,7 +266,6 @@ def _score_chunk(indices: List[int]) -> List[Tuple[int, float]]:
     return results
 def score_predictions_parallel(results_path, dataset_id, split_name, num_processes=4):
     # Load results file
@@ -309,3 +299,39 @@ def score_predictions_parallel(results_path, dataset_id, split_name, num_process
     return scored_df

     return label_id_to_penalty
 def rasterize_gt_trace(
     gt_trace: List[List[float]], height: int, width: int
 ) -> np.ndarray:
     return np.array(gt_line_pixels)
 def create_penalty_mask(
     segmentation_mask: np.ndarray,
     gt_trace: List[List[float]],
     return penalty_mask
 def resample_to_match_length(
     trace_1: np.ndarray, trace_2: np.ndarray
 ) -> Tuple[np.ndarray, np.ndarray]:
     else:
         return shorter, longer
 def calculate_semantic_penalty(
     prediction: np.ndarray, penalty_mask: np.ndarray
 ) -> List[float]:
     return np.mean(penalties)
 def calculate_fde(prediction: np.ndarray, ground_truth: np.ndarray):
     return np.linalg.norm(prediction[-1] - ground_truth[-1])
 def calculate_dtw(prediction: np.ndarray, ground_truth: np.ndarray):
     # Create cost matrix
     return cost_matrix[n, m]
 def score(
     prediction: List[List[float]],
     ground_truths: List[List[List[float]]],
     # Select the best score
     return min(scores)
 def _initialize_worker(results_path, dataset_id, split_name):
     _get_sample = get_sample
 def _score_chunk(indices: List[int]) -> List[Tuple[int, float]]:
     results = []
     return results
 def score_predictions_parallel(results_path, dataset_id, split_name, num_processes=4):
     # Load results file
     return scored_df
+def score_predictions(results_df, dataset):
+    # Build a lookup dictionary for efficient sample retrieval by ID
+    id_to_index = {sample_id: i for i, sample_id in enumerate(dataset["sample_id"])}
+    # Iterate over each row in the results DataFrame with a progress bar
+    scores = []
+    for _, row in tqdm(results_df.iterrows(), total=len(results_df), desc="Scoring predictions"):
+        # Skip invalid predictions
+        if len(row["prediction"]) == 0:
+            scores.append(np.nan)
+            continue
+        # Get the corresponding ground truth sample using the lookup
+        sample_id = row["sample_id"]
+        sample = dataset[id_to_index[sample_id]]
+        # Extract necessary data for scoring
+        embodiment = row["embodiment"]
+        prediction = json.loads(row["prediction"])
+        ground_truths = sample["ground_truth"][row["embodiment"]]
+        segmentation_mask = np.array(sample["segmentation_mask"])
+        if ground_truths is None:
+            raise ValueError(f"The sample {sample} has hidden ground-truths")
+        # Calculate the score and append it to the list
+        s = score(prediction, ground_truths, segmentation_mask, embodiment)
+        scores.append(s)
+    # Create a copy and add the new 'score' column
+    scored_df = results_df.copy()
+    scored_df["score"] = scores
+    return scored_df