Spaces:

leggedrobotics
/

navitrace_leaderboard

Running

App Files Files Community

TimWindecker commited on Nov 11

Commit

ed767be

verified ·

1 Parent(s): 9e399c9

Add score normalization

Browse files

Files changed (1) hide show

src/score_calculation/score.py +16 -7

src/score_calculation/score.py CHANGED Viewed

@@ -15,6 +15,7 @@ from tqdm import tqdm
 PENALTY_SCORES_PATH = "./category_penalty.tsv"
 M2F_CONFIG_PATH = "./mask2former_config.json"
 @functools.lru_cache(maxsize=4)
@@ -118,7 +119,7 @@ def resample_to_match_length(
 ) -> Tuple[np.ndarray, np.ndarray]:
     if len(trace_1) == 0 or len(trace_2) == 0:
-        raise ValueError("One of the trace is empty")
     if len(trace_1) == len(trace_2):
         return trace_1, trace_2
     elif len(trace_1) > len(trace_2):
@@ -193,6 +194,11 @@ def calculate_dtw(prediction: np.ndarray, ground_truth: np.ndarray):
     return cost_matrix[n, m]
 def score(
     prediction: List[List[float]],
     ground_truths: List[List[List[float]]],
@@ -221,7 +227,10 @@ def score(
         scores.append(dtw + fde + sem_penalty)
     # Select the best score
-    return min(scores)
 def _initialize_worker(results_path, dataset_id, split_name):
@@ -246,11 +255,6 @@ def _score_chunk(indices: List[int]) -> List[Tuple[int, float]]:
     for idx in indices:
         row = _results_df.loc[idx]
-        # Skip invalid predictions
-        if len(row["prediction"]) == 0:
-            results.append((idx, np.nan))
-            continue
         # Extract prediction and ground truth
         sample = _get_sample(row["sample_id"])
         embodiment = row["embodiment"]
@@ -261,6 +265,11 @@ def _score_chunk(indices: List[int]) -> List[Tuple[int, float]]:
         # Check that ground-truth is not hidden as it is for the test split
         if ground_truths is None:
             raise ValueError(f"The sample {sample} has hidden ground-truths")
         # Calculate score
         s = score(prediction, ground_truths, segmentation_mask, embodiment)

 PENALTY_SCORES_PATH = "./category_penalty.tsv"
 M2F_CONFIG_PATH = "./mask2former_config.json"
+BAD_SCORE_THRESHOLD = 3234.75
 @functools.lru_cache(maxsize=4)
 ) -> Tuple[np.ndarray, np.ndarray]:
     if len(trace_1) == 0 or len(trace_2) == 0:
+        raise ValueError("One of the traces is empty")
     if len(trace_1) == len(trace_2):
         return trace_1, trace_2
     elif len(trace_1) > len(trace_2):
     return cost_matrix[n, m]
+def normalize_score(score: float) -> float:
+    # Normalize score so that a perferct score is at 100 and a score worse than the avg. performance of predicting a vertical line through the center is < 0
+    return (BAD_SCORE_THRESHOLD - score) / BAD_SCORE_THRESHOLD * 100
 def score(
     prediction: List[List[float]],
     ground_truths: List[List[List[float]]],
         scores.append(dtw + fde + sem_penalty)
     # Select the best score
+    score = min(scores)
+    # Normalize
+    return normalize_score(score)
 def _initialize_worker(results_path, dataset_id, split_name):
     for idx in indices:
         row = _results_df.loc[idx]
         # Extract prediction and ground truth
         sample = _get_sample(row["sample_id"])
         embodiment = row["embodiment"]
         # Check that ground-truth is not hidden as it is for the test split
         if ground_truths is None:
             raise ValueError(f"The sample {sample} has hidden ground-truths")
+        # Skip invalid predictions
+        if len(prediction) == 0:
+            results.append((idx, np.nan))
+            continue
         # Calculate score
         s = score(prediction, ground_truths, segmentation_mask, embodiment)