AgPerry commited on
Commit
e77a483
·
verified ·
1 Parent(s): 78cfa0c

Switch sort key to corpus-interception (Stage 1) with corpus-reward as tiebreak

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -107,18 +107,23 @@ def load_results() -> pd.DataFrame:
107
  df = pd.read_csv(io.BytesIO(raw))
108
  if "reward_rate" not in df.columns:
109
  df["reward_rate"] = pd.NA
110
- # Rank by corpus reward (passed / full_corpus_size) so partial batches don't
111
- # outrank complete ones with lower reward_rate. The displayed reward_rate is
112
- # still over attempted tasks; only the sort changes.
 
113
  df["_corpus_size"] = df["dataset"].map(CORPUS_SIZE).fillna(df["total"])
 
 
 
 
114
  df["_corpus_reward"] = df["passed"] / df["_corpus_size"]
115
  df = df.sort_values(
116
- ["dataset", "_corpus_reward", "pass_rate"],
117
  ascending=[True, False, False],
118
  na_position="last",
119
  ).reset_index(drop=True)
120
  df.insert(0, "rank", df.groupby("dataset").cumcount() + 1)
121
- df = df.drop(columns=["_corpus_size", "_corpus_reward"])
122
  df["pass_rate"] = df["pass_rate"].map(_format_pct)
123
  df["reward_rate"] = df["reward_rate"].map(_format_pct)
124
  df["wall_hours"] = df["wall_hours"].map(_format_wall)
 
107
  df = pd.read_csv(io.BytesIO(raw))
108
  if "reward_rate" not in df.columns:
109
  df["reward_rate"] = pd.NA
110
+ # Rank by corpus interception rate (intercepted_count / full_corpus_size) as
111
+ # the headline metric Stage 1 is deterministic (URL/method match) and
112
+ # universally comparable. Tiebreak by corpus reward (passed / corpus_size)
113
+ # so partial batches don't outrank complete ones with lower rates.
114
  df["_corpus_size"] = df["dataset"].map(CORPUS_SIZE).fillna(df["total"])
115
+ # `pass_rate` in our CSV is the Stage-1 intercept rate (%) over attempted.
116
+ # Convert it to a fraction over the full corpus.
117
+ df["_intercepted_count"] = (df["pass_rate"].astype(float) / 100.0 * df["total"]).round().astype(int)
118
+ df["_corpus_intercepted"] = df["_intercepted_count"] / df["_corpus_size"]
119
  df["_corpus_reward"] = df["passed"] / df["_corpus_size"]
120
  df = df.sort_values(
121
+ ["dataset", "_corpus_intercepted", "_corpus_reward"],
122
  ascending=[True, False, False],
123
  na_position="last",
124
  ).reset_index(drop=True)
125
  df.insert(0, "rank", df.groupby("dataset").cumcount() + 1)
126
+ df = df.drop(columns=["_corpus_size", "_corpus_reward", "_intercepted_count", "_corpus_intercepted"])
127
  df["pass_rate"] = df["pass_rate"].map(_format_pct)
128
  df["reward_rate"] = df["reward_rate"].map(_format_pct)
129
  df["wall_hours"] = df["wall_hours"].map(_format_wall)