Switch sort key to corpus-interception (Stage 1) with corpus-reward as tiebreak
Browse files
app.py
CHANGED
|
@@ -107,18 +107,23 @@ def load_results() -> pd.DataFrame:
|
|
| 107 |
df = pd.read_csv(io.BytesIO(raw))
|
| 108 |
if "reward_rate" not in df.columns:
|
| 109 |
df["reward_rate"] = pd.NA
|
| 110 |
-
# Rank by corpus
|
| 111 |
-
#
|
| 112 |
-
#
|
|
|
|
| 113 |
df["_corpus_size"] = df["dataset"].map(CORPUS_SIZE).fillna(df["total"])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
df["_corpus_reward"] = df["passed"] / df["_corpus_size"]
|
| 115 |
df = df.sort_values(
|
| 116 |
-
["dataset", "
|
| 117 |
ascending=[True, False, False],
|
| 118 |
na_position="last",
|
| 119 |
).reset_index(drop=True)
|
| 120 |
df.insert(0, "rank", df.groupby("dataset").cumcount() + 1)
|
| 121 |
-
df = df.drop(columns=["_corpus_size", "_corpus_reward"])
|
| 122 |
df["pass_rate"] = df["pass_rate"].map(_format_pct)
|
| 123 |
df["reward_rate"] = df["reward_rate"].map(_format_pct)
|
| 124 |
df["wall_hours"] = df["wall_hours"].map(_format_wall)
|
|
|
|
| 107 |
df = pd.read_csv(io.BytesIO(raw))
|
| 108 |
if "reward_rate" not in df.columns:
|
| 109 |
df["reward_rate"] = pd.NA
|
| 110 |
+
# Rank by corpus interception rate (intercepted_count / full_corpus_size) as
|
| 111 |
+
# the headline metric — Stage 1 is deterministic (URL/method match) and
|
| 112 |
+
# universally comparable. Tiebreak by corpus reward (passed / corpus_size)
|
| 113 |
+
# so partial batches don't outrank complete ones with lower rates.
|
| 114 |
df["_corpus_size"] = df["dataset"].map(CORPUS_SIZE).fillna(df["total"])
|
| 115 |
+
# `pass_rate` in our CSV is the Stage-1 intercept rate (%) over attempted.
|
| 116 |
+
# Convert it to a fraction over the full corpus.
|
| 117 |
+
df["_intercepted_count"] = (df["pass_rate"].astype(float) / 100.0 * df["total"]).round().astype(int)
|
| 118 |
+
df["_corpus_intercepted"] = df["_intercepted_count"] / df["_corpus_size"]
|
| 119 |
df["_corpus_reward"] = df["passed"] / df["_corpus_size"]
|
| 120 |
df = df.sort_values(
|
| 121 |
+
["dataset", "_corpus_intercepted", "_corpus_reward"],
|
| 122 |
ascending=[True, False, False],
|
| 123 |
na_position="last",
|
| 124 |
).reset_index(drop=True)
|
| 125 |
df.insert(0, "rank", df.groupby("dataset").cumcount() + 1)
|
| 126 |
+
df = df.drop(columns=["_corpus_size", "_corpus_reward", "_intercepted_count", "_corpus_intercepted"])
|
| 127 |
df["pass_rate"] = df["pass_rate"].map(_format_pct)
|
| 128 |
df["reward_rate"] = df["reward_rate"].map(_format_pct)
|
| 129 |
df["wall_hours"] = df["wall_hours"].map(_format_wall)
|