Sort by corpus reward (passed/full_corpus_size) so partial batches don't outrank complete ones
Browse files
app.py
CHANGED
|
@@ -99,17 +99,26 @@ def _format_wall(v) -> str:
|
|
| 99 |
return "—" if pd.isna(v) else f"{v:.2f}"
|
| 100 |
|
| 101 |
|
|
|
|
|
|
|
|
|
|
| 102 |
def load_results() -> pd.DataFrame:
|
| 103 |
raw = urllib.request.urlopen(RESULTS_URL, timeout=30).read()
|
| 104 |
df = pd.read_csv(io.BytesIO(raw))
|
| 105 |
if "reward_rate" not in df.columns:
|
| 106 |
df["reward_rate"] = pd.NA
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
df = df.sort_values(
|
| 108 |
-
["dataset", "
|
| 109 |
ascending=[True, False, False],
|
| 110 |
na_position="last",
|
| 111 |
).reset_index(drop=True)
|
| 112 |
df.insert(0, "rank", df.groupby("dataset").cumcount() + 1)
|
|
|
|
| 113 |
df["pass_rate"] = df["pass_rate"].map(_format_pct)
|
| 114 |
df["reward_rate"] = df["reward_rate"].map(_format_pct)
|
| 115 |
df["wall_hours"] = df["wall_hours"].map(_format_wall)
|
|
|
|
| 99 |
return "—" if pd.isna(v) else f"{v:.2f}"
|
| 100 |
|
| 101 |
|
| 102 |
+
CORPUS_SIZE = {"v1": 153, "v2": 130}
|
| 103 |
+
|
| 104 |
+
|
| 105 |
def load_results() -> pd.DataFrame:
|
| 106 |
raw = urllib.request.urlopen(RESULTS_URL, timeout=30).read()
|
| 107 |
df = pd.read_csv(io.BytesIO(raw))
|
| 108 |
if "reward_rate" not in df.columns:
|
| 109 |
df["reward_rate"] = pd.NA
|
| 110 |
+
# Rank by corpus reward (passed / full_corpus_size) so partial batches don't
|
| 111 |
+
# outrank complete ones with lower reward_rate. The displayed reward_rate is
|
| 112 |
+
# still over attempted tasks; only the sort changes.
|
| 113 |
+
df["_corpus_size"] = df["dataset"].map(CORPUS_SIZE).fillna(df["total"])
|
| 114 |
+
df["_corpus_reward"] = df["passed"] / df["_corpus_size"]
|
| 115 |
df = df.sort_values(
|
| 116 |
+
["dataset", "_corpus_reward", "pass_rate"],
|
| 117 |
ascending=[True, False, False],
|
| 118 |
na_position="last",
|
| 119 |
).reset_index(drop=True)
|
| 120 |
df.insert(0, "rank", df.groupby("dataset").cumcount() + 1)
|
| 121 |
+
df = df.drop(columns=["_corpus_size", "_corpus_reward"])
|
| 122 |
df["pass_rate"] = df["pass_rate"].map(_format_pct)
|
| 123 |
df["reward_rate"] = df["reward_rate"].map(_format_pct)
|
| 124 |
df["wall_hours"] = df["wall_hours"].map(_format_wall)
|