Spaces:
Runtime error
Runtime error
Update.
Browse files- app.py +2 -1
- src/about.py +2 -2
- src/display/css_html_js.py +38 -19
app.py
CHANGED
|
@@ -214,7 +214,7 @@ with blocks:
|
|
| 214 |
gr.HTML(WHAT_IS_F1_HTML_TOP)
|
| 215 |
|
| 216 |
# Examples (kept inside a centered, 800px container)
|
| 217 |
-
with gr.Group(elem_id="f1-examples"):
|
| 218 |
gr.HTML(
|
| 219 |
'<div class="f1-tabs-body"><h3 class="f1-examples-title">Examples of FormulaOne problems</h3></div>'
|
| 220 |
)
|
|
@@ -257,6 +257,7 @@ with blocks:
|
|
| 257 |
choices=["Warmup", "Tier 1", "Tier 2"],
|
| 258 |
value="Warmup",
|
| 259 |
label=None,
|
|
|
|
| 260 |
elem_id="f1-example-radio",
|
| 261 |
)
|
| 262 |
tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2])
|
|
|
|
| 214 |
gr.HTML(WHAT_IS_F1_HTML_TOP)
|
| 215 |
|
| 216 |
# Examples (kept inside a centered, 800px container)
|
| 217 |
+
with gr.Group(elem_id="f1-examples", elem_classes=["f1-container"]):
|
| 218 |
gr.HTML(
|
| 219 |
'<div class="f1-tabs-body"><h3 class="f1-examples-title">Examples of FormulaOne problems</h3></div>'
|
| 220 |
)
|
|
|
|
| 257 |
choices=["Warmup", "Tier 1", "Tier 2"],
|
| 258 |
value="Warmup",
|
| 259 |
label=None,
|
| 260 |
+
show_label=False, # hide the "Radio" caption
|
| 261 |
elem_id="f1-example-radio",
|
| 262 |
)
|
| 263 |
tab_radio.change(_select_example_tab, inputs=tab_radio, outputs=[md_warmup, md_tier1, md_tier2])
|
src/about.py
CHANGED
|
@@ -72,7 +72,7 @@ WHAT_IS_F1_HTML_BOTTOM = """
|
|
| 72 |
</video>
|
| 73 |
<figcaption class="f1-figcaption">Animation showing the design of a compressed dynamic programming state-space.</figcaption>
|
| 74 |
</figure>
|
| 75 |
-
<p class="f1-p">The deceptive simplicity of the problem statements belies the <strong>extraordinary difficulty</strong> of discovering the correct dynamic programming solution. This process is riddled with subtle combinatorial and logical pitfalls, demanding a profound understanding of the problemβs underlying structure. For a detailed walkthrough of the fifteen interdependent reasoning steps required to solve a single hard problem
|
| 76 |
</section>
|
| 77 |
|
| 78 |
<section id="evaluation">
|
|
@@ -87,7 +87,7 @@ WHAT_IS_F1_HTML_BOTTOM = """
|
|
| 87 |
<p class="mb-4 f1-p">To support research and encourage community contributions, the <code>FormulaOne-Warmup</code> dataset is released as a public resource for training and fine-tuning models. The complete test suite for all 100 Warmup problems is available, alongside a standalone evaluation environment, in our <a href="https://github.com/double-ai/formulaone-dataset/tree/main" target="_blank" rel="noopener noreferrer" class="f1-a">GitHub repository</a>.</p>
|
| 88 |
<p class="f1-p">To maintain the integrity of the core benchmark, only a minimal subset of tests is released for the Tier 1 and Tier 2 problems.</p>
|
| 89 |
|
| 90 |
-
<!--
|
| 91 |
<h2 class="f1-h2">Model Accuracy</h2>
|
| 92 |
<p class="mb-4 f1-p">On the <strong>FormulaOne-Warmup</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks.</p>
|
| 93 |
<figure class="f1-figure">
|
|
|
|
| 72 |
</video>
|
| 73 |
<figcaption class="f1-figcaption">Animation showing the design of a compressed dynamic programming state-space.</figcaption>
|
| 74 |
</figure>
|
| 75 |
+
<p class="f1-p">The deceptive simplicity of the problem statements belies the <strong>extraordinary difficulty</strong> of discovering the correct dynamic programming solution. This process is riddled with subtle combinatorial and logical pitfalls, demanding a profound understanding of the problemβs underlying structure. For a detailed walkthrough of the fifteen interdependent reasoning steps required to solve a single hard problem --- <code>Maximal-Cluster-Graph</code> --- <a href="https://arxiv.org/pdf/2507.13337#appendix.A" target="_blank" rel="noopener noreferrer" class="f1-a">see the appendix of our paper</a>.</p>
|
| 76 |
</section>
|
| 77 |
|
| 78 |
<section id="evaluation">
|
|
|
|
| 87 |
<p class="mb-4 f1-p">To support research and encourage community contributions, the <code>FormulaOne-Warmup</code> dataset is released as a public resource for training and fine-tuning models. The complete test suite for all 100 Warmup problems is available, alongside a standalone evaluation environment, in our <a href="https://github.com/double-ai/formulaone-dataset/tree/main" target="_blank" rel="noopener noreferrer" class="f1-a">GitHub repository</a>.</p>
|
| 88 |
<p class="f1-p">To maintain the integrity of the core benchmark, only a minimal subset of tests is released for the Tier 1 and Tier 2 problems.</p>
|
| 89 |
|
| 90 |
+
<!-- Same level as Evaluation -->
|
| 91 |
<h2 class="f1-h2">Model Accuracy</h2>
|
| 92 |
<p class="mb-4 f1-p">On the <strong>FormulaOne-Warmup</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks.</p>
|
| 93 |
<figure class="f1-figure">
|
src/display/css_html_js.py
CHANGED
|
@@ -45,47 +45,66 @@ custom_css = """
|
|
| 45 |
.f1-blockquote { border-left: 4px solid #d1d5db; padding-left: 1rem; margin-left: 0; font-style: italic; color: #4b5563; }
|
| 46 |
.f1-problem-name { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; font-weight: 600; text-align: center; }
|
| 47 |
|
| 48 |
-
/* ===== Clean "table" using divs (centered, not full width,
|
| 49 |
.f1-grid-wrap { text-align: center; margin: 10px auto 8px auto; }
|
| 50 |
.f1-grid-table {
|
| 51 |
display: inline-block; /* center by shrink-to-fit */
|
| 52 |
border-top: 1px solid var(--f1-border);
|
| 53 |
border-left: 1px solid var(--f1-border);
|
|
|
|
|
|
|
| 54 |
background: var(--f1-bg);
|
|
|
|
|
|
|
| 55 |
}
|
| 56 |
.f1-grid-row { display: grid; grid-template-columns: auto auto 1fr; align-items: start; }
|
| 57 |
.f1-grid-row + .f1-grid-row { border-top: 1px solid var(--f1-border); } /* horizontal separators */
|
| 58 |
-
.f1-grid-cell { padding:
|
| 59 |
.f1-grid-head .f1-grid-cell { font-weight: 600; text-align: center; } /* centered headers */
|
|
|
|
| 60 |
|
| 61 |
-
/* ===== Examples card (look
|
| 62 |
-
#f1-examples {
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
-
/* Problem content: consistent
|
| 66 |
#f1-examples .f1-problem-markdown .markdown {
|
| 67 |
-
background: var(--f1-bg-muted);
|
| 68 |
border: 1px solid var(--f1-border);
|
| 69 |
border-radius: 8px;
|
| 70 |
-
padding:
|
| 71 |
-
margin: 0 14px
|
| 72 |
}
|
| 73 |
|
| 74 |
-
/* Bottom "tabs" using Radio
|
| 75 |
-
#f1-example-radio { border-top: 1px solid var(--f1-border); padding:
|
| 76 |
-
#f1-example-radio
|
| 77 |
-
#f1-example-radio
|
|
|
|
|
|
|
| 78 |
#f1-example-radio input[type="radio"]:checked + span {
|
| 79 |
-
background:
|
| 80 |
border: 1px solid var(--f1-border);
|
| 81 |
border-radius: 999px;
|
| 82 |
-
padding: 6px
|
| 83 |
}
|
| 84 |
|
| 85 |
-
/* Leaderboard: center the whole tab and
|
| 86 |
-
#formulaone-leaderboard-tab-table { max-width: 1200px; margin-left: auto; margin-right: auto; } /* center
|
| 87 |
-
#formulaone-leaderboard-tab-table .
|
| 88 |
-
#formulaone-leaderboard-tab-table .
|
| 89 |
#formulaone-leaderboard-tab-table [data-testid="dropdown"], #formulaone-leaderboard-tab-table input[type="text"] { width: 100% !important; }
|
| 90 |
|
| 91 |
/* Login button: force light */
|
|
|
|
| 45 |
.f1-blockquote { border-left: 4px solid #d1d5db; padding-left: 1rem; margin-left: 0; font-style: italic; color: #4b5563; }
|
| 46 |
.f1-problem-name { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; font-weight: 600; text-align: center; }
|
| 47 |
|
| 48 |
+
/* ===== Clean "table" using divs (centered, not full width, borders all around) ===== */
|
| 49 |
.f1-grid-wrap { text-align: center; margin: 10px auto 8px auto; }
|
| 50 |
.f1-grid-table {
|
| 51 |
display: inline-block; /* center by shrink-to-fit */
|
| 52 |
border-top: 1px solid var(--f1-border);
|
| 53 |
border-left: 1px solid var(--f1-border);
|
| 54 |
+
border-right: 1px solid var(--f1-border); /* add right border */
|
| 55 |
+
border-bottom: 1px solid var(--f1-border); /* add bottom border */
|
| 56 |
background: var(--f1-bg);
|
| 57 |
+
border-radius: 8px;
|
| 58 |
+
overflow: hidden;
|
| 59 |
}
|
| 60 |
.f1-grid-row { display: grid; grid-template-columns: auto auto 1fr; align-items: start; }
|
| 61 |
.f1-grid-row + .f1-grid-row { border-top: 1px solid var(--f1-border); } /* horizontal separators */
|
| 62 |
+
.f1-grid-cell { padding: 10px 14px; text-align: left; }
|
| 63 |
.f1-grid-head .f1-grid-cell { font-weight: 600; text-align: center; } /* centered headers */
|
| 64 |
+
.f1-grid-row .f1-grid-cell + .f1-grid-cell { border-left: 1px solid var(--f1-border); }
|
| 65 |
|
| 66 |
+
/* ===== Examples card (restore nice look, fix width, unify background, center title) ===== */
|
| 67 |
+
#f1-examples {
|
| 68 |
+
background: var(--f1-bg-muted); /* match problem box bg */
|
| 69 |
+
border: 1px solid var(--f1-border);
|
| 70 |
+
border-radius: 10px;
|
| 71 |
+
box-shadow: 0 1px 2px rgba(0,0,0,0.04);
|
| 72 |
+
margin-bottom: 12px;
|
| 73 |
+
}
|
| 74 |
+
#f1-examples .f1-examples-title {
|
| 75 |
+
font-weight: 700;
|
| 76 |
+
margin: 12px 14px 8px 14px;
|
| 77 |
+
color: var(--f1-text);
|
| 78 |
+
font-size: 1.1rem;
|
| 79 |
+
text-align: center; /* center heading */
|
| 80 |
+
}
|
| 81 |
|
| 82 |
+
/* Problem content: consistent background + padding */
|
| 83 |
#f1-examples .f1-problem-markdown .markdown {
|
| 84 |
+
background: var(--f1-bg-muted); /* same as container */
|
| 85 |
border: 1px solid var(--f1-border);
|
| 86 |
border-radius: 8px;
|
| 87 |
+
padding: 18px; /* ensure inner padding */
|
| 88 |
+
margin: 0 14px 8px 14px;
|
| 89 |
}
|
| 90 |
|
| 91 |
+
/* Bottom "tabs" using Radio -> show only pills (hide inputs), no "Radio" label */
|
| 92 |
+
#f1-example-radio { border-top: 1px solid var(--f1-border); padding: 8px 10px 10px 10px; margin: 0 8px 8px; }
|
| 93 |
+
#f1-example-radio input[type="radio"] { display: none; } /* hide the radio bullet */
|
| 94 |
+
#f1-example-radio .wrap { display: flex; gap: 8px; flex-wrap: wrap; justify-content: flex-start; }
|
| 95 |
+
#f1-example-radio label { border: 1px solid transparent; border-radius: 999px; padding: 6px 12px; cursor: pointer; background: #fff; }
|
| 96 |
+
#f1-example-radio label:hover { background: #f3f4f6; }
|
| 97 |
#f1-example-radio input[type="radio"]:checked + span {
|
| 98 |
+
background: #e5e7eb; /* subtle active pill */
|
| 99 |
border: 1px solid var(--f1-border);
|
| 100 |
border-radius: 999px;
|
| 101 |
+
padding: 6px 12px;
|
| 102 |
}
|
| 103 |
|
| 104 |
+
/* Leaderboard: center the whole tab and apply requested nesting/min-width rule with .column/.row */
|
| 105 |
+
#formulaone-leaderboard-tab-table { max-width: 1200px; margin-left: auto; margin-right: auto; } /* center */
|
| 106 |
+
#formulaone-leaderboard-tab-table .column .row .column { min-width: 80% !important; } /* exact chain rule */
|
| 107 |
+
#formulaone-leaderboard-tab-table .row, #formulaone-leaderboard-tab-table .column { width: 100% !important; max-width: 100% !important; }
|
| 108 |
#formulaone-leaderboard-tab-table [data-testid="dropdown"], #formulaone-leaderboard-tab-table input[type="text"] { width: 100% !important; }
|
| 109 |
|
| 110 |
/* Login button: force light */
|