Spaces:
Runtime error
Runtime error
Update plot.
Browse files- app.py +1 -2
- assets/{deeper_tier_performance.png β perf_plot.png} +2 -2
- assets/shallow_tier_performance.png +0 -3
- src/about.py +0 -6
app.py
CHANGED
|
@@ -11,7 +11,6 @@ from huggingface_hub import whoami
|
|
| 11 |
|
| 12 |
# HTML is split so we can inject Gradio media (images/video) where needed.
|
| 13 |
from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video
|
| 14 |
-
from src.about import WHAT_IS_F1_HTML_AFTER_WARMUPFIG # text between warmup/tier1 figs
|
| 15 |
from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS # text after the heading, before the first figure
|
| 16 |
from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS # up to (and including) the "Infinite Well" heading
|
| 17 |
from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig
|
|
@@ -492,7 +491,7 @@ with blocks:
|
|
| 492 |
# Evaluation: Warmup figure
|
| 493 |
gr.HTML(WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG, padding=False)
|
| 494 |
gr.Image(
|
| 495 |
-
"assets/
|
| 496 |
width=600,
|
| 497 |
show_label=False,
|
| 498 |
elem_classes=["f1-image"],
|
|
|
|
| 11 |
|
| 12 |
# HTML is split so we can inject Gradio media (images/video) where needed.
|
| 13 |
from src.about import WHAT_IS_F1_HTML_AFTER_VIDEO # text immediately after the video
|
|
|
|
| 14 |
from src.about import WHAT_IS_F1_HTML_BOTTOM_A_AFTER_TABS # text after the heading, before the first figure
|
| 15 |
from src.about import WHAT_IS_F1_HTML_BOTTOM_A_BEFORE_TABS # up to (and including) the "Infinite Well" heading
|
| 16 |
from src.about import WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG # evaluation section up to before Warmup fig
|
|
|
|
| 491 |
# Evaluation: Warmup figure
|
| 492 |
gr.HTML(WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG, padding=False)
|
| 493 |
gr.Image(
|
| 494 |
+
"assets/perf_plot.png",
|
| 495 |
width=600,
|
| 496 |
show_label=False,
|
| 497 |
elem_classes=["f1-image"],
|
assets/{deeper_tier_performance.png β perf_plot.png}
RENAMED
|
File without changes
|
assets/shallow_tier_performance.png
DELETED
Git LFS Details
|
src/about.py
CHANGED
|
@@ -90,13 +90,7 @@ WHAT_IS_F1_HTML_AFTER_VIDEO = """
|
|
| 90 |
WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG = """
|
| 91 |
<h2 class="f1-h2">Model Accuracy</h2>
|
| 92 |
<p class="mb-4 f1-p">On the <strong>FormulaOne-Shallow</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks, in other words, the tasks are squarely in-distribution.</p>
|
| 93 |
-
<!-- warmup_performance figure inserted via gr.Image in app.py -->
|
| 94 |
-
"""
|
| 95 |
-
|
| 96 |
-
# Between Shallow and Deeper figures
|
| 97 |
-
WHAT_IS_F1_HTML_AFTER_WARMUPFIG = """
|
| 98 |
<p class="mb-4 f1-p">However, as the reasoning depth increases in the <strong>Deeper</strong> tier, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
|
| 99 |
-
<!-- tier1_performance figure inserted via gr.Image in app.py -->
|
| 100 |
"""
|
| 101 |
|
| 102 |
# Tail after Deeper figure (closes evaluation section + container)
|
|
|
|
| 90 |
WHAT_IS_F1_HTML_EVAL_BEFORE_WARMUPFIG = """
|
| 91 |
<h2 class="f1-h2">Model Accuracy</h2>
|
| 92 |
<p class="mb-4 f1-p">On the <strong>FormulaOne-Shallow</strong> problems, frontier models perform reasonably well. This confirms they have a foundational capability for these types of algorithmic tasks, in other words, the tasks are squarely in-distribution.</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
<p class="mb-4 f1-p">However, as the reasoning depth increases in the <strong>Deeper</strong> tier, and solutions require the discovery and integration of novel and more complex state representations, model performance drops off sharply.</p>
|
|
|
|
| 94 |
"""
|
| 95 |
|
| 96 |
# Tail after Deeper figure (closes evaluation section + container)
|