kaizuberbuehler's picture
Update capex numbers; Fix and revamp champs
0a86c6a
raw
history blame
8.1 kB
import json
from datetime import datetime, date
import gradio as gr
import plotly.graph_objects as go
def create_big_five_capex_plot() -> go.Figure:
# Read data from the JSON Lines file.
with open("big_five_capex.jsonl", "r") as file:
data = [json.loads(line) for line in file if line.strip()]
quarters: list[str] = [entry["Quarter"] for entry in data]
companies = ['Microsoft', 'Google', 'Meta', 'Apple', 'Amazon']
colors = ['#80bb00', '#ee161f', '#0065e3', '#000000', '#ff6200']
x_positions = list(range(len(quarters)))
traces = []
for company, color in zip(companies, colors):
y_data = [entry[company] for entry in data]
traces.append(go.Bar(
name=company,
x=x_positions,
y=y_data,
marker_color=color
))
fig = go.Figure(data=traces)
fig.update_layout(
barmode="stack",
title="Capital Expenditures of the Big Five Tech Companies in Millions of USD per Quarter",
xaxis_title="Quarter",
yaxis_title="Capital Expenditures (Millions USD)",
xaxis=dict(
tickmode='array',
tickvals=x_positions,
ticktext=quarters
),
height=600
)
# Calculate the x position for the vertical dotted line.
# We want the line drawn between "2023 Q1" and "2023 Q2".
try:
idx_q1 = quarters.index("2023 Q1")
idx_q2 = quarters.index("2023 Q2")
vline_x = (idx_q1 + idx_q2) / 2 # position midway between the two quarters
except ValueError:
# Fall back if quarters not found.
vline_x = 0
# Add a vertical dotted line spanning the full height
fig.add_shape(
type="line",
xref="x",
yref="paper",
x0=vline_x,
y0=0,
x1=vline_x,
y1=1,
line=dict(
color="black",
dash="dot",
width=2
)
)
# Add an annotation label above the vertical line.
fig.add_annotation(
x=vline_x,
y=1.05, # place just above the top of the plotting area
xref="x",
yref="paper",
text="AI arms race begins",
showarrow=False,
font=dict(
color="black",
size=12
),
align="center"
)
return fig
def create_simple_plot(data_path: str,
name: str,
start_date: datetime, end_date: datetime,
min_value: int = 0, max_value: int = 100) -> go.Figure:
simple_bench_leaderboard = []
with open(data_path, 'r') as file:
for line in file:
simple_bench_leaderboard.append(json.loads(line))
models = []
with open("models.jsonl", 'r') as file:
for line in file:
models.append(json.loads(line))
data = []
for entry in simple_bench_leaderboard:
model_name = entry['model']
score = entry['score']
model_info = next((m for m in models if m['Name'] == model_name), None)
if model_info:
release_date = datetime.strptime(model_info['Release Date'], "%Y-%m-%d")
data.append({'model': model_name, 'score': score, 'release_date': release_date})
else:
print(f"[WARNING] Model '{model_name}' not found in models.jsonl")
data.sort(key=lambda x: x['release_date'])
x_dates = [d['release_date'] for d in data]
y_scores = []
max_score = 0
for entry in data:
if entry['score'] > max_score:
max_score = entry['score']
y_scores.append(max_score)
fig = go.Figure()
fig.add_trace(go.Scatter(
x=x_dates,
y=y_scores,
mode='lines',
line=dict(shape='hv', width=2),
name='Best Score to Date'
))
for i, entry in enumerate(data):
if i == 0 or y_scores[i] > y_scores[i - 1]:
fig.add_trace(go.Scatter(
x=[entry['release_date']],
y=[entry['score']],
mode='markers+text',
marker=dict(size=10),
text=[entry['model']],
textposition="top center",
name=entry['model']
))
fig.update_layout(
title=f'{name} Over Time',
xaxis_title='Release Date',
yaxis_title=name,
hovermode='x unified',
xaxis=dict(
range=[start_date, end_date],
type='date'
),
yaxis=dict(
range=[min_value, max_value]
),
height=800
)
return fig
with gr.Blocks() as demo:
with gr.Tab("System Performance Over Time"):
with gr.Tab("ARC-AGI-Pub") as arc_agi_tab:
arc_agi_plot: gr.Plot = gr.Plot()
with gr.Tab("Simple Bench") as simple_bench_tab:
simple_bench_plot: gr.Plot = gr.Plot()
with gr.Tab("PlanBench") as planbench_tab:
planbench_plot: gr.Plot = gr.Plot()
planbench_markdown: gr.Markdown = gr.Markdown(
value="""Source: [Valmeekam et al. 2024](https://arxiv.org/abs/2409.13373)"""
)
with gr.Tab("Codeforces") as codeforces_tab:
with gr.Tab("General-Purpose Systems"):
codeforces_plot: gr.Plot = gr.Plot()
with gr.Tab("BigCodeBench", interactive=False):
bigcodebench_plot: gr.Plot = gr.Plot()
with gr.Tab("GAIA", interactive=False):
gaia_plot: gr.Plot = gr.Plot()
with gr.Tab("GPQA", interactive=False):
gpqa_plot: gr.Plot = gr.Plot()
with gr.Tab("HumanEval", interactive=False):
humaneval_plot: gr.Plot = gr.Plot()
with gr.Tab("Chatbot Arena", interactive=False):
chatbot_arena_plot: gr.Plot = gr.Plot()
with gr.Tab("MATH", interactive=False):
math_plot: gr.Plot = gr.Plot()
with gr.Tab("OpenCompass", interactive=False):
opencompass_plot: gr.Plot = gr.Plot()
with gr.Tab("SWE-bench", interactive=False):
swe_bench_plot: gr.Plot = gr.Plot()
with gr.Tab("WebArena", interactive=False):
webarena_plot: gr.Plot = gr.Plot()
with gr.Tab("ZeroEval", interactive=False):
zeroeval_plot: gr.Plot = gr.Plot()
with gr.Tab("Finance") as finance_tab:
with gr.Tab("Big Five Capex") as big_five_capex_tab:
big_five_capex_plot: gr.Plot = gr.Plot()
with gr.Tab("NVIDIA Revenue", interactive=False) as nvidia_revenue:
nvidia_revenue_plot: gr.Plot = gr.Plot()
big_five_capex_tab.select(fn=create_big_five_capex_plot, outputs=big_five_capex_plot)
finance_tab.select(fn=create_big_five_capex_plot, outputs=big_five_capex_plot)
arc_agi_tab.select(fn=create_simple_plot,
inputs=[gr.State("arc_agi_leaderboard.jsonl"), gr.State("ARC-AGI-Pub (Public Eval) Score"),
gr.State(date(2024, 5, 13)), gr.State(date(2024, 12, 20))],
outputs=arc_agi_plot)
simple_bench_tab.select(fn=create_simple_plot,
inputs=[gr.State("simple_bench_leaderboard.jsonl"), gr.State("Simple Bench Score"),
gr.State(date(2023, 6, 13)), gr.State(date(2024, 8, 14))],
outputs=simple_bench_plot)
codeforces_tab.select(fn=create_simple_plot,
inputs=[gr.State("codeforces_leaderboard.jsonl"), gr.State("Codeforces (Elo Rating)"),
gr.State(date(2024, 5, 13)), gr.State(date(2024, 12, 20)),
gr.State(800), gr.State(3000)],
outputs=codeforces_plot)
planbench_tab.select(fn=create_simple_plot,
inputs=[gr.State("planbench_leaderboard.jsonl"), gr.State("PlanBench (Mystery Blocksworld, 0-shot) Score"),
gr.State(date(2023, 3, 14)), gr.State(date(2024, 9, 23))],
outputs=planbench_plot)
if __name__ == "__main__":
demo.launch()