import json from datetime import datetime, date import gradio as gr import plotly.graph_objects as go def create_big_five_capex_plot() -> go.Figure: # Read data from the JSON Lines file. with open("big_five_capex.jsonl", "r") as file: data = [json.loads(line) for line in file if line.strip()] quarters: list[str] = [entry["Quarter"] for entry in data] companies = ['Microsoft', 'Google', 'Meta', 'Apple', 'Amazon'] colors = ['#80bb00', '#ee161f', '#0065e3', '#000000', '#ff6200'] x_positions = list(range(len(quarters))) traces = [] for company, color in zip(companies, colors): y_data = [entry[company] for entry in data] traces.append(go.Bar( name=company, x=x_positions, y=y_data, marker_color=color )) fig = go.Figure(data=traces) fig.update_layout( barmode="stack", title="Capital Expenditures of the Big Five Tech Companies in Millions of USD per Quarter", xaxis_title="Quarter", yaxis_title="Capital Expenditures (Millions USD)", xaxis=dict( tickmode='array', tickvals=x_positions, ticktext=quarters ), height=600 ) # Calculate the x position for the vertical dotted line. # We want the line drawn between "2023 Q1" and "2023 Q2". try: idx_q1 = quarters.index("2023 Q1") idx_q2 = quarters.index("2023 Q2") vline_x = (idx_q1 + idx_q2) / 2 # position midway between the two quarters except ValueError: # Fall back if quarters not found. vline_x = 0 # Add a vertical dotted line spanning the full height fig.add_shape( type="line", xref="x", yref="paper", x0=vline_x, y0=0, x1=vline_x, y1=1, line=dict( color="black", dash="dot", width=2 ) ) # Add an annotation label above the vertical line. fig.add_annotation( x=vline_x, y=1.05, # place just above the top of the plotting area xref="x", yref="paper", text="AI arms race begins", showarrow=False, font=dict( color="black", size=12 ), align="center" ) return fig def create_simple_plot(data_path: str, name: str, start_date: datetime, end_date: datetime, min_value: int = 0, max_value: int = 100) -> go.Figure: simple_bench_leaderboard = [] with open(data_path, 'r') as file: for line in file: simple_bench_leaderboard.append(json.loads(line)) models = [] with open("models.jsonl", 'r') as file: for line in file: models.append(json.loads(line)) data = [] for entry in simple_bench_leaderboard: model_name = entry['model'] score = entry['score'] model_info = next((m for m in models if m['Name'] == model_name), None) if model_info: release_date = datetime.strptime(model_info['Release Date'], "%Y-%m-%d") data.append({'model': model_name, 'score': score, 'release_date': release_date}) else: print(f"[WARNING] Model '{model_name}' not found in models.jsonl") data.sort(key=lambda x: x['release_date']) x_dates = [d['release_date'] for d in data] y_scores = [] max_score = 0 for entry in data: if entry['score'] > max_score: max_score = entry['score'] y_scores.append(max_score) fig = go.Figure() fig.add_trace(go.Scatter( x=x_dates, y=y_scores, mode='lines', line=dict(shape='hv', width=2), name='Best Score to Date' )) for i, entry in enumerate(data): if i == 0 or y_scores[i] > y_scores[i - 1]: fig.add_trace(go.Scatter( x=[entry['release_date']], y=[entry['score']], mode='markers+text', marker=dict(size=10), text=[entry['model']], textposition="top center", name=entry['model'] )) fig.update_layout( title=f'{name} Over Time', xaxis_title='Release Date', yaxis_title=name, hovermode='x unified', xaxis=dict( range=[start_date, end_date], type='date' ), yaxis=dict( range=[min_value, max_value] ), height=800 ) return fig with gr.Blocks() as demo: with gr.Tab("System Performance Over Time"): with gr.Tab("ARC-AGI-Pub") as arc_agi_tab: arc_agi_plot: gr.Plot = gr.Plot() with gr.Tab("Simple Bench") as simple_bench_tab: simple_bench_plot: gr.Plot = gr.Plot() with gr.Tab("PlanBench") as planbench_tab: planbench_plot: gr.Plot = gr.Plot() planbench_markdown: gr.Markdown = gr.Markdown( value="""Source: [Valmeekam et al. 2024](https://arxiv.org/abs/2409.13373)""" ) with gr.Tab("Codeforces") as codeforces_tab: with gr.Tab("General-Purpose Systems"): codeforces_plot: gr.Plot = gr.Plot() with gr.Tab("BigCodeBench", interactive=False): bigcodebench_plot: gr.Plot = gr.Plot() with gr.Tab("GAIA", interactive=False): gaia_plot: gr.Plot = gr.Plot() with gr.Tab("GPQA", interactive=False): gpqa_plot: gr.Plot = gr.Plot() with gr.Tab("HumanEval", interactive=False): humaneval_plot: gr.Plot = gr.Plot() with gr.Tab("Chatbot Arena", interactive=False): chatbot_arena_plot: gr.Plot = gr.Plot() with gr.Tab("MATH", interactive=False): math_plot: gr.Plot = gr.Plot() with gr.Tab("OpenCompass", interactive=False): opencompass_plot: gr.Plot = gr.Plot() with gr.Tab("SWE-bench", interactive=False): swe_bench_plot: gr.Plot = gr.Plot() with gr.Tab("WebArena", interactive=False): webarena_plot: gr.Plot = gr.Plot() with gr.Tab("ZeroEval", interactive=False): zeroeval_plot: gr.Plot = gr.Plot() with gr.Tab("Finance") as finance_tab: with gr.Tab("Big Five Capex") as big_five_capex_tab: big_five_capex_plot: gr.Plot = gr.Plot() with gr.Tab("NVIDIA Revenue", interactive=False) as nvidia_revenue: nvidia_revenue_plot: gr.Plot = gr.Plot() big_five_capex_tab.select(fn=create_big_five_capex_plot, outputs=big_five_capex_plot) finance_tab.select(fn=create_big_five_capex_plot, outputs=big_five_capex_plot) arc_agi_tab.select(fn=create_simple_plot, inputs=[gr.State("arc_agi_leaderboard.jsonl"), gr.State("ARC-AGI-Pub (Public Eval) Score"), gr.State(date(2024, 5, 13)), gr.State(date(2024, 12, 20))], outputs=arc_agi_plot) simple_bench_tab.select(fn=create_simple_plot, inputs=[gr.State("simple_bench_leaderboard.jsonl"), gr.State("Simple Bench Score"), gr.State(date(2023, 6, 13)), gr.State(date(2024, 8, 14))], outputs=simple_bench_plot) codeforces_tab.select(fn=create_simple_plot, inputs=[gr.State("codeforces_leaderboard.jsonl"), gr.State("Codeforces (Elo Rating)"), gr.State(date(2024, 5, 13)), gr.State(date(2024, 12, 20)), gr.State(800), gr.State(3000)], outputs=codeforces_plot) planbench_tab.select(fn=create_simple_plot, inputs=[gr.State("planbench_leaderboard.jsonl"), gr.State("PlanBench (Mystery Blocksworld, 0-shot) Score"), gr.State(date(2023, 3, 14)), gr.State(date(2024, 9, 23))], outputs=planbench_plot) if __name__ == "__main__": demo.launch()