Spaces:
Sleeping
Sleeping
| import json | |
| from datetime import datetime, date | |
| import gradio as gr | |
| import plotly.graph_objects as go | |
| def create_big_five_capex_plot() -> go.Figure: | |
| # Read data from the JSON Lines file. | |
| with open("big_five_capex.jsonl", "r") as file: | |
| data = [json.loads(line) for line in file if line.strip()] | |
| quarters: list[str] = [entry["Quarter"] for entry in data] | |
| companies = ['Microsoft', 'Google', 'Meta', 'Apple', 'Amazon'] | |
| colors = ['#80bb00', '#ee161f', '#0065e3', '#000000', '#ff6200'] | |
| x_positions = list(range(len(quarters))) | |
| traces = [] | |
| for company, color in zip(companies, colors): | |
| y_data = [entry[company] for entry in data] | |
| traces.append(go.Bar( | |
| name=company, | |
| x=x_positions, | |
| y=y_data, | |
| marker_color=color | |
| )) | |
| fig = go.Figure(data=traces) | |
| fig.update_layout( | |
| barmode="stack", | |
| title="Capital Expenditures of the Big Five Tech Companies in Millions of USD per Quarter", | |
| xaxis_title="Quarter", | |
| yaxis_title="Capital Expenditures (Millions USD)", | |
| xaxis=dict( | |
| tickmode='array', | |
| tickvals=x_positions, | |
| ticktext=quarters | |
| ), | |
| height=600 | |
| ) | |
| # Calculate the x position for the vertical dotted line. | |
| # We want the line drawn between "2023 Q1" and "2023 Q2". | |
| try: | |
| idx_q1 = quarters.index("2023 Q1") | |
| idx_q2 = quarters.index("2023 Q2") | |
| vline_x = (idx_q1 + idx_q2) / 2 # position midway between the two quarters | |
| except ValueError: | |
| # Fall back if quarters not found. | |
| vline_x = 0 | |
| # Add a vertical dotted line spanning the full height | |
| fig.add_shape( | |
| type="line", | |
| xref="x", | |
| yref="paper", | |
| x0=vline_x, | |
| y0=0, | |
| x1=vline_x, | |
| y1=1, | |
| line=dict( | |
| color="black", | |
| dash="dot", | |
| width=2 | |
| ) | |
| ) | |
| # Add an annotation label above the vertical line. | |
| fig.add_annotation( | |
| x=vline_x, | |
| y=1.05, # place just above the top of the plotting area | |
| xref="x", | |
| yref="paper", | |
| text="AI arms race begins", | |
| showarrow=False, | |
| font=dict( | |
| color="black", | |
| size=12 | |
| ), | |
| align="center" | |
| ) | |
| return fig | |
| def create_simple_plot(data_path: str, | |
| name: str, | |
| start_date: datetime, end_date: datetime, | |
| min_value: int = 0, max_value: int = 100) -> go.Figure: | |
| simple_bench_leaderboard = [] | |
| with open(data_path, 'r') as file: | |
| for line in file: | |
| simple_bench_leaderboard.append(json.loads(line)) | |
| models = [] | |
| with open("models.jsonl", 'r') as file: | |
| for line in file: | |
| models.append(json.loads(line)) | |
| data = [] | |
| for entry in simple_bench_leaderboard: | |
| model_name = entry['model'] | |
| score = entry['score'] | |
| model_info = next((m for m in models if m['Name'] == model_name), None) | |
| if model_info: | |
| release_date = datetime.strptime(model_info['Release Date'], "%Y-%m-%d") | |
| data.append({'model': model_name, 'score': score, 'release_date': release_date}) | |
| else: | |
| print(f"[WARNING] Model '{model_name}' not found in models.jsonl") | |
| data.sort(key=lambda x: x['release_date']) | |
| x_dates = [d['release_date'] for d in data] | |
| y_scores = [] | |
| max_score = 0 | |
| for entry in data: | |
| if entry['score'] > max_score: | |
| max_score = entry['score'] | |
| y_scores.append(max_score) | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter( | |
| x=x_dates, | |
| y=y_scores, | |
| mode='lines', | |
| line=dict(shape='hv', width=2), | |
| name='Best Score to Date' | |
| )) | |
| for i, entry in enumerate(data): | |
| if i == 0 or y_scores[i] > y_scores[i - 1]: | |
| fig.add_trace(go.Scatter( | |
| x=[entry['release_date']], | |
| y=[entry['score']], | |
| mode='markers+text', | |
| marker=dict(size=10), | |
| text=[entry['model']], | |
| textposition="top center", | |
| name=entry['model'] | |
| )) | |
| fig.update_layout( | |
| title=f'{name} Over Time', | |
| xaxis_title='Release Date', | |
| yaxis_title=name, | |
| hovermode='x unified', | |
| xaxis=dict( | |
| range=[start_date, end_date], | |
| type='date' | |
| ), | |
| yaxis=dict( | |
| range=[min_value, max_value] | |
| ), | |
| height=800 | |
| ) | |
| return fig | |
| with gr.Blocks() as demo: | |
| with gr.Tab("System Performance Over Time"): | |
| with gr.Tab("ARC-AGI-Pub") as arc_agi_tab: | |
| arc_agi_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("Simple Bench") as simple_bench_tab: | |
| simple_bench_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("PlanBench") as planbench_tab: | |
| planbench_plot: gr.Plot = gr.Plot() | |
| planbench_markdown: gr.Markdown = gr.Markdown( | |
| value="""Source: [Valmeekam et al. 2024](https://arxiv.org/abs/2409.13373)""" | |
| ) | |
| with gr.Tab("Codeforces") as codeforces_tab: | |
| with gr.Tab("General-Purpose Systems"): | |
| codeforces_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("BigCodeBench", interactive=False): | |
| bigcodebench_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("GAIA", interactive=False): | |
| gaia_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("GPQA", interactive=False): | |
| gpqa_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("HumanEval", interactive=False): | |
| humaneval_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("Chatbot Arena", interactive=False): | |
| chatbot_arena_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("MATH", interactive=False): | |
| math_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("OpenCompass", interactive=False): | |
| opencompass_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("SWE-bench", interactive=False): | |
| swe_bench_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("WebArena", interactive=False): | |
| webarena_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("ZeroEval", interactive=False): | |
| zeroeval_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("Finance") as finance_tab: | |
| with gr.Tab("Big Five Capex") as big_five_capex_tab: | |
| big_five_capex_plot: gr.Plot = gr.Plot() | |
| with gr.Tab("NVIDIA Revenue", interactive=False) as nvidia_revenue: | |
| nvidia_revenue_plot: gr.Plot = gr.Plot() | |
| big_five_capex_tab.select(fn=create_big_five_capex_plot, outputs=big_five_capex_plot) | |
| finance_tab.select(fn=create_big_five_capex_plot, outputs=big_five_capex_plot) | |
| arc_agi_tab.select(fn=create_simple_plot, | |
| inputs=[gr.State("arc_agi_leaderboard.jsonl"), gr.State("ARC-AGI-Pub (Public Eval) Score"), | |
| gr.State(date(2024, 5, 13)), gr.State(date(2024, 12, 20))], | |
| outputs=arc_agi_plot) | |
| simple_bench_tab.select(fn=create_simple_plot, | |
| inputs=[gr.State("simple_bench_leaderboard.jsonl"), gr.State("Simple Bench Score"), | |
| gr.State(date(2023, 6, 13)), gr.State(date(2024, 8, 14))], | |
| outputs=simple_bench_plot) | |
| codeforces_tab.select(fn=create_simple_plot, | |
| inputs=[gr.State("codeforces_leaderboard.jsonl"), gr.State("Codeforces (Elo Rating)"), | |
| gr.State(date(2024, 5, 13)), gr.State(date(2024, 12, 20)), | |
| gr.State(800), gr.State(3000)], | |
| outputs=codeforces_plot) | |
| planbench_tab.select(fn=create_simple_plot, | |
| inputs=[gr.State("planbench_leaderboard.jsonl"), gr.State("PlanBench (Mystery Blocksworld, 0-shot) Score"), | |
| gr.State(date(2023, 3, 14)), gr.State(date(2024, 9, 23))], | |
| outputs=planbench_plot) | |
| if __name__ == "__main__": | |
| demo.launch() | |