Spaces:
Runtime error
Runtime error
File size: 1,142 Bytes
69e0075 bd00fe6 69e0075 bd00fe6 69e0075 bd00fe6 69e0075 bd00fe6 69e0075 bd00fe6 69e0075 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | import gradio as gr
import pandas as pd
# AFDBench: Area Forecast Discussion Benchmark
# Final Real Data (Phase 2 Zero-Shot Baseline)
# Human Reference is the absolute 100% Alignment target.
# All other scores represent real zero-shot performance on 7,734 human samples.
data = {
"Model": [
"Human Reference (NWS)",
"Nous/Hermes-3-Llama-3.1-8B",
"Qwen/Qwen2.5-7B-Instruct",
"Microsoft/Phi-3.5-mini",
"Mistral-7B-Instruct-v0.3"
],
"Met-Align (%)": [100.0, 11.38, 9.89, 7.13, 5.69],
"Style-Align (0-1)": [1.00, 0.68, 0.52, 0.52, 0.52]
}
df = pd.DataFrame(data).sort_values("Met-Align (%)", ascending=False)
with gr.Blocks(title="AFDBench") as demo:
gr.Markdown("# 🌦 AFDBench Leaderboard")
gr.Markdown("Evaluating AI alignment with professional NWS Forecast Discussions.")
gr.DataFrame(value=df, interactive=False)
gr.Markdown("---")
gr.Markdown("**Met-Align**: Physical accuracy vs. Human Meteorologist choices.")
gr.Markdown("**Style-Align**: Linguistic alignment with NWS professional prose.")
if __name__ == "__main__":
demo.launch()
|