Spaces:

manmeet3591
/

AFDBench

Runtime error

AFDBench / app.py

Upload folder using huggingface_hub

bd00fe6 verified about 1 month ago

1.14 kB

	import gradio as gr
	import pandas as pd

	# AFDBench: Area Forecast Discussion Benchmark
	# Final Real Data (Phase 2 Zero-Shot Baseline)

	# Human Reference is the absolute 100% Alignment target.
	# All other scores represent real zero-shot performance on 7,734 human samples.

	data = {
	"Model": [
	"Human Reference (NWS)",
	"Nous/Hermes-3-Llama-3.1-8B",
	"Qwen/Qwen2.5-7B-Instruct",
	"Microsoft/Phi-3.5-mini",
	"Mistral-7B-Instruct-v0.3"
	],
	"Met-Align (%)": [100.0, 11.38, 9.89, 7.13, 5.69],
	"Style-Align (0-1)": [1.00, 0.68, 0.52, 0.52, 0.52]
	}

	df = pd.DataFrame(data).sort_values("Met-Align (%)", ascending=False)

	with gr.Blocks(title="AFDBench") as demo:
	gr.Markdown("# 🌦 AFDBench Leaderboard")
	gr.Markdown("Evaluating AI alignment with professional NWS Forecast Discussions.")

	gr.DataFrame(value=df, interactive=False)

	gr.Markdown("---")
	gr.Markdown("Met-Align: Physical accuracy vs. Human Meteorologist choices.")
	gr.Markdown("Style-Align: Linguistic alignment with NWS professional prose.")

	if __name__ == "__main__":
	demo.launch()