import gradio as gr import pandas as pd DATA_URL = "https://huggingface.co/datasets/aeriesec/orgforge/resolve/main/leaderboard/leaderboard.csv" # Define column groups to keep things organized CORE_COLS = ["retriever", "generator", "answer_score", "mrr_at_10", "recall_at_10", "accuracy"] # Mapping of friendly names to the technical column pairs DETAIL_MAP = { "Causal": ["mrr_CAUSAL", "score_CAUSAL"], "Escalation": ["mrr_ESCALATION", "score_ESCALATION"], "Gap Detection": ["mrr_GAP_DETECTION", "score_GAP_DETECTION"], "Planning": ["mrr_PLAN", "score_PLAN"], "Retrieval Detail": ["mrr_RETRIEVAL", "score_RETRIEVAL"], "Routing": ["mrr_ROUTING", "score_ROUTING"], "Temporal": ["mrr_TEMPORAL", "score_TEMPORAL"] } def load_and_filter(search_query, selected_details, tier_type): try: df = pd.read_csv(DATA_URL) df = df[df["tier"] == tier_type] # 1. Search Filter if search_query: df = df[df["generator"].str.contains(search_query, case=False) | df["retriever"].str.contains(search_query, case=False)] # 2. Column Selection cols_to_show = CORE_COLS.copy() for detail in selected_details: cols_to_show.extend(DETAIL_MAP[detail]) # 3. Final Formatting display_df = df[cols_to_show].sort_values(by="answer_score", ascending=False) return display_df.round(3) except Exception as e: return pd.DataFrame({"Error": [str(e)]}) with gr.Blocks(title="OrgForge RAG Leaderboard") as demo: gr.Markdown("# 🏆 OrgForge Enterprise RAG Benchmark") with gr.Row(): search_bar = gr.Textbox( placeholder="Search models (e.g., 'Claude', 'Llama')...", label="Filter by Model Name", scale=2 ) column_filter = gr.CheckboxGroup( choices=list(DETAIL_MAP.keys()), label="Show Detailed Category Scores", scale=3 ) with gr.Tabs(): with gr.Tab("🎯 Tier 1+2 (Full RAG)"): out_t2 = gr.Dataframe( value=load_and_filter("", [], "1+2"), interactive=False, max_height=600 ) with gr.Tab("🔍 Tier 1 (Retrieval Only)"): out_t1 = gr.Dataframe( value=load_and_filter("", [], "1"), interactive=False, max_height=600 ) def refresh(search, cols): return load_and_filter(search, cols, "1+2"), load_and_filter(search, cols, "1") inputs = [search_bar, column_filter] search_bar.change(fn=refresh, inputs=inputs, outputs=[out_t2, out_t1]) column_filter.change(fn=refresh, inputs=inputs, outputs=[out_t2, out_t1]) demo.launch()