import gradio as gr
import pandas as pd
import numpy as np
import json
import os
from pathlib import Path
from huggingface_hub import snapshot_download, HfApi

# =========================
# Basic Config
# =========================

DATASET_REPO = "Fysics-AI/FysicsWorld-Leaderborad-Result"
HF_TOKEN = os.environ.get("HF_TOKEN")

TRACK_TO_CSV = {
    "omni-mllm": "omni-mllm.csv",
    "image-gen": "image-gen.csv",
    "video-gen": "video-gen.csv",
}

# =========================
# Download Dataset (once)
# =========================
LOCAL_DATA_DIR = Path(
    snapshot_download(
        repo_id=DATASET_REPO,
        repo_type="dataset",
        token=HF_TOKEN,
    )
)

print("📂 Dataset dir:", LOCAL_DATA_DIR)
print("📄 Files:", [p.name for p in LOCAL_DATA_DIR.iterdir()])

# =========================
# Column Rename Maps (关键修复点)
# =========================

OMNI_MLLM_RENAME = {
    "Task1-1": "Image\nUnderstanding",
    "Task1-2": "Video\nUnderstanding",

    "Task2-1": "Speech-Driven\nImage Understanding",
    "Task2-2": "Image-Audio\nReasoning",
    "Task2-3": "Speech-Based\nImage QA",
    "Task2-4": "Speech Generation\nfrom Image",
    "Task2-5": "Audio Matching\nfrom Image",

    "Task3-1": "Speech-Driven\nVideo Understanding",
    "Task3-2": "Video-Audio\nReasoning",
    "Task3-3": "Speech-Based\nVideo QA",
    "Task3-4": "Speech Generation\nfrom Video",
    "Task3-5": "Audio Matching\nfrom Video",
    "Task3-6": "Next-Action\nPrediction",
}

AUDIO_RENAME = {
    "Task1-3": "Audio Reasoning"
}

IMAGE_GEN_RENAME = {
    "WIScore": "WIScore",
    "SC": "Semantic\nConsistency",
    "PQ": "Perceptual\nQuality",
    "OR": "Overall\nQuality",
}

VIDEO_GEN_RENAME = {
    "Imaging": "Imaging",
    "Aesthetic": "Aesthetic",
    "Motion": "Motion",
    "Temporal": "Temporal",
}

# =========================
# Utils
# =========================
def format_numeric_columns(df, decimals=2):
    df = df.copy()
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    for col in numeric_cols:
        df[col] = df[col].map(
            lambda x: f"{x:.{decimals}f}" if pd.notnull(x) else ""
        )
    return df


def load_csv(filename, sort_key=None, ascending=False):
    csv_path = LOCAL_DATA_DIR / filename
    df = pd.read_csv(csv_path)

    if sort_key and sort_key in df.columns:
        df = df.sort_values(sort_key, ascending=ascending)

    df = format_numeric_columns(df, decimals=2)
    return df


# =========================
# Submission Logic（不动）
# =========================
api = HfApi()


def parse_submission(file_bytes):
    data = json.loads(file_bytes.decode("utf-8"))

    required = ["benchmark", "track", "model", "type", "metrics"]
    for k in required:
        if k not in data:
            raise ValueError(f"Missing field: {k}")

    if data["benchmark"] != "OmniWorld":
        raise ValueError("Invalid benchmark")

    if data["track"] not in TRACK_TO_CSV:
        raise ValueError("Invalid track")

    return data


def append_submission(data):
    csv_name = TRACK_TO_CSV[data["track"]]
    csv_path = LOCAL_DATA_DIR / csv_name

    df = pd.read_csv(csv_path)

    if data["model"] in df["Model"].values:
        raise ValueError("Model already exists in leaderboard")

    row = {
        "Model": data["model"],
        "Type": data["type"],
    }
    row.update(data["metrics"])

    df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
    df.to_csv(csv_path, index=False)

    api.upload_file(
        path_or_fileobj=str(csv_path),
        path_in_repo=csv_name,
        repo_id=DATASET_REPO,
        repo_type="dataset",
        token=HF_TOKEN,
    )


def handle_submit(file):
    if file is None:
        return "❌ No file uploaded"

    try:
        data = parse_submission(file)
        append_submission(data)
        return "✅ Submission successful! Please refresh leaderboard."
    except Exception as e:
        return f"❌ Error: {str(e)}"


# =========================
# Gradio UI
# =========================
with gr.Blocks(
    theme=gr.themes.Soft(),
    css="""
    .container {
        max-width: 1200px;
        margin: auto;
    }
    .leaderboard-links a {
        display: inline-block;
        margin: 0 8px;
        padding: 6px 12px;
        border-radius: 20px;
        background: #f4f4f5;
        color: #111827;
        text-decoration: none;
        font-weight: 500;
        font-size: 14px;
    }
    .leaderboard-links a:hover {
        background: #e5e7eb;
    }
    .description {
        max-width: 900px;
        margin: 18px auto 30px auto;
        font-size: 16px;
        line-height: 1.7;
        color: #374151;
        text-align: center;
    }
    body, .gradio-container {
    font-family: 
        -apple-system, 
        BlinkMacSystemFont, 
        "Segoe UI", 
        Roboto, 
        "Helvetica Neue", 
        Arial, 
        "Noto Sans", 
        "Liberation Sans",
        sans-serif;
    }
    /* OmniLLM 表格：第 1 列（Model） */
    table th:nth-child(1),
    table td:nth-child(1) {
        min-width: 220px;
        max-width: 220px;
        white-space: nowrap;
    }

    /* 第 2 列（Type） */
    table th:nth-child(2),
    table td:nth-child(2) {
        min-width: 120px;
        max-width: 120px;
    }
    
    .overall-definition {
        max-width: 900px;
        margin: 30px auto 40px auto;
        padding: 22px 28px;
        background: #f9fafb;
        border: 1px solid #e5e7eb;
        border-radius: 14px;
        font-size: 15px;
        line-height: 1.7;
        color: #1f2937;
    }

    .overall-definition h3 {
        text-align: center;
        font-size: 22px;
        margin-bottom: 16px;
    }

    .overall-definition strong {
        color: #111827;
    }

    """) as demo:
    gr.Markdown(
        """
        <h1 style="text-align:center; font-size:42px; margin-bottom:10px;">
            🏆 FysicsWorld Leaderboard
        </h1>

        <div class="leaderboard-links" style="text-align:center; margin-bottom:12px;">
            <a href="https://github.com/Fysics-AI/FysicsWorld" target="_blank"
            style="margin: 0 10px;">
            🏠 Project Page
            </a>
            <a href="https://arxiv.org/pdf/2512.12756" target="_blank"
            style="margin: 0 10px;">
            📖 Paper
            </a>
            <a href="https://huggingface.co/datasets/Fysics-AI/FysicsWorld" target="_blank"
            style="margin: 0 10px;">
            🤗 Dataset
            </a>
            <a href="https://www.modelscope.cn/datasets/Fysics-AI/FysicsWorld" target="_blank"
            style="margin: 0 10px;">
            👾 ModelScope
            </a>
        </div>

        <div class="description">
            We introduce <b><i>FysicsWorld</i></b>, the <b>first</b> unified full-modality benchmark
            that supports bidirectional input-output across <i>image, video, audio, and text</i>,
            enabling comprehensive any-to-any evaluation across understanding, generation, and reasoning.
            Our systematic design spans uni-modal perception tasks to fusion-dependent reasoning
            under strong cross-modal coupling, allowing us to diagnose, with unprecedented clarity,
            the limitations and emerging strengths of modern multimodal and omni-modal architectures.
        </div>
        """
    )

    with gr.Tabs():

        # ---------- OmniLLM / MLLM ----------
        with gr.Tab("🧠 OmniLLM / MLLM"):
            gr.Markdown("Evaluation results for OmniLLM / MLLM models.")

            df_omni = load_csv("omni-mllm.csv", sort_key="Overall")
            df_omni = df_omni.rename(columns=OMNI_MLLM_RENAME)

            omni_table = gr.Dataframe(
                value=df_omni,
                interactive=False,
                wrap=True
            )

        # ---------- Image Generation ----------
        with gr.Tab("🎨 Image Generation"):
            gr.Markdown("Evaluation results for image generation models.")

            df_img = load_csv("image-gen.csv", sort_key="Overall")
            df_img = df_img.rename(columns=IMAGE_GEN_RENAME)

            image_table = gr.Dataframe(
                value=df_img,
                interactive=False,
            )

        # ---------- Video Generation ----------
        with gr.Tab("🎬 Video Generation"):
            gr.Markdown("Evaluation results for video generation models.")

            df_vid = load_csv("video-gen.csv", sort_key="Overall")
            df_vid = df_vid.rename(columns=VIDEO_GEN_RENAME)

            video_table = gr.Dataframe(
                value=df_vid,
                interactive=False,
            )
        
        # ---------- Audio Reasoning ----------
        with gr.Tab("🎵 Audio Reasoning"):
            gr.Markdown("Evaluation results for OmniLLMs, MLLMs, and AudioLLMs.")

            df_aud = load_csv("audio-reasoning.csv", sort_key="Task1-3")
            df_aud = df_aud.rename(columns=AUDIO_RENAME)

            audio_table = gr.Dataframe(
                value=df_aud,
                interactive=False,
            )
            
            
    # ---------- Refresh ----------
    gr.Button("🔄 Refresh All").click(
        fn=lambda: (
            load_csv("omni-mllm.csv", "Overall").rename(columns=OMNI_MLLM_RENAME),
            load_csv("image-gen.csv", "Overall").rename(columns=IMAGE_GEN_RENAME),
            load_csv("video-gen.csv", "Overall").rename(columns=VIDEO_GEN_RENAME),
            load_csv("audio-reasoning.csv", "Task1-3").rename(columns=AUDIO_RENAME),
        ),
        outputs=[omni_table, image_table, video_table, audio_table],
    )
    
    gr.Markdown(
    r"""
### 📊 Overall Score Definition

To facilitate clearer and more consistent comparison across models, we introduce an **Overall** score for each leaderboard track.

**1. OmniLLM / MLLM**  
The **Overall** score is computed as the arithmetic mean of all reported task-specific scores.

**2. Image Generation**  
The evaluation involves metrics defined on different numerical scales. **WIScore** is used for image generation, while **VIEScore** (averaged over three dimensions) is used for image editing.  
The **Overall** score is defined as:

$$
\text{Overall}=\frac{(\text{WIScore}\times 10)+\left(\frac{\sum \text{VIEScore}}{3}\right)}{2}
$$

This normalization-based formulation ensures a balanced contribution from both image generation and image editing performance.

**3. Video Generation**  
The **Overall** score is calculated as the arithmetic mean of all evaluated dimensions, including imaging quality, aesthetics, motion, and temporal consistency.
"""
)

demo.launch()