Spaces:

opencompass
/

ATLAS

Sleeping

File size: 17,636 Bytes

import os
import json
import datetime
import requests
from email.utils import parseaddr
import gradio as gr
import pandas as pd
import numpy as np

from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
    BENCHMARK_COLS,
    COLS,
    EVAL_COLS,
    EVAL_TYPES,
    AutoEvalColumn,
    ModelType,
    fields,
    WeightType,
    Precision
)

# ATLAS specific imports - use populate module to avoid transformers dependency
try:
    from src.populate import process_sage_results_for_leaderboard, get_sage_leaderboard_df
    SAGE_MODULES_AVAILABLE = process_sage_results_for_leaderboard is not None
    if SAGE_MODULES_AVAILABLE:
        print("✅ ATLAS modules loaded successfully")
    else:
        print("❌ ATLAS modules not available")
except ImportError as e:
    print(f"Warning: ATLAS modules not available: {e}")
    SAGE_MODULES_AVAILABLE = False


# Configuration
TOKEN = os.environ.get("HF_TOKEN", None)
OWNER = "opencompass"

# OSS submission tracking paths
SUBMISSION_TRACKING_PATH = "atlas_eval/submissions/user_tracking/"
SUBMISSION_HISTORY_FILE = "submission_history.json"

def format_error(msg):
    return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"

def format_warning(msg):
    return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"

def format_log(msg):
    return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"

def model_hyperlink(link, model_name):
    if link and link.startswith("http"):
        return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
    return model_name

def load_submission_history():
    """Load user submission history from OSS"""
    try:
        from src.oss.oss_file_manager import OSSFileManager
        oss_manager = OSSFileManager()
        
        # Try to download submission history file
        history_content = oss_manager.download_file_content(
            SUBMISSION_TRACKING_PATH + SUBMISSION_HISTORY_FILE
        )
        
        if history_content:
            return json.loads(history_content)
        else:
            print("📝 Creating new submission history")
            return {}
            
    except Exception as e:
        print(f"⚠️ Failed to load submission history: {e}")
        return {}

def save_submission_history(history):
    """Save user submission history to OSS"""
    try:
        from src.oss.oss_file_manager import OSSFileManager
        oss_manager = OSSFileManager()
        
        # Upload submission history
        history_json = json.dumps(history, indent=2, ensure_ascii=False)
        success = oss_manager.upload_file_content(
            content=history_json,
            object_key=SUBMISSION_TRACKING_PATH + SUBMISSION_HISTORY_FILE
        )
        
        return success
        
    except Exception as e:
        print(f"❌ Failed to save submission history: {e}")
        return False

def check_user_submission_eligibility(profile: gr.OAuthProfile, org_name: str):
    """Check user submission eligibility"""
    try:
        # 1. Check account age limit (60 days)
        user_data = requests.get(f"https://huggingface.co/api/users/{profile.username}/overview")
        if user_data.status_code == 200:
            creation_date = json.loads(user_data.content)["createdAt"]
            account_age = datetime.datetime.now() - datetime.datetime.strptime(creation_date, '%Y-%m-%dT%H:%M:%S.%fZ')
            
            if account_age < datetime.timedelta(days=60):
                return False, "This account does not meet the submission requirement. Account age must exceed 60 days."
        else:
            return False, "Unable to verify account information. Please try again later."
        
        # 2. Check daily submission limit
        submission_history = load_submission_history()
        user_submissions = submission_history.get(profile.username, [])
        
        today = datetime.datetime.today().strftime('%Y-%m-%d')
        today_submissions = [s for s in user_submissions if s.get("date", "") == today]
        
        if len(today_submissions) >= 2:
            return False, "You have already submitted twice today. Please try again tomorrow."
        
        return True, "Eligibility check passed"
        
    except Exception as e:
        print(f"❌ User eligibility check failed: {e}")
        return False, f"System check error, please try again later: {str(e)}"

def record_user_submission(profile: gr.OAuthProfile, model_name: str, org_name: str, email: str):
    """Record user submission"""
    try:
        submission_history = load_submission_history()
        
        if profile.username not in submission_history:
            submission_history[profile.username] = []
        
        # Record this submission
        submission_record = {
            "date": datetime.datetime.today().strftime('%Y-%m-%d'),
            "time": datetime.datetime.now().strftime('%H:%M:%S'),
            "model": model_name,
            "organization": org_name,
            "email": email,
            "username": profile.username
        }
        
        submission_history[profile.username].append(submission_record)
        
        # Save submission history
        return save_submission_history(submission_history)
        
    except Exception as e:
        print(f"❌ Failed to record submission history: {e}")
        return False

def get_leaderboard_dataframe():
    """Generate leaderboard dataframe from ATLAS results"""
    print("🔄 Loading ATLAS leaderboard data...")
    
    if not SAGE_MODULES_AVAILABLE:
        print("❌ ATLAS modules not available")
        return pd.DataFrame()
        
    try:
        # Use the updated get_sage_leaderboard_df function
        df = get_sage_leaderboard_df()
        
        if df.empty:
            print("❌ No ATLAS results found")
            return pd.DataFrame()
        
        print(f"✅ Generated dataframe with {len(df)} rows")
        return df
        
    except Exception as e:
        print(f"❌ Error generating leaderboard dataframe: {e}")
        import traceback
        traceback.print_exc()
        return pd.DataFrame()

def refresh_leaderboard():
    """Refresh the leaderboard data"""
    print("🔄 Refreshing leaderboard data...")
    return get_leaderboard_dataframe()

# Initialize data
print("🚀 Initializing ATLAS leaderboard...")
leaderboard_df = get_leaderboard_dataframe()
print(f"📈 Leaderboard initialized with {len(leaderboard_df)} rows")

# Define column types for the dataframe (Model, Organization, Accuracy, mG-Pass@2, mG-Pass@4, Submission Date)
COLUMN_TYPES = ["markdown", "str", "number", "number", "number", "str"]


# Create Gradio interface
demo = gr.Blocks(css="""
.markdown-text {
    font-size: 16px !important;
}
#citation-button {
    font-family: monospace;
}
""")

with demo:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    # Citation section - directly visible
    gr.Markdown("## 📙 Citation", elem_classes="markdown-text")
    citation_button = gr.Textbox(
        value=CITATION_BUTTON_TEXT,
        label=CITATION_BUTTON_LABEL,
        elem_id="citation-button",
        lines=6,
        max_lines=10,
        interactive=False
    )

    # Main leaderboard table - COMMENTED OUT
    # gr.Markdown("## 🏆 ATLAS Benchmark Results", elem_classes="markdown-text")
    
    # # Debug information - dynamic component
    # results_count = gr.Markdown(f"📊 **Showing {len(leaderboard_df)} results**")
    
    # leaderboard_table = gr.Dataframe(
    #     value=leaderboard_df,
    #     datatype=COLUMN_TYPES,
    #     interactive=False,
    #     wrap=True,
    #     column_widths=["30%", "20%", "12%", "12%", "12%", "14%"]
    # )

    # # Refresh button
    # refresh_button = gr.Button("🔄 Refresh Leaderboard")
    
    # def refresh_leaderboard_with_count():
    #     """Refresh leaderboard and update count display"""
    #     df = refresh_leaderboard()
    #     count_text = f"📊 **Showing {len(df)} results**"
    #     return df, count_text
    
    # refresh_button.click(
    #     refresh_leaderboard_with_count,
    #     inputs=[],
    #     outputs=[leaderboard_table, results_count]
    # )

    # Submission section
    with gr.Accordion("🎯 Submit Your ATLAS Results", open=False):
        gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
        
        gr.Markdown("""
### 📋 Submission Requirements
<!-- 
- Login required: You must log in with a Hugging Face account
- Account age: Account must be older than 60 days  
- Submission frequency: Each user can submit up to 2 times per day
-->
- File format: Upload a JSON file in the ATLAS format
- Organization: Provide the exact organization name (shown on the leaderboard)
- Contact email: Provide a valid email for notifications
- Auto evaluation: After submission, the system will run LLM-based evaluation and update the leaderboard

<!-- 
### 🔐 Security Policy
To prevent spam and ensure evaluation quality, we enforce:
- New accounts must wait 60 days before submitting (prevents abuse)
- Daily submission limits to ensure leaderboard quality and system stability
- Duplicate checks to avoid multiple submissions for the same organization
-->
        """, elem_classes="markdown-text")
        
        with gr.Row():
            with gr.Column():
                model_textbox = gr.Textbox(
                    label="Model Name - will be shown on the leaderboard", 
                    placeholder="Your Model Name (e.g., GPT-4, Llama-2-70B)"
                )
                org_textbox = gr.Textbox(
                    label="Organization Name - will be shown on the leaderboard", 
                    placeholder="Your Organization"
                )
                email_textbox = gr.Textbox(
                    label="Contact Email - used for contact, not publicly visible", 
                    placeholder="contact@example.com"
                )
            with gr.Column():
                file_upload = gr.File(
                    label="Upload ATLAS Results (JSON)",
                    file_types=[".json"],
                    type="filepath"
                )

        # 提交按钮 (登录功能暂时注释)
        with gr.Row():
            login_button = gr.LoginButton("🔐 Login with HuggingFace", size="lg")
            submit_button = gr.Button("Submit Results", variant="primary", size="lg")
        # 登录状态与用户信息
        profile_state = gr.State()
        login_status = gr.Markdown(visible=True)

        # def on_login(profile: gr.OAuthProfile):
        #     try:
        #         if profile and getattr(profile, "name", None):
        #             name = profile.name
        #             text = f"✅ Logged in as: **{name}**"
        #         else:
        #             text = "❌ Login failed, please try again"
        #         return profile, text
        #     except Exception:
        #         return None, "❌ Login failed, please try again"
        # login_button.click(on_login, inputs=None, outputs=[profile_state, login_status])
        
        # 进度显示和结果显示区域
        progress_info = gr.HTML()
        submission_result = gr.HTML()
        
        def show_progress(step, message, total_steps=4):
            """Show progress information"""
            progress_percentage = int((step / total_steps) * 100)
            progress_html = f"""
<div style="background-color: #e7f3ff; border: 1px solid #4dabf7; border-radius: 5px; padding: 15px; margin: 10px 0;">
    <div style="display: flex; align-items: center; margin-bottom: 10px;">
        <h4 style="color: #1971c2; margin: 0; flex-grow: 1;">⏳ Processing submission...</h4>
        <span style="color: #1971c2; font-weight: bold;">{progress_percentage}%</span>
    </div>
    <p style="color: #1971c2; margin: 5px 0;"><strong>Step {step}/{total_steps}:</strong> {message}</p>
    <div style="background-color: #fff; border-radius: 10px; height: 20px; margin: 10px 0; border: 1px solid #dee2e6;">
        <div style="background: linear-gradient(90deg, #4dabf7, #74c0fc); height: 100%; width: {progress_percentage}%; border-radius: 10px; transition: width 0.5s ease; display: flex; align-items: center; justify-content: center;">
            {f'<span style="color: white; font-size: 12px; font-weight: bold;">{progress_percentage}%</span>' if progress_percentage > 20 else ''}
        </div>
    </div>
    <p style="color: #495057; font-size: 14px; margin: 5px 0;">
        {'✨ Almost done, please wait...' if step >= total_steps else '📤 Please wait, processing your submission...'}
    </p>
</div>
            """
            return progress_html
        
        def handle_submission(file_upload, model_name, org_name, email, user_profile: gr.OAuthProfile):
            try:
                # 步骤1: 基本验证
                yield show_progress(1, "Validating submission info"), ""
                
                # 校验登录
                if user_profile is None or getattr(user_profile, "name", None) is None:
                    yield "", format_error("Please log in with Hugging Face before submitting")
                    return
                print(f"user_profile: {user_profile}")
                print(f"user_profile.name: {user_profile.name}")

                if not file_upload:
                    yield "", format_error("Please select a file to upload")
                    return
                if not model_name or not model_name.strip():
                    yield "", format_error("Please enter model name")
                    return
                if not org_name or not org_name.strip():
                    yield "", format_error("Please enter organization name")
                    return
                if not email or not email.strip():
                    yield "", format_error("Please enter email address")
                    return
                
                # 验证邮箱格式
                _, parsed_email = parseaddr(email)
                if "@" not in parsed_email:
                    yield "", format_warning("Please provide a valid email address")
                    return
                
                # 步骤2: 文件验证和读取
                yield show_progress(2, "Validating file format and content"), ""
                
                import time
                time.sleep(0.5)  # allow users to see progress update
                
                # 用户资格检查（账号年龄/频率/重复提交）
                eligible, msg = check_user_submission_eligibility(user_profile, org_name)
                if not eligible:
                    yield "", format_error(msg)
                    return

                # 步骤3: 上传到OSS
                yield show_progress(3, "Uploading file to OSS storage"), ""
                
                # 处理文件提交
                from src.submission.submit import process_sage_submission_simple
                result = process_sage_submission_simple(file_upload, model_name, org_name, email)
                
                # 步骤4: 完成
                yield show_progress(4, "Submission completed, preparing evaluation"), ""
                
                time.sleep(0.5)  # allow users to see completion state
                
                # 记录提交历史
                try:
                    record_user_submission(user_profile, model_name, org_name, email)
                except Exception:
                    pass

                # 生成成功信息
                success_info = f"""
<div style="background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 5px; padding: 15px; margin: 10px 0;">
    <h4 style="color: #155724; margin-top: 0;">🎉 Submission successful!</h4>
    <p style="color: #155724; margin: 5px 0;"><strong>Model:</strong> {model_name}</p>
    <p style="color: #155724; margin: 5px 0;"><strong>Organization:</strong> {org_name}</p>
    <p style="color: #155724; margin: 5px 0;"><strong>Email:</strong> {email}</p>
    <p style="color: #155724; margin: 5px 0;"><strong>Submitted at:</strong> {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
    <p style="color: #155724; margin-bottom: 0;">Your results have been submitted via OSS. LLM evaluation will complete in 5-10 minutes and the leaderboard will be updated.</p>
</div>
                """
                
                # 清除进度条，显示最终结果
                yield "", success_info + result
                
            except ImportError as e:
                yield "", format_error(f"Submission system modules unavailable: {e}")
            except Exception as e:
                import traceback
                traceback.print_exc()
                yield "", format_error(f"An error occurred during submission: {str(e)}")
        
        submit_button.click(
            handle_submission,
            inputs=[file_upload, model_textbox, org_textbox, email_textbox], # profile_state
            outputs=[progress_info, submission_result]
        )

# Launch the app
if __name__ == "__main__":
    # Disable SSR mode for better OAuth compatibility
    # Note: OAuth is handled internally via gr.LoginButton, not at launch level
    demo.launch(ssr_mode=False)