""" app.py - DISBench Leaderboard Main Application Startup Flow: 1. Space rebuild (triggered by PR merge) → Docker container starts 2. Call evaluate.run_evaluation() to scan new submissions in submissions/ 3. Calculate EM/F1 scores for new submissions, update leaderboard_data.json 4. Commit updated data back to repository (persistence) 5. Start Flask Web server """ import os import json import logging from datetime import datetime from flask import Flask, render_template, request, redirect, url_for, jsonify from huggingface_hub import HfApi, CommitOperationAdd # Evaluation module from evaluate import run_evaluation, commit_leaderboard_to_repo logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") logger = logging.getLogger(__name__) app = Flask(__name__) app.secret_key = os.environ.get("SECRET_KEY", "disbench-leaderboard-secret-key") # --- Configuration --- LEADERBOARD_FILE = "leaderboard_data.json" SUBMISSIONS_DIR = "submissions" os.makedirs(SUBMISSIONS_DIR, exist_ok=True) # HuggingFace Space configuration HF_TOKEN = os.environ.get("HF_TOKEN") SPACE_ID = os.environ.get("SPACE_ID", "RUC-NLPIR/DISBench-Leaderboard") # ============================================================ # Automatic Evaluation on Startup # ============================================================ def startup_evaluation(): """ Automatically run evaluation when the app starts. When maintainers merge a PR containing new submission files, HF Space will automatically rebuild and restart, and this function will be called: - Scan all files in submissions/ directory - Re-evaluate all submissions (deduplicate using configuration combinations) - Compare with groundtruth.jsonl to calculate scores - Update leaderboard_data.json - Commit results back to repository for persistence Note: - Every startup re-evaluates all files, making the logic simpler - submissions/ is the single source of truth - Evaluation is fast and won't affect startup speed """ logger.info("=" * 60) logger.info("DISBench: Running startup evaluation...") logger.info("=" * 60) try: total, _ = run_evaluation() if total > 0: logger.info(f"Evaluated all submissions. Committing to repo...") commit_leaderboard_to_repo() else: logger.info("No submissions found.") logger.info(f"Leaderboard has {total} unique configurations. Ready to serve.") except Exception as e: logger.error(f"Startup evaluation failed: {e}") logger.info("Continuing with existing leaderboard data...") # Execute startup evaluation startup_evaluation() # ============================================================ # Data Loading # ============================================================ def load_leaderboard(): if os.path.exists(LEADERBOARD_FILE): with open(LEADERBOARD_FILE, 'r', encoding='utf-8') as f: return json.load(f) return [] # ============================================================ # Submission Validation # ============================================================ def validate_submission(submission): errors = [] if not isinstance(submission, dict): return ["Submission must be a JSON object with 'meta' and 'predictions' fields."] meta = submission.get("meta") preds = submission.get("predictions") if not meta or not isinstance(meta, dict): errors.append("Missing or invalid 'meta' field.") else: required_meta = ["method_name"] for field in required_meta: if field not in meta: errors.append(f"Missing required field: meta.{field}") valid_tracks = ["Standard", "Open"] if meta.get("track") and meta["track"] not in valid_tracks: errors.append(f"meta.track must be one of: {valid_tracks}") if not preds or not isinstance(preds, dict): errors.append("Missing or invalid 'predictions' field.") return errors # ============================================================ # PR Creation # ============================================================ def create_pr_submission(submission_json, method_name): """Create a PR via HF Hub API, upload submission file to submissions/ directory""" if not HF_TOKEN: raise RuntimeError( "HF_TOKEN not configured. Please set the HF_TOKEN secret in your Space settings." ) api = HfApi(token=HF_TOKEN) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") safe_name = method_name.replace(" ", "-").replace("/", "_") filename = f"{safe_name}_{timestamp}.json" path_in_repo = f"submissions/{filename}" content = json.dumps(submission_json, indent=2, ensure_ascii=False).encode("utf-8") commit_info = api.create_commit( repo_id=SPACE_ID, repo_type="space", operations=[ CommitOperationAdd( path_in_repo=path_in_repo, path_or_fileobj=content, ) ], commit_message=f"[Submission] Add results for {method_name}", commit_description=( f"**Method**: {method_name}\n" f"**Organization**: {submission_json.get('meta', {}).get('organization', 'N/A')}\n" f"**Track**: {submission_json.get('meta', {}).get('track', 'N/A')}\n" f"**Agent**: {submission_json.get('meta', {}).get('agent_framework', 'N/A')}\n" f"**Backbone**: {submission_json.get('meta', {}).get('backbone_model', 'N/A')}\n" f"**Retriever**: {submission_json.get('meta', {}).get('retriever_model', 'N/A')}\n\n" f"Submitted at {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}" ), create_pr=True, ) return commit_info # ============================================================ # Routes # ============================================================ @app.route('/') def index(): data = load_leaderboard() return render_template('index.html', data=data) @app.route('/upload', methods=['POST']) def upload_file(): """Handle submission: validate → create PR → return result""" if 'file' not in request.files: return jsonify({"success": False, "error": "No file uploaded."}), 400 file = request.files['file'] if file.filename == '': return jsonify({"success": False, "error": "No file selected."}), 400 try: submission = json.load(file) except json.JSONDecodeError as e: return jsonify({"success": False, "error": f"Invalid JSON file: {e}"}), 400 errors = validate_submission(submission) if errors: return jsonify({"success": False, "error": "Validation failed.", "details": errors}), 400 method_name = submission["meta"]["method_name"] # Local backup safe_name = method_name.replace(" ", "-").replace("/", "_") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") local_path = os.path.join(SUBMISSIONS_DIR, f"{safe_name}_{timestamp}.json") with open(local_path, 'w', encoding='utf-8') as f: json.dump(submission, f, indent=2, ensure_ascii=False) # Create PR try: commit_info = create_pr_submission(submission, method_name) pr_url = getattr(commit_info, 'pr_url', None) return jsonify({ "success": True, "message": f"Submission for '{method_name}' has been submitted as a Pull Request!", "pr_url": pr_url or f"https://huggingface.co/spaces/{SPACE_ID}/discussions", }) except RuntimeError as e: return jsonify({ "success": True, "message": ( f"Submission for '{method_name}' saved locally. " f"PR creation skipped: {str(e)}. " f"Maintainers will review it manually." ), "pr_url": None, }) except Exception as e: return jsonify({ "success": True, "message": ( f"Submission for '{method_name}' saved locally, " f"but PR creation failed: {str(e)}. " f"Please contact the maintainers." ), "pr_url": None, }) if __name__ == '__main__': app.run(debug=False, host="0.0.0.0", port=7860)