ChenlongDeng's picture
Upload 12 files
45c9afd verified
"""
app.py - DISBench Leaderboard Main Application
Startup Flow:
1. Space rebuild (triggered by PR merge) → Docker container starts
2. Call evaluate.run_evaluation() to scan new submissions in submissions/
3. Calculate EM/F1 scores for new submissions, update leaderboard_data.json
4. Commit updated data back to repository (persistence)
5. Start Flask Web server
"""
import os
import json
import logging
from datetime import datetime
from flask import Flask, render_template, request, redirect, url_for, jsonify
from huggingface_hub import HfApi, CommitOperationAdd
# Evaluation module
from evaluate import run_evaluation, commit_leaderboard_to_repo
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
app = Flask(__name__)
app.secret_key = os.environ.get("SECRET_KEY", "disbench-leaderboard-secret-key")
# --- Configuration ---
LEADERBOARD_FILE = "leaderboard_data.json"
SUBMISSIONS_DIR = "submissions"
os.makedirs(SUBMISSIONS_DIR, exist_ok=True)
# HuggingFace Space configuration
HF_TOKEN = os.environ.get("HF_TOKEN")
SPACE_ID = os.environ.get("SPACE_ID", "RUC-NLPIR/DISBench-Leaderboard")
# ============================================================
# Automatic Evaluation on Startup
# ============================================================
def startup_evaluation():
"""
Automatically run evaluation when the app starts.
When maintainers merge a PR containing new submission files,
HF Space will automatically rebuild and restart, and this function will be called:
- Scan all files in submissions/ directory
- Re-evaluate all submissions (deduplicate using configuration combinations)
- Compare with groundtruth.jsonl to calculate scores
- Update leaderboard_data.json
- Commit results back to repository for persistence
Note:
- Every startup re-evaluates all files, making the logic simpler
- submissions/ is the single source of truth
- Evaluation is fast and won't affect startup speed
"""
logger.info("=" * 60)
logger.info("DISBench: Running startup evaluation...")
logger.info("=" * 60)
try:
total, _ = run_evaluation()
if total > 0:
logger.info(f"Evaluated all submissions. Committing to repo...")
commit_leaderboard_to_repo()
else:
logger.info("No submissions found.")
logger.info(f"Leaderboard has {total} unique configurations. Ready to serve.")
except Exception as e:
logger.error(f"Startup evaluation failed: {e}")
logger.info("Continuing with existing leaderboard data...")
# Execute startup evaluation
startup_evaluation()
# ============================================================
# Data Loading
# ============================================================
def load_leaderboard():
if os.path.exists(LEADERBOARD_FILE):
with open(LEADERBOARD_FILE, 'r', encoding='utf-8') as f:
return json.load(f)
return []
# ============================================================
# Submission Validation
# ============================================================
def validate_submission(submission):
errors = []
if not isinstance(submission, dict):
return ["Submission must be a JSON object with 'meta' and 'predictions' fields."]
meta = submission.get("meta")
preds = submission.get("predictions")
if not meta or not isinstance(meta, dict):
errors.append("Missing or invalid 'meta' field.")
else:
required_meta = ["method_name"]
for field in required_meta:
if field not in meta:
errors.append(f"Missing required field: meta.{field}")
valid_tracks = ["Standard", "Open"]
if meta.get("track") and meta["track"] not in valid_tracks:
errors.append(f"meta.track must be one of: {valid_tracks}")
if not preds or not isinstance(preds, dict):
errors.append("Missing or invalid 'predictions' field.")
return errors
# ============================================================
# PR Creation
# ============================================================
def create_pr_submission(submission_json, method_name):
"""Create a PR via HF Hub API, upload submission file to submissions/ directory"""
if not HF_TOKEN:
raise RuntimeError(
"HF_TOKEN not configured. Please set the HF_TOKEN secret in your Space settings."
)
api = HfApi(token=HF_TOKEN)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
safe_name = method_name.replace(" ", "-").replace("/", "_")
filename = f"{safe_name}_{timestamp}.json"
path_in_repo = f"submissions/{filename}"
content = json.dumps(submission_json, indent=2, ensure_ascii=False).encode("utf-8")
commit_info = api.create_commit(
repo_id=SPACE_ID,
repo_type="space",
operations=[
CommitOperationAdd(
path_in_repo=path_in_repo,
path_or_fileobj=content,
)
],
commit_message=f"[Submission] Add results for {method_name}",
commit_description=(
f"**Method**: {method_name}\n"
f"**Organization**: {submission_json.get('meta', {}).get('organization', 'N/A')}\n"
f"**Track**: {submission_json.get('meta', {}).get('track', 'N/A')}\n"
f"**Agent**: {submission_json.get('meta', {}).get('agent_framework', 'N/A')}\n"
f"**Backbone**: {submission_json.get('meta', {}).get('backbone_model', 'N/A')}\n"
f"**Retriever**: {submission_json.get('meta', {}).get('retriever_model', 'N/A')}\n\n"
f"Submitted at {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}"
),
create_pr=True,
)
return commit_info
# ============================================================
# Routes
# ============================================================
@app.route('/')
def index():
data = load_leaderboard()
return render_template('index.html', data=data)
@app.route('/upload', methods=['POST'])
def upload_file():
"""Handle submission: validate → create PR → return result"""
if 'file' not in request.files:
return jsonify({"success": False, "error": "No file uploaded."}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"success": False, "error": "No file selected."}), 400
try:
submission = json.load(file)
except json.JSONDecodeError as e:
return jsonify({"success": False, "error": f"Invalid JSON file: {e}"}), 400
errors = validate_submission(submission)
if errors:
return jsonify({"success": False, "error": "Validation failed.", "details": errors}), 400
method_name = submission["meta"]["method_name"]
# Local backup
safe_name = method_name.replace(" ", "-").replace("/", "_")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
local_path = os.path.join(SUBMISSIONS_DIR, f"{safe_name}_{timestamp}.json")
with open(local_path, 'w', encoding='utf-8') as f:
json.dump(submission, f, indent=2, ensure_ascii=False)
# Create PR
try:
commit_info = create_pr_submission(submission, method_name)
pr_url = getattr(commit_info, 'pr_url', None)
return jsonify({
"success": True,
"message": f"Submission for '{method_name}' has been submitted as a Pull Request!",
"pr_url": pr_url or f"https://huggingface.co/spaces/{SPACE_ID}/discussions",
})
except RuntimeError as e:
return jsonify({
"success": True,
"message": (
f"Submission for '{method_name}' saved locally. "
f"PR creation skipped: {str(e)}. "
f"Maintainers will review it manually."
),
"pr_url": None,
})
except Exception as e:
return jsonify({
"success": True,
"message": (
f"Submission for '{method_name}' saved locally, "
f"but PR creation failed: {str(e)}. "
f"Please contact the maintainers."
),
"pr_url": None,
})
if __name__ == '__main__':
app.run(debug=False, host="0.0.0.0", port=7860)