Borchmann's picture
Upload folder using huggingface_hub
87993b5 verified
raw
history blame
7.67 kB
import json
import os
from datetime import datetime, timezone
from src.display.formatting import styled_error, styled_message, styled_warning
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, RESULTS_REPO
from src.submission.check_validity import already_submitted_models
REQUESTED_MODELS = None
USERS_TO_SUBMISSION_DATES = None
def validate_jsonl_submission(file_path):
"""Validates the structure and content of a submission JSONL file"""
required_fields = ["question", "answer", "citations", "iterations", "id"]
try:
with open(file_path, "r") as f:
lines = f.readlines()
if len(lines) == 0:
return False, "File is empty", 0
predictions = []
for line_num, line in enumerate(lines, 1):
line = line.strip()
if not line:
continue
try:
pred = json.loads(line)
except json.JSONDecodeError as e:
return False, f"Line {line_num}: Invalid JSON - {str(e)}", 0
# Check required fields
for field in required_fields:
if field not in pred:
return False, f"Line {line_num}: Missing required field '{field}'", 0
# Validate field types
if not isinstance(pred["question"], str):
return False, f"Line {line_num}: 'question' must be a string", 0
if not isinstance(pred["answer"], list):
return False, f"Line {line_num}: 'answer' must be a list", 0
if not isinstance(pred["citations"], list):
return False, f"Line {line_num}: 'citations' must be a list", 0
if not isinstance(pred["iterations"], int) or pred["iterations"] < 0:
return False, f"Line {line_num}: 'iterations' must be a non-negative integer", 0
if not isinstance(pred["id"], str):
return False, f"Line {line_num}: 'id' must be a string", 0
# Validate citations structure
for cit_idx, citation in enumerate(pred["citations"]):
if not isinstance(citation, dict):
return False, f"Line {line_num}, citation {cit_idx}: Must be a dict with 'file' and 'page'", 0
if "file" not in citation or "page" not in citation:
return False, f"Line {line_num}, citation {cit_idx}: Must have 'file' and 'page' fields", 0
predictions.append(pred)
return True, "", len(predictions)
except Exception as e:
return False, f"Error reading file: {str(e)}", 0
def add_new_eval(
model_name: str,
organization: str,
model_type: str,
predictions_file,
link: str = "",
):
global REQUESTED_MODELS
global USERS_TO_SUBMISSION_DATES
if not REQUESTED_MODELS:
REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
# Validate inputs
if not model_name or model_name.strip() == "":
return styled_error("Please provide a model name.")
if not organization or organization.strip() == "":
return styled_error("Please provide your organization name.")
if model_type is None or model_type == "":
return styled_error("Please select a model type (API or Open-weight).")
if predictions_file is None:
return styled_error("Please upload a predictions JSONL file.")
# Validate JSONL structure
is_valid, error_msg, num_predictions = validate_jsonl_submission(predictions_file)
if not is_valid:
return styled_error(f"Invalid submission format: {error_msg}")
print(f"Validated {num_predictions} predictions")
# Create safe filename
safe_model_name = model_name.replace("/", "_").replace(" ", "_")
# Check for duplicate submission
if safe_model_name in REQUESTED_MODELS:
return styled_warning("This model has already been submitted.")
print("Adding new eval")
# Prepare directories
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{organization}"
PREDICTIONS_DIR = f"{EVAL_RESULTS_PATH}/{organization}"
os.makedirs(OUT_DIR, exist_ok=True)
os.makedirs(PREDICTIONS_DIR, exist_ok=True)
# Save predictions file
predictions_path = f"{PREDICTIONS_DIR}/{safe_model_name}_predictions_{current_time}.jsonl"
# Copy the uploaded file
import shutil
shutil.copy(predictions_file, predictions_path)
# TODO: Run evaluation here
# from src.evaluation.evaluator import evaluate_predictions
# results = evaluate_predictions(predictions_path)
# For now, create placeholder results
# This will be replaced with actual evaluation
placeholder_results = {
"model_name": model_name,
"results": {
"overall": {"anls": 0.50},
"single_evidence": {"anls": 0.50},
"multi_evidence_same_doc": {"anls": 0.50},
"multi_evidence_multi_doc": {"anls": 0.50},
},
"metadata": {
"agent_steps": num_predictions, # Use total predictions as placeholder
"cost_usd": 0.0, # Placeholder
"model_type": model_type.lower(),
},
"organization": organization,
"submission_date": current_time,
"num_predictions": num_predictions,
"link": link.strip() if link else "",
}
# Save results file
results_path = f"{PREDICTIONS_DIR}/{safe_model_name}_results_{current_time}.json"
with open(results_path, "w") as f:
json.dump(placeholder_results, f, indent=2)
# Create request entry for queue
eval_request = {
"model": model_name,
"organization": organization,
"model_type": model_type,
"status": "PENDING", # Will be set to FINISHED after evaluation
"submitted_time": current_time,
"link": link.strip() if link else "",
}
# Save request file
request_path = f"{OUT_DIR}/{safe_model_name}_eval_request_{current_time}.json"
with open(request_path, "w") as f:
json.dump(eval_request, f, indent=2)
print("Uploading files")
try:
# Upload predictions file
API.upload_file(
path_or_fileobj=predictions_path,
path_in_repo=predictions_path.split("eval-results/")[1],
repo_id=RESULTS_REPO,
repo_type="dataset",
commit_message=f"Add predictions for {model_name}",
)
# Upload results file
API.upload_file(
path_or_fileobj=results_path,
path_in_repo=results_path.split("eval-results/")[1],
repo_id=RESULTS_REPO,
repo_type="dataset",
commit_message=f"Add results for {model_name}",
)
# Upload request file to queue repo
API.upload_file(
path_or_fileobj=request_path,
path_in_repo=request_path.split("eval-queue/")[1],
repo_id=QUEUE_REPO,
repo_type="dataset",
commit_message=f"Add {model_name} to eval queue",
)
except Exception as e:
return styled_error(f"Error uploading files: {str(e)}")
# Remove local files
os.remove(request_path)
os.remove(predictions_path)
os.remove(results_path)
return styled_message(
f"Your submission for '{model_name}' has been successfully submitted!\n"
f"Validated {num_predictions} predictions.\n"
f"⚠️ Note: Currently using placeholder scores. Implement the evaluator to compute actual ANLS scores.\n"
f"Please wait for the leaderboard to refresh."
)