math-piqa-backend / src /backend /manage_requests.py
stellaathena's picture
Initial commit: MATH & PIQA Backend
81afbdf verified
"""Manage evaluation requests and their status."""
import json
import os
from dataclasses import dataclass
from pathlib import Path
from typing import List, Optional
from huggingface_hub import HfApi
# Status constants
PENDING_STATUS = "PENDING"
RUNNING_STATUS = "RUNNING"
FINISHED_STATUS = "FINISHED"
FAILED_STATUS = "FAILED"
@dataclass
class EvalRequest:
"""Represents an evaluation request."""
model: str
revision: str
precision: str
weight_type: str
model_type: str
status: str
submitted_time: str
base_model: str = ""
likes: int = 0
params: float = 0.0
license: str = ""
private: bool = False
json_filepath: str = ""
def get_model_args(self) -> str:
"""Get model arguments string for lm-eval."""
args = f"pretrained={self.model}"
if self.revision and self.revision != "main":
args += f",revision={self.revision}"
if self.precision:
args += f",dtype={self.precision}"
# Add trust_remote_code for safety
args += ",trust_remote_code=True"
return args
def get_eval_requests(
job_status: List[str],
hf_repo: str,
local_dir: str,
) -> List[EvalRequest]:
"""
Load evaluation requests with specified status.
Args:
job_status: List of status values to filter by
hf_repo: HuggingFace dataset repo ID
local_dir: Local directory with cached requests
Returns:
List of EvalRequest objects
"""
requests = []
requests_dir = Path(local_dir)
if not requests_dir.exists():
return requests
for json_file in requests_dir.rglob("*.json"):
try:
with open(json_file, "r") as f:
data = json.load(f)
if data.get("status", PENDING_STATUS) in job_status:
request = EvalRequest(
model=data.get("model", ""),
revision=data.get("revision", "main"),
precision=data.get("precision", "float16"),
weight_type=data.get("weight_type", "Original"),
model_type=data.get("model_type", ""),
status=data.get("status", PENDING_STATUS),
submitted_time=data.get("submitted_time", ""),
base_model=data.get("base_model", ""),
likes=data.get("likes", 0),
params=data.get("params", 0.0),
license=data.get("license", ""),
private=data.get("private", False),
json_filepath=str(json_file),
)
requests.append(request)
except (json.JSONDecodeError, OSError) as e:
print(f"Error loading {json_file}: {e}")
continue
return requests
def set_eval_request(
api: HfApi,
eval_request: EvalRequest,
set_to_status: str,
hf_repo: str,
local_dir: str,
) -> None:
"""
Update the status of an evaluation request.
Args:
api: HuggingFace API client
eval_request: The request to update
set_to_status: New status value
hf_repo: HuggingFace dataset repo ID
local_dir: Local directory with cached requests
"""
json_filepath = Path(eval_request.json_filepath)
if not json_filepath.exists():
print(f"Request file not found: {json_filepath}")
return
# Load current data
with open(json_filepath, "r") as f:
data = json.load(f)
# Update status
data["status"] = set_to_status
# Save locally
with open(json_filepath, "w") as f:
json.dump(data, f, indent=2)
# Upload to Hub
try:
repo_path = str(json_filepath).replace(local_dir + "/", "")
api.upload_file(
path_or_fileobj=str(json_filepath),
path_in_repo=repo_path,
repo_id=hf_repo,
repo_type="dataset",
commit_message=f"Update status to {set_to_status} for {eval_request.model}",
)
except Exception as e:
print(f"Failed to upload status update: {e}")
def check_completed_evals(
api: HfApi,
checked_status: str,
completed_status: str,
failed_status: str,
hf_repo: str,
local_dir: str,
hf_repo_results: str,
local_dir_results: str,
) -> None:
"""
Check for completed evaluations and update their status.
Args:
api: HuggingFace API client
checked_status: Status to check (e.g., RUNNING)
completed_status: Status to set if results exist
failed_status: Status to set if evaluation failed
hf_repo: Requests dataset repo ID
local_dir: Local requests directory
hf_repo_results: Results dataset repo ID
local_dir_results: Local results directory
"""
running_requests = get_eval_requests([checked_status], hf_repo, local_dir)
for request in running_requests:
# Check if results exist
model_results_dir = Path(local_dir_results) / request.model
if model_results_dir.exists():
result_files = list(model_results_dir.rglob("results_*.json"))
if result_files:
# Results found, mark as completed
set_eval_request(
api=api,
eval_request=request,
set_to_status=completed_status,
hf_repo=hf_repo,
local_dir=local_dir,
)
print(f"Marked {request.model} as {completed_status}")