bbkdevops's picture
download
raw
8.78 kB
"""Official leaderboard readiness packet builder.
This module does not fabricate external scores. It creates auditable packets
for the official paths that currently require public model access or a Hub
submission flow: LMArena, Artificial Analysis, and Hugging Face Open LLM
Leaderboard.
"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Iterable
HF_SUBMISSION_DOC = "https://huggingface.co/docs/leaderboards/open_llm_leaderboard/submitting"
LMARENA_POLICY_URL = "https://news.lmarena.ai/policy/"
ARTIFICIAL_ANALYSIS_URL = "https://artificialanalysis.ai/"
ARTIFICIAL_ANALYSIS_API_DOC = "https://artificialanalysis.ai/api-reference"
def _exists(path: str | Path | None) -> bool:
return bool(path) and Path(path).exists()
def _load_config(hub_dir: str | Path | None) -> dict:
config_path = Path(hub_dir) / "config.json" if hub_dir else None
if config_path and config_path.exists():
return json.loads(config_path.read_text(encoding="utf-8"))
return {}
def _hf_status(hub_dir: str | Path | None, config: dict) -> dict:
hub = Path(hub_dir) if hub_dir else None
missing: list[str] = []
blockers: list[str] = []
if not hub or not hub.exists():
missing.append("hub_dir")
if hub and not (hub / "config.json").exists():
missing.append("config.json")
if hub and not (hub / "model.safetensors").exists():
missing.append("model.safetensors")
if hub and not ((hub / "tokenizer.json").exists() or (hub / "tokenizer_config.json").exists()):
missing.append("tokenizer")
if config.get("auto_map"):
blockers.append("custom_architecture_requires_remote_code")
if config.get("official_open_llm_leaderboard_ready") is False:
blockers.append("package_declares_official_leaderboard_not_ready")
status = "ready" if not missing and not blockers else "blocked"
return {
"status": status,
"official_path": HF_SUBMISSION_DOC,
"missing": missing,
"blockers": blockers,
"requirements": [
"public Hugging Face model repo",
"AutoConfig/AutoModel/AutoTokenizer load without use_remote_code",
"safetensors weights",
"model card metadata",
"fixed commit revision at submission time",
],
"can_submit_immediately": status == "ready",
}
def _public_api_status(public_api_url: str | None, target: str) -> dict:
missing = []
if not public_api_url:
missing.append("public_api_url")
elif not public_api_url.startswith(("https://", "http://")):
missing.append("public_api_url:http_or_https")
return {
"status": "ready_to_contact" if not missing else "blocked",
"missing": missing,
"requires_human_or_provider_followup": True,
"can_submit_immediately": not missing,
"target": target,
}
def _write(path: Path, text: str) -> str:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(text, encoding="utf-8")
return str(path)
def _provider_packet(
*,
title: str,
model_id: str,
public_api_url: str | None,
evidence_paths: list[str],
official_url: str,
notes: list[str],
) -> str:
lines = [
f"# {title}",
"",
f"- Model ID: `{model_id}`",
f"- Public API URL: `{public_api_url or 'MISSING'}`",
f"- Official URL: {official_url}",
"- Claim policy: no official score is claimed until the target publishes or confirms results.",
"",
"## Evidence",
]
if evidence_paths:
lines.extend(f"- `{path}`" for path in evidence_paths)
else:
lines.append("- None attached")
lines.extend(["", "## Notes"])
lines.extend(f"- {note}" for note in notes)
lines.append("")
return "\n".join(lines)
def _hf_check_script(model_id: str) -> str:
return f'''"""HF Open LLM Leaderboard preflight check for TinyMind.
Run after publishing a public Hub repo:
python hf_open_llm_preflight.py
"""
from transformers import AutoConfig, AutoModel, AutoTokenizer
MODEL_ID = "{model_id}"
REVISION = None
config = AutoConfig.from_pretrained(MODEL_ID, revision=REVISION)
model = AutoModel.from_pretrained(MODEL_ID, revision=REVISION)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, revision=REVISION)
print("config", type(config).__name__)
print("model", type(model).__name__)
print("tokenizer", type(tokenizer).__name__)
print("HF preflight passed")
'''
def build_official_eval_pack(
out_dir: str | Path,
model_id: str,
hub_dir: str | Path | None = None,
public_api_url: str | None = None,
evidence_paths: Iterable[str | Path] = (),
) -> dict:
out = Path(out_dir)
out.mkdir(parents=True, exist_ok=True)
evidence = [str(path) for path in evidence_paths if _exists(path)]
config = _load_config(hub_dir)
hf = _hf_status(hub_dir, config)
lmarena = _public_api_status(public_api_url, "lmarena")
lmarena.update(
{
"official_path": LMARENA_POLICY_URL,
"requirements": [
"publicly available model via weights, API, service, or LMArena early release",
"model access remains available for at least 30 days",
"community battle testing accumulates enough votes, at least 1000 and usually more",
"leaderboard score is published only after rating stabilizes",
],
}
)
artificial = _public_api_status(public_api_url, "artificial_analysis")
artificial.update(
{
"official_path": ARTIFICIAL_ANALYSIS_URL,
"api_reference": ARTIFICIAL_ANALYSIS_API_DOC,
"requirements": [
"public provider endpoint or provider-accessible API",
"pricing and access documentation",
"independent evaluation by Artificial Analysis, not self-reported scores",
],
}
)
artifacts = {
"lmarena_packet": _write(
out / "lmarena_provider_packet.md",
_provider_packet(
title="LMArena Provider Packet",
model_id=model_id,
public_api_url=public_api_url,
evidence_paths=evidence,
official_url=LMARENA_POLICY_URL,
notes=[
"Submit only after the model is public or accessible to LMArena.",
"Official rank requires community votes and cannot be produced instantly by local code.",
],
),
),
"artificial_analysis_packet": _write(
out / "artificial_analysis_provider_packet.md",
_provider_packet(
title="Artificial Analysis Provider Packet",
model_id=model_id,
public_api_url=public_api_url,
evidence_paths=evidence,
official_url=ARTIFICIAL_ANALYSIS_URL,
notes=[
"Artificial Analysis publishes independent evaluations; this packet is a request/support artifact.",
"Do not claim Artificial Analysis score until it appears in their official data or written confirmation.",
],
),
),
"hf_preflight_script": _write(out / "hf_open_llm_preflight.py", _hf_check_script(model_id)),
}
report = {
"schema_version": "tinymind-official-eval-readiness-v1",
"model_id": model_id,
"hub_dir": str(hub_dir) if hub_dir else None,
"public_api_url": public_api_url,
"targets": {
"lmarena": lmarena,
"artificial_analysis": artificial,
"huggingface_open_llm_leaderboard": hf,
},
"artifacts": artifacts,
"evidence_paths": evidence,
"official_results_claimed": False,
"world_best_claim_allowed": False,
}
report_path = out / "official_eval_readiness.json"
report["report_path"] = str(report_path)
report_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
lines = ["# TinyMind Official Eval Readiness", "", f"- Model ID: `{model_id}`", ""]
for name, target in report["targets"].items():
lines.append(f"- {name}: status={target['status']}, can_submit_immediately={target['can_submit_immediately']}")
lines.extend(["", "World-best claim: blocked until official results from external sources exist.", ""])
_write(out / "official_eval_readiness.md", "\n".join(lines))
artifacts["readiness_markdown"] = str(out / "official_eval_readiness.md")
report_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
return report

Xet Storage Details

Size:
8.78 kB
·
Xet hash:
67d46997650e6ccc977d36a431afaeac6a46c32ac583b4cbc1027d2e340e9024

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.