Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /official_eval.py
| """Official leaderboard readiness packet builder. | |
| This module does not fabricate external scores. It creates auditable packets | |
| for the official paths that currently require public model access or a Hub | |
| submission flow: LMArena, Artificial Analysis, and Hugging Face Open LLM | |
| Leaderboard. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| from typing import Iterable | |
| HF_SUBMISSION_DOC = "https://huggingface.co/docs/leaderboards/open_llm_leaderboard/submitting" | |
| LMARENA_POLICY_URL = "https://news.lmarena.ai/policy/" | |
| ARTIFICIAL_ANALYSIS_URL = "https://artificialanalysis.ai/" | |
| ARTIFICIAL_ANALYSIS_API_DOC = "https://artificialanalysis.ai/api-reference" | |
| def _exists(path: str | Path | None) -> bool: | |
| return bool(path) and Path(path).exists() | |
| def _load_config(hub_dir: str | Path | None) -> dict: | |
| config_path = Path(hub_dir) / "config.json" if hub_dir else None | |
| if config_path and config_path.exists(): | |
| return json.loads(config_path.read_text(encoding="utf-8")) | |
| return {} | |
| def _hf_status(hub_dir: str | Path | None, config: dict) -> dict: | |
| hub = Path(hub_dir) if hub_dir else None | |
| missing: list[str] = [] | |
| blockers: list[str] = [] | |
| if not hub or not hub.exists(): | |
| missing.append("hub_dir") | |
| if hub and not (hub / "config.json").exists(): | |
| missing.append("config.json") | |
| if hub and not (hub / "model.safetensors").exists(): | |
| missing.append("model.safetensors") | |
| if hub and not ((hub / "tokenizer.json").exists() or (hub / "tokenizer_config.json").exists()): | |
| missing.append("tokenizer") | |
| if config.get("auto_map"): | |
| blockers.append("custom_architecture_requires_remote_code") | |
| if config.get("official_open_llm_leaderboard_ready") is False: | |
| blockers.append("package_declares_official_leaderboard_not_ready") | |
| status = "ready" if not missing and not blockers else "blocked" | |
| return { | |
| "status": status, | |
| "official_path": HF_SUBMISSION_DOC, | |
| "missing": missing, | |
| "blockers": blockers, | |
| "requirements": [ | |
| "public Hugging Face model repo", | |
| "AutoConfig/AutoModel/AutoTokenizer load without use_remote_code", | |
| "safetensors weights", | |
| "model card metadata", | |
| "fixed commit revision at submission time", | |
| ], | |
| "can_submit_immediately": status == "ready", | |
| } | |
| def _public_api_status(public_api_url: str | None, target: str) -> dict: | |
| missing = [] | |
| if not public_api_url: | |
| missing.append("public_api_url") | |
| elif not public_api_url.startswith(("https://", "http://")): | |
| missing.append("public_api_url:http_or_https") | |
| return { | |
| "status": "ready_to_contact" if not missing else "blocked", | |
| "missing": missing, | |
| "requires_human_or_provider_followup": True, | |
| "can_submit_immediately": not missing, | |
| "target": target, | |
| } | |
| def _write(path: Path, text: str) -> str: | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| path.write_text(text, encoding="utf-8") | |
| return str(path) | |
| def _provider_packet( | |
| *, | |
| title: str, | |
| model_id: str, | |
| public_api_url: str | None, | |
| evidence_paths: list[str], | |
| official_url: str, | |
| notes: list[str], | |
| ) -> str: | |
| lines = [ | |
| f"# {title}", | |
| "", | |
| f"- Model ID: `{model_id}`", | |
| f"- Public API URL: `{public_api_url or 'MISSING'}`", | |
| f"- Official URL: {official_url}", | |
| "- Claim policy: no official score is claimed until the target publishes or confirms results.", | |
| "", | |
| "## Evidence", | |
| ] | |
| if evidence_paths: | |
| lines.extend(f"- `{path}`" for path in evidence_paths) | |
| else: | |
| lines.append("- None attached") | |
| lines.extend(["", "## Notes"]) | |
| lines.extend(f"- {note}" for note in notes) | |
| lines.append("") | |
| return "\n".join(lines) | |
| def _hf_check_script(model_id: str) -> str: | |
| return f'''"""HF Open LLM Leaderboard preflight check for TinyMind. | |
| Run after publishing a public Hub repo: | |
| python hf_open_llm_preflight.py | |
| """ | |
| from transformers import AutoConfig, AutoModel, AutoTokenizer | |
| MODEL_ID = "{model_id}" | |
| REVISION = None | |
| config = AutoConfig.from_pretrained(MODEL_ID, revision=REVISION) | |
| model = AutoModel.from_pretrained(MODEL_ID, revision=REVISION) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, revision=REVISION) | |
| print("config", type(config).__name__) | |
| print("model", type(model).__name__) | |
| print("tokenizer", type(tokenizer).__name__) | |
| print("HF preflight passed") | |
| ''' | |
| def build_official_eval_pack( | |
| out_dir: str | Path, | |
| model_id: str, | |
| hub_dir: str | Path | None = None, | |
| public_api_url: str | None = None, | |
| evidence_paths: Iterable[str | Path] = (), | |
| ) -> dict: | |
| out = Path(out_dir) | |
| out.mkdir(parents=True, exist_ok=True) | |
| evidence = [str(path) for path in evidence_paths if _exists(path)] | |
| config = _load_config(hub_dir) | |
| hf = _hf_status(hub_dir, config) | |
| lmarena = _public_api_status(public_api_url, "lmarena") | |
| lmarena.update( | |
| { | |
| "official_path": LMARENA_POLICY_URL, | |
| "requirements": [ | |
| "publicly available model via weights, API, service, or LMArena early release", | |
| "model access remains available for at least 30 days", | |
| "community battle testing accumulates enough votes, at least 1000 and usually more", | |
| "leaderboard score is published only after rating stabilizes", | |
| ], | |
| } | |
| ) | |
| artificial = _public_api_status(public_api_url, "artificial_analysis") | |
| artificial.update( | |
| { | |
| "official_path": ARTIFICIAL_ANALYSIS_URL, | |
| "api_reference": ARTIFICIAL_ANALYSIS_API_DOC, | |
| "requirements": [ | |
| "public provider endpoint or provider-accessible API", | |
| "pricing and access documentation", | |
| "independent evaluation by Artificial Analysis, not self-reported scores", | |
| ], | |
| } | |
| ) | |
| artifacts = { | |
| "lmarena_packet": _write( | |
| out / "lmarena_provider_packet.md", | |
| _provider_packet( | |
| title="LMArena Provider Packet", | |
| model_id=model_id, | |
| public_api_url=public_api_url, | |
| evidence_paths=evidence, | |
| official_url=LMARENA_POLICY_URL, | |
| notes=[ | |
| "Submit only after the model is public or accessible to LMArena.", | |
| "Official rank requires community votes and cannot be produced instantly by local code.", | |
| ], | |
| ), | |
| ), | |
| "artificial_analysis_packet": _write( | |
| out / "artificial_analysis_provider_packet.md", | |
| _provider_packet( | |
| title="Artificial Analysis Provider Packet", | |
| model_id=model_id, | |
| public_api_url=public_api_url, | |
| evidence_paths=evidence, | |
| official_url=ARTIFICIAL_ANALYSIS_URL, | |
| notes=[ | |
| "Artificial Analysis publishes independent evaluations; this packet is a request/support artifact.", | |
| "Do not claim Artificial Analysis score until it appears in their official data or written confirmation.", | |
| ], | |
| ), | |
| ), | |
| "hf_preflight_script": _write(out / "hf_open_llm_preflight.py", _hf_check_script(model_id)), | |
| } | |
| report = { | |
| "schema_version": "tinymind-official-eval-readiness-v1", | |
| "model_id": model_id, | |
| "hub_dir": str(hub_dir) if hub_dir else None, | |
| "public_api_url": public_api_url, | |
| "targets": { | |
| "lmarena": lmarena, | |
| "artificial_analysis": artificial, | |
| "huggingface_open_llm_leaderboard": hf, | |
| }, | |
| "artifacts": artifacts, | |
| "evidence_paths": evidence, | |
| "official_results_claimed": False, | |
| "world_best_claim_allowed": False, | |
| } | |
| report_path = out / "official_eval_readiness.json" | |
| report["report_path"] = str(report_path) | |
| report_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") | |
| lines = ["# TinyMind Official Eval Readiness", "", f"- Model ID: `{model_id}`", ""] | |
| for name, target in report["targets"].items(): | |
| lines.append(f"- {name}: status={target['status']}, can_submit_immediately={target['can_submit_immediately']}") | |
| lines.extend(["", "World-best claim: blocked until official results from external sources exist.", ""]) | |
| _write(out / "official_eval_readiness.md", "\n".join(lines)) | |
| artifacts["readiness_markdown"] = str(out / "official_eval_readiness.md") | |
| report_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") | |
| return report | |
Xet Storage Details
- Size:
- 8.78 kB
- Xet hash:
- 67d46997650e6ccc977d36a431afaeac6a46c32ac583b4cbc1027d2e340e9024
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.