Buckets:

linvest21
/

shft-artifacts

Files

xet

linvest21/shft-artifacts / code /self_healing_finetuning /eval /frozen_suite.py

linvest21

about 2 months ago

download

raw

3.38 kB

	from __future__ import annotations

	import hashlib
	import json
	from pathlib import Path
	from typing import Any

	from n21.config import load_structured, write_json
	from n21.settings import REPO_ROOT, SHFT_WORKSPACE_ROOT
	from observability.audit_log import utc_now


	REQUIRED_EVAL_FIELDS = {"id", "task", "prompt", "expected_points", "critical_checks"}


	def _load_jsonl(path: Path) -> list[dict[str, Any]]:
	rows: list[dict[str, Any]] = []
	for line_no, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1):
	if not line.strip():
	continue
	try:
	item = json.loads(line)
	except json.JSONDecodeError as exc:
	raise ValueError(f"invalid JSONL at {path}:{line_no}: {exc}") from exc
	if not isinstance(item, dict):
	raise ValueError(f"eval item must be an object at {path}:{line_no}")
	rows.append(item)
	return rows


	def _sha256(path: Path) -> str:
	h = hashlib.sha256()
	h.update(path.read_bytes())
	return h.hexdigest()


	def validate_frozen_suite(manifest_path: Path) -> dict[str, Any]:
	manifest = load_structured(manifest_path)
	suite_path = REPO_ROOT / str(manifest["path"])
	errors: list[str] = []
	if not suite_path.exists():
	errors.append(f"eval suite file does not exist: {suite_path}")
	rows: list[dict[str, Any]] = []
	else:
	rows = _load_jsonl(suite_path)

	ids: set[str] = set()
	task_counts: dict[str, int] = {}
	for idx, row in enumerate(rows, start=1):
	missing = sorted(REQUIRED_EVAL_FIELDS - set(row))
	if missing:
	errors.append(f"row {idx} missing fields: {', '.join(missing)}")
	row_id = str(row.get("id", ""))
	if not row_id:
	errors.append(f"row {idx} has empty id")
	elif row_id in ids:
	errors.append(f"duplicate eval id: {row_id}")
	ids.add(row_id)
	if not str(row.get("prompt", "")).strip():
	errors.append(f"row {idx} has empty prompt")
	expected_points = row.get("expected_points")
	if not isinstance(expected_points, list) or not expected_points:
	errors.append(f"row {idx} expected_points must be a non-empty list")
	critical_checks = row.get("critical_checks")
	if not isinstance(critical_checks, list) or not critical_checks:
	errors.append(f"row {idx} critical_checks must be a non-empty list")
	task = str(row.get("task", "unknown"))
	task_counts[task] = task_counts.get(task, 0) + 1

	expected_count = int(manifest.get("sample_count", -1))
	if expected_count != len(rows):
	errors.append(f"manifest sample_count {expected_count} does not match actual {len(rows)}")

	report = {
	"ok": not errors,
	"errors": errors,
	"eval_suite_id": manifest.get("eval_suite_id"),
	"version": manifest.get("version"),
	"status": manifest.get("status"),
	"sample_count": len(rows),
	"task_counts": task_counts,
	"sha256": _sha256(suite_path) if suite_path.exists() else None,
	"manifest_path": str(manifest_path),
	"suite_path": str(suite_path),
	"created_at": utc_now(),
	}
	out = SHFT_WORKSPACE_ROOT / "registry" / "eval_suites" / f"{manifest.get('eval_suite_id', 'unknown')}_validation.json"
	write_json(out, report)
	return report

Xet Storage Details

Size:: 3.38 kB
Xet hash:: b8dda4b1a64128c419e9164097ba3d08d9664b5fb2be5b6dce90b4615b760847

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.