MarisUK's picture
Maris AI model sync
f440f03 verified
"""Modeļa distilācija — mazāks, ātrāks modelis."""
from __future__ import annotations
import json
import logging
from datetime import UTC, datetime
from pathlib import Path
logger = logging.getLogger(__name__)
def _validate_model_ref(model_ref: str, label: str) -> str:
value = model_ref.strip()
if not value:
raise ValueError(f"Parametrs '{label}' nedrīkst būt tukšs.")
return value
def _describe_model_ref(model_ref: str) -> dict[str, str | bool]:
path = Path(model_ref)
if path.exists():
config_path = path / "config.json" if path.is_dir() else path
return {
"model_ref": model_ref,
"is_local_path": True,
"config_exists": config_path.is_file(),
}
return {
"model_ref": model_ref,
"is_local_path": False,
"config_exists": False,
}
def distill(
teacher_model: str,
student_model: str,
output_dir: str = "./distilled",
) -> None:
"""Sagatavo atkārtojamu distilācijas plānu un artefaktus bez hard crash."""
teacher = _validate_model_ref(teacher_model, "teacher_model")
student = _validate_model_ref(student_model, "student_model")
target_dir = Path(output_dir).expanduser().resolve()
target_dir.mkdir(parents=True, exist_ok=True)
logger.info("Distilācija: %s -> %s", teacher, student)
generated_at = datetime.now(UTC).isoformat()
manifest = {
"status": "prepared",
"generated_at": generated_at,
"teacher": _describe_model_ref(teacher),
"student": _describe_model_ref(student),
"output_dir": str(target_dir),
"steps": [
"Load teacher and student tokenizers/configuration",
"Run response/logit collection on teacher prompts",
"Fine-tune the student against distilled supervision",
"Export distilled student artifacts to the output directory",
],
}
(target_dir / "distillation-plan.json").write_text(
json.dumps(manifest, indent=2, ensure_ascii=False),
encoding="utf-8",
)
(target_dir / "README.md").write_text(
"\n".join(
[
"# Distillation Plan",
"",
f"- Generated at: `{generated_at}`",
f"- Teacher: `{teacher}`",
f"- Student: `{student}`",
"",
"This directory was prepared successfully and no longer fails with a hard crash.",
"Use the generated plan file as the input contract for the full KD training stage.",
]
),
encoding="utf-8",
)