| """ |
| Quick verification for Component 5 training pipeline. |
| Runs a tiny 5-step training smoke test. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import sys |
| from pathlib import Path |
|
|
| |
| PROJECT_ROOT = Path(__file__).resolve().parents[1] |
| if str(PROJECT_ROOT) not in sys.path: |
| sys.path.insert(0, str(PROJECT_ROOT)) |
|
|
| from scripts.train_component5 import main as train_main |
|
|
|
|
| if __name__ == "__main__": |
| try: |
| |
| sys.argv = ["verify_component5_training_pipeline.py", "--config", "configs/component5_training_config.yaml"] |
|
|
| |
| import yaml |
|
|
| cfg_path = Path("configs/component5_training_config.yaml") |
| cfg = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) |
| cfg["training"]["max_steps"] = 5 |
| cfg["training"]["save_every"] = 5 |
| cfg["training"]["eval_every"] = 5 |
| cfg["training"]["log_every"] = 1 |
| cfg["resume"]["resume_from"] = "none" |
|
|
| tmp_cfg = Path("configs/component5_training_config.verify.yaml") |
| tmp_cfg.write_text(yaml.safe_dump(cfg, sort_keys=False), encoding="utf-8") |
|
|
| sys.argv = ["verify_component5_training_pipeline.py", "--config", str(tmp_cfg)] |
| train_main() |
| print("") |
| print("Component 5 verification passed.") |
| except Exception as exc: |
| print("Component 5 verification failed.") |
| print(f"What went wrong: {exc}") |
| print("Fix suggestion: ensure CUDA is available and tokenized dataset path is correct.") |
| raise SystemExit(1) |
|
|