| { | |
| "model_name": "HuggingFaceTB/SmolLM2-1.7B-Instruct", | |
| "method": "full_finetune_bf16", | |
| "epochs": 3, | |
| "batch_size": 64, | |
| "grad_accum": 1, | |
| "lr": 2e-05, | |
| "max_seq_len": 512, | |
| "seed": 42, | |
| "merge_val": false, | |
| "trainable_params": 1711376384, | |
| "train_size": 32368, | |
| "val_size": 3142, | |
| "test_size": 8273, | |
| "eval_samples": 200, | |
| "train_metrics": { | |
| "train_runtime": 66555.6435, | |
| "train_samples_per_second": 1.459, | |
| "train_steps_per_second": 0.023, | |
| "total_flos": 4.462399430364365e+17, | |
| "train_loss": 1.2469815944023284, | |
| "epoch": 3.0 | |
| }, | |
| "eval_results": { | |
| "overall": { | |
| "total": 200, | |
| "json_valid_rate": 0.935, | |
| "schema_valid_rate": 0.935 | |
| }, | |
| "per_type": { | |
| "semantic": { | |
| "total": 141, | |
| "json_valid_rate": 0.9078, | |
| "schema_valid_rate": 0.9078, | |
| "mean_rouge_l": 0.3824 | |
| }, | |
| "episodic": { | |
| "total": 55, | |
| "json_valid_rate": 1.0, | |
| "schema_valid_rate": 1.0, | |
| "mean_rouge_l": 0.5173 | |
| }, | |
| "procedural": { | |
| "total": 4, | |
| "json_valid_rate": 1.0, | |
| "schema_valid_rate": 1.0, | |
| "mean_rouge_l": 0.3471 | |
| } | |
| } | |
| } | |
| } |