{ "model_name": "Is Even", "model_id": "is-even", "author": "SnurfyAI", "device": "cpu", "dataset_path": "data\\is_even_chatml.jsonl", "dataset_generated": false, "dataset_examples": 9960, "train_examples": 7470, "validation_examples": 2490, "epochs": 60, "batch_size": 64, "learning_rate": 0.001, "weight_decay": 0.01, "layers": 4, "heads": 4, "embedding_size": 64, "seed": 1337, "best_validation_accuracy": 1.0, "final_holdout_accuracy": 1.0, "history": [ { "epoch": 1, "loss": 0.697788, "validation_accuracy": 0.5 }, { "epoch": 2, "loss": 0.694164, "validation_accuracy": 0.513655 }, { "epoch": 3, "loss": 0.686167, "validation_accuracy": 0.511245 }, { "epoch": 4, "loss": 0.680494, "validation_accuracy": 0.530924 }, { "epoch": 5, "loss": 0.689283, "validation_accuracy": 0.527711 }, { "epoch": 6, "loss": 0.681289, "validation_accuracy": 0.573896 }, { "epoch": 7, "loss": 0.670433, "validation_accuracy": 0.575904 }, { "epoch": 8, "loss": 0.661171, "validation_accuracy": 0.565462 }, { "epoch": 9, "loss": 0.656629, "validation_accuracy": 0.56988 }, { "epoch": 10, "loss": 0.643007, "validation_accuracy": 0.603614 }, { "epoch": 11, "loss": 0.632225, "validation_accuracy": 0.609639 }, { "epoch": 12, "loss": 0.615953, "validation_accuracy": 0.610442 }, { "epoch": 13, "loss": 0.562408, "validation_accuracy": 0.674699 }, { "epoch": 14, "loss": 0.478761, "validation_accuracy": 0.740161 }, { "epoch": 15, "loss": 0.34804, "validation_accuracy": 0.811647 }, { "epoch": 16, "loss": 0.250844, "validation_accuracy": 0.859839 }, { "epoch": 17, "loss": 0.169116, "validation_accuracy": 0.959438 }, { "epoch": 18, "loss": 0.104428, "validation_accuracy": 0.938956 }, { "epoch": 19, "loss": 0.092213, "validation_accuracy": 0.950201 }, { "epoch": 20, "loss": 0.070184, "validation_accuracy": 0.979518 }, { "epoch": 21, "loss": 0.031573, "validation_accuracy": 0.98996 }, { "epoch": 22, "loss": 0.022625, "validation_accuracy": 0.985542 }, { "epoch": 23, "loss": 0.016124, "validation_accuracy": 0.989558 }, { "epoch": 24, "loss": 0.015279, "validation_accuracy": 0.974297 }, { "epoch": 25, "loss": 0.014489, "validation_accuracy": 0.996787 }, { "epoch": 26, "loss": 0.012525, "validation_accuracy": 0.997189 }, { "epoch": 27, "loss": 0.005206, "validation_accuracy": 0.999197 }, { "epoch": 28, "loss": 0.000203, "validation_accuracy": 0.998394 }, { "epoch": 29, "loss": 0.003485, "validation_accuracy": 0.999598 }, { "epoch": 30, "loss": 0.000164, "validation_accuracy": 1.0 }, { "epoch": 31, "loss": 0.000112, "validation_accuracy": 1.0 }, { "epoch": 32, "loss": 9.7e-05, "validation_accuracy": 1.0 }, { "epoch": 33, "loss": 8.6e-05, "validation_accuracy": 1.0 }, { "epoch": 34, "loss": 7.8e-05, "validation_accuracy": 1.0 }, { "epoch": 35, "loss": 7.1e-05, "validation_accuracy": 1.0 }, { "epoch": 36, "loss": 6.5e-05, "validation_accuracy": 1.0 }, { "epoch": 37, "loss": 6e-05, "validation_accuracy": 1.0 }, { "epoch": 38, "loss": 5.5e-05, "validation_accuracy": 1.0 }, { "epoch": 39, "loss": 5.1e-05, "validation_accuracy": 1.0 }, { "epoch": 40, "loss": 4.8e-05, "validation_accuracy": 1.0 }, { "epoch": 41, "loss": 4.5e-05, "validation_accuracy": 1.0 }, { "epoch": 42, "loss": 4.2e-05, "validation_accuracy": 1.0 }, { "epoch": 43, "loss": 4e-05, "validation_accuracy": 1.0 }, { "epoch": 44, "loss": 3.8e-05, "validation_accuracy": 1.0 }, { "epoch": 45, "loss": 3.6e-05, "validation_accuracy": 1.0 }, { "epoch": 46, "loss": 3.4e-05, "validation_accuracy": 1.0 }, { "epoch": 47, "loss": 3.3e-05, "validation_accuracy": 1.0 }, { "epoch": 48, "loss": 3.1e-05, "validation_accuracy": 1.0 }, { "epoch": 49, "loss": 3e-05, "validation_accuracy": 1.0 }, { "epoch": 50, "loss": 2.9e-05, "validation_accuracy": 1.0 }, { "epoch": 51, "loss": 2.8e-05, "validation_accuracy": 1.0 }, { "epoch": 52, "loss": 2.7e-05, "validation_accuracy": 1.0 }, { "epoch": 53, "loss": 2.6e-05, "validation_accuracy": 1.0 }, { "epoch": 54, "loss": 2.6e-05, "validation_accuracy": 1.0 }, { "epoch": 55, "loss": 2.5e-05, "validation_accuracy": 1.0 }, { "epoch": 56, "loss": 2.4e-05, "validation_accuracy": 1.0 }, { "epoch": 57, "loss": 2.4e-05, "validation_accuracy": 1.0 }, { "epoch": 58, "loss": 2.4e-05, "validation_accuracy": 1.0 }, { "epoch": 59, "loss": 2.3e-05, "validation_accuracy": 1.0 }, { "epoch": 60, "loss": 2.3e-05, "validation_accuracy": 1.0 } ] }