QomSSLab
/

Legal-gemma3-12b-it

Safetensors

gemma3

Model card Files Files and versions

xet

Community

QomSSLab commited on Jul 1, 2025

Commit

00d0a98

verified ·

1 Parent(s): 1ccb709

Upload model and tokenizer

Browse files

Files changed (1) hide show

checkpoint-4937/trainer_state.json +167 -0

checkpoint-4937/trainer_state.json ADDED Viewed

	@@ -0,0 +1,167 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9998987341772152,
+  "eval_steps": 500,
+  "global_step": 4937,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05063291139240506,
+      "grad_norm": 43.75,
+      "learning_rate": 9.960000000000001e-06,
+      "loss": 1.6749,
+      "step": 250
+    },
+    {
+      "epoch": 0.10126582278481013,
+      "grad_norm": 34.0,
+      "learning_rate": 9.930523139001199e-06,
+      "loss": 2.2096,
+      "step": 500
+    },
+    {
+      "epoch": 0.1518987341772152,
+      "grad_norm": 52.5,
+      "learning_rate": 9.722924278929705e-06,
+      "loss": 2.0945,
+      "step": 750
+    },
+    {
+      "epoch": 0.20253164556962025,
+      "grad_norm": 44.5,
+      "learning_rate": 9.383017947787972e-06,
+      "loss": 2.2143,
+      "step": 1000
+    },
+    {
+      "epoch": 0.25316455696202533,
+      "grad_norm": 44.25,
+      "learning_rate": 8.920326243415839e-06,
+      "loss": 2.244,
+      "step": 1250
+    },
+    {
+      "epoch": 0.3037974683544304,
+      "grad_norm": 60.0,
+      "learning_rate": 8.347810959328346e-06,
+      "loss": 2.1818,
+      "step": 1500
+    },
+    {
+      "epoch": 0.35443037974683544,
+      "grad_norm": 39.0,
+      "learning_rate": 7.68151047448588e-06,
+      "loss": 2.0913,
+      "step": 1750
+    },
+    {
+      "epoch": 0.4050632911392405,
+      "grad_norm": 33.0,
+      "learning_rate": 6.940090455944688e-06,
+      "loss": 2.0647,
+      "step": 2000
+    },
+    {
+      "epoch": 0.45569620253164556,
+      "grad_norm": 47.75,
+      "learning_rate": 6.144320960953343e-06,
+      "loss": 1.9985,
+      "step": 2250
+    },
+    {
+      "epoch": 0.5063291139240507,
+      "grad_norm": 32.25,
+      "learning_rate": 5.316494586898351e-06,
+      "loss": 2.037,
+      "step": 2500
+    },
+    {
+      "epoch": 0.5569620253164557,
+      "grad_norm": 30.5,
+      "learning_rate": 4.479801968981108e-06,
+      "loss": 1.917,
+      "step": 2750
+    },
+    {
+      "epoch": 0.6075949367088608,
+      "grad_norm": 23.0,
+      "learning_rate": 3.6576821203642086e-06,
+      "loss": 1.8975,
+      "step": 3000
+    },
+    {
+      "epoch": 0.6582278481012658,
+      "grad_norm": 51.5,
+      "learning_rate": 2.8731658142854126e-06,
+      "loss": 1.8655,
+      "step": 3250
+    },
+    {
+      "epoch": 0.7088607594936709,
+      "grad_norm": 24.5,
+      "learning_rate": 2.1482304025590883e-06,
+      "loss": 1.9389,
+      "step": 3500
+    },
+    {
+      "epoch": 0.759493670886076,
+      "grad_norm": 27.625,
+      "learning_rate": 1.5031841445072532e-06,
+      "loss": 1.8563,
+      "step": 3750
+    },
+    {
+      "epoch": 0.810126582278481,
+      "grad_norm": 28.75,
+      "learning_rate": 9.56097293660526e-07,
+      "loss": 1.8943,
+      "step": 4000
+    },
+    {
+      "epoch": 0.8607594936708861,
+      "grad_norm": 34.5,
+      "learning_rate": 5.222958797023036e-07,
+      "loss": 1.8757,
+      "step": 4250
+    },
+    {
+      "epoch": 0.9113924050632911,
+      "grad_norm": 60.25,
+      "learning_rate": 2.1393236679203845e-07,
+      "loss": 1.917,
+      "step": 4500
+    },
+    {
+      "epoch": 0.9620253164556962,
+      "grad_norm": 21.0,
+      "learning_rate": 3.964521579718605e-08,
+      "loss": 1.8739,
+      "step": 4750
+    }
+  ],
+  "logging_steps": 250,
+  "max_steps": 4937,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 250,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.26174299954206e+17,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}