Upload folder using huggingface_hub
Browse files- checkpoint-50/adapter_config.json +2 -2
- checkpoint-50/adapter_model.safetensors +1 -1
- checkpoint-50/optimizer.pt +1 -1
- checkpoint-50/rng_state_0.pth +2 -2
- checkpoint-50/rng_state_1.pth +2 -2
- checkpoint-50/rng_state_2.pth +1 -1
- checkpoint-50/rng_state_3.pth +1 -1
- checkpoint-50/trainer_state.json +34 -34
- checkpoint-50/training_args.bin +1 -1
checkpoint-50/adapter_config.json
CHANGED
|
@@ -19,9 +19,9 @@
|
|
| 19 |
"rank_pattern": {},
|
| 20 |
"revision": null,
|
| 21 |
"target_modules": [
|
| 22 |
-
"q_proj",
|
| 23 |
-
"k_proj",
|
| 24 |
"v_proj",
|
|
|
|
|
|
|
| 25 |
"o_proj"
|
| 26 |
],
|
| 27 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 19 |
"rank_pattern": {},
|
| 20 |
"revision": null,
|
| 21 |
"target_modules": [
|
|
|
|
|
|
|
| 22 |
"v_proj",
|
| 23 |
+
"k_proj",
|
| 24 |
+
"q_proj",
|
| 25 |
"o_proj"
|
| 26 |
],
|
| 27 |
"task_type": "CAUSAL_LM",
|
checkpoint-50/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 436242776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8124a081a7ccfa909b9e072bfa0377d8895934427a97f45a78b899294c019006
|
| 3 |
size 436242776
|
checkpoint-50/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 872568314
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ea58ef1f259cc64213e083069205c6a7c87f3bda280b0cedd8d82aa9ea9a001
|
| 3 |
size 872568314
|
checkpoint-50/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37f8cf6ff0104f126e962b837e6aadc34920baa4dba5c947d76a4d280d03d435
|
| 3 |
+
size 15024
|
checkpoint-50/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:522e793c65745183643c72542cf75d1f9178a0262c7be626f122c78da45ba43b
|
| 3 |
+
size 15024
|
checkpoint-50/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:715a6d2d00c49a8f827f26e430939815ec4b310cc1838bb170e0cd29a335df83
|
| 3 |
size 15024
|
checkpoint-50/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56b9eef68233418cf695ee351c740b2435854b7f56a1de55cda0a2cb934925b9
|
| 3 |
size 15024
|
checkpoint-50/trainer_state.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
"best_model_checkpoint": "./mistral/01-03-24-Weni-ZeroShot-3.3.18-Mistral-7b-Multilanguage-3.2.0_Zeroshot-2_max_steps-100_batch_16_2024-03-01_ppid_7/checkpoint-50",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 10,
|
| 6 |
"global_step": 50,
|
| 7 |
"is_hyper_param_search": false,
|
|
@@ -9,57 +9,57 @@
|
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
-
"epoch": 0.
|
| 13 |
-
"eval_loss": 0.
|
| 14 |
-
"eval_runtime":
|
| 15 |
-
"eval_samples_per_second":
|
| 16 |
-
"eval_steps_per_second": 0.
|
| 17 |
"step": 10
|
| 18 |
},
|
| 19 |
{
|
| 20 |
-
"epoch": 0.
|
| 21 |
-
"grad_norm": 0.
|
| 22 |
"learning_rate": 0.00019510565162951537,
|
| 23 |
-
"loss": 0.
|
| 24 |
"step": 20
|
| 25 |
},
|
| 26 |
{
|
| 27 |
-
"epoch": 0.
|
| 28 |
-
"eval_loss": 0.
|
| 29 |
-
"eval_runtime":
|
| 30 |
-
"eval_samples_per_second":
|
| 31 |
-
"eval_steps_per_second": 0.
|
| 32 |
"step": 20
|
| 33 |
},
|
| 34 |
{
|
| 35 |
-
"epoch": 0.
|
| 36 |
-
"eval_loss": 0.
|
| 37 |
-
"eval_runtime":
|
| 38 |
-
"eval_samples_per_second":
|
| 39 |
-
"eval_steps_per_second": 0.
|
| 40 |
"step": 30
|
| 41 |
},
|
| 42 |
{
|
| 43 |
-
"epoch": 0.
|
| 44 |
-
"grad_norm": 0.
|
| 45 |
"learning_rate": 0.0001529919264233205,
|
| 46 |
-
"loss": 0.
|
| 47 |
"step": 40
|
| 48 |
},
|
| 49 |
{
|
| 50 |
-
"epoch": 0.
|
| 51 |
-
"eval_loss": 0.
|
| 52 |
-
"eval_runtime":
|
| 53 |
-
"eval_samples_per_second":
|
| 54 |
-
"eval_steps_per_second": 0.
|
| 55 |
"step": 40
|
| 56 |
},
|
| 57 |
{
|
| 58 |
-
"epoch": 0.
|
| 59 |
-
"eval_loss": 0.
|
| 60 |
-
"eval_runtime":
|
| 61 |
-
"eval_samples_per_second":
|
| 62 |
-
"eval_steps_per_second": 0.
|
| 63 |
"step": 50
|
| 64 |
}
|
| 65 |
],
|
|
@@ -68,7 +68,7 @@
|
|
| 68 |
"num_input_tokens_seen": 0,
|
| 69 |
"num_train_epochs": 1,
|
| 70 |
"save_steps": 10,
|
| 71 |
-
"total_flos":
|
| 72 |
"train_batch_size": 16,
|
| 73 |
"trial_name": null,
|
| 74 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.40609535574913025,
|
| 3 |
"best_model_checkpoint": "./mistral/01-03-24-Weni-ZeroShot-3.3.18-Mistral-7b-Multilanguage-3.2.0_Zeroshot-2_max_steps-100_batch_16_2024-03-01_ppid_7/checkpoint-50",
|
| 4 |
+
"epoch": 0.49504950495049505,
|
| 5 |
"eval_steps": 10,
|
| 6 |
"global_step": 50,
|
| 7 |
"is_hyper_param_search": false,
|
|
|
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
+
"epoch": 0.1,
|
| 13 |
+
"eval_loss": 0.6292536854743958,
|
| 14 |
+
"eval_runtime": 93.2576,
|
| 15 |
+
"eval_samples_per_second": 30.743,
|
| 16 |
+
"eval_steps_per_second": 0.965,
|
| 17 |
"step": 10
|
| 18 |
},
|
| 19 |
{
|
| 20 |
+
"epoch": 0.2,
|
| 21 |
+
"grad_norm": 0.24918170273303986,
|
| 22 |
"learning_rate": 0.00019510565162951537,
|
| 23 |
+
"loss": 0.9604,
|
| 24 |
"step": 20
|
| 25 |
},
|
| 26 |
{
|
| 27 |
+
"epoch": 0.2,
|
| 28 |
+
"eval_loss": 0.46683359146118164,
|
| 29 |
+
"eval_runtime": 93.2154,
|
| 30 |
+
"eval_samples_per_second": 30.757,
|
| 31 |
+
"eval_steps_per_second": 0.966,
|
| 32 |
"step": 20
|
| 33 |
},
|
| 34 |
{
|
| 35 |
+
"epoch": 0.3,
|
| 36 |
+
"eval_loss": 0.4341636002063751,
|
| 37 |
+
"eval_runtime": 93.1765,
|
| 38 |
+
"eval_samples_per_second": 30.77,
|
| 39 |
+
"eval_steps_per_second": 0.966,
|
| 40 |
"step": 30
|
| 41 |
},
|
| 42 |
{
|
| 43 |
+
"epoch": 0.4,
|
| 44 |
+
"grad_norm": 0.16474689543247223,
|
| 45 |
"learning_rate": 0.0001529919264233205,
|
| 46 |
+
"loss": 0.4381,
|
| 47 |
"step": 40
|
| 48 |
},
|
| 49 |
{
|
| 50 |
+
"epoch": 0.4,
|
| 51 |
+
"eval_loss": 0.40781059861183167,
|
| 52 |
+
"eval_runtime": 93.1933,
|
| 53 |
+
"eval_samples_per_second": 30.764,
|
| 54 |
+
"eval_steps_per_second": 0.966,
|
| 55 |
"step": 40
|
| 56 |
},
|
| 57 |
{
|
| 58 |
+
"epoch": 0.5,
|
| 59 |
+
"eval_loss": 0.40609535574913025,
|
| 60 |
+
"eval_runtime": 93.1926,
|
| 61 |
+
"eval_samples_per_second": 30.764,
|
| 62 |
+
"eval_steps_per_second": 0.966,
|
| 63 |
"step": 50
|
| 64 |
}
|
| 65 |
],
|
|
|
|
| 68 |
"num_input_tokens_seen": 0,
|
| 69 |
"num_train_epochs": 1,
|
| 70 |
"save_steps": 10,
|
| 71 |
+
"total_flos": 4.5255742262450586e+17,
|
| 72 |
"train_batch_size": 16,
|
| 73 |
"trial_name": null,
|
| 74 |
"trial_params": null
|
checkpoint-50/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5176
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:daf4ca86cd3cb3688a488e999435cb5aec8a4b0702902c7de81df751b2180e05
|
| 3 |
size 5176
|