Upload folder using huggingface_hub
Browse files- checkpoint-200/adapter_config.json +3 -3
- checkpoint-200/adapter_model.safetensors +3 -0
- checkpoint-200/optimizer.pt +3 -0
- checkpoint-200/scheduler.pt +1 -1
- checkpoint-200/trainer_state.json +12 -12
- checkpoint-200/training_args.bin +1 -1
- checkpoint-210/adapter_config.json +3 -3
- checkpoint-210/adapter_model.safetensors +3 -0
- checkpoint-210/optimizer.pt +1 -1
- checkpoint-210/scheduler.pt +1 -1
- checkpoint-210/trainer_state.json +23 -23
- checkpoint-210/training_args.bin +1 -1
checkpoint-200/adapter_config.json
CHANGED
|
@@ -33,12 +33,12 @@
|
|
| 33 |
"rank_pattern": {},
|
| 34 |
"revision": null,
|
| 35 |
"target_modules": [
|
| 36 |
-
"q_proj",
|
| 37 |
"gate_proj",
|
| 38 |
-
"
|
|
|
|
| 39 |
"v_proj",
|
| 40 |
"o_proj",
|
| 41 |
-
"
|
| 42 |
"up_proj"
|
| 43 |
],
|
| 44 |
"target_parameters": null,
|
|
|
|
| 33 |
"rank_pattern": {},
|
| 34 |
"revision": null,
|
| 35 |
"target_modules": [
|
|
|
|
| 36 |
"gate_proj",
|
| 37 |
+
"q_proj",
|
| 38 |
+
"k_proj",
|
| 39 |
"v_proj",
|
| 40 |
"o_proj",
|
| 41 |
+
"down_proj",
|
| 42 |
"up_proj"
|
| 43 |
],
|
| 44 |
"target_parameters": null,
|
checkpoint-200/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa4d55a4104e38eebca405dd4486abc01e5dc526138f5d6fe9f6019e710a88bb
|
| 3 |
+
size 528550256
|
checkpoint-200/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56ab6b742fdcf189ee3a2b88d7419b1f60c77156d8bb2601b4d013dd2ef41b1e
|
| 3 |
+
size 1057397963
|
checkpoint-200/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4e31671a48f9cd58f7aa0506207aac3399505fb2f97613873fa5f366e0fd1e8
|
| 3 |
size 1465
|
checkpoint-200/trainer_state.json
CHANGED
|
@@ -448,25 +448,25 @@
|
|
| 448 |
},
|
| 449 |
{
|
| 450 |
"epoch": 1.7576099210822997,
|
| 451 |
-
"eval_loss": 1.
|
| 452 |
-
"eval_runtime":
|
| 453 |
-
"eval_samples_per_second":
|
| 454 |
-
"eval_steps_per_second":
|
| 455 |
"step": 195
|
| 456 |
},
|
| 457 |
{
|
| 458 |
"epoch": 1.8027057497181511,
|
| 459 |
-
"grad_norm": 1.
|
| 460 |
-
"learning_rate": 3.
|
| 461 |
-
"loss": 1.
|
| 462 |
"step": 200
|
| 463 |
},
|
| 464 |
{
|
| 465 |
"epoch": 1.8027057497181511,
|
| 466 |
-
"eval_loss": 1.
|
| 467 |
-
"eval_runtime":
|
| 468 |
-
"eval_samples_per_second": 4.
|
| 469 |
-
"eval_steps_per_second": 2.
|
| 470 |
"step": 200
|
| 471 |
}
|
| 472 |
],
|
|
@@ -487,7 +487,7 @@
|
|
| 487 |
"attributes": {}
|
| 488 |
}
|
| 489 |
},
|
| 490 |
-
"total_flos": 6.
|
| 491 |
"train_batch_size": 2,
|
| 492 |
"trial_name": null,
|
| 493 |
"trial_params": null
|
|
|
|
| 448 |
},
|
| 449 |
{
|
| 450 |
"epoch": 1.7576099210822997,
|
| 451 |
+
"eval_loss": 1.4964991807937622,
|
| 452 |
+
"eval_runtime": 48.3648,
|
| 453 |
+
"eval_samples_per_second": 3.866,
|
| 454 |
+
"eval_steps_per_second": 1.944,
|
| 455 |
"step": 195
|
| 456 |
},
|
| 457 |
{
|
| 458 |
"epoch": 1.8027057497181511,
|
| 459 |
+
"grad_norm": 1.575948772369884e-05,
|
| 460 |
+
"learning_rate": 3.91304347826087e-06,
|
| 461 |
+
"loss": 1.4529,
|
| 462 |
"step": 200
|
| 463 |
},
|
| 464 |
{
|
| 465 |
"epoch": 1.8027057497181511,
|
| 466 |
+
"eval_loss": 1.486992359161377,
|
| 467 |
+
"eval_runtime": 46.1615,
|
| 468 |
+
"eval_samples_per_second": 4.051,
|
| 469 |
+
"eval_steps_per_second": 2.036,
|
| 470 |
"step": 200
|
| 471 |
}
|
| 472 |
],
|
|
|
|
| 487 |
"attributes": {}
|
| 488 |
}
|
| 489 |
},
|
| 490 |
+
"total_flos": 6.213149044511539e+16,
|
| 491 |
"train_batch_size": 2,
|
| 492 |
"trial_name": null,
|
| 493 |
"trial_params": null
|
checkpoint-200/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5841
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1c6ee9e73e6fc72e15749b15bb0baa391d3380a7113028297856ba7cbc98304
|
| 3 |
size 5841
|
checkpoint-210/adapter_config.json
CHANGED
|
@@ -33,12 +33,12 @@
|
|
| 33 |
"rank_pattern": {},
|
| 34 |
"revision": null,
|
| 35 |
"target_modules": [
|
| 36 |
-
"q_proj",
|
| 37 |
"gate_proj",
|
| 38 |
-
"
|
|
|
|
| 39 |
"v_proj",
|
| 40 |
"o_proj",
|
| 41 |
-
"
|
| 42 |
"up_proj"
|
| 43 |
],
|
| 44 |
"target_parameters": null,
|
|
|
|
| 33 |
"rank_pattern": {},
|
| 34 |
"revision": null,
|
| 35 |
"target_modules": [
|
|
|
|
| 36 |
"gate_proj",
|
| 37 |
+
"q_proj",
|
| 38 |
+
"k_proj",
|
| 39 |
"v_proj",
|
| 40 |
"o_proj",
|
| 41 |
+
"down_proj",
|
| 42 |
"up_proj"
|
| 43 |
],
|
| 44 |
"target_parameters": null,
|
checkpoint-210/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ecacff68af455fbb529438b0827f96309f71400443efdd0a15e0d5a3d49a787
|
| 3 |
+
size 528550256
|
checkpoint-210/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1057397963
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a645364045044066373fd0bdbba55cb9e0190b2c1084789d7aedefe6e38f7cf
|
| 3 |
size 1057397963
|
checkpoint-210/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0db5f604b9634909dd24be4ffc74e09ba25916453088c45515338d684ec436f3
|
| 3 |
size 1465
|
checkpoint-210/trainer_state.json
CHANGED
|
@@ -448,48 +448,48 @@
|
|
| 448 |
},
|
| 449 |
{
|
| 450 |
"epoch": 1.7576099210822997,
|
| 451 |
-
"eval_loss": 1.
|
| 452 |
-
"eval_runtime":
|
| 453 |
-
"eval_samples_per_second":
|
| 454 |
-
"eval_steps_per_second":
|
| 455 |
"step": 195
|
| 456 |
},
|
| 457 |
{
|
| 458 |
"epoch": 1.8027057497181511,
|
| 459 |
-
"grad_norm": 1.
|
| 460 |
-
"learning_rate": 3.
|
| 461 |
-
"loss": 1.
|
| 462 |
"step": 200
|
| 463 |
},
|
| 464 |
{
|
| 465 |
"epoch": 1.8027057497181511,
|
| 466 |
-
"eval_loss": 1.
|
| 467 |
-
"eval_runtime":
|
| 468 |
-
"eval_samples_per_second": 4.
|
| 469 |
-
"eval_steps_per_second": 2.
|
| 470 |
"step": 200
|
| 471 |
},
|
| 472 |
{
|
| 473 |
"epoch": 1.8478015783540023,
|
| 474 |
-
"eval_loss": 1.
|
| 475 |
-
"eval_runtime":
|
| 476 |
-
"eval_samples_per_second": 4.
|
| 477 |
-
"eval_steps_per_second": 2.
|
| 478 |
"step": 205
|
| 479 |
},
|
| 480 |
{
|
| 481 |
"epoch": 1.8928974069898534,
|
| 482 |
-
"grad_norm": 1.
|
| 483 |
-
"learning_rate":
|
| 484 |
-
"loss": 1.
|
| 485 |
"step": 210
|
| 486 |
},
|
| 487 |
{
|
| 488 |
"epoch": 1.8928974069898534,
|
| 489 |
-
"eval_loss": 1.
|
| 490 |
-
"eval_runtime":
|
| 491 |
-
"eval_samples_per_second": 4.
|
| 492 |
-
"eval_steps_per_second": 2.
|
| 493 |
"step": 210
|
| 494 |
}
|
| 495 |
],
|
|
@@ -510,7 +510,7 @@
|
|
| 510 |
"attributes": {}
|
| 511 |
}
|
| 512 |
},
|
| 513 |
-
"total_flos": 6.
|
| 514 |
"train_batch_size": 2,
|
| 515 |
"trial_name": null,
|
| 516 |
"trial_params": null
|
|
|
|
| 448 |
},
|
| 449 |
{
|
| 450 |
"epoch": 1.7576099210822997,
|
| 451 |
+
"eval_loss": 1.4964991807937622,
|
| 452 |
+
"eval_runtime": 48.3648,
|
| 453 |
+
"eval_samples_per_second": 3.866,
|
| 454 |
+
"eval_steps_per_second": 1.944,
|
| 455 |
"step": 195
|
| 456 |
},
|
| 457 |
{
|
| 458 |
"epoch": 1.8027057497181511,
|
| 459 |
+
"grad_norm": 1.575948772369884e-05,
|
| 460 |
+
"learning_rate": 3.91304347826087e-06,
|
| 461 |
+
"loss": 1.4529,
|
| 462 |
"step": 200
|
| 463 |
},
|
| 464 |
{
|
| 465 |
"epoch": 1.8027057497181511,
|
| 466 |
+
"eval_loss": 1.486992359161377,
|
| 467 |
+
"eval_runtime": 46.1615,
|
| 468 |
+
"eval_samples_per_second": 4.051,
|
| 469 |
+
"eval_steps_per_second": 2.036,
|
| 470 |
"step": 200
|
| 471 |
},
|
| 472 |
{
|
| 473 |
"epoch": 1.8478015783540023,
|
| 474 |
+
"eval_loss": 1.470670461654663,
|
| 475 |
+
"eval_runtime": 45.9542,
|
| 476 |
+
"eval_samples_per_second": 4.069,
|
| 477 |
+
"eval_steps_per_second": 2.046,
|
| 478 |
"step": 205
|
| 479 |
},
|
| 480 |
{
|
| 481 |
"epoch": 1.8928974069898534,
|
| 482 |
+
"grad_norm": 1.8683402231545188e-05,
|
| 483 |
+
"learning_rate": 8.260869565217392e-06,
|
| 484 |
+
"loss": 1.4523,
|
| 485 |
"step": 210
|
| 486 |
},
|
| 487 |
{
|
| 488 |
"epoch": 1.8928974069898534,
|
| 489 |
+
"eval_loss": 1.4473758935928345,
|
| 490 |
+
"eval_runtime": 46.0635,
|
| 491 |
+
"eval_samples_per_second": 4.06,
|
| 492 |
+
"eval_steps_per_second": 2.041,
|
| 493 |
"step": 210
|
| 494 |
}
|
| 495 |
],
|
|
|
|
| 510 |
"attributes": {}
|
| 511 |
}
|
| 512 |
},
|
| 513 |
+
"total_flos": 6.519918556481126e+16,
|
| 514 |
"train_batch_size": 2,
|
| 515 |
"trial_name": null,
|
| 516 |
"trial_params": null
|
checkpoint-210/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5841
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1c6ee9e73e6fc72e15749b15bb0baa391d3380a7113028297856ba7cbc98304
|
| 3 |
size 5841
|