penfever commited on
Commit
8f869af
·
verified ·
1 Parent(s): 2b87c9c

Model save

Browse files
README.md CHANGED
@@ -1,7 +1,5 @@
1
  ---
2
  library_name: transformers
3
- license: apache-2.0
4
- base_model: Qwen/Qwen3-8B
5
  tags:
6
  - llama-factory
7
  - full
@@ -16,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # swesmith-stack-undr7030
18
 
19
- This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the penfever/GLM-4.6-swesmith-32ep-131k-nosumm-reasoning and the penfever/GLM-4.6-stackexchange-overflow-sandboxes-32eps-65k-reasoning datasets.
20
 
21
  ## Model description
22
 
 
1
  ---
2
  library_name: transformers
 
 
3
  tags:
4
  - llama-factory
5
  - full
 
14
 
15
  # swesmith-stack-undr7030
16
 
17
+ This model was trained from scratch on the None dataset.
18
 
19
  ## Model description
20
 
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e424fcf440a650a92cea34b016894414bf752a451f1ee64854ef5724fd0b9e71
3
  size 4902257696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:141a8129811f0ed309766d5e65b1f8b31fcfbc752bbbe53c75cd0d0e9a6c6c23
3
  size 4902257696
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da0f296913a79575663d3432de4fdd6ea67a2f23da01d019c6610062726d3c4d
3
  size 4915960368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:453bb5d4e8cb2d04e81dcc76ff89699e224cf2949c8b5bc47367461ad2b3942d
3
  size 4915960368
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0f47b3938e80e625e103fc4883c01f2ebe11969370117954047fa4b0af689b3
3
  size 4983068496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dff3979f49e3379a94cba00e5aca4c0b528f038fab6b77acb9b48d75c51ec83a
3
  size 4983068496
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4af79f14d450d1aa19b46fd3c120e8a8bd06a81ea34735566475fec19a25a73b
3
  size 1580230264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cdfd6cfbff0fd9ce41320103fda16d035ccf784a5c88c80471a3408a3131bfd
3
  size 1580230264
trainer_log.jsonl CHANGED
@@ -742,3 +742,24 @@
742
  {"current_steps": 3695, "total_steps": 3703, "loss": 0.1561, "lr": 7.200657808792422e-10, "epoch": 6.9877010406811735, "percentage": 99.78, "elapsed_time": "12:50:24", "remaining_time": "0:01:40"}
743
  {"current_steps": 3700, "total_steps": 3703, "loss": 0.1492, "lr": 1.4223590088180416e-10, "epoch": 6.997161778618732, "percentage": 99.92, "elapsed_time": "12:53:27", "remaining_time": "0:00:37"}
744
  {"current_steps": 3702, "total_steps": 3703, "epoch": 7.0, "percentage": 99.97, "elapsed_time": "12:54:17", "remaining_time": "0:00:12"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
742
  {"current_steps": 3695, "total_steps": 3703, "loss": 0.1561, "lr": 7.200657808792422e-10, "epoch": 6.9877010406811735, "percentage": 99.78, "elapsed_time": "12:50:24", "remaining_time": "0:01:40"}
743
  {"current_steps": 3700, "total_steps": 3703, "loss": 0.1492, "lr": 1.4223590088180416e-10, "epoch": 6.997161778618732, "percentage": 99.92, "elapsed_time": "12:53:27", "remaining_time": "0:00:37"}
744
  {"current_steps": 3702, "total_steps": 3703, "epoch": 7.0, "percentage": 99.97, "elapsed_time": "12:54:17", "remaining_time": "0:00:12"}
745
+ {"current_steps": 3605, "total_steps": 3703, "loss": 0.1448, "lr": 8.706523924000066e-08, "epoch": 6.815515610217597, "percentage": 97.35, "elapsed_time": "0:03:09", "remaining_time": "0:00:05"}
746
+ {"current_steps": 3610, "total_steps": 3703, "loss": 0.1496, "lr": 7.849846547778983e-08, "epoch": 6.824976348155156, "percentage": 97.49, "elapsed_time": "0:06:11", "remaining_time": "0:00:09"}
747
+ {"current_steps": 3615, "total_steps": 3703, "loss": 0.1498, "lr": 7.03744340325252e-08, "epoch": 6.8344370860927155, "percentage": 97.62, "elapsed_time": "0:09:09", "remaining_time": "0:00:13"}
748
+ {"current_steps": 3620, "total_steps": 3703, "loss": 0.1473, "lr": 6.269332545548068e-08, "epoch": 6.843897824030274, "percentage": 97.76, "elapsed_time": "0:12:14", "remaining_time": "0:00:16"}
749
+ {"current_steps": 3625, "total_steps": 3703, "loss": 0.1514, "lr": 5.5455310454259894e-08, "epoch": 6.853358561967833, "percentage": 97.89, "elapsed_time": "0:15:14", "remaining_time": "0:00:19"}
750
+ {"current_steps": 3630, "total_steps": 3703, "loss": 0.1464, "lr": 4.866054988900581e-08, "epoch": 6.862819299905393, "percentage": 98.03, "elapsed_time": "0:18:16", "remaining_time": "0:00:22"}
751
+ {"current_steps": 3635, "total_steps": 3703, "loss": 0.1487, "lr": 4.230919476881479e-08, "epoch": 6.872280037842952, "percentage": 98.16, "elapsed_time": "0:21:15", "remaining_time": "0:00:23"}
752
+ {"current_steps": 3640, "total_steps": 3703, "loss": 0.1429, "lr": 3.640138624839695e-08, "epoch": 6.881740775780511, "percentage": 98.3, "elapsed_time": "0:24:11", "remaining_time": "0:00:25"}
753
+ {"current_steps": 3645, "total_steps": 3703, "loss": 0.1445, "lr": 3.093725562492544e-08, "epoch": 6.8912015137180695, "percentage": 98.43, "elapsed_time": "0:27:05", "remaining_time": "0:00:25"}
754
+ {"current_steps": 3650, "total_steps": 3703, "loss": 0.148, "lr": 2.591692433511872e-08, "epoch": 6.900662251655629, "percentage": 98.57, "elapsed_time": "0:29:59", "remaining_time": "0:00:26"}
755
+ {"current_steps": 3655, "total_steps": 3703, "loss": 0.1454, "lr": 2.1340503952551606e-08, "epoch": 6.910122989593188, "percentage": 98.7, "elapsed_time": "0:32:48", "remaining_time": "0:00:25"}
756
+ {"current_steps": 3660, "total_steps": 3703, "loss": 0.1392, "lr": 1.720809618516839e-08, "epoch": 6.919583727530747, "percentage": 98.84, "elapsed_time": "0:35:53", "remaining_time": "0:00:25"}
757
+ {"current_steps": 3665, "total_steps": 3703, "loss": 0.1519, "lr": 1.351979287302463e-08, "epoch": 6.929044465468307, "percentage": 98.97, "elapsed_time": "0:38:49", "remaining_time": "0:00:24"}
758
+ {"current_steps": 3670, "total_steps": 3703, "loss": 0.1502, "lr": 1.0275675986242128e-08, "epoch": 6.938505203405866, "percentage": 99.11, "elapsed_time": "0:41:45", "remaining_time": "0:00:22"}
759
+ {"current_steps": 3675, "total_steps": 3703, "loss": 0.1505, "lr": 7.475817623194826e-09, "epoch": 6.9479659413434245, "percentage": 99.24, "elapsed_time": "0:44:34", "remaining_time": "0:00:20"}
760
+ {"current_steps": 3680, "total_steps": 3703, "loss": 0.1467, "lr": 5.120280008901191e-09, "epoch": 6.957426679280984, "percentage": 99.38, "elapsed_time": "0:47:26", "remaining_time": "0:00:17"}
761
+ {"current_steps": 3685, "total_steps": 3703, "loss": 0.1559, "lr": 3.2091154936386705e-09, "epoch": 6.966887417218543, "percentage": 99.51, "elapsed_time": "0:50:27", "remaining_time": "0:00:14"}
762
+ {"current_steps": 3690, "total_steps": 3703, "loss": 0.1487, "lr": 1.7423665517868338e-09, "epoch": 6.976348155156102, "percentage": 99.65, "elapsed_time": "0:53:23", "remaining_time": "0:00:11"}
763
+ {"current_steps": 3695, "total_steps": 3703, "loss": 0.1516, "lr": 7.200657808792422e-10, "epoch": 6.985808893093662, "percentage": 99.78, "elapsed_time": "0:56:19", "remaining_time": "0:00:07"}
764
+ {"current_steps": 3700, "total_steps": 3703, "loss": 0.1429, "lr": 1.4223590088180416e-10, "epoch": 6.995269631031221, "percentage": 99.92, "elapsed_time": "0:59:16", "remaining_time": "0:00:02"}
765
+ {"current_steps": 3702, "total_steps": 3703, "epoch": 6.999053926206244, "percentage": 99.97, "elapsed_time": "1:00:42", "remaining_time": "0:00:00"}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6193931cf16e75d3fe954f4e5c4e4a841a67cdad5f57f6a8bf44c9ebfede748
3
  size 8657
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67a7ae46b29cadb9f5b193cd283f0ee1c7dd373bb344784a7b2a44b03782efbd
3
  size 8657