rbelanec commited on
Commit
dafa695
·
verified ·
1 Parent(s): 2c775b6

Training in progress, step 270

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +18 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c410bba6d892fb1bb33cca0d4dfc1b3464ab400d1b745e636ad4c669100e182
3
  size 58745928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7341b76583f7277ca45baf9d3b9754b7d09b72faa61d5dee1e4a9fa24aa2f96
3
  size 58745928
trainer_log.jsonl CHANGED
@@ -45,3 +45,21 @@
45
  {"current_steps": 215, "total_steps": 1800, "loss": 0.0034, "lr": 4.994567744093703e-05, "epoch": 2.388888888888889, "percentage": 11.94, "elapsed_time": "0:00:48", "remaining_time": "0:05:59", "throughput": 1386.65, "total_tokens": 67552}
46
  {"current_steps": 220, "total_steps": 1800, "loss": 0.0821, "lr": 4.992853359792444e-05, "epoch": 2.4444444444444446, "percentage": 12.22, "elapsed_time": "0:00:49", "remaining_time": "0:05:57", "throughput": 1390.46, "total_tokens": 69120}
47
  {"current_steps": 225, "total_steps": 1800, "loss": 0.1169, "lr": 4.9909046049328846e-05, "epoch": 2.5, "percentage": 12.5, "elapsed_time": "0:00:50", "remaining_time": "0:05:54", "throughput": 1394.4, "total_tokens": 70688}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  {"current_steps": 215, "total_steps": 1800, "loss": 0.0034, "lr": 4.994567744093703e-05, "epoch": 2.388888888888889, "percentage": 11.94, "elapsed_time": "0:00:48", "remaining_time": "0:05:59", "throughput": 1386.65, "total_tokens": 67552}
46
  {"current_steps": 220, "total_steps": 1800, "loss": 0.0821, "lr": 4.992853359792444e-05, "epoch": 2.4444444444444446, "percentage": 12.22, "elapsed_time": "0:00:49", "remaining_time": "0:05:57", "throughput": 1390.46, "total_tokens": 69120}
47
  {"current_steps": 225, "total_steps": 1800, "loss": 0.1169, "lr": 4.9909046049328846e-05, "epoch": 2.5, "percentage": 12.5, "elapsed_time": "0:00:50", "remaining_time": "0:05:54", "throughput": 1394.4, "total_tokens": 70688}
48
+ {"current_steps": 230, "total_steps": 1800, "loss": 0.0178, "lr": 4.988721662731083e-05, "epoch": 2.5555555555555554, "percentage": 12.78, "elapsed_time": "0:00:51", "remaining_time": "0:05:52", "throughput": 1398.34, "total_tokens": 72288}
49
+ {"current_steps": 235, "total_steps": 1800, "loss": 0.0005, "lr": 4.9863047384206835e-05, "epoch": 2.611111111111111, "percentage": 13.06, "elapsed_time": "0:00:52", "remaining_time": "0:05:50", "throughput": 1401.67, "total_tokens": 73856}
50
+ {"current_steps": 240, "total_steps": 1800, "loss": 0.0029, "lr": 4.983654059233626e-05, "epoch": 2.6666666666666665, "percentage": 13.33, "elapsed_time": "0:00:53", "remaining_time": "0:05:48", "throughput": 1404.39, "total_tokens": 75392}
51
+ {"current_steps": 245, "total_steps": 1800, "loss": 0.0065, "lr": 4.9807698743787744e-05, "epoch": 2.7222222222222223, "percentage": 13.61, "elapsed_time": "0:00:54", "remaining_time": "0:05:47", "throughput": 1407.52, "total_tokens": 76960}
52
+ {"current_steps": 250, "total_steps": 1800, "loss": 0.0153, "lr": 4.9776524550184965e-05, "epoch": 2.7777777777777777, "percentage": 13.89, "elapsed_time": "0:00:55", "remaining_time": "0:05:45", "throughput": 1410.04, "total_tokens": 78496}
53
+ {"current_steps": 255, "total_steps": 1800, "loss": 0.0017, "lr": 4.974302094243164e-05, "epoch": 2.8333333333333335, "percentage": 14.17, "elapsed_time": "0:00:56", "remaining_time": "0:05:43", "throughput": 1411.73, "total_tokens": 80000}
54
+ {"current_steps": 260, "total_steps": 1800, "loss": 0.0003, "lr": 4.970719107043595e-05, "epoch": 2.888888888888889, "percentage": 14.44, "elapsed_time": "0:00:57", "remaining_time": "0:05:41", "throughput": 1414.46, "total_tokens": 81568}
55
+ {"current_steps": 265, "total_steps": 1800, "loss": 0.0911, "lr": 4.966903830281449e-05, "epoch": 2.9444444444444446, "percentage": 14.72, "elapsed_time": "0:00:58", "remaining_time": "0:05:39", "throughput": 1417.73, "total_tokens": 83168}
56
+ {"current_steps": 270, "total_steps": 1800, "loss": 0.0001, "lr": 4.962856622657541e-05, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:59", "remaining_time": "0:05:38", "throughput": 1419.81, "total_tokens": 84736}
57
+ {"current_steps": 270, "total_steps": 1800, "eval_loss": 0.031980086117982864, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:01:00", "remaining_time": "0:05:41", "throughput": 1405.29, "total_tokens": 84736}
58
+ {"current_steps": 275, "total_steps": 1800, "loss": 0.0, "lr": 4.9585778646781364e-05, "epoch": 3.0555555555555554, "percentage": 15.28, "elapsed_time": "0:01:02", "remaining_time": "0:05:47", "throughput": 1377.06, "total_tokens": 86304}
59
+ {"current_steps": 280, "total_steps": 1800, "loss": 0.0, "lr": 4.9540679586191605e-05, "epoch": 3.111111111111111, "percentage": 15.56, "elapsed_time": "0:01:03", "remaining_time": "0:05:45", "throughput": 1380.66, "total_tokens": 87904}
60
+ {"current_steps": 285, "total_steps": 1800, "loss": 0.0002, "lr": 4.9493273284883854e-05, "epoch": 3.1666666666666665, "percentage": 15.83, "elapsed_time": "0:01:04", "remaining_time": "0:05:43", "throughput": 1382.5, "total_tokens": 89408}
61
+ {"current_steps": 290, "total_steps": 1800, "loss": 0.0431, "lr": 4.9443564199855666e-05, "epoch": 3.2222222222222223, "percentage": 16.11, "elapsed_time": "0:01:06", "remaining_time": "0:05:43", "throughput": 1377.76, "total_tokens": 91008}
62
+ {"current_steps": 295, "total_steps": 1800, "loss": 0.0001, "lr": 4.939155700460536e-05, "epoch": 3.2777777777777777, "percentage": 16.39, "elapsed_time": "0:01:07", "remaining_time": "0:05:42", "throughput": 1379.43, "total_tokens": 92512}
63
+ {"current_steps": 300, "total_steps": 1800, "loss": 0.0001, "lr": 4.933725658869267e-05, "epoch": 3.3333333333333335, "percentage": 16.67, "elapsed_time": "0:01:08", "remaining_time": "0:05:40", "throughput": 1382.03, "total_tokens": 94080}
64
+ {"current_steps": 305, "total_steps": 1800, "loss": 0.0003, "lr": 4.9280668057279014e-05, "epoch": 3.388888888888889, "percentage": 16.94, "elapsed_time": "0:01:09", "remaining_time": "0:05:38", "throughput": 1385.12, "total_tokens": 95680}
65
+ {"current_steps": 310, "total_steps": 1800, "loss": 0.0001, "lr": 4.9221796730647516e-05, "epoch": 3.4444444444444446, "percentage": 17.22, "elapsed_time": "0:01:10", "remaining_time": "0:05:36", "throughput": 1387.7, "total_tokens": 97248}