rbelanec commited on
Commit
8f009a1
·
verified ·
1 Parent(s): e83a6e3

Training in progress, step 5360

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +50 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50efa0f6a7f8849abc517fadb2f8a0195435d66b5ecfe2fff7a31c9180c720cb
3
  size 26214528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa9424d0e7b2a16fb66db6aae2adcbb0ed8b34e7486d838cbd29f765d5d82122
3
  size 26214528
trainer_log.jsonl CHANGED
@@ -1041,3 +1041,53 @@
1041
  {"current_steps": 5110, "total_steps": 5360, "loss": 5.6455, "lr": 3.332539971966836e-07, "epoch": 9.533582089552239, "percentage": 95.34, "elapsed_time": "0:08:51", "remaining_time": "0:00:26", "throughput": 2732.02, "total_tokens": 1452304}
1042
  {"current_steps": 5115, "total_steps": 5360, "loss": 6.4375, "lr": 3.201373149577247e-07, "epoch": 9.542910447761194, "percentage": 95.43, "elapsed_time": "0:08:52", "remaining_time": "0:00:25", "throughput": 2732.73, "total_tokens": 1453968}
1043
  {"current_steps": 5120, "total_steps": 5360, "loss": 5.8376, "lr": 3.0728231089907634e-07, "epoch": 9.552238805970148, "percentage": 95.52, "elapsed_time": "0:08:52", "remaining_time": "0:00:24", "throughput": 2733.07, "total_tokens": 1455376}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1041
  {"current_steps": 5110, "total_steps": 5360, "loss": 5.6455, "lr": 3.332539971966836e-07, "epoch": 9.533582089552239, "percentage": 95.34, "elapsed_time": "0:08:51", "remaining_time": "0:00:26", "throughput": 2732.02, "total_tokens": 1452304}
1042
  {"current_steps": 5115, "total_steps": 5360, "loss": 6.4375, "lr": 3.201373149577247e-07, "epoch": 9.542910447761194, "percentage": 95.43, "elapsed_time": "0:08:52", "remaining_time": "0:00:25", "throughput": 2732.73, "total_tokens": 1453968}
1043
  {"current_steps": 5120, "total_steps": 5360, "loss": 5.8376, "lr": 3.0728231089907634e-07, "epoch": 9.552238805970148, "percentage": 95.52, "elapsed_time": "0:08:52", "remaining_time": "0:00:24", "throughput": 2733.07, "total_tokens": 1455376}
1044
+ {"current_steps": 5125, "total_steps": 5360, "loss": 5.6522, "lr": 2.946891213211012e-07, "epoch": 9.561567164179104, "percentage": 95.62, "elapsed_time": "0:08:52", "remaining_time": "0:00:24", "throughput": 2733.45, "total_tokens": 1456816}
1045
+ {"current_steps": 5130, "total_steps": 5360, "loss": 6.4457, "lr": 2.823578797481574e-07, "epoch": 9.57089552238806, "percentage": 95.71, "elapsed_time": "0:08:53", "remaining_time": "0:00:23", "throughput": 2733.92, "total_tokens": 1458320}
1046
+ {"current_steps": 5135, "total_steps": 5360, "loss": 5.3658, "lr": 2.7028871692720003e-07, "epoch": 9.580223880597014, "percentage": 95.8, "elapsed_time": "0:08:53", "remaining_time": "0:00:23", "throughput": 2734.15, "total_tokens": 1459632}
1047
+ {"current_steps": 5140, "total_steps": 5360, "loss": 5.9502, "lr": 2.5848176082639007e-07, "epoch": 9.58955223880597, "percentage": 95.9, "elapsed_time": "0:08:54", "remaining_time": "0:00:22", "throughput": 2734.42, "total_tokens": 1461008}
1048
+ {"current_steps": 5145, "total_steps": 5360, "loss": 6.4102, "lr": 2.4693713663372644e-07, "epoch": 9.598880597014926, "percentage": 95.99, "elapsed_time": "0:08:54", "remaining_time": "0:00:22", "throughput": 2734.5, "total_tokens": 1462224}
1049
+ {"current_steps": 5150, "total_steps": 5360, "loss": 6.4049, "lr": 2.3565496675574118e-07, "epoch": 9.60820895522388, "percentage": 96.08, "elapsed_time": "0:08:55", "remaining_time": "0:00:21", "throughput": 2734.9, "total_tokens": 1463664}
1050
+ {"current_steps": 5155, "total_steps": 5360, "loss": 6.2554, "lr": 2.246353708161758e-07, "epoch": 9.617537313432836, "percentage": 96.18, "elapsed_time": "0:08:55", "remaining_time": "0:00:21", "throughput": 2735.37, "total_tokens": 1465168}
1051
+ {"current_steps": 5160, "total_steps": 5360, "loss": 5.9242, "lr": 2.1387846565474045e-07, "epoch": 9.626865671641792, "percentage": 96.27, "elapsed_time": "0:08:56", "remaining_time": "0:00:20", "throughput": 2735.65, "total_tokens": 1466544}
1052
+ {"current_steps": 5165, "total_steps": 5360, "loss": 5.4619, "lr": 2.0338436532584827e-07, "epoch": 9.636194029850746, "percentage": 96.36, "elapsed_time": "0:08:56", "remaining_time": "0:00:20", "throughput": 2736.25, "total_tokens": 1468176}
1053
+ {"current_steps": 5170, "total_steps": 5360, "loss": 6.3981, "lr": 1.9315318109742465e-07, "epoch": 9.645522388059701, "percentage": 96.46, "elapsed_time": "0:08:57", "remaining_time": "0:00:19", "throughput": 2736.71, "total_tokens": 1469680}
1054
+ {"current_steps": 5175, "total_steps": 5360, "loss": 6.2368, "lr": 1.831850214497194e-07, "epoch": 9.654850746268657, "percentage": 96.55, "elapsed_time": "0:08:57", "remaining_time": "0:00:19", "throughput": 2736.96, "total_tokens": 1471024}
1055
+ {"current_steps": 5180, "total_steps": 5360, "loss": 6.3969, "lr": 1.7347999207415478e-07, "epoch": 9.664179104477611, "percentage": 96.64, "elapsed_time": "0:08:57", "remaining_time": "0:00:18", "throughput": 2737.26, "total_tokens": 1472464}
1056
+ {"current_steps": 5185, "total_steps": 5360, "loss": 5.6938, "lr": 1.6403819587221814e-07, "epoch": 9.673507462686567, "percentage": 96.74, "elapsed_time": "0:08:58", "remaining_time": "0:00:18", "throughput": 2737.55, "total_tokens": 1473904}
1057
+ {"current_steps": 5190, "total_steps": 5360, "loss": 6.3233, "lr": 1.5485973295434885e-07, "epoch": 9.682835820895523, "percentage": 96.83, "elapsed_time": "0:08:58", "remaining_time": "0:00:17", "throughput": 2738.0, "total_tokens": 1475408}
1058
+ {"current_steps": 5195, "total_steps": 5360, "loss": 6.5344, "lr": 1.4594470063890308e-07, "epoch": 9.692164179104477, "percentage": 96.92, "elapsed_time": "0:08:59", "remaining_time": "0:00:17", "throughput": 2738.3, "total_tokens": 1476784}
1059
+ {"current_steps": 5200, "total_steps": 5360, "loss": 5.6072, "lr": 1.3729319345109348e-07, "epoch": 9.701492537313433, "percentage": 97.01, "elapsed_time": "0:08:59", "remaining_time": "0:00:16", "throughput": 2738.6, "total_tokens": 1478160}
1060
+ {"current_steps": 5205, "total_steps": 5360, "loss": 6.325, "lr": 1.2890530312200945e-07, "epoch": 9.710820895522389, "percentage": 97.11, "elapsed_time": "0:09:00", "remaining_time": "0:00:16", "throughput": 2738.96, "total_tokens": 1479600}
1061
+ {"current_steps": 5210, "total_steps": 5360, "loss": 6.7264, "lr": 1.207811185876373e-07, "epoch": 9.720149253731343, "percentage": 97.2, "elapsed_time": "0:09:00", "remaining_time": "0:00:15", "throughput": 2739.16, "total_tokens": 1480944}
1062
+ {"current_steps": 5215, "total_steps": 5360, "loss": 6.4153, "lr": 1.1292072598791114e-07, "epoch": 9.729477611940299, "percentage": 97.29, "elapsed_time": "0:09:01", "remaining_time": "0:00:15", "throughput": 2739.61, "total_tokens": 1482448}
1063
+ {"current_steps": 5220, "total_steps": 5360, "loss": 6.253, "lr": 1.0532420866581072e-07, "epoch": 9.738805970149254, "percentage": 97.39, "elapsed_time": "0:09:01", "remaining_time": "0:00:14", "throughput": 2740.05, "total_tokens": 1483920}
1064
+ {"current_steps": 5225, "total_steps": 5360, "loss": 6.2562, "lr": 9.799164716646769e-08, "epoch": 9.748134328358208, "percentage": 97.48, "elapsed_time": "0:09:02", "remaining_time": "0:00:14", "throughput": 2740.24, "total_tokens": 1485232}
1065
+ {"current_steps": 5230, "total_steps": 5360, "loss": 5.9339, "lr": 9.092311923632191e-08, "epoch": 9.757462686567164, "percentage": 97.57, "elapsed_time": "0:09:02", "remaining_time": "0:00:13", "throughput": 2740.56, "total_tokens": 1486640}
1066
+ {"current_steps": 5235, "total_steps": 5360, "loss": 5.9634, "lr": 8.411869982228038e-08, "epoch": 9.76679104477612, "percentage": 97.67, "elapsed_time": "0:09:02", "remaining_time": "0:00:12", "throughput": 2740.93, "total_tokens": 1488080}
1067
+ {"current_steps": 5240, "total_steps": 5360, "loss": 6.3032, "lr": 7.757846107094291e-08, "epoch": 9.776119402985074, "percentage": 97.76, "elapsed_time": "0:09:03", "remaining_time": "0:00:12", "throughput": 2741.18, "total_tokens": 1489456}
1068
+ {"current_steps": 5245, "total_steps": 5360, "loss": 5.7682, "lr": 7.130247232782216e-08, "epoch": 9.78544776119403, "percentage": 97.85, "elapsed_time": "0:09:03", "remaining_time": "0:00:11", "throughput": 2741.58, "total_tokens": 1491024}
1069
+ {"current_steps": 5250, "total_steps": 5360, "loss": 5.2717, "lr": 6.529080013661648e-08, "epoch": 9.794776119402986, "percentage": 97.95, "elapsed_time": "0:09:04", "remaining_time": "0:00:11", "throughput": 2741.93, "total_tokens": 1492432}
1070
+ {"current_steps": 5255, "total_steps": 5360, "loss": 6.1187, "lr": 5.954350823850208e-08, "epoch": 9.80410447761194, "percentage": 98.04, "elapsed_time": "0:09:04", "remaining_time": "0:00:10", "throughput": 2742.47, "total_tokens": 1494064}
1071
+ {"current_steps": 5260, "total_steps": 5360, "loss": 5.6411, "lr": 5.4060657571453064e-08, "epoch": 9.813432835820896, "percentage": 98.13, "elapsed_time": "0:09:05", "remaining_time": "0:00:10", "throughput": 2742.88, "total_tokens": 1495536}
1072
+ {"current_steps": 5265, "total_steps": 5360, "loss": 5.9149, "lr": 4.884230626960307e-08, "epoch": 9.822761194029852, "percentage": 98.23, "elapsed_time": "0:09:05", "remaining_time": "0:00:09", "throughput": 2743.09, "total_tokens": 1496848}
1073
+ {"current_steps": 5270, "total_steps": 5360, "loss": 6.223, "lr": 4.388850966261793e-08, "epoch": 9.832089552238806, "percentage": 98.32, "elapsed_time": "0:09:06", "remaining_time": "0:00:09", "throughput": 2743.44, "total_tokens": 1498352}
1074
+ {"current_steps": 5275, "total_steps": 5360, "loss": 6.0281, "lr": 3.919932027512674e-08, "epoch": 9.841417910447761, "percentage": 98.41, "elapsed_time": "0:09:06", "remaining_time": "0:00:08", "throughput": 2743.82, "total_tokens": 1499824}
1075
+ {"current_steps": 5280, "total_steps": 5360, "loss": 6.8476, "lr": 3.477478782614452e-08, "epoch": 9.850746268656717, "percentage": 98.51, "elapsed_time": "0:09:07", "remaining_time": "0:00:08", "throughput": 2744.28, "total_tokens": 1501328}
1076
+ {"current_steps": 5285, "total_steps": 5360, "loss": 6.3168, "lr": 3.061495922855873e-08, "epoch": 9.860074626865671, "percentage": 98.6, "elapsed_time": "0:09:07", "remaining_time": "0:00:07", "throughput": 2744.49, "total_tokens": 1502640}
1077
+ {"current_steps": 5290, "total_steps": 5360, "loss": 6.3805, "lr": 2.67198785886269e-08, "epoch": 9.869402985074627, "percentage": 98.69, "elapsed_time": "0:09:07", "remaining_time": "0:00:07", "throughput": 2744.94, "total_tokens": 1504144}
1078
+ {"current_steps": 5295, "total_steps": 5360, "loss": 6.0447, "lr": 2.3089587205507578e-08, "epoch": 9.878731343283581, "percentage": 98.79, "elapsed_time": "0:09:08", "remaining_time": "0:00:06", "throughput": 2745.32, "total_tokens": 1505584}
1079
+ {"current_steps": 5300, "total_steps": 5360, "loss": 5.8787, "lr": 1.972412357083009e-08, "epoch": 9.888059701492537, "percentage": 98.88, "elapsed_time": "0:09:08", "remaining_time": "0:00:06", "throughput": 2745.9, "total_tokens": 1507184}
1080
+ {"current_steps": 5305, "total_steps": 5360, "loss": 5.76, "lr": 1.662352336827544e-08, "epoch": 9.897388059701493, "percentage": 98.97, "elapsed_time": "0:09:09", "remaining_time": "0:00:05", "throughput": 2746.31, "total_tokens": 1508656}
1081
+ {"current_steps": 5310, "total_steps": 5360, "loss": 5.9212, "lr": 1.3787819473207176e-08, "epoch": 9.906716417910447, "percentage": 99.07, "elapsed_time": "0:09:09", "remaining_time": "0:00:05", "throughput": 2746.8, "total_tokens": 1510192}
1082
+ {"current_steps": 5315, "total_steps": 5360, "loss": 6.5124, "lr": 1.1217041952313323e-08, "epoch": 9.916044776119403, "percentage": 99.16, "elapsed_time": "0:09:10", "remaining_time": "0:00:04", "throughput": 2747.16, "total_tokens": 1511632}
1083
+ {"current_steps": 5320, "total_steps": 5360, "loss": 6.0284, "lr": 8.91121806330386e-09, "epoch": 9.925373134328359, "percentage": 99.25, "elapsed_time": "0:09:10", "remaining_time": "0:00:04", "throughput": 2747.45, "total_tokens": 1513008}
1084
+ {"current_steps": 5325, "total_steps": 5360, "loss": 6.1722, "lr": 6.870372254602631e-09, "epoch": 9.934701492537313, "percentage": 99.35, "elapsed_time": "0:09:11", "remaining_time": "0:00:03", "throughput": 2747.76, "total_tokens": 1514416}
1085
+ {"current_steps": 5330, "total_steps": 5360, "loss": 6.1963, "lr": 5.09452616509476e-09, "epoch": 9.944029850746269, "percentage": 99.44, "elapsed_time": "0:09:11", "remaining_time": "0:00:03", "throughput": 2748.11, "total_tokens": 1515856}
1086
+ {"current_steps": 5335, "total_steps": 5360, "loss": 5.8672, "lr": 3.58369862391017e-09, "epoch": 9.953358208955224, "percentage": 99.53, "elapsed_time": "0:09:12", "remaining_time": "0:00:02", "throughput": 2748.44, "total_tokens": 1517328}
1087
+ {"current_steps": 5340, "total_steps": 5360, "loss": 5.6686, "lr": 2.3379056502015327e-09, "epoch": 9.962686567164178, "percentage": 99.63, "elapsed_time": "0:09:12", "remaining_time": "0:00:02", "throughput": 2748.75, "total_tokens": 1518800}
1088
+ {"current_steps": 5345, "total_steps": 5360, "loss": 5.7095, "lr": 1.357160452988837e-09, "epoch": 9.972014925373134, "percentage": 99.72, "elapsed_time": "0:09:12", "remaining_time": "0:00:01", "throughput": 2749.07, "total_tokens": 1520208}
1089
+ {"current_steps": 5350, "total_steps": 5360, "loss": 6.2756, "lr": 6.414734310233872e-10, "epoch": 9.98134328358209, "percentage": 99.81, "elapsed_time": "0:09:13", "remaining_time": "0:00:01", "throughput": 2749.38, "total_tokens": 1521616}
1090
+ {"current_steps": 5355, "total_steps": 5360, "loss": 6.6308, "lr": 1.9085217266290312e-10, "epoch": 9.990671641791044, "percentage": 99.91, "elapsed_time": "0:09:13", "remaining_time": "0:00:00", "throughput": 2749.67, "total_tokens": 1522992}
1091
+ {"current_steps": 5360, "total_steps": 5360, "loss": 5.5584, "lr": 5.3014557993558144e-12, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:09:14", "remaining_time": "0:00:00", "throughput": 2749.19, "total_tokens": 1524216}
1092
+ {"current_steps": 5360, "total_steps": 5360, "eval_loss": 6.148874282836914, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:09:17", "remaining_time": "0:00:00", "throughput": 2736.46, "total_tokens": 1524216}
1093
+ {"current_steps": 5360, "total_steps": 5360, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:09:18", "remaining_time": "0:00:00", "throughput": 2731.47, "total_tokens": 1524216}