rbelanec commited on
Commit
32d7efa
·
verified ·
1 Parent(s): ef036fa

Training in progress, step 4000

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:912d356c241ccd97067350c46d0cc9e21bc972e73b5bbecabc51b871cc19084f
3
  size 182352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c8914c8fc4c4860c7e11a1aa07ef19fe570180d7a06261ff3328aeef345f91
3
  size 182352
trainer_log.jsonl CHANGED
@@ -777,3 +777,44 @@
777
  {"current_steps": 3795, "total_steps": 20000, "loss": 0.1618, "lr": 4.569024894915547e-05, "epoch": 0.6872043278480726, "percentage": 18.98, "elapsed_time": "2:42:31", "remaining_time": "11:33:58", "throughput": 2760.2, "total_tokens": 26915328}
778
  {"current_steps": 3800, "total_steps": 20000, "loss": 0.1459, "lr": 4.567922139802574e-05, "epoch": 0.6881097353946445, "percentage": 19.0, "elapsed_time": "2:42:38", "remaining_time": "11:33:20", "throughput": 2761.87, "total_tokens": 26950464}
779
  {"current_steps": 3800, "total_steps": 20000, "eval_loss": 0.16453325748443604, "epoch": 0.6881097353946445, "percentage": 19.0, "elapsed_time": "2:46:48", "remaining_time": "11:51:09", "throughput": 2692.67, "total_tokens": 26950464}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
777
  {"current_steps": 3795, "total_steps": 20000, "loss": 0.1618, "lr": 4.569024894915547e-05, "epoch": 0.6872043278480726, "percentage": 18.98, "elapsed_time": "2:42:31", "remaining_time": "11:33:58", "throughput": 2760.2, "total_tokens": 26915328}
778
  {"current_steps": 3800, "total_steps": 20000, "loss": 0.1459, "lr": 4.567922139802574e-05, "epoch": 0.6881097353946445, "percentage": 19.0, "elapsed_time": "2:42:38", "remaining_time": "11:33:20", "throughput": 2761.87, "total_tokens": 26950464}
779
  {"current_steps": 3800, "total_steps": 20000, "eval_loss": 0.16453325748443604, "epoch": 0.6881097353946445, "percentage": 19.0, "elapsed_time": "2:46:48", "remaining_time": "11:51:09", "throughput": 2692.67, "total_tokens": 26950464}
780
+ {"current_steps": 3805, "total_steps": 20000, "loss": 0.1604, "lr": 4.566818109091325e-05, "epoch": 0.6890151429412165, "percentage": 19.02, "elapsed_time": "2:46:57", "remaining_time": "11:50:35", "throughput": 2694.16, "total_tokens": 26987712}
781
+ {"current_steps": 3810, "total_steps": 20000, "loss": 0.1638, "lr": 4.5657128034628235e-05, "epoch": 0.6899205504877883, "percentage": 19.05, "elapsed_time": "2:47:03", "remaining_time": "11:49:55", "throughput": 2695.84, "total_tokens": 27022976}
782
+ {"current_steps": 3815, "total_steps": 20000, "loss": 0.1567, "lr": 4.5646062235988766e-05, "epoch": 0.6908259580343602, "percentage": 19.07, "elapsed_time": "2:47:10", "remaining_time": "11:49:15", "throughput": 2697.46, "total_tokens": 27057600}
783
+ {"current_steps": 3820, "total_steps": 20000, "loss": 0.1585, "lr": 4.5634983701820776e-05, "epoch": 0.6917313655809321, "percentage": 19.1, "elapsed_time": "2:47:17", "remaining_time": "11:48:35", "throughput": 2699.18, "total_tokens": 27093376}
784
+ {"current_steps": 3825, "total_steps": 20000, "loss": 0.1713, "lr": 4.5623892438958074e-05, "epoch": 0.692636773127504, "percentage": 19.12, "elapsed_time": "2:47:24", "remaining_time": "11:47:55", "throughput": 2701.02, "total_tokens": 27130560}
785
+ {"current_steps": 3830, "total_steps": 20000, "loss": 0.1624, "lr": 4.5612788454242294e-05, "epoch": 0.6935421806740759, "percentage": 19.15, "elapsed_time": "2:47:31", "remaining_time": "11:47:16", "throughput": 2702.56, "total_tokens": 27164480}
786
+ {"current_steps": 3835, "total_steps": 20000, "loss": 0.1618, "lr": 4.5601671754522944e-05, "epoch": 0.6944475882206478, "percentage": 19.18, "elapsed_time": "2:47:38", "remaining_time": "11:46:36", "throughput": 2704.3, "total_tokens": 27200448}
787
+ {"current_steps": 3840, "total_steps": 20000, "loss": 0.1547, "lr": 4.559054234665735e-05, "epoch": 0.6953529957672198, "percentage": 19.2, "elapsed_time": "2:47:45", "remaining_time": "11:45:57", "throughput": 2706.11, "total_tokens": 27237184}
788
+ {"current_steps": 3845, "total_steps": 20000, "loss": 0.1678, "lr": 4.557940023751071e-05, "epoch": 0.6962584033137916, "percentage": 19.23, "elapsed_time": "2:47:51", "remaining_time": "11:45:17", "throughput": 2707.8, "total_tokens": 27272704}
789
+ {"current_steps": 3850, "total_steps": 20000, "loss": 0.1634, "lr": 4.5568245433956024e-05, "epoch": 0.6971638108603635, "percentage": 19.25, "elapsed_time": "2:47:58", "remaining_time": "11:44:38", "throughput": 2709.48, "total_tokens": 27308096}
790
+ {"current_steps": 3855, "total_steps": 20000, "loss": 0.173, "lr": 4.555707794287414e-05, "epoch": 0.6980692184069355, "percentage": 19.28, "elapsed_time": "2:48:05", "remaining_time": "11:43:59", "throughput": 2711.13, "total_tokens": 27343296}
791
+ {"current_steps": 3860, "total_steps": 20000, "loss": 0.1474, "lr": 4.554589777115371e-05, "epoch": 0.6989746259535073, "percentage": 19.3, "elapsed_time": "2:48:12", "remaining_time": "11:43:20", "throughput": 2712.85, "total_tokens": 27379392}
792
+ {"current_steps": 3865, "total_steps": 20000, "loss": 0.1716, "lr": 4.553470492569125e-05, "epoch": 0.6998800335000792, "percentage": 19.32, "elapsed_time": "2:48:19", "remaining_time": "11:42:41", "throughput": 2714.4, "total_tokens": 27413568}
793
+ {"current_steps": 3870, "total_steps": 20000, "loss": 0.1484, "lr": 4.552349941339106e-05, "epoch": 0.7007854410466511, "percentage": 19.35, "elapsed_time": "2:48:26", "remaining_time": "11:42:01", "throughput": 2715.98, "total_tokens": 27448064}
794
+ {"current_steps": 3875, "total_steps": 20000, "loss": 0.1577, "lr": 4.551228124116526e-05, "epoch": 0.7016908485932231, "percentage": 19.38, "elapsed_time": "2:48:32", "remaining_time": "11:41:22", "throughput": 2717.68, "total_tokens": 27483840}
795
+ {"current_steps": 3880, "total_steps": 20000, "loss": 0.1637, "lr": 4.550105041593378e-05, "epoch": 0.7025962561397949, "percentage": 19.4, "elapsed_time": "2:48:39", "remaining_time": "11:40:44", "throughput": 2719.23, "total_tokens": 27518016}
796
+ {"current_steps": 3885, "total_steps": 20000, "loss": 0.1639, "lr": 4.5489806944624366e-05, "epoch": 0.7035016636863668, "percentage": 19.43, "elapsed_time": "2:48:46", "remaining_time": "11:40:05", "throughput": 2720.91, "total_tokens": 27553664}
797
+ {"current_steps": 3890, "total_steps": 20000, "loss": 0.1344, "lr": 4.547855083417256e-05, "epoch": 0.7044070712329388, "percentage": 19.45, "elapsed_time": "2:48:53", "remaining_time": "11:39:26", "throughput": 2722.67, "total_tokens": 27590144}
798
+ {"current_steps": 3895, "total_steps": 20000, "loss": 0.153, "lr": 4.546728209152168e-05, "epoch": 0.7053124787795106, "percentage": 19.48, "elapsed_time": "2:49:00", "remaining_time": "11:38:48", "throughput": 2724.28, "total_tokens": 27625152}
799
+ {"current_steps": 3900, "total_steps": 20000, "loss": 0.1645, "lr": 4.545600072362286e-05, "epoch": 0.7062178863260825, "percentage": 19.5, "elapsed_time": "2:49:07", "remaining_time": "11:38:09", "throughput": 2725.88, "total_tokens": 27659904}
800
+ {"current_steps": 3905, "total_steps": 20000, "loss": 0.1542, "lr": 4.5444706737435014e-05, "epoch": 0.7071232938726544, "percentage": 19.53, "elapsed_time": "2:49:14", "remaining_time": "11:37:31", "throughput": 2727.57, "total_tokens": 27695872}
801
+ {"current_steps": 3910, "total_steps": 20000, "loss": 0.1615, "lr": 4.543340013992484e-05, "epoch": 0.7080287014192264, "percentage": 19.55, "elapsed_time": "2:49:20", "remaining_time": "11:36:52", "throughput": 2729.1, "total_tokens": 27730048}
802
+ {"current_steps": 3915, "total_steps": 20000, "loss": 0.1663, "lr": 4.542208093806682e-05, "epoch": 0.7089341089657982, "percentage": 19.57, "elapsed_time": "2:49:27", "remaining_time": "11:36:14", "throughput": 2730.76, "total_tokens": 27765568}
803
+ {"current_steps": 3920, "total_steps": 20000, "loss": 0.1616, "lr": 4.541074913884321e-05, "epoch": 0.7098395165123701, "percentage": 19.6, "elapsed_time": "2:49:34", "remaining_time": "11:35:36", "throughput": 2732.48, "total_tokens": 27801856}
804
+ {"current_steps": 3925, "total_steps": 20000, "loss": 0.1653, "lr": 4.5399404749244026e-05, "epoch": 0.7107449240589421, "percentage": 19.62, "elapsed_time": "2:49:41", "remaining_time": "11:34:58", "throughput": 2734.27, "total_tokens": 27838848}
805
+ {"current_steps": 3930, "total_steps": 20000, "loss": 0.161, "lr": 4.538804777626705e-05, "epoch": 0.7116503316055139, "percentage": 19.65, "elapsed_time": "2:49:48", "remaining_time": "11:34:20", "throughput": 2735.83, "total_tokens": 27873472}
806
+ {"current_steps": 3935, "total_steps": 20000, "loss": 0.1472, "lr": 4.5376678226917845e-05, "epoch": 0.7125557391520858, "percentage": 19.68, "elapsed_time": "2:49:55", "remaining_time": "11:33:42", "throughput": 2737.58, "total_tokens": 27910144}
807
+ {"current_steps": 3940, "total_steps": 20000, "loss": 0.1694, "lr": 4.536529610820972e-05, "epoch": 0.7134611466986578, "percentage": 19.7, "elapsed_time": "2:50:02", "remaining_time": "11:33:04", "throughput": 2739.16, "total_tokens": 27944896}
808
+ {"current_steps": 3945, "total_steps": 20000, "loss": 0.158, "lr": 4.5353901427163725e-05, "epoch": 0.7143665542452297, "percentage": 19.73, "elapsed_time": "2:50:08", "remaining_time": "11:32:27", "throughput": 2740.87, "total_tokens": 27981120}
809
+ {"current_steps": 3950, "total_steps": 20000, "loss": 0.1641, "lr": 4.534249419080869e-05, "epoch": 0.7152719617918015, "percentage": 19.75, "elapsed_time": "2:50:15", "remaining_time": "11:31:49", "throughput": 2742.39, "total_tokens": 28015424}
810
+ {"current_steps": 3955, "total_steps": 20000, "loss": 0.1566, "lr": 4.5331074406181164e-05, "epoch": 0.7161773693383734, "percentage": 19.78, "elapsed_time": "2:50:22", "remaining_time": "11:31:11", "throughput": 2744.09, "total_tokens": 28051520}
811
+ {"current_steps": 3960, "total_steps": 20000, "loss": 0.1668, "lr": 4.531964208032544e-05, "epoch": 0.7170827768849454, "percentage": 19.8, "elapsed_time": "2:50:29", "remaining_time": "11:30:33", "throughput": 2745.69, "total_tokens": 28086592}
812
+ {"current_steps": 3965, "total_steps": 20000, "loss": 0.1413, "lr": 4.530819722029355e-05, "epoch": 0.7179881844315172, "percentage": 19.82, "elapsed_time": "2:50:36", "remaining_time": "11:29:56", "throughput": 2747.3, "total_tokens": 28121920}
813
+ {"current_steps": 3970, "total_steps": 20000, "loss": 0.1365, "lr": 4.529673983314526e-05, "epoch": 0.7188935919780891, "percentage": 19.85, "elapsed_time": "2:50:43", "remaining_time": "11:29:19", "throughput": 2748.89, "total_tokens": 28156928}
814
+ {"current_steps": 3975, "total_steps": 20000, "loss": 0.1702, "lr": 4.528526992594807e-05, "epoch": 0.7197989995246611, "percentage": 19.88, "elapsed_time": "2:50:49", "remaining_time": "11:28:41", "throughput": 2750.49, "total_tokens": 28192128}
815
+ {"current_steps": 3980, "total_steps": 20000, "loss": 0.1494, "lr": 4.52737875057772e-05, "epoch": 0.720704407071233, "percentage": 19.9, "elapsed_time": "2:50:56", "remaining_time": "11:28:04", "throughput": 2752.23, "total_tokens": 28228736}
816
+ {"current_steps": 3985, "total_steps": 20000, "loss": 0.1655, "lr": 4.5262292579715556e-05, "epoch": 0.7216098146178048, "percentage": 19.93, "elapsed_time": "2:51:03", "remaining_time": "11:27:27", "throughput": 2753.97, "total_tokens": 28265728}
817
+ {"current_steps": 3990, "total_steps": 20000, "loss": 0.1504, "lr": 4.5250785154853814e-05, "epoch": 0.7225152221643767, "percentage": 19.95, "elapsed_time": "2:51:10", "remaining_time": "11:26:50", "throughput": 2755.69, "total_tokens": 28302144}
818
+ {"current_steps": 3995, "total_steps": 20000, "loss": 0.1537, "lr": 4.523926523829032e-05, "epoch": 0.7234206297109487, "percentage": 19.98, "elapsed_time": "2:51:17", "remaining_time": "11:26:13", "throughput": 2757.43, "total_tokens": 28338880}
819
+ {"current_steps": 4000, "total_steps": 20000, "loss": 0.1521, "lr": 4.5227732837131134e-05, "epoch": 0.7243260372575205, "percentage": 20.0, "elapsed_time": "2:51:24", "remaining_time": "11:25:36", "throughput": 2759.11, "total_tokens": 28374976}
820
+ {"current_steps": 4000, "total_steps": 20000, "eval_loss": 0.16136571764945984, "epoch": 0.7243260372575205, "percentage": 20.0, "elapsed_time": "2:55:34", "remaining_time": "11:42:19", "throughput": 2693.41, "total_tokens": 28374976}