versae commited on
Commit
821784a
·
1 Parent(s): 40cc04e

Step... (14000/50000 | Loss: 1.7139594554901123, Acc: 0.6574689745903015): 29%|███████▋ | 14350/50000 [5:33:22<15:08:11, 1.53s/it]

Browse files
Files changed (34) hide show
  1. .gitattributes +3 -0
  2. flax_model.msgpack +1 -1
  3. outputs/checkpoints/{checkpoint-7000 → checkpoint-12000}/config.json +0 -0
  4. outputs/checkpoints/{checkpoint-7000 → checkpoint-12000}/data_collator.joblib +0 -0
  5. outputs/checkpoints/{checkpoint-7000 → checkpoint-12000}/flax_model.msgpack +1 -1
  6. outputs/checkpoints/{checkpoint-9000 → checkpoint-12000}/optimizer_state.msgpack +1 -1
  7. outputs/checkpoints/{checkpoint-7000 → checkpoint-12000}/training_args.joblib +0 -0
  8. outputs/checkpoints/checkpoint-12000/training_state.json +1 -0
  9. outputs/checkpoints/{checkpoint-8000 → checkpoint-13000}/config.json +0 -0
  10. outputs/checkpoints/{checkpoint-8000 → checkpoint-13000}/data_collator.joblib +0 -0
  11. outputs/checkpoints/{checkpoint-9000 → checkpoint-13000}/flax_model.msgpack +1 -1
  12. outputs/checkpoints/{checkpoint-7000 → checkpoint-13000}/optimizer_state.msgpack +1 -1
  13. outputs/checkpoints/{checkpoint-8000 → checkpoint-13000}/training_args.joblib +0 -0
  14. outputs/checkpoints/checkpoint-13000/training_state.json +1 -0
  15. outputs/checkpoints/{checkpoint-9000 → checkpoint-14000}/config.json +0 -0
  16. outputs/checkpoints/{checkpoint-9000 → checkpoint-14000}/data_collator.joblib +0 -0
  17. outputs/checkpoints/{checkpoint-8000 → checkpoint-14000}/flax_model.msgpack +1 -1
  18. outputs/checkpoints/{checkpoint-8000 → checkpoint-14000}/optimizer_state.msgpack +1 -1
  19. outputs/checkpoints/{checkpoint-9000 → checkpoint-14000}/training_args.joblib +0 -0
  20. outputs/checkpoints/checkpoint-14000/training_state.json +1 -0
  21. outputs/checkpoints/checkpoint-7000/training_state.json +0 -1
  22. outputs/checkpoints/checkpoint-8000/training_state.json +0 -1
  23. outputs/checkpoints/checkpoint-9000/training_state.json +0 -1
  24. outputs/events.out.tfevents.1627258355.tablespoon.3000110.3.v2 +2 -2
  25. outputs/flax_model.msgpack +1 -1
  26. outputs/optimizer_state.msgpack +1 -1
  27. outputs/training_state.json +1 -1
  28. pytorch_model.bin +1 -1
  29. run_stream.512.log +0 -0
  30. wandb/run-20210726_001233-17u6inbn/files/output.log +1732 -0
  31. wandb/run-20210726_001233-17u6inbn/files/wandb-summary.json +1 -1
  32. wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log +0 -0
  33. wandb/run-20210726_001233-17u6inbn/logs/debug.log +3 -27
  34. wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb +0 -0
.gitattributes CHANGED
@@ -15,3 +15,6 @@
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
17
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
17
  *tfevents* filter=lfs diff=lfs merge=lfs -text
18
+ *.wandb filter=lfs diff=lfs merge=lfs -text
19
+ debug.log filter=lfs diff=lfs merge=lfs -text
20
+ debug-internal.log filter=lfs diff=lfs merge=lfs -text
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:165a80d10b493e4117c19ffeb7cbc1d340e88d14e329eb9be3ab1d32f050f973
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40b18e55e7e0e173646f5693cf8c145dd0ec756f12776cb671210c598dafdb45
3
  size 249750019
outputs/checkpoints/{checkpoint-7000 → checkpoint-12000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-7000 → checkpoint-12000}/data_collator.joblib RENAMED
File without changes
outputs/checkpoints/{checkpoint-7000 → checkpoint-12000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:353e62a7bbf3b5817b869c37e749c8e30fe14477d32a3cf95345a030057ed760
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4df1917f93cb5be75e1a67299b85e14508ce6d594537be9e03fa1ea0d5c451b
3
  size 249750019
outputs/checkpoints/{checkpoint-9000 → checkpoint-12000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2085e2cdeca180d85963536b92e396dad244a1a40804023af28d868e886658c8
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5ef9d9909e0225cdfdb08ba23fd64c8a8a881103ca5b932bc2206768a7e920b
3
  size 499500278
outputs/checkpoints/{checkpoint-7000 → checkpoint-12000}/training_args.joblib RENAMED
File without changes
outputs/checkpoints/checkpoint-12000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 12001}
outputs/checkpoints/{checkpoint-8000 → checkpoint-13000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-8000 → checkpoint-13000}/data_collator.joblib RENAMED
File without changes
outputs/checkpoints/{checkpoint-9000 → checkpoint-13000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55484b434d505ef7284a42471c8326f9bebe13561d6cbe478c61990f9fd7a04d
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7781249560c15a41eb883214ab5f6613f40b42c1ae0886c52a020bbfa19f76fb
3
  size 249750019
outputs/checkpoints/{checkpoint-7000 → checkpoint-13000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cd67c6ccf30e42fa238a68d1aa1ae063e8e11fc6c50bf034163444ab3f91118
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c37a1e738b919e689e3c653244d8a680235541f5d91c99fb41edd65340a91d
3
  size 499500278
outputs/checkpoints/{checkpoint-8000 → checkpoint-13000}/training_args.joblib RENAMED
File without changes
outputs/checkpoints/checkpoint-13000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 13001}
outputs/checkpoints/{checkpoint-9000 → checkpoint-14000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-9000 → checkpoint-14000}/data_collator.joblib RENAMED
File without changes
outputs/checkpoints/{checkpoint-8000 → checkpoint-14000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcd1e001a114c411bab4cde0ffdf4e4bc13e918b2c1c3cac7a75100e5a3f0349
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40b18e55e7e0e173646f5693cf8c145dd0ec756f12776cb671210c598dafdb45
3
  size 249750019
outputs/checkpoints/{checkpoint-8000 → checkpoint-14000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7070a9b0eb3c596cc8b7f538faa458611e2d751b69600e272ec31b7c5c1bbc82
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3b657f7303349384c5ab4bd1d5226d2f8dbc1b641fc9355b1d5d4d2825ce382
3
  size 499500278
outputs/checkpoints/{checkpoint-9000 → checkpoint-14000}/training_args.joblib RENAMED
File without changes
outputs/checkpoints/checkpoint-14000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 14001}
outputs/checkpoints/checkpoint-7000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 7001}
 
 
outputs/checkpoints/checkpoint-8000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 8001}
 
 
outputs/checkpoints/checkpoint-9000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 9001}
 
 
outputs/events.out.tfevents.1627258355.tablespoon.3000110.3.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0526c59728161c425d35ed5858dbea479f8f85f54dacc020da0bd7b01b4c8862
3
- size 1693554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4576e5515e6cf1926a9625e2db3778c06552a27d5a56f4b306bfdc6dec02245d
3
+ size 2061819
outputs/flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:165a80d10b493e4117c19ffeb7cbc1d340e88d14e329eb9be3ab1d32f050f973
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40b18e55e7e0e173646f5693cf8c145dd0ec756f12776cb671210c598dafdb45
3
  size 249750019
outputs/optimizer_state.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17907ad9f925f7ff5210c836be64cf4f0b87dea575a17582ec3bce13447deb03
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3b657f7303349384c5ab4bd1d5226d2f8dbc1b641fc9355b1d5d4d2825ce382
3
  size 499500278
outputs/training_state.json CHANGED
@@ -1 +1 @@
1
- {"step": 11001}
 
1
+ {"step": 14001}
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e31df557709db351ba2444f6f63f0229f00d2662c5659a587f36135034e99a59
3
  size 498858859
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0290d7d4fc3d31d587881870f70299d2262836ee8bad199236e57b27fd504a0
3
  size 498858859
run_stream.512.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20210726_001233-17u6inbn/files/output.log CHANGED
@@ -7731,6 +7731,1738 @@ You should probably TRAIN this model on a down-stream task to be able to use it
7731
 
7732
 
7733
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7734
 
7735
 
7736
 
 
7731
 
7732
 
7733
 
7734
+
7735
+
7736
+
7737
+
7738
+
7739
+
7740
+
7741
+
7742
+
7743
+
7744
+
7745
+
7746
+
7747
+
7748
+
7749
+
7750
+
7751
+
7752
+
7753
+
7754
+
7755
+
7756
+
7757
+
7758
+
7759
+
7760
+
7761
+
7762
+
7763
+
7764
+
7765
+
7766
+
7767
+
7768
+
7769
+
7770
+
7771
+
7772
+
7773
+
7774
+
7775
+
7776
+
7777
+
7778
+
7779
+
7780
+
7781
+
7782
+
7783
+
7784
+
7785
+
7786
+
7787
+
7788
+
7789
+
7790
+
7791
+
7792
+
7793
+
7794
+
7795
+
7796
+
7797
+
7798
+
7799
+
7800
+
7801
+
7802
+
7803
+
7804
+
7805
+
7806
+
7807
+
7808
+
7809
+
7810
+
7811
+
7812
+
7813
+
7814
+
7815
+
7816
+
7817
+
7818
+
7819
+
7820
+
7821
+
7822
+
7823
+
7824
+
7825
+
7826
+
7827
+
7828
+
7829
+
7830
+
7831
+
7832
+
7833
+
7834
+
7835
+
7836
+
7837
+
7838
+
7839
+
7840
+
7841
+
7842
+
7843
+
7844
+
7845
+
7846
+
7847
+
7848
+
7849
+
7850
+
7851
+
7852
+
7853
+
7854
+
7855
+
7856
+
7857
+
7858
+
7859
+
7860
+
7861
+
7862
+
7863
+
7864
+
7865
+
7866
+
7867
+
7868
+
7869
+
7870
+
7871
+
7872
+
7873
+
7874
+
7875
+
7876
+
7877
+
7878
+
7879
+
7880
+
7881
+
7882
+
7883
+
7884
+
7885
+
7886
+ Step... (11000/50000 | Loss: 1.7415039539337158, Acc: 0.6532756686210632): 24%|██████▍ | 12000/50000 [4:36:38<14:37:46, 1.39s/it]
7887
+ Step... (11500 | Loss: 1.8508110046386719, Learning Rate: 0.0004666667082346976)
7888
+ Step... (11000/50000 | Loss: 1.7415039539337158, Acc: 0.6532756686210632): 24%|██████▍ | 12000/50000 [4:36:40<14:37:46, 1.39s/it]
7889
+
7890
+
7891
+
7892
+
7893
+
7894
+
7895
+
7896
+
7897
+
7898
+
7899
+
7900
+
7901
+ [06:45:03] - INFO - __main__ - Saving checkpoint at 12000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
7902
+ All Flax model weights were used when initializing RobertaForMaskedLM.
7903
+ Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
7904
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
7905
+
7906
+
7907
+
7908
+
7909
+
7910
+
7911
+
7912
+
7913
+
7914
+
7915
+
7916
+
7917
+
7918
+
7919
+
7920
+
7921
+
7922
+
7923
+
7924
+
7925
+
7926
+
7927
+
7928
+
7929
+
7930
+
7931
+
7932
+
7933
+
7934
+
7935
+
7936
+
7937
+
7938
+
7939
+
7940
+
7941
+
7942
+
7943
+
7944
+
7945
+
7946
+
7947
+
7948
+
7949
+
7950
+
7951
+
7952
+
7953
+
7954
+
7955
+
7956
+
7957
+
7958
+
7959
+
7960
+
7961
+
7962
+
7963
+
7964
+
7965
+
7966
+
7967
+
7968
+
7969
+
7970
+
7971
+
7972
+
7973
+
7974
+
7975
+
7976
+
7977
+
7978
+
7979
+
7980
+
7981
+
7982
+
7983
+
7984
+
7985
+
7986
+
7987
+
7988
+
7989
+
7990
+
7991
+
7992
+
7993
+
7994
+
7995
+
7996
+
7997
+
7998
+
7999
+
8000
+
8001
+
8002
+
8003
+
8004
+
8005
+
8006
+
8007
+
8008
+
8009
+
8010
+
8011
+
8012
+
8013
+
8014
+
8015
+
8016
+
8017
+
8018
+
8019
+
8020
+
8021
+
8022
+
8023
+
8024
+
8025
+
8026
+
8027
+
8028
+
8029
+
8030
+
8031
+
8032
+
8033
+
8034
+
8035
+
8036
+
8037
+
8038
+
8039
+
8040
+
8041
+
8042
+
8043
+
8044
+
8045
+
8046
+
8047
+
8048
+
8049
+
8050
+
8051
+
8052
+
8053
+
8054
+
8055
+
8056
+
8057
+
8058
+
8059
+
8060
+
8061
+
8062
+
8063
+
8064
+
8065
+
8066
+
8067
+
8068
+
8069
+
8070
+
8071
+
8072
+
8073
+
8074
+
8075
+
8076
+
8077
+
8078
+
8079
+
8080
+
8081
+
8082
+
8083
+
8084
+
8085
+
8086
+
8087
+
8088
+
8089
+
8090
+
8091
+
8092
+
8093
+
8094
+
8095
+
8096
+
8097
+
8098
+
8099
+
8100
+
8101
+
8102
+
8103
+
8104
+
8105
+
8106
+
8107
+
8108
+
8109
+
8110
+
8111
+
8112
+
8113
+
8114
+
8115
+
8116
+
8117
+
8118
+
8119
+
8120
+
8121
+
8122
+
8123
+
8124
+
8125
+
8126
+
8127
+
8128
+
8129
+
8130
+
8131
+
8132
+
8133
+
8134
+
8135
+
8136
+
8137
+
8138
+
8139
+
8140
+
8141
+
8142
+
8143
+
8144
+
8145
+
8146
+
8147
+
8148
+
8149
+
8150
+
8151
+
8152
+
8153
+
8154
+
8155
+
8156
+
8157
+
8158
+
8159
+
8160
+
8161
+
8162
+
8163
+
8164
+
8165
+
8166
+
8167
+
8168
+
8169
+
8170
+
8171
+
8172
+
8173
+
8174
+
8175
+
8176
+
8177
+
8178
+
8179
+
8180
+
8181
+
8182
+
8183
+
8184
+
8185
+
8186
+
8187
+
8188
+
8189
+
8190
+
8191
+
8192
+
8193
+
8194
+
8195
+
8196
+
8197
+
8198
+
8199
+
8200
+
8201
+
8202
+
8203
+
8204
+
8205
+
8206
+
8207
+
8208
+
8209
+
8210
+
8211
+
8212
+
8213
+
8214
+
8215
+
8216
+
8217
+
8218
+
8219
+
8220
+
8221
+
8222
+
8223
+
8224
+
8225
+
8226
+
8227
+
8228
+
8229
+
8230
+
8231
+
8232
+
8233
+
8234
+
8235
+
8236
+
8237
+
8238
+
8239
+
8240
+
8241
+
8242
+
8243
+
8244
+
8245
+
8246
+
8247
+
8248
+
8249
+
8250
+
8251
+
8252
+
8253
+
8254
+
8255
+
8256
+
8257
+
8258
+
8259
+
8260
+
8261
+
8262
+
8263
+
8264
+
8265
+
8266
+
8267
+
8268
+
8269
+
8270
+
8271
+
8272
+
8273
+
8274
+
8275
+
8276
+
8277
+
8278
+
8279
+
8280
+
8281
+
8282
+
8283
+
8284
+
8285
+
8286
+
8287
+
8288
+
8289
+
8290
+
8291
+
8292
+
8293
+
8294
+
8295
+
8296
+
8297
+
8298
+
8299
+
8300
+
8301
+
8302
+
8303
+
8304
+
8305
+
8306
+
8307
+
8308
+
8309
+
8310
+
8311
+
8312
+
8313
+
8314
+
8315
+
8316
+
8317
+
8318
+
8319
+
8320
+
8321
+
8322
+
8323
+
8324
+
8325
+
8326
+
8327
+
8328
+
8329
+
8330
+
8331
+
8332
+
8333
+
8334
+
8335
+
8336
+
8337
+
8338
+
8339
+
8340
+
8341
+
8342
+
8343
+
8344
+
8345
+
8346
+
8347
+
8348
+
8349
+
8350
+
8351
+
8352
+
8353
+
8354
+
8355
+
8356
+
8357
+
8358
+
8359
+
8360
+
8361
+
8362
+
8363
+
8364
+
8365
+
8366
+
8367
+
8368
+
8369
+
8370
+
8371
+
8372
+
8373
+
8374
+
8375
+
8376
+
8377
+
8378
+
8379
+
8380
+
8381
+
8382
+
8383
+
8384
+
8385
+
8386
+
8387
+
8388
+
8389
+
8390
+
8391
+
8392
+
8393
+
8394
+
8395
+
8396
+
8397
+
8398
+
8399
+
8400
+
8401
+
8402
+
8403
+
8404
+
8405
+
8406
+
8407
+
8408
+
8409
+
8410
+
8411
+
8412
+
8413
+
8414
+
8415
+
8416
+
8417
+
8418
+
8419
+
8420
+
8421
+
8422
+
8423
+
8424
+
8425
+
8426
+
8427
+
8428
+
8429
+
8430
+
8431
+
8432
+
8433
+
8434
+
8435
+
8436
+
8437
+
8438
+
8439
+
8440
+
8441
+
8442
+
8443
+
8444
+
8445
+
8446
+
8447
+
8448
+
8449
+
8450
+
8451
+
8452
+
8453
+
8454
+
8455
+
8456
+
8457
+
8458
+
8459
+
8460
+
8461
+
8462
+
8463
+
8464
+
8465
+
8466
+
8467
+
8468
+
8469
+
8470
+
8471
+
8472
+
8473
+
8474
+
8475
+
8476
+
8477
+
8478
+
8479
+
8480
+
8481
+
8482
+
8483
+
8484
+
8485
+
8486
+
8487
+
8488
+
8489
+
8490
+
8491
+
8492
+
8493
+
8494
+
8495
+
8496
+
8497
+
8498
+
8499
+
8500
+
8501
+
8502
+
8503
+
8504
+
8505
+
8506
+
8507
+
8508
+
8509
+
8510
+
8511
+
8512
+
8513
+
8514
+
8515
+
8516
+
8517
+
8518
+
8519
+
8520
+
8521
+
8522
+
8523
+
8524
+
8525
+
8526
+
8527
+
8528
+
8529
+
8530
+
8531
+
8532
+
8533
+
8534
+
8535
+
8536
+
8537
+
8538
+
8539
+
8540
+
8541
+
8542
+
8543
+
8544
+
8545
+
8546
+
8547
+
8548
+
8549
+
8550
+
8551
+
8552
+
8553
+
8554
+
8555
+
8556
+
8557
+
8558
+ Step... (12000/50000 | Loss: 1.7264103889465332, Acc: 0.6554967761039734): 26%|███████ | 13000/50000 [5:00:28<12:23:03, 1.20s/it]
8559
+ Step... (12500 | Loss: 1.8441736698150635, Learning Rate: 0.00045454545761458576)
8560
+ Step... (12000/50000 | Loss: 1.7264103889465332, Acc: 0.6554967761039734): 26%|███████ | 13000/50000 [5:00:30<12:23:03, 1.20s/it]
8561
+
8562
+
8563
+
8564
+
8565
+
8566
+
8567
+
8568
+
8569
+
8570
+
8571
+
8572
+
8573
+ [07:08:53] - INFO - __main__ - Saving checkpoint at 13000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
8574
+ All Flax model weights were used when initializing RobertaForMaskedLM.
8575
+ Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
8576
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
8577
+
8578
+
8579
+
8580
+
8581
+
8582
+
8583
+
8584
+
8585
+
8586
+
8587
+
8588
+
8589
+
8590
+
8591
+
8592
+
8593
+
8594
+
8595
+
8596
+
8597
+
8598
+
8599
+
8600
+
8601
+
8602
+
8603
+
8604
+
8605
+
8606
+
8607
+
8608
+
8609
+
8610
+
8611
+
8612
+
8613
+
8614
+
8615
+
8616
+
8617
+
8618
+
8619
+
8620
+
8621
+
8622
+
8623
+
8624
+
8625
+
8626
+
8627
+
8628
+
8629
+
8630
+
8631
+
8632
+
8633
+
8634
+
8635
+
8636
+
8637
+
8638
+
8639
+
8640
+
8641
+
8642
+
8643
+
8644
+
8645
+
8646
+
8647
+
8648
+
8649
+
8650
+
8651
+
8652
+
8653
+
8654
+
8655
+
8656
+
8657
+
8658
+
8659
+
8660
+
8661
+
8662
+
8663
+
8664
+
8665
+
8666
+
8667
+
8668
+
8669
+
8670
+
8671
+
8672
+
8673
+
8674
+
8675
+
8676
+
8677
+
8678
+
8679
+
8680
+
8681
+
8682
+
8683
+
8684
+
8685
+
8686
+
8687
+
8688
+
8689
+
8690
+
8691
+
8692
+
8693
+
8694
+
8695
+
8696
+
8697
+
8698
+
8699
+
8700
+
8701
+
8702
+
8703
+
8704
+
8705
+
8706
+
8707
+
8708
+
8709
+
8710
+
8711
+
8712
+
8713
+
8714
+
8715
+
8716
+
8717
+
8718
+
8719
+
8720
+
8721
+
8722
+
8723
+
8724
+
8725
+
8726
+
8727
+
8728
+
8729
+
8730
+
8731
+
8732
+
8733
+
8734
+
8735
+
8736
+
8737
+
8738
+
8739
+
8740
+
8741
+
8742
+
8743
+
8744
+
8745
+
8746
+
8747
+
8748
+
8749
+
8750
+
8751
+
8752
+
8753
+
8754
+
8755
+
8756
+
8757
+
8758
+
8759
+
8760
+
8761
+
8762
+
8763
+
8764
+
8765
+
8766
+
8767
+
8768
+
8769
+
8770
+
8771
+
8772
+
8773
+
8774
+
8775
+
8776
+
8777
+
8778
+
8779
+
8780
+
8781
+
8782
+
8783
+
8784
+
8785
+
8786
+
8787
+
8788
+
8789
+
8790
+
8791
+
8792
+
8793
+
8794
+
8795
+
8796
+
8797
+
8798
+
8799
+
8800
+
8801
+
8802
+
8803
+
8804
+
8805
+
8806
+
8807
+
8808
+
8809
+
8810
+
8811
+
8812
+
8813
+
8814
+
8815
+
8816
+
8817
+
8818
+
8819
+
8820
+
8821
+
8822
+
8823
+
8824
+
8825
+
8826
+
8827
+
8828
+
8829
+
8830
+
8831
+
8832
+
8833
+
8834
+
8835
+
8836
+
8837
+
8838
+
8839
+
8840
+
8841
+
8842
+
8843
+
8844
+
8845
+
8846
+
8847
+
8848
+
8849
+
8850
+
8851
+
8852
+
8853
+
8854
+
8855
+
8856
+
8857
+
8858
+
8859
+
8860
+
8861
+
8862
+
8863
+
8864
+
8865
+
8866
+
8867
+
8868
+
8869
+
8870
+
8871
+
8872
+
8873
+
8874
+
8875
+
8876
+
8877
+
8878
+
8879
+
8880
+
8881
+
8882
+
8883
+
8884
+
8885
+
8886
+
8887
+
8888
+
8889
+
8890
+
8891
+
8892
+
8893
+
8894
+
8895
+
8896
+
8897
+
8898
+
8899
+
8900
+
8901
+
8902
+
8903
+
8904
+
8905
+
8906
+
8907
+
8908
+
8909
+
8910
+
8911
+
8912
+
8913
+
8914
+
8915
+
8916
+
8917
+
8918
+
8919
+
8920
+
8921
+
8922
+
8923
+
8924
+
8925
+
8926
+
8927
+
8928
+
8929
+
8930
+
8931
+
8932
+
8933
+
8934
+
8935
+
8936
+
8937
+
8938
+
8939
+
8940
+
8941
+
8942
+
8943
+
8944
+
8945
+
8946
+
8947
+
8948
+
8949
+
8950
+
8951
+
8952
+
8953
+
8954
+
8955
+
8956
+
8957
+
8958
+
8959
+
8960
+
8961
+
8962
+
8963
+
8964
+
8965
+
8966
+
8967
+
8968
+
8969
+
8970
+
8971
+
8972
+
8973
+
8974
+
8975
+
8976
+
8977
+
8978
+
8979
+
8980
+
8981
+
8982
+
8983
+
8984
+
8985
+
8986
+
8987
+
8988
+
8989
+
8990
+
8991
+
8992
+
8993
+
8994
+
8995
+
8996
+
8997
+
8998
+
8999
+
9000
+
9001
+
9002
+
9003
+
9004
+
9005
+
9006
+
9007
+
9008
+
9009
+
9010
+
9011
+
9012
+
9013
+
9014
+
9015
+
9016
+
9017
+
9018
+
9019
+
9020
+
9021
+
9022
+
9023
+
9024
+
9025
+
9026
+
9027
+
9028
+
9029
+
9030
+
9031
+
9032
+
9033
+
9034
+
9035
+
9036
+
9037
+
9038
+
9039
+
9040
+
9041
+
9042
+
9043
+
9044
+
9045
+
9046
+
9047
+
9048
+
9049
+
9050
+
9051
+
9052
+
9053
+
9054
+
9055
+
9056
+
9057
+
9058
+
9059
+
9060
+
9061
+
9062
+
9063
+
9064
+
9065
+
9066
+
9067
+
9068
+
9069
+
9070
+
9071
+
9072
+
9073
+
9074
+
9075
+
9076
+
9077
+
9078
+
9079
+
9080
+
9081
+
9082
+
9083
+
9084
+
9085
+
9086
+
9087
+
9088
+
9089
+
9090
+
9091
+
9092
+
9093
+
9094
+
9095
+
9096
+
9097
+
9098
+
9099
+
9100
+
9101
+
9102
+
9103
+
9104
+
9105
+
9106
+
9107
+
9108
+
9109
+
9110
+
9111
+
9112
+
9113
+
9114
+
9115
+
9116
+
9117
+
9118
+
9119
+
9120
+
9121
+
9122
+
9123
+
9124
+
9125
+
9126
+
9127
+
9128
+
9129
+
9130
+
9131
+
9132
+
9133
+
9134
+
9135
+
9136
+
9137
+
9138
+
9139
+
9140
+
9141
+
9142
+
9143
+
9144
+
9145
+
9146
+
9147
+
9148
+
9149
+
9150
+
9151
+
9152
+
9153
+
9154
+
9155
+
9156
+
9157
+
9158
+
9159
+
9160
+
9161
+
9162
+
9163
+
9164
+
9165
+
9166
+
9167
+
9168
+
9169
+
9170
+
9171
+
9172
+
9173
+
9174
+
9175
+
9176
+
9177
+
9178
+
9179
+
9180
+
9181
+
9182
+
9183
+
9184
+
9185
+
9186
+
9187
+
9188
+
9189
+
9190
+
9191
+
9192
+
9193
+
9194
+
9195
+
9196
+
9197
+
9198
+
9199
+
9200
+
9201
+
9202
+
9203
+
9204
+
9205
+
9206
+
9207
+
9208
+
9209
+
9210
+
9211
+
9212
+
9213
+
9214
+
9215
+
9216
+
9217
+
9218
+
9219
+
9220
+
9221
+
9222
+
9223
+
9224
+
9225
+
9226
+
9227
+
9228
+
9229
+
9230
+
9231
+
9232
+
9233
+
9234
+
9235
+
9236
+
9237
+
9238
+
9239
+
9240
+
9241
+
9242
+ Step... (13000/50000 | Loss: 1.725870966911316, Acc: 0.6557744741439819): 28%|███████▊ | 14000/50000 [5:24:30<15:07:56, 1.51s/it]
9243
+ Step... (13500 | Loss: 1.8221518993377686, Learning Rate: 0.0004424242360983044)
9244
+ Step... (14000 | Loss: 1.7394559383392334, Learning Rate: 0.0004363636835478246)
9245
+
9246
+
9247
+
9248
+
9249
+
9250
+
9251
+
9252
+
9253
+
9254
+
9255
+
9256
+ [07:32:53] - INFO - __main__ - Saving checkpoint at 14000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.59it/s]
9257
+ All Flax model weights were used when initializing RobertaForMaskedLM.
9258
+ Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
9259
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
9260
+
9261
+
9262
+
9263
+
9264
+
9265
+
9266
+
9267
+
9268
+
9269
+
9270
+
9271
+
9272
+
9273
+
9274
+
9275
+
9276
+
9277
+
9278
+
9279
+
9280
+
9281
+
9282
+
9283
+
9284
+
9285
+
9286
+
9287
+
9288
+
9289
+
9290
+
9291
+
9292
+
9293
+
9294
+
9295
+
9296
+
9297
+
9298
+
9299
+
9300
+
9301
+
9302
+
9303
+
9304
+
9305
+
9306
+
9307
+
9308
+
9309
+
9310
+
9311
+
9312
+
9313
+
9314
+
9315
+
9316
+
9317
+
9318
+
9319
+
9320
+
9321
+
9322
+
9323
+
9324
+
9325
+
9326
+
9327
+
9328
+
9329
+
9330
+
9331
+
9332
+
9333
+
9334
+
9335
+
9336
+
9337
+
9338
+
9339
+
9340
+
9341
+
9342
+
9343
+
9344
+
9345
+
9346
+
9347
+
9348
+
9349
+
9350
+
9351
+
9352
+
9353
+
9354
+
9355
+
9356
+
9357
+
9358
+
9359
+
9360
+
9361
+
9362
+
9363
+
9364
+
9365
+
9366
+
9367
+
9368
+
9369
+
9370
+
9371
+
9372
+
9373
+
9374
+
9375
+
9376
+
9377
+
9378
+
9379
+
9380
+
9381
+
9382
+
9383
+
9384
+
9385
+
9386
+
9387
+
9388
+
9389
+
9390
+
9391
+
9392
+
9393
+
9394
+
9395
+
9396
+
9397
+
9398
+
9399
+
9400
+
9401
+
9402
+
9403
+
9404
+
9405
+
9406
+
9407
+
9408
+
9409
+
9410
+
9411
+
9412
+
9413
+
9414
+
9415
+
9416
+
9417
+
9418
+
9419
+
9420
+
9421
+
9422
+
9423
+
9424
+
9425
+
9426
+
9427
+
9428
+
9429
+
9430
+
9431
+
9432
+
9433
+
9434
+
9435
+
9436
+
9437
+
9438
+
9439
+
9440
+
9441
+
9442
+
9443
+
9444
+
9445
+
9446
+
9447
+
9448
+
9449
+
9450
+
9451
+
9452
+
9453
+
9454
+
9455
+
9456
+
9457
+
9458
+
9459
+
9460
+
9461
+
9462
+
9463
+
9464
+
9465
+
9466
 
9467
 
9468
 
wandb/run-20210726_001233-17u6inbn/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"global_step": 11500, "_timestamp": 1627281196.535555, "train_time": 349233.125, "train_learning_rate": 0.0004666667082346976, "_step": 22931, "train_loss": 2.128620147705078, "eval_accuracy": 0.6532756686210632, "eval_loss": 1.7415039539337158}
 
1
+ {"global_step": 14000, "_timestamp": 1627284744.890835, "train_time": 473944.03125, "train_learning_rate": 0.0004363636835478246, "_step": 27916, "train_loss": 1.866248369216919, "eval_accuracy": 0.6557744741439819, "eval_loss": 1.725870966911316}
wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20210726_001233-17u6inbn/logs/debug.log CHANGED
@@ -1,27 +1,3 @@
1
- 2021-07-26 00:12:33,307 INFO MainThread:3000110 [wandb_setup.py:_flush():69] setting env: {}
2
- 2021-07-26 00:12:33,307 INFO MainThread:3000110 [wandb_setup.py:_flush():69] setting login settings: {}
3
- 2021-07-26 00:12:33,307 INFO MainThread:3000110 [wandb_init.py:_log_setup():337] Logging user logs to /var/hf/experiment-base-exp-512seq-stepwise/wandb/run-20210726_001233-17u6inbn/logs/debug.log
4
- 2021-07-26 00:12:33,307 INFO MainThread:3000110 [wandb_init.py:_log_setup():338] Logging internal logs to /var/hf/experiment-base-exp-512seq-stepwise/wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log
5
- 2021-07-26 00:12:33,307 INFO MainThread:3000110 [wandb_init.py:init():370] calling init triggers
6
- 2021-07-26 00:12:33,307 INFO MainThread:3000110 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
7
- config: {}
8
- 2021-07-26 00:12:33,307 INFO MainThread:3000110 [wandb_init.py:init():419] starting backend
9
- 2021-07-26 00:12:33,307 INFO MainThread:3000110 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
- 2021-07-26 00:12:33,351 INFO MainThread:3000110 [backend.py:ensure_launched():135] starting backend process...
11
- 2021-07-26 00:12:33,394 INFO MainThread:3000110 [backend.py:ensure_launched():139] started backend process with pid: 3001431
12
- 2021-07-26 00:12:33,396 INFO MainThread:3000110 [wandb_init.py:init():424] backend started and connected
13
- 2021-07-26 00:12:33,399 INFO MainThread:3000110 [wandb_init.py:init():472] updated telemetry
14
- 2021-07-26 00:12:33,400 INFO MainThread:3000110 [wandb_init.py:init():491] communicating current version
15
- 2021-07-26 00:12:34,050 INFO MainThread:3000110 [wandb_init.py:init():496] got version response upgrade_message: "wandb version 0.11.0 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
16
-
17
- 2021-07-26 00:12:34,050 INFO MainThread:3000110 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
18
- 2021-07-26 00:12:34,261 INFO MainThread:3000110 [wandb_init.py:init():529] starting run threads in backend
19
- 2021-07-26 00:12:35,502 INFO MainThread:3000110 [wandb_run.py:_console_start():1623] atexit reg
20
- 2021-07-26 00:12:35,502 INFO MainThread:3000110 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
21
- 2021-07-26 00:12:35,503 INFO MainThread:3000110 [wandb_run.py:_redirect():1502] Redirecting console.
22
- 2021-07-26 00:12:35,505 INFO MainThread:3000110 [wandb_run.py:_redirect():1558] Redirects installed.
23
- 2021-07-26 00:12:35,505 INFO MainThread:3000110 [wandb_init.py:init():554] run started, returning control to user process
24
- 2021-07-26 00:12:35,506 INFO MainThread:3000110 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './outputs', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 48, 'per_device_eval_batch_size': 48, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 0.0006, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-06, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './outputs/runs/Jul26_00-12-25_tablespoon', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 500, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 1000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './outputs', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'push_to_hub_model_id': 'outputs', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': '', '_n_gpu': 0, '__cached__setup_devices': 'cpu'}
25
- 2021-07-26 00:12:35,507 INFO MainThread:3000110 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': 'bertin-project/bertin-base-stepwise', 'model_type': 'roberta', 'config_name': './configs/base', 'tokenizer_name': './configs/base', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'bfloat16'}
26
- 2021-07-26 00:12:35,508 INFO MainThread:3000110 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': 'bertin-project/mc4-es-sampled', 'dataset_config_name': 'stepwise', 'train_file': None, 'validation_file': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 512, 'preprocessing_num_workers': None, 'mlm_probability': 0.15, 'pad_to_max_length': True, 'line_by_line': False, 'text_column_name': 'text', 'shuffle_buffer_size': 10000, 'num_train_steps': 50000, 'num_eval_samples': 50000}
27
- 2021-07-26 00:12:35,587 INFO MainThread:3000110 [wandb_run.py:_tensorboard_callback():943] tensorboard callback: outputs, None
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2dd7c5185e79b60bb93ec1d9266f770360846fbb237a1f78837006b5f8269bd
3
+ size 5866
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb CHANGED
Binary files a/wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb and b/wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb differ