sharukat commited on
Commit
4360b70
·
verified ·
1 Parent(s): 207ada8

Training in progress, epoch 1

Browse files
runs/Mar06_14-45-27_cd2c3b1980c7/events.out.tfevents.1709736327.cd2c3b1980c7.34.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f645083ede524cda645409ba956fa9ad914ea27655e9c752b11826e464d69c84
3
+ size 5374
runs/Mar06_14-50-20_cd2c3b1980c7/events.out.tfevents.1709736621.cd2c3b1980c7.34.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a301590a4808a5fa5fcfa1db4131ac3207c9712745d495cc156aacd42dbd2116
3
+ size 5374
runs/Mar06_14-51-12_cd2c3b1980c7/events.out.tfevents.1709736673.cd2c3b1980c7.34.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:773e13429b1bf17993993809acd0e6c51078afe5d27a291d147d444234f8251c
3
+ size 6185
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dc70d1f390dffc6ea61c4aff646ef9dc475c8030276d3b0440a503b96593826
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:170b6b5c6913afd023aae993ae4a337517f868260c0c93323166526cc988705b
3
  size 4856
wandb/debug-internal.log CHANGED
@@ -874,3 +874,346 @@
874
  2024-03-06 14:44:13,758 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
875
  2024-03-06 14:44:14,301 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
876
  2024-03-06 14:44:14,303 DEBUG SenderThread:133 [sender.py:send():382] send: stats
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
  2024-03-06 14:44:13,758 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
875
  2024-03-06 14:44:14,301 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
876
  2024-03-06 14:44:14,303 DEBUG SenderThread:133 [sender.py:send():382] send: stats
877
+ 2024-03-06 14:44:15,387 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
878
+ 2024-03-06 14:44:15,387 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
879
+ 2024-03-06 14:44:15,387 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
880
+ 2024-03-06 14:44:15,387 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
881
+ 2024-03-06 14:44:15,387 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
882
+ 2024-03-06 14:44:15,388 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
883
+ 2024-03-06 14:44:15,389 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
884
+ 2024-03-06 14:44:15,394 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
885
+ 2024-03-06 14:44:15,395 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
886
+ 2024-03-06 14:44:15,395 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
887
+ 2024-03-06 14:44:15,395 DEBUG SenderThread:133 [sender.py:send():382] send: stats
888
+ 2024-03-06 14:44:15,630 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
889
+ 2024-03-06 14:44:19,396 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
890
+ 2024-03-06 14:44:20,631 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
891
+ 2024-03-06 14:44:24,398 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
892
+ 2024-03-06 14:44:25,633 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
893
+ 2024-03-06 14:44:29,399 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
894
+ 2024-03-06 14:44:30,634 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
895
+ 2024-03-06 14:44:34,400 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
896
+ 2024-03-06 14:44:35,635 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
897
+ 2024-03-06 14:44:39,401 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
898
+ 2024-03-06 14:44:40,636 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
899
+ 2024-03-06 14:44:44,403 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
900
+ 2024-03-06 14:44:45,637 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
901
+ 2024-03-06 14:44:49,404 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
902
+ 2024-03-06 14:44:50,639 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
903
+ 2024-03-06 14:44:54,405 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
904
+ 2024-03-06 14:44:55,640 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
905
+ 2024-03-06 14:44:59,407 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
906
+ 2024-03-06 14:45:00,641 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
907
+ 2024-03-06 14:45:04,407 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
908
+ 2024-03-06 14:45:05,642 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
909
+ 2024-03-06 14:45:09,409 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
910
+ 2024-03-06 14:45:09,984 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
911
+ 2024-03-06 14:45:09,984 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
912
+ 2024-03-06 14:45:09,984 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
913
+ 2024-03-06 14:45:09,984 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
914
+ 2024-03-06 14:45:09,984 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
915
+ 2024-03-06 14:45:09,985 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
916
+ 2024-03-06 14:45:09,986 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
917
+ 2024-03-06 14:45:09,986 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
918
+ 2024-03-06 14:45:09,986 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
919
+ 2024-03-06 14:45:09,986 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
920
+ 2024-03-06 14:45:09,986 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
921
+ 2024-03-06 14:45:09,986 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
922
+ 2024-03-06 14:45:09,987 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
923
+ 2024-03-06 14:45:09,989 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
924
+ 2024-03-06 14:45:09,989 DEBUG SenderThread:133 [sender.py:send():382] send: stats
925
+ 2024-03-06 14:45:10,643 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
926
+ 2024-03-06 14:45:14,990 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
927
+ 2024-03-06 14:45:15,645 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
928
+ 2024-03-06 14:45:19,992 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
929
+ 2024-03-06 14:45:20,646 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
930
+ 2024-03-06 14:45:24,993 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
931
+ 2024-03-06 14:45:25,647 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
932
+ 2024-03-06 14:45:26,835 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
933
+ 2024-03-06 14:45:26,836 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
934
+ 2024-03-06 14:45:26,836 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
935
+ 2024-03-06 14:45:26,836 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
936
+ 2024-03-06 14:45:26,837 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
937
+ 2024-03-06 14:45:26,837 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
938
+ 2024-03-06 14:45:26,839 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
939
+ 2024-03-06 14:45:26,840 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
940
+ 2024-03-06 14:45:26,840 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
941
+ 2024-03-06 14:45:27,960 DEBUG SenderThread:133 [sender.py:send():382] send: config
942
+ 2024-03-06 14:45:27,962 DEBUG SenderThread:133 [sender.py:send():382] send: metric
943
+ 2024-03-06 14:45:27,962 DEBUG SenderThread:133 [sender.py:send():382] send: metric
944
+ 2024-03-06 14:45:27,962 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
945
+ 2024-03-06 14:45:29,624 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
946
+ 2024-03-06 14:45:30,774 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
947
+ 2024-03-06 14:45:30,963 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
948
+ 2024-03-06 14:45:35,775 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
949
+ 2024-03-06 14:45:35,964 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
950
+ 2024-03-06 14:45:40,777 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
951
+ 2024-03-06 14:45:40,965 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
952
+ 2024-03-06 14:45:45,784 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
953
+ 2024-03-06 14:45:45,966 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
954
+ 2024-03-06 14:45:50,785 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
955
+ 2024-03-06 14:45:50,972 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
956
+ 2024-03-06 14:45:51,633 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
957
+ 2024-03-06 14:45:54,625 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
958
+ 2024-03-06 14:45:54,626 DEBUG SenderThread:133 [sender.py:send():382] send: history
959
+ 2024-03-06 14:45:54,626 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
960
+ 2024-03-06 14:45:54,627 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
961
+ 2024-03-06 14:45:54,634 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
962
+ 2024-03-06 14:45:55,786 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
963
+ 2024-03-06 14:45:56,513 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
964
+ 2024-03-06 14:45:56,513 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
965
+ 2024-03-06 14:45:56,513 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
966
+ 2024-03-06 14:45:56,514 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
967
+ 2024-03-06 14:45:56,514 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
968
+ 2024-03-06 14:45:56,514 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
969
+ 2024-03-06 14:45:56,514 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
970
+ 2024-03-06 14:45:56,515 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
971
+ 2024-03-06 14:45:56,521 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
972
+ 2024-03-06 14:45:56,521 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
973
+ 2024-03-06 14:45:56,521 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
974
+ 2024-03-06 14:45:56,522 DEBUG SenderThread:133 [sender.py:send():382] send: stats
975
+ 2024-03-06 14:45:56,522 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
976
+ 2024-03-06 14:46:00,787 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
977
+ 2024-03-06 14:46:01,523 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
978
+ 2024-03-06 14:46:05,788 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
979
+ 2024-03-06 14:46:06,525 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
980
+ 2024-03-06 14:46:10,789 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
981
+ 2024-03-06 14:46:11,526 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
982
+ 2024-03-06 14:46:15,790 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
983
+ 2024-03-06 14:46:16,527 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
984
+ 2024-03-06 14:46:20,792 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
985
+ 2024-03-06 14:46:21,528 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
986
+ 2024-03-06 14:46:25,793 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
987
+ 2024-03-06 14:46:26,529 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
988
+ 2024-03-06 14:46:30,794 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
989
+ 2024-03-06 14:46:31,530 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
990
+ 2024-03-06 14:46:35,795 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
991
+ 2024-03-06 14:46:36,531 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
992
+ 2024-03-06 14:46:40,797 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
993
+ 2024-03-06 14:46:41,533 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
994
+ 2024-03-06 14:46:45,798 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
995
+ 2024-03-06 14:46:46,534 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
996
+ 2024-03-06 14:46:50,799 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
997
+ 2024-03-06 14:46:51,535 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
998
+ 2024-03-06 14:46:55,800 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
999
+ 2024-03-06 14:46:56,536 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1000
+ 2024-03-06 14:47:00,801 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1001
+ 2024-03-06 14:47:01,537 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1002
+ 2024-03-06 14:47:05,802 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1003
+ 2024-03-06 14:47:06,538 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1004
+ 2024-03-06 14:47:10,804 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1005
+ 2024-03-06 14:47:11,540 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1006
+ 2024-03-06 14:47:15,805 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1007
+ 2024-03-06 14:47:16,541 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1008
+ 2024-03-06 14:47:20,806 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1009
+ 2024-03-06 14:47:21,542 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1010
+ 2024-03-06 14:47:25,807 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1011
+ 2024-03-06 14:47:26,543 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1012
+ 2024-03-06 14:47:30,808 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1013
+ 2024-03-06 14:47:31,544 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1014
+ 2024-03-06 14:47:35,809 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1015
+ 2024-03-06 14:47:36,546 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1016
+ 2024-03-06 14:47:40,810 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1017
+ 2024-03-06 14:47:41,547 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1018
+ 2024-03-06 14:47:45,811 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1019
+ 2024-03-06 14:47:46,548 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1020
+ 2024-03-06 14:47:50,813 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1021
+ 2024-03-06 14:47:51,549 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1022
+ 2024-03-06 14:47:55,814 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1023
+ 2024-03-06 14:47:56,550 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1024
+ 2024-03-06 14:48:00,815 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1025
+ 2024-03-06 14:48:01,551 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1026
+ 2024-03-06 14:48:05,816 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1027
+ 2024-03-06 14:48:06,552 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1028
+ 2024-03-06 14:48:10,817 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1029
+ 2024-03-06 14:48:11,554 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1030
+ 2024-03-06 14:48:15,818 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1031
+ 2024-03-06 14:48:16,555 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1032
+ 2024-03-06 14:48:20,819 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1033
+ 2024-03-06 14:48:21,556 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1034
+ 2024-03-06 14:48:25,820 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1035
+ 2024-03-06 14:48:26,557 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1036
+ 2024-03-06 14:48:30,821 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1037
+ 2024-03-06 14:48:31,559 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1038
+ 2024-03-06 14:48:35,822 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1039
+ 2024-03-06 14:48:36,560 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1040
+ 2024-03-06 14:48:40,823 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1041
+ 2024-03-06 14:48:41,561 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1042
+ 2024-03-06 14:48:45,824 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1043
+ 2024-03-06 14:48:46,562 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1044
+ 2024-03-06 14:48:50,825 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1045
+ 2024-03-06 14:48:51,563 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1046
+ 2024-03-06 14:48:55,827 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1047
+ 2024-03-06 14:48:56,564 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1048
+ 2024-03-06 14:49:00,828 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1049
+ 2024-03-06 14:49:01,565 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1050
+ 2024-03-06 14:49:05,829 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1051
+ 2024-03-06 14:49:06,566 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1052
+ 2024-03-06 14:49:10,830 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1053
+ 2024-03-06 14:49:11,567 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1054
+ 2024-03-06 14:49:15,831 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1055
+ 2024-03-06 14:49:16,568 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1056
+ 2024-03-06 14:49:20,832 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1057
+ 2024-03-06 14:49:21,569 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1058
+ 2024-03-06 14:49:25,834 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1059
+ 2024-03-06 14:49:26,570 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1060
+ 2024-03-06 14:49:30,835 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1061
+ 2024-03-06 14:49:31,571 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1062
+ 2024-03-06 14:49:35,836 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1063
+ 2024-03-06 14:49:36,572 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1064
+ 2024-03-06 14:49:40,837 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1065
+ 2024-03-06 14:49:41,573 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1066
+ 2024-03-06 14:49:45,838 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1067
+ 2024-03-06 14:49:46,574 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1068
+ 2024-03-06 14:49:50,839 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1069
+ 2024-03-06 14:49:51,575 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1070
+ 2024-03-06 14:49:55,841 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1071
+ 2024-03-06 14:49:56,576 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1072
+ 2024-03-06 14:50:00,842 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1073
+ 2024-03-06 14:50:01,577 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1074
+ 2024-03-06 14:50:05,843 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1075
+ 2024-03-06 14:50:06,578 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1076
+ 2024-03-06 14:50:10,844 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1077
+ 2024-03-06 14:50:11,580 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1078
+ 2024-03-06 14:50:11,638 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
1079
+ 2024-03-06 14:50:11,639 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
1080
+ 2024-03-06 14:50:11,639 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
1081
+ 2024-03-06 14:50:11,639 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
1082
+ 2024-03-06 14:50:11,639 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
1083
+ 2024-03-06 14:50:11,640 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
1084
+ 2024-03-06 14:50:11,641 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
1085
+ 2024-03-06 14:50:11,641 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
1086
+ 2024-03-06 14:50:11,641 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
1087
+ 2024-03-06 14:50:11,642 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
1088
+ 2024-03-06 14:50:11,642 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
1089
+ 2024-03-06 14:50:11,642 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
1090
+ 2024-03-06 14:50:11,642 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
1091
+ 2024-03-06 14:50:11,643 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
1092
+ 2024-03-06 14:50:11,645 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
1093
+ 2024-03-06 14:50:11,653 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
1094
+ 2024-03-06 14:50:11,654 DEBUG SenderThread:133 [sender.py:send():382] send: stats
1095
+ 2024-03-06 14:50:15,846 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1096
+ 2024-03-06 14:50:16,655 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1097
+ 2024-03-06 14:50:19,879 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
1098
+ 2024-03-06 14:50:19,879 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
1099
+ 2024-03-06 14:50:19,879 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
1100
+ 2024-03-06 14:50:19,879 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
1101
+ 2024-03-06 14:50:19,880 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
1102
+ 2024-03-06 14:50:19,881 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
1103
+ 2024-03-06 14:50:19,882 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
1104
+ 2024-03-06 14:50:19,883 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
1105
+ 2024-03-06 14:50:19,885 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
1106
+ 2024-03-06 14:50:20,901 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1107
+ 2024-03-06 14:50:21,115 DEBUG SenderThread:133 [sender.py:send():382] send: config
1108
+ 2024-03-06 14:50:21,116 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1109
+ 2024-03-06 14:50:21,117 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1110
+ 2024-03-06 14:50:21,117 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
1111
+ 2024-03-06 14:50:21,743 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
1112
+ 2024-03-06 14:50:22,122 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1113
+ 2024-03-06 14:50:22,744 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
1114
+ 2024-03-06 14:50:25,904 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1115
+ 2024-03-06 14:50:27,366 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1116
+ 2024-03-06 14:50:30,905 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1117
+ 2024-03-06 14:50:32,367 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1118
+ 2024-03-06 14:50:35,907 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1119
+ 2024-03-06 14:50:37,368 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1120
+ 2024-03-06 14:50:40,908 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1121
+ 2024-03-06 14:50:42,369 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1122
+ 2024-03-06 14:50:45,909 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1123
+ 2024-03-06 14:50:47,370 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1124
+ 2024-03-06 14:50:47,806 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
1125
+ 2024-03-06 14:50:47,807 DEBUG SenderThread:133 [sender.py:send():382] send: history
1126
+ 2024-03-06 14:50:47,807 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
1127
+ 2024-03-06 14:50:47,810 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
1128
+ 2024-03-06 14:50:48,754 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
1129
+ 2024-03-06 14:50:49,741 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
1130
+ 2024-03-06 14:50:49,741 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
1131
+ 2024-03-06 14:50:49,741 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
1132
+ 2024-03-06 14:50:49,742 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
1133
+ 2024-03-06 14:50:49,742 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
1134
+ 2024-03-06 14:50:49,742 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
1135
+ 2024-03-06 14:50:49,742 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
1136
+ 2024-03-06 14:50:49,743 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
1137
+ 2024-03-06 14:50:49,749 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
1138
+ 2024-03-06 14:50:49,749 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
1139
+ 2024-03-06 14:50:49,749 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
1140
+ 2024-03-06 14:50:49,750 DEBUG SenderThread:133 [sender.py:send():382] send: stats
1141
+ 2024-03-06 14:50:50,910 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1142
+ 2024-03-06 14:50:52,751 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1143
+ 2024-03-06 14:50:55,911 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1144
+ 2024-03-06 14:50:57,752 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1145
+ 2024-03-06 14:51:00,913 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1146
+ 2024-03-06 14:51:02,754 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1147
+ 2024-03-06 14:51:05,914 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1148
+ 2024-03-06 14:51:07,755 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1149
+ 2024-03-06 14:51:08,770 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
1150
+ 2024-03-06 14:51:08,770 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
1151
+ 2024-03-06 14:51:08,771 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
1152
+ 2024-03-06 14:51:08,771 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
1153
+ 2024-03-06 14:51:08,771 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
1154
+ 2024-03-06 14:51:08,772 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
1155
+ 2024-03-06 14:51:08,773 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
1156
+ 2024-03-06 14:51:08,775 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
1157
+ 2024-03-06 14:51:08,777 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
1158
+ 2024-03-06 14:51:08,779 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
1159
+ 2024-03-06 14:51:08,779 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
1160
+ 2024-03-06 14:51:08,779 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
1161
+ 2024-03-06 14:51:08,779 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
1162
+ 2024-03-06 14:51:08,779 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
1163
+ 2024-03-06 14:51:08,780 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
1164
+ 2024-03-06 14:51:08,780 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
1165
+ 2024-03-06 14:51:08,781 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
1166
+ 2024-03-06 14:51:08,791 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
1167
+ 2024-03-06 14:51:08,791 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
1168
+ 2024-03-06 14:51:08,791 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
1169
+ 2024-03-06 14:51:08,792 DEBUG SenderThread:133 [sender.py:send():382] send: stats
1170
+ 2024-03-06 14:51:10,915 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1171
+ 2024-03-06 14:51:12,600 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
1172
+ 2024-03-06 14:51:12,600 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
1173
+ 2024-03-06 14:51:12,600 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
1174
+ 2024-03-06 14:51:12,600 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
1175
+ 2024-03-06 14:51:12,601 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
1176
+ 2024-03-06 14:51:12,602 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
1177
+ 2024-03-06 14:51:12,605 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
1178
+ 2024-03-06 14:51:12,605 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
1179
+ 2024-03-06 14:51:12,606 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
1180
+ 2024-03-06 14:51:12,793 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1181
+ 2024-03-06 14:51:13,753 DEBUG SenderThread:133 [sender.py:send():382] send: config
1182
+ 2024-03-06 14:51:13,755 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1183
+ 2024-03-06 14:51:13,755 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1184
+ 2024-03-06 14:51:13,755 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
1185
+ 2024-03-06 14:51:13,763 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
1186
+ 2024-03-06 14:51:15,919 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1187
+ 2024-03-06 14:51:18,756 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1188
+ 2024-03-06 14:51:20,922 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1189
+ 2024-03-06 14:51:23,762 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1190
+ 2024-03-06 14:51:24,767 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
1191
+ 2024-03-06 14:51:25,923 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1192
+ 2024-03-06 14:51:29,020 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1193
+ 2024-03-06 14:51:30,925 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1194
+ 2024-03-06 14:51:34,021 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1195
+ 2024-03-06 14:51:35,928 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1196
+ 2024-03-06 14:51:39,022 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1197
+ 2024-03-06 14:51:40,429 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
1198
+ 2024-03-06 14:51:40,430 DEBUG SenderThread:133 [sender.py:send():382] send: history
1199
+ 2024-03-06 14:51:40,431 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
1200
+ 2024-03-06 14:51:40,432 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
1201
+ 2024-03-06 14:51:40,773 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
1202
+ 2024-03-06 14:51:40,942 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1203
+ 2024-03-06 14:51:42,098 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
1204
+ 2024-03-06 14:51:42,101 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1205
+ 2024-03-06 14:51:42,101 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1206
+ 2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1207
+ 2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1208
+ 2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send():382] send: history
1209
+ 2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
1210
+ 2024-03-06 14:51:42,103 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
1211
+ 2024-03-06 14:51:42,774 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
1212
+ 2024-03-06 14:51:43,775 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
1213
+ 2024-03-06 14:51:44,104 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1214
+ 2024-03-06 14:51:46,440 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
1215
+ 2024-03-06 14:51:46,441 DEBUG SenderThread:133 [sender.py:send():382] send: history
1216
+ 2024-03-06 14:51:46,442 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
1217
+ 2024-03-06 14:51:46,442 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
1218
+ 2024-03-06 14:51:46,678 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1219
+ 2024-03-06 14:51:46,776 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
wandb/debug.log CHANGED
@@ -74,3 +74,24 @@ config: {}
74
  2024-03-06 14:40:45,818 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
75
  2024-03-06 14:43:14,295 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
76
  2024-03-06 14:43:15,473 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  2024-03-06 14:40:45,818 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
75
  2024-03-06 14:43:14,295 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
76
  2024-03-06 14:43:15,473 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
77
+ 2024-03-06 14:44:15,386 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
78
+ 2024-03-06 14:44:15,386 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
79
+ 2024-03-06 14:45:09,983 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
80
+ 2024-03-06 14:45:09,985 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
81
+ 2024-03-06 14:45:09,985 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
82
+ 2024-03-06 14:45:26,835 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
83
+ 2024-03-06 14:45:27,956 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-45-27_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
84
+ 2024-03-06 14:45:56,512 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
85
+ 2024-03-06 14:45:56,512 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
86
+ 2024-03-06 14:50:11,637 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
87
+ 2024-03-06 14:50:11,640 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
88
+ 2024-03-06 14:50:11,640 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
89
+ 2024-03-06 14:50:19,878 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
90
+ 2024-03-06 14:50:21,110 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-50-20_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
91
+ 2024-03-06 14:50:49,740 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
92
+ 2024-03-06 14:50:49,740 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
93
+ 2024-03-06 14:51:08,770 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
94
+ 2024-03-06 14:51:08,772 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
95
+ 2024-03-06 14:51:08,773 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
96
+ 2024-03-06 14:51:12,599 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
97
+ 2024-03-06 14:51:13,748 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-51-12_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
wandb/run-20240306_142408-og3d0ld1/files/config.yaml CHANGED
@@ -422,7 +422,7 @@ log_on_each_node:
422
  value: true
423
  logging_dir:
424
  desc: null
425
- value: /kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7
426
  logging_strategy:
427
  desc: null
428
  value: epoch
 
422
  value: true
423
  logging_dir:
424
  desc: null
425
+ value: /kaggle/working/runs/Mar06_14-51-12_cd2c3b1980c7
426
  logging_strategy:
427
  desc: null
428
  value: epoch
wandb/run-20240306_142408-og3d0ld1/files/output.log CHANGED
@@ -29,3 +29,10 @@ Class Weights: tensor([0.8491, 0.9698, 0.7866, 0.6530, 0.9310, 0.8578, 0.9526],
29
  Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
30
  You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
31
  Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
 
 
 
 
 
 
 
 
29
  Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
30
  You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
31
  Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
32
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
33
+ Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
34
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
35
+ Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
36
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
37
+ Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
38
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"train/loss": 1.7916, "train/grad_norm": 7.054403781890869, "train/learning_rate": 3.4482758620689656e-07, "train/epoch": 1.0, "train/global_step": 58, "_timestamp": 1709736226.5010476, "_runtime": 1177.8634026050568, "_step": 5, "eval/loss": 1.811458706855774, "eval/runtime": 1.6208, "eval/samples_per_second": 50.593, "eval/steps_per_second": 6.787, "train/train_runtime": 31.0436, "train/train_samples_per_second": 14.947, "train/train_steps_per_second": 1.868, "train/total_flos": 122089010380800.0, "train/train_loss": 1.791607692323882}
 
1
+ {"train/loss": 1.7916, "train/grad_norm": 7.054403781890869, "train/learning_rate": 3.4482758620689656e-07, "train/epoch": 1.0, "train/global_step": 58, "_timestamp": 1709736706.4396355, "_runtime": 1657.8019905090332, "_step": 10, "eval/loss": 1.811458706855774, "eval/runtime": 1.6651, "eval/samples_per_second": 49.246, "eval/steps_per_second": 6.606, "train/train_runtime": 32.7083, "train/train_samples_per_second": 14.186, "train/train_steps_per_second": 1.773, "train/total_flos": 122089010380800.0, "train/train_loss": 1.791607692323882, "eval/accuracy": 0.32926829268292684, "eval/precision": 0.15702844661708878, "eval/recall": 0.32926829268292684, "eval/f1": 0.20646817673609674}
wandb/run-20240306_142408-og3d0ld1/logs/debug-internal.log CHANGED
@@ -874,3 +874,346 @@
874
  2024-03-06 14:44:13,758 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
875
  2024-03-06 14:44:14,301 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
876
  2024-03-06 14:44:14,303 DEBUG SenderThread:133 [sender.py:send():382] send: stats
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
  2024-03-06 14:44:13,758 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
875
  2024-03-06 14:44:14,301 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
876
  2024-03-06 14:44:14,303 DEBUG SenderThread:133 [sender.py:send():382] send: stats
877
+ 2024-03-06 14:44:15,387 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
878
+ 2024-03-06 14:44:15,387 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
879
+ 2024-03-06 14:44:15,387 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
880
+ 2024-03-06 14:44:15,387 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
881
+ 2024-03-06 14:44:15,387 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
882
+ 2024-03-06 14:44:15,388 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
883
+ 2024-03-06 14:44:15,389 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
884
+ 2024-03-06 14:44:15,394 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
885
+ 2024-03-06 14:44:15,395 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
886
+ 2024-03-06 14:44:15,395 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
887
+ 2024-03-06 14:44:15,395 DEBUG SenderThread:133 [sender.py:send():382] send: stats
888
+ 2024-03-06 14:44:15,630 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
889
+ 2024-03-06 14:44:19,396 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
890
+ 2024-03-06 14:44:20,631 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
891
+ 2024-03-06 14:44:24,398 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
892
+ 2024-03-06 14:44:25,633 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
893
+ 2024-03-06 14:44:29,399 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
894
+ 2024-03-06 14:44:30,634 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
895
+ 2024-03-06 14:44:34,400 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
896
+ 2024-03-06 14:44:35,635 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
897
+ 2024-03-06 14:44:39,401 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
898
+ 2024-03-06 14:44:40,636 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
899
+ 2024-03-06 14:44:44,403 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
900
+ 2024-03-06 14:44:45,637 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
901
+ 2024-03-06 14:44:49,404 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
902
+ 2024-03-06 14:44:50,639 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
903
+ 2024-03-06 14:44:54,405 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
904
+ 2024-03-06 14:44:55,640 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
905
+ 2024-03-06 14:44:59,407 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
906
+ 2024-03-06 14:45:00,641 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
907
+ 2024-03-06 14:45:04,407 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
908
+ 2024-03-06 14:45:05,642 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
909
+ 2024-03-06 14:45:09,409 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
910
+ 2024-03-06 14:45:09,984 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
911
+ 2024-03-06 14:45:09,984 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
912
+ 2024-03-06 14:45:09,984 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
913
+ 2024-03-06 14:45:09,984 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
914
+ 2024-03-06 14:45:09,984 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
915
+ 2024-03-06 14:45:09,985 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
916
+ 2024-03-06 14:45:09,986 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
917
+ 2024-03-06 14:45:09,986 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
918
+ 2024-03-06 14:45:09,986 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
919
+ 2024-03-06 14:45:09,986 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
920
+ 2024-03-06 14:45:09,986 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
921
+ 2024-03-06 14:45:09,986 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
922
+ 2024-03-06 14:45:09,987 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
923
+ 2024-03-06 14:45:09,989 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
924
+ 2024-03-06 14:45:09,989 DEBUG SenderThread:133 [sender.py:send():382] send: stats
925
+ 2024-03-06 14:45:10,643 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
926
+ 2024-03-06 14:45:14,990 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
927
+ 2024-03-06 14:45:15,645 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
928
+ 2024-03-06 14:45:19,992 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
929
+ 2024-03-06 14:45:20,646 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
930
+ 2024-03-06 14:45:24,993 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
931
+ 2024-03-06 14:45:25,647 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
932
+ 2024-03-06 14:45:26,835 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
933
+ 2024-03-06 14:45:26,836 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
934
+ 2024-03-06 14:45:26,836 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
935
+ 2024-03-06 14:45:26,836 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
936
+ 2024-03-06 14:45:26,837 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
937
+ 2024-03-06 14:45:26,837 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
938
+ 2024-03-06 14:45:26,839 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
939
+ 2024-03-06 14:45:26,840 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
940
+ 2024-03-06 14:45:26,840 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
941
+ 2024-03-06 14:45:27,960 DEBUG SenderThread:133 [sender.py:send():382] send: config
942
+ 2024-03-06 14:45:27,962 DEBUG SenderThread:133 [sender.py:send():382] send: metric
943
+ 2024-03-06 14:45:27,962 DEBUG SenderThread:133 [sender.py:send():382] send: metric
944
+ 2024-03-06 14:45:27,962 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
945
+ 2024-03-06 14:45:29,624 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
946
+ 2024-03-06 14:45:30,774 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
947
+ 2024-03-06 14:45:30,963 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
948
+ 2024-03-06 14:45:35,775 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
949
+ 2024-03-06 14:45:35,964 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
950
+ 2024-03-06 14:45:40,777 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
951
+ 2024-03-06 14:45:40,965 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
952
+ 2024-03-06 14:45:45,784 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
953
+ 2024-03-06 14:45:45,966 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
954
+ 2024-03-06 14:45:50,785 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
955
+ 2024-03-06 14:45:50,972 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
956
+ 2024-03-06 14:45:51,633 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
957
+ 2024-03-06 14:45:54,625 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
958
+ 2024-03-06 14:45:54,626 DEBUG SenderThread:133 [sender.py:send():382] send: history
959
+ 2024-03-06 14:45:54,626 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
960
+ 2024-03-06 14:45:54,627 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
961
+ 2024-03-06 14:45:54,634 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
962
+ 2024-03-06 14:45:55,786 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
963
+ 2024-03-06 14:45:56,513 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
964
+ 2024-03-06 14:45:56,513 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
965
+ 2024-03-06 14:45:56,513 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
966
+ 2024-03-06 14:45:56,514 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
967
+ 2024-03-06 14:45:56,514 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
968
+ 2024-03-06 14:45:56,514 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
969
+ 2024-03-06 14:45:56,514 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
970
+ 2024-03-06 14:45:56,515 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
971
+ 2024-03-06 14:45:56,521 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
972
+ 2024-03-06 14:45:56,521 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
973
+ 2024-03-06 14:45:56,521 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
974
+ 2024-03-06 14:45:56,522 DEBUG SenderThread:133 [sender.py:send():382] send: stats
975
+ 2024-03-06 14:45:56,522 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
976
+ 2024-03-06 14:46:00,787 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
977
+ 2024-03-06 14:46:01,523 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
978
+ 2024-03-06 14:46:05,788 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
979
+ 2024-03-06 14:46:06,525 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
980
+ 2024-03-06 14:46:10,789 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
981
+ 2024-03-06 14:46:11,526 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
982
+ 2024-03-06 14:46:15,790 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
983
+ 2024-03-06 14:46:16,527 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
984
+ 2024-03-06 14:46:20,792 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
985
+ 2024-03-06 14:46:21,528 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
986
+ 2024-03-06 14:46:25,793 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
987
+ 2024-03-06 14:46:26,529 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
988
+ 2024-03-06 14:46:30,794 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
989
+ 2024-03-06 14:46:31,530 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
990
+ 2024-03-06 14:46:35,795 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
991
+ 2024-03-06 14:46:36,531 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
992
+ 2024-03-06 14:46:40,797 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
993
+ 2024-03-06 14:46:41,533 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
994
+ 2024-03-06 14:46:45,798 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
995
+ 2024-03-06 14:46:46,534 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
996
+ 2024-03-06 14:46:50,799 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
997
+ 2024-03-06 14:46:51,535 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
998
+ 2024-03-06 14:46:55,800 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
999
+ 2024-03-06 14:46:56,536 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1000
+ 2024-03-06 14:47:00,801 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1001
+ 2024-03-06 14:47:01,537 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1002
+ 2024-03-06 14:47:05,802 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1003
+ 2024-03-06 14:47:06,538 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1004
+ 2024-03-06 14:47:10,804 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1005
+ 2024-03-06 14:47:11,540 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1006
+ 2024-03-06 14:47:15,805 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1007
+ 2024-03-06 14:47:16,541 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1008
+ 2024-03-06 14:47:20,806 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1009
+ 2024-03-06 14:47:21,542 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1010
+ 2024-03-06 14:47:25,807 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1011
+ 2024-03-06 14:47:26,543 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1012
+ 2024-03-06 14:47:30,808 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1013
+ 2024-03-06 14:47:31,544 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1014
+ 2024-03-06 14:47:35,809 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1015
+ 2024-03-06 14:47:36,546 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1016
+ 2024-03-06 14:47:40,810 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1017
+ 2024-03-06 14:47:41,547 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1018
+ 2024-03-06 14:47:45,811 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1019
+ 2024-03-06 14:47:46,548 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1020
+ 2024-03-06 14:47:50,813 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1021
+ 2024-03-06 14:47:51,549 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1022
+ 2024-03-06 14:47:55,814 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1023
+ 2024-03-06 14:47:56,550 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1024
+ 2024-03-06 14:48:00,815 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1025
+ 2024-03-06 14:48:01,551 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1026
+ 2024-03-06 14:48:05,816 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1027
+ 2024-03-06 14:48:06,552 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1028
+ 2024-03-06 14:48:10,817 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1029
+ 2024-03-06 14:48:11,554 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1030
+ 2024-03-06 14:48:15,818 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1031
+ 2024-03-06 14:48:16,555 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1032
+ 2024-03-06 14:48:20,819 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1033
+ 2024-03-06 14:48:21,556 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1034
+ 2024-03-06 14:48:25,820 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1035
+ 2024-03-06 14:48:26,557 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1036
+ 2024-03-06 14:48:30,821 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1037
+ 2024-03-06 14:48:31,559 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1038
+ 2024-03-06 14:48:35,822 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1039
+ 2024-03-06 14:48:36,560 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1040
+ 2024-03-06 14:48:40,823 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1041
+ 2024-03-06 14:48:41,561 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1042
+ 2024-03-06 14:48:45,824 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1043
+ 2024-03-06 14:48:46,562 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1044
+ 2024-03-06 14:48:50,825 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1045
+ 2024-03-06 14:48:51,563 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1046
+ 2024-03-06 14:48:55,827 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1047
+ 2024-03-06 14:48:56,564 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1048
+ 2024-03-06 14:49:00,828 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1049
+ 2024-03-06 14:49:01,565 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1050
+ 2024-03-06 14:49:05,829 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1051
+ 2024-03-06 14:49:06,566 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1052
+ 2024-03-06 14:49:10,830 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1053
+ 2024-03-06 14:49:11,567 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1054
+ 2024-03-06 14:49:15,831 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1055
+ 2024-03-06 14:49:16,568 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1056
+ 2024-03-06 14:49:20,832 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1057
+ 2024-03-06 14:49:21,569 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1058
+ 2024-03-06 14:49:25,834 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1059
+ 2024-03-06 14:49:26,570 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1060
+ 2024-03-06 14:49:30,835 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1061
+ 2024-03-06 14:49:31,571 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1062
+ 2024-03-06 14:49:35,836 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1063
+ 2024-03-06 14:49:36,572 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1064
+ 2024-03-06 14:49:40,837 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1065
+ 2024-03-06 14:49:41,573 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1066
+ 2024-03-06 14:49:45,838 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1067
+ 2024-03-06 14:49:46,574 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1068
+ 2024-03-06 14:49:50,839 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1069
+ 2024-03-06 14:49:51,575 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1070
+ 2024-03-06 14:49:55,841 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1071
+ 2024-03-06 14:49:56,576 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1072
+ 2024-03-06 14:50:00,842 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1073
+ 2024-03-06 14:50:01,577 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1074
+ 2024-03-06 14:50:05,843 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1075
+ 2024-03-06 14:50:06,578 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1076
+ 2024-03-06 14:50:10,844 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1077
+ 2024-03-06 14:50:11,580 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1078
+ 2024-03-06 14:50:11,638 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
1079
+ 2024-03-06 14:50:11,639 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
1080
+ 2024-03-06 14:50:11,639 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
1081
+ 2024-03-06 14:50:11,639 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
1082
+ 2024-03-06 14:50:11,639 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
1083
+ 2024-03-06 14:50:11,640 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
1084
+ 2024-03-06 14:50:11,641 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
1085
+ 2024-03-06 14:50:11,641 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
1086
+ 2024-03-06 14:50:11,641 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
1087
+ 2024-03-06 14:50:11,642 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
1088
+ 2024-03-06 14:50:11,642 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
1089
+ 2024-03-06 14:50:11,642 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
1090
+ 2024-03-06 14:50:11,642 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
1091
+ 2024-03-06 14:50:11,643 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
1092
+ 2024-03-06 14:50:11,645 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
1093
+ 2024-03-06 14:50:11,653 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
1094
+ 2024-03-06 14:50:11,654 DEBUG SenderThread:133 [sender.py:send():382] send: stats
1095
+ 2024-03-06 14:50:15,846 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1096
+ 2024-03-06 14:50:16,655 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1097
+ 2024-03-06 14:50:19,879 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
1098
+ 2024-03-06 14:50:19,879 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
1099
+ 2024-03-06 14:50:19,879 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
1100
+ 2024-03-06 14:50:19,879 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
1101
+ 2024-03-06 14:50:19,880 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
1102
+ 2024-03-06 14:50:19,881 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
1103
+ 2024-03-06 14:50:19,882 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
1104
+ 2024-03-06 14:50:19,883 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
1105
+ 2024-03-06 14:50:19,885 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
1106
+ 2024-03-06 14:50:20,901 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1107
+ 2024-03-06 14:50:21,115 DEBUG SenderThread:133 [sender.py:send():382] send: config
1108
+ 2024-03-06 14:50:21,116 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1109
+ 2024-03-06 14:50:21,117 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1110
+ 2024-03-06 14:50:21,117 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
1111
+ 2024-03-06 14:50:21,743 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
1112
+ 2024-03-06 14:50:22,122 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1113
+ 2024-03-06 14:50:22,744 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
1114
+ 2024-03-06 14:50:25,904 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1115
+ 2024-03-06 14:50:27,366 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1116
+ 2024-03-06 14:50:30,905 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1117
+ 2024-03-06 14:50:32,367 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1118
+ 2024-03-06 14:50:35,907 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1119
+ 2024-03-06 14:50:37,368 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1120
+ 2024-03-06 14:50:40,908 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1121
+ 2024-03-06 14:50:42,369 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1122
+ 2024-03-06 14:50:45,909 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1123
+ 2024-03-06 14:50:47,370 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1124
+ 2024-03-06 14:50:47,806 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
1125
+ 2024-03-06 14:50:47,807 DEBUG SenderThread:133 [sender.py:send():382] send: history
1126
+ 2024-03-06 14:50:47,807 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
1127
+ 2024-03-06 14:50:47,810 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
1128
+ 2024-03-06 14:50:48,754 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
1129
+ 2024-03-06 14:50:49,741 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
1130
+ 2024-03-06 14:50:49,741 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
1131
+ 2024-03-06 14:50:49,741 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
1132
+ 2024-03-06 14:50:49,742 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
1133
+ 2024-03-06 14:50:49,742 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
1134
+ 2024-03-06 14:50:49,742 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
1135
+ 2024-03-06 14:50:49,742 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
1136
+ 2024-03-06 14:50:49,743 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
1137
+ 2024-03-06 14:50:49,749 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
1138
+ 2024-03-06 14:50:49,749 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
1139
+ 2024-03-06 14:50:49,749 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
1140
+ 2024-03-06 14:50:49,750 DEBUG SenderThread:133 [sender.py:send():382] send: stats
1141
+ 2024-03-06 14:50:50,910 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1142
+ 2024-03-06 14:50:52,751 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1143
+ 2024-03-06 14:50:55,911 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1144
+ 2024-03-06 14:50:57,752 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1145
+ 2024-03-06 14:51:00,913 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1146
+ 2024-03-06 14:51:02,754 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1147
+ 2024-03-06 14:51:05,914 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1148
+ 2024-03-06 14:51:07,755 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1149
+ 2024-03-06 14:51:08,770 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
1150
+ 2024-03-06 14:51:08,770 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
1151
+ 2024-03-06 14:51:08,771 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
1152
+ 2024-03-06 14:51:08,771 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
1153
+ 2024-03-06 14:51:08,771 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
1154
+ 2024-03-06 14:51:08,772 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
1155
+ 2024-03-06 14:51:08,773 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
1156
+ 2024-03-06 14:51:08,775 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
1157
+ 2024-03-06 14:51:08,777 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
1158
+ 2024-03-06 14:51:08,779 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
1159
+ 2024-03-06 14:51:08,779 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
1160
+ 2024-03-06 14:51:08,779 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
1161
+ 2024-03-06 14:51:08,779 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
1162
+ 2024-03-06 14:51:08,779 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
1163
+ 2024-03-06 14:51:08,780 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
1164
+ 2024-03-06 14:51:08,780 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
1165
+ 2024-03-06 14:51:08,781 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
1166
+ 2024-03-06 14:51:08,791 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
1167
+ 2024-03-06 14:51:08,791 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
1168
+ 2024-03-06 14:51:08,791 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
1169
+ 2024-03-06 14:51:08,792 DEBUG SenderThread:133 [sender.py:send():382] send: stats
1170
+ 2024-03-06 14:51:10,915 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1171
+ 2024-03-06 14:51:12,600 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
1172
+ 2024-03-06 14:51:12,600 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
1173
+ 2024-03-06 14:51:12,600 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
1174
+ 2024-03-06 14:51:12,600 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
1175
+ 2024-03-06 14:51:12,601 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
1176
+ 2024-03-06 14:51:12,602 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
1177
+ 2024-03-06 14:51:12,605 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
1178
+ 2024-03-06 14:51:12,605 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
1179
+ 2024-03-06 14:51:12,606 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
1180
+ 2024-03-06 14:51:12,793 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1181
+ 2024-03-06 14:51:13,753 DEBUG SenderThread:133 [sender.py:send():382] send: config
1182
+ 2024-03-06 14:51:13,755 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1183
+ 2024-03-06 14:51:13,755 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1184
+ 2024-03-06 14:51:13,755 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
1185
+ 2024-03-06 14:51:13,763 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
1186
+ 2024-03-06 14:51:15,919 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1187
+ 2024-03-06 14:51:18,756 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1188
+ 2024-03-06 14:51:20,922 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1189
+ 2024-03-06 14:51:23,762 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1190
+ 2024-03-06 14:51:24,767 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
1191
+ 2024-03-06 14:51:25,923 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1192
+ 2024-03-06 14:51:29,020 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1193
+ 2024-03-06 14:51:30,925 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1194
+ 2024-03-06 14:51:34,021 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1195
+ 2024-03-06 14:51:35,928 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1196
+ 2024-03-06 14:51:39,022 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1197
+ 2024-03-06 14:51:40,429 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
1198
+ 2024-03-06 14:51:40,430 DEBUG SenderThread:133 [sender.py:send():382] send: history
1199
+ 2024-03-06 14:51:40,431 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
1200
+ 2024-03-06 14:51:40,432 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
1201
+ 2024-03-06 14:51:40,773 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
1202
+ 2024-03-06 14:51:40,942 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1203
+ 2024-03-06 14:51:42,098 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
1204
+ 2024-03-06 14:51:42,101 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1205
+ 2024-03-06 14:51:42,101 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1206
+ 2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1207
+ 2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send():382] send: metric
1208
+ 2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send():382] send: history
1209
+ 2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
1210
+ 2024-03-06 14:51:42,103 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
1211
+ 2024-03-06 14:51:42,774 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
1212
+ 2024-03-06 14:51:43,775 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
1213
+ 2024-03-06 14:51:44,104 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
1214
+ 2024-03-06 14:51:46,440 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
1215
+ 2024-03-06 14:51:46,441 DEBUG SenderThread:133 [sender.py:send():382] send: history
1216
+ 2024-03-06 14:51:46,442 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
1217
+ 2024-03-06 14:51:46,442 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
1218
+ 2024-03-06 14:51:46,678 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
1219
+ 2024-03-06 14:51:46,776 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
wandb/run-20240306_142408-og3d0ld1/logs/debug.log CHANGED
@@ -74,3 +74,24 @@ config: {}
74
  2024-03-06 14:40:45,818 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
75
  2024-03-06 14:43:14,295 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
76
  2024-03-06 14:43:15,473 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  2024-03-06 14:40:45,818 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
75
  2024-03-06 14:43:14,295 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
76
  2024-03-06 14:43:15,473 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
77
+ 2024-03-06 14:44:15,386 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
78
+ 2024-03-06 14:44:15,386 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
79
+ 2024-03-06 14:45:09,983 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
80
+ 2024-03-06 14:45:09,985 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
81
+ 2024-03-06 14:45:09,985 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
82
+ 2024-03-06 14:45:26,835 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
83
+ 2024-03-06 14:45:27,956 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-45-27_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
84
+ 2024-03-06 14:45:56,512 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
85
+ 2024-03-06 14:45:56,512 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
86
+ 2024-03-06 14:50:11,637 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
87
+ 2024-03-06 14:50:11,640 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
88
+ 2024-03-06 14:50:11,640 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
89
+ 2024-03-06 14:50:19,878 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
90
+ 2024-03-06 14:50:21,110 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-50-20_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
91
+ 2024-03-06 14:50:49,740 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
92
+ 2024-03-06 14:50:49,740 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
93
+ 2024-03-06 14:51:08,770 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
94
+ 2024-03-06 14:51:08,772 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
95
+ 2024-03-06 14:51:08,773 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
96
+ 2024-03-06 14:51:12,599 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
97
+ 2024-03-06 14:51:13,748 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-51-12_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
wandb/run-20240306_142408-og3d0ld1/run-og3d0ld1.wandb CHANGED
Binary files a/wandb/run-20240306_142408-og3d0ld1/run-og3d0ld1.wandb and b/wandb/run-20240306_142408-og3d0ld1/run-og3d0ld1.wandb differ