Training in progress, epoch 1
Browse files- runs/Mar06_14-45-27_cd2c3b1980c7/events.out.tfevents.1709736327.cd2c3b1980c7.34.4 +3 -0
- runs/Mar06_14-50-20_cd2c3b1980c7/events.out.tfevents.1709736621.cd2c3b1980c7.34.5 +3 -0
- runs/Mar06_14-51-12_cd2c3b1980c7/events.out.tfevents.1709736673.cd2c3b1980c7.34.6 +3 -0
- training_args.bin +1 -1
- wandb/debug-internal.log +343 -0
- wandb/debug.log +21 -0
- wandb/run-20240306_142408-og3d0ld1/files/config.yaml +1 -1
- wandb/run-20240306_142408-og3d0ld1/files/output.log +7 -0
- wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json +1 -1
- wandb/run-20240306_142408-og3d0ld1/logs/debug-internal.log +343 -0
- wandb/run-20240306_142408-og3d0ld1/logs/debug.log +21 -0
- wandb/run-20240306_142408-og3d0ld1/run-og3d0ld1.wandb +0 -0
runs/Mar06_14-45-27_cd2c3b1980c7/events.out.tfevents.1709736327.cd2c3b1980c7.34.4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f645083ede524cda645409ba956fa9ad914ea27655e9c752b11826e464d69c84
|
| 3 |
+
size 5374
|
runs/Mar06_14-50-20_cd2c3b1980c7/events.out.tfevents.1709736621.cd2c3b1980c7.34.5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a301590a4808a5fa5fcfa1db4131ac3207c9712745d495cc156aacd42dbd2116
|
| 3 |
+
size 5374
|
runs/Mar06_14-51-12_cd2c3b1980c7/events.out.tfevents.1709736673.cd2c3b1980c7.34.6
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:773e13429b1bf17993993809acd0e6c51078afe5d27a291d147d444234f8251c
|
| 3 |
+
size 6185
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4856
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:170b6b5c6913afd023aae993ae4a337517f868260c0c93323166526cc988705b
|
| 3 |
size 4856
|
wandb/debug-internal.log
CHANGED
|
@@ -874,3 +874,346 @@
|
|
| 874 |
2024-03-06 14:44:13,758 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 875 |
2024-03-06 14:44:14,301 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 876 |
2024-03-06 14:44:14,303 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 874 |
2024-03-06 14:44:13,758 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 875 |
2024-03-06 14:44:14,301 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 876 |
2024-03-06 14:44:14,303 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 877 |
+
2024-03-06 14:44:15,387 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
|
| 878 |
+
2024-03-06 14:44:15,387 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
|
| 879 |
+
2024-03-06 14:44:15,387 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
|
| 880 |
+
2024-03-06 14:44:15,387 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
|
| 881 |
+
2024-03-06 14:44:15,387 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
|
| 882 |
+
2024-03-06 14:44:15,388 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
|
| 883 |
+
2024-03-06 14:44:15,389 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
|
| 884 |
+
2024-03-06 14:44:15,394 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
|
| 885 |
+
2024-03-06 14:44:15,395 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
|
| 886 |
+
2024-03-06 14:44:15,395 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
|
| 887 |
+
2024-03-06 14:44:15,395 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 888 |
+
2024-03-06 14:44:15,630 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 889 |
+
2024-03-06 14:44:19,396 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 890 |
+
2024-03-06 14:44:20,631 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 891 |
+
2024-03-06 14:44:24,398 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 892 |
+
2024-03-06 14:44:25,633 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 893 |
+
2024-03-06 14:44:29,399 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 894 |
+
2024-03-06 14:44:30,634 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 895 |
+
2024-03-06 14:44:34,400 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 896 |
+
2024-03-06 14:44:35,635 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 897 |
+
2024-03-06 14:44:39,401 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 898 |
+
2024-03-06 14:44:40,636 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 899 |
+
2024-03-06 14:44:44,403 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 900 |
+
2024-03-06 14:44:45,637 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 901 |
+
2024-03-06 14:44:49,404 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 902 |
+
2024-03-06 14:44:50,639 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 903 |
+
2024-03-06 14:44:54,405 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 904 |
+
2024-03-06 14:44:55,640 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 905 |
+
2024-03-06 14:44:59,407 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 906 |
+
2024-03-06 14:45:00,641 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 907 |
+
2024-03-06 14:45:04,407 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 908 |
+
2024-03-06 14:45:05,642 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 909 |
+
2024-03-06 14:45:09,409 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 910 |
+
2024-03-06 14:45:09,984 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
|
| 911 |
+
2024-03-06 14:45:09,984 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
|
| 912 |
+
2024-03-06 14:45:09,984 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
|
| 913 |
+
2024-03-06 14:45:09,984 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
| 914 |
+
2024-03-06 14:45:09,984 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
|
| 915 |
+
2024-03-06 14:45:09,985 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
|
| 916 |
+
2024-03-06 14:45:09,986 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
|
| 917 |
+
2024-03-06 14:45:09,986 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
|
| 918 |
+
2024-03-06 14:45:09,986 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
|
| 919 |
+
2024-03-06 14:45:09,986 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 920 |
+
2024-03-06 14:45:09,986 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
|
| 921 |
+
2024-03-06 14:45:09,986 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
|
| 922 |
+
2024-03-06 14:45:09,987 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
|
| 923 |
+
2024-03-06 14:45:09,989 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
|
| 924 |
+
2024-03-06 14:45:09,989 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 925 |
+
2024-03-06 14:45:10,643 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 926 |
+
2024-03-06 14:45:14,990 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 927 |
+
2024-03-06 14:45:15,645 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 928 |
+
2024-03-06 14:45:19,992 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 929 |
+
2024-03-06 14:45:20,646 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 930 |
+
2024-03-06 14:45:24,993 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 931 |
+
2024-03-06 14:45:25,647 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 932 |
+
2024-03-06 14:45:26,835 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
|
| 933 |
+
2024-03-06 14:45:26,836 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
|
| 934 |
+
2024-03-06 14:45:26,836 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
|
| 935 |
+
2024-03-06 14:45:26,836 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
| 936 |
+
2024-03-06 14:45:26,837 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
|
| 937 |
+
2024-03-06 14:45:26,837 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
|
| 938 |
+
2024-03-06 14:45:26,839 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
|
| 939 |
+
2024-03-06 14:45:26,840 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
|
| 940 |
+
2024-03-06 14:45:26,840 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
|
| 941 |
+
2024-03-06 14:45:27,960 DEBUG SenderThread:133 [sender.py:send():382] send: config
|
| 942 |
+
2024-03-06 14:45:27,962 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 943 |
+
2024-03-06 14:45:27,962 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 944 |
+
2024-03-06 14:45:27,962 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
|
| 945 |
+
2024-03-06 14:45:29,624 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
|
| 946 |
+
2024-03-06 14:45:30,774 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 947 |
+
2024-03-06 14:45:30,963 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 948 |
+
2024-03-06 14:45:35,775 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 949 |
+
2024-03-06 14:45:35,964 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 950 |
+
2024-03-06 14:45:40,777 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 951 |
+
2024-03-06 14:45:40,965 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 952 |
+
2024-03-06 14:45:45,784 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 953 |
+
2024-03-06 14:45:45,966 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 954 |
+
2024-03-06 14:45:50,785 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 955 |
+
2024-03-06 14:45:50,972 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 956 |
+
2024-03-06 14:45:51,633 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
|
| 957 |
+
2024-03-06 14:45:54,625 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
|
| 958 |
+
2024-03-06 14:45:54,626 DEBUG SenderThread:133 [sender.py:send():382] send: history
|
| 959 |
+
2024-03-06 14:45:54,626 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
|
| 960 |
+
2024-03-06 14:45:54,627 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
|
| 961 |
+
2024-03-06 14:45:54,634 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
|
| 962 |
+
2024-03-06 14:45:55,786 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 963 |
+
2024-03-06 14:45:56,513 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
|
| 964 |
+
2024-03-06 14:45:56,513 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
|
| 965 |
+
2024-03-06 14:45:56,513 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
|
| 966 |
+
2024-03-06 14:45:56,514 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 967 |
+
2024-03-06 14:45:56,514 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
|
| 968 |
+
2024-03-06 14:45:56,514 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
|
| 969 |
+
2024-03-06 14:45:56,514 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
|
| 970 |
+
2024-03-06 14:45:56,515 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
|
| 971 |
+
2024-03-06 14:45:56,521 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
|
| 972 |
+
2024-03-06 14:45:56,521 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
|
| 973 |
+
2024-03-06 14:45:56,521 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
|
| 974 |
+
2024-03-06 14:45:56,522 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 975 |
+
2024-03-06 14:45:56,522 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 976 |
+
2024-03-06 14:46:00,787 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 977 |
+
2024-03-06 14:46:01,523 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 978 |
+
2024-03-06 14:46:05,788 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 979 |
+
2024-03-06 14:46:06,525 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 980 |
+
2024-03-06 14:46:10,789 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 981 |
+
2024-03-06 14:46:11,526 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 982 |
+
2024-03-06 14:46:15,790 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 983 |
+
2024-03-06 14:46:16,527 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 984 |
+
2024-03-06 14:46:20,792 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 985 |
+
2024-03-06 14:46:21,528 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 986 |
+
2024-03-06 14:46:25,793 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 987 |
+
2024-03-06 14:46:26,529 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 988 |
+
2024-03-06 14:46:30,794 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 989 |
+
2024-03-06 14:46:31,530 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 990 |
+
2024-03-06 14:46:35,795 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 991 |
+
2024-03-06 14:46:36,531 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 992 |
+
2024-03-06 14:46:40,797 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 993 |
+
2024-03-06 14:46:41,533 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 994 |
+
2024-03-06 14:46:45,798 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 995 |
+
2024-03-06 14:46:46,534 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 996 |
+
2024-03-06 14:46:50,799 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 997 |
+
2024-03-06 14:46:51,535 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 998 |
+
2024-03-06 14:46:55,800 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 999 |
+
2024-03-06 14:46:56,536 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1000 |
+
2024-03-06 14:47:00,801 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1001 |
+
2024-03-06 14:47:01,537 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1002 |
+
2024-03-06 14:47:05,802 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1003 |
+
2024-03-06 14:47:06,538 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1004 |
+
2024-03-06 14:47:10,804 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1005 |
+
2024-03-06 14:47:11,540 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1006 |
+
2024-03-06 14:47:15,805 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1007 |
+
2024-03-06 14:47:16,541 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1008 |
+
2024-03-06 14:47:20,806 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1009 |
+
2024-03-06 14:47:21,542 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1010 |
+
2024-03-06 14:47:25,807 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1011 |
+
2024-03-06 14:47:26,543 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1012 |
+
2024-03-06 14:47:30,808 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1013 |
+
2024-03-06 14:47:31,544 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1014 |
+
2024-03-06 14:47:35,809 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1015 |
+
2024-03-06 14:47:36,546 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1016 |
+
2024-03-06 14:47:40,810 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1017 |
+
2024-03-06 14:47:41,547 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1018 |
+
2024-03-06 14:47:45,811 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1019 |
+
2024-03-06 14:47:46,548 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1020 |
+
2024-03-06 14:47:50,813 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1021 |
+
2024-03-06 14:47:51,549 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1022 |
+
2024-03-06 14:47:55,814 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1023 |
+
2024-03-06 14:47:56,550 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1024 |
+
2024-03-06 14:48:00,815 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1025 |
+
2024-03-06 14:48:01,551 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1026 |
+
2024-03-06 14:48:05,816 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1027 |
+
2024-03-06 14:48:06,552 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1028 |
+
2024-03-06 14:48:10,817 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1029 |
+
2024-03-06 14:48:11,554 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1030 |
+
2024-03-06 14:48:15,818 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1031 |
+
2024-03-06 14:48:16,555 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1032 |
+
2024-03-06 14:48:20,819 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1033 |
+
2024-03-06 14:48:21,556 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1034 |
+
2024-03-06 14:48:25,820 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1035 |
+
2024-03-06 14:48:26,557 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1036 |
+
2024-03-06 14:48:30,821 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1037 |
+
2024-03-06 14:48:31,559 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1038 |
+
2024-03-06 14:48:35,822 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1039 |
+
2024-03-06 14:48:36,560 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1040 |
+
2024-03-06 14:48:40,823 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1041 |
+
2024-03-06 14:48:41,561 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1042 |
+
2024-03-06 14:48:45,824 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1043 |
+
2024-03-06 14:48:46,562 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1044 |
+
2024-03-06 14:48:50,825 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1045 |
+
2024-03-06 14:48:51,563 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1046 |
+
2024-03-06 14:48:55,827 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1047 |
+
2024-03-06 14:48:56,564 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1048 |
+
2024-03-06 14:49:00,828 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1049 |
+
2024-03-06 14:49:01,565 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1050 |
+
2024-03-06 14:49:05,829 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1051 |
+
2024-03-06 14:49:06,566 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1052 |
+
2024-03-06 14:49:10,830 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1053 |
+
2024-03-06 14:49:11,567 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1054 |
+
2024-03-06 14:49:15,831 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1055 |
+
2024-03-06 14:49:16,568 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1056 |
+
2024-03-06 14:49:20,832 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1057 |
+
2024-03-06 14:49:21,569 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1058 |
+
2024-03-06 14:49:25,834 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1059 |
+
2024-03-06 14:49:26,570 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1060 |
+
2024-03-06 14:49:30,835 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1061 |
+
2024-03-06 14:49:31,571 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1062 |
+
2024-03-06 14:49:35,836 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1063 |
+
2024-03-06 14:49:36,572 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1064 |
+
2024-03-06 14:49:40,837 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1065 |
+
2024-03-06 14:49:41,573 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1066 |
+
2024-03-06 14:49:45,838 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1067 |
+
2024-03-06 14:49:46,574 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1068 |
+
2024-03-06 14:49:50,839 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1069 |
+
2024-03-06 14:49:51,575 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1070 |
+
2024-03-06 14:49:55,841 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1071 |
+
2024-03-06 14:49:56,576 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1072 |
+
2024-03-06 14:50:00,842 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1073 |
+
2024-03-06 14:50:01,577 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1074 |
+
2024-03-06 14:50:05,843 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1075 |
+
2024-03-06 14:50:06,578 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1076 |
+
2024-03-06 14:50:10,844 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1077 |
+
2024-03-06 14:50:11,580 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1078 |
+
2024-03-06 14:50:11,638 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
|
| 1079 |
+
2024-03-06 14:50:11,639 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
|
| 1080 |
+
2024-03-06 14:50:11,639 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
|
| 1081 |
+
2024-03-06 14:50:11,639 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
| 1082 |
+
2024-03-06 14:50:11,639 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
|
| 1083 |
+
2024-03-06 14:50:11,640 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
|
| 1084 |
+
2024-03-06 14:50:11,641 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
|
| 1085 |
+
2024-03-06 14:50:11,641 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
|
| 1086 |
+
2024-03-06 14:50:11,641 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
|
| 1087 |
+
2024-03-06 14:50:11,642 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
|
| 1088 |
+
2024-03-06 14:50:11,642 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
|
| 1089 |
+
2024-03-06 14:50:11,642 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 1090 |
+
2024-03-06 14:50:11,642 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
|
| 1091 |
+
2024-03-06 14:50:11,643 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
|
| 1092 |
+
2024-03-06 14:50:11,645 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
|
| 1093 |
+
2024-03-06 14:50:11,653 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
|
| 1094 |
+
2024-03-06 14:50:11,654 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 1095 |
+
2024-03-06 14:50:15,846 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1096 |
+
2024-03-06 14:50:16,655 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1097 |
+
2024-03-06 14:50:19,879 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
|
| 1098 |
+
2024-03-06 14:50:19,879 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
|
| 1099 |
+
2024-03-06 14:50:19,879 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
|
| 1100 |
+
2024-03-06 14:50:19,879 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
| 1101 |
+
2024-03-06 14:50:19,880 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
|
| 1102 |
+
2024-03-06 14:50:19,881 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
|
| 1103 |
+
2024-03-06 14:50:19,882 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
|
| 1104 |
+
2024-03-06 14:50:19,883 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
|
| 1105 |
+
2024-03-06 14:50:19,885 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
|
| 1106 |
+
2024-03-06 14:50:20,901 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1107 |
+
2024-03-06 14:50:21,115 DEBUG SenderThread:133 [sender.py:send():382] send: config
|
| 1108 |
+
2024-03-06 14:50:21,116 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1109 |
+
2024-03-06 14:50:21,117 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1110 |
+
2024-03-06 14:50:21,117 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
|
| 1111 |
+
2024-03-06 14:50:21,743 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
|
| 1112 |
+
2024-03-06 14:50:22,122 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1113 |
+
2024-03-06 14:50:22,744 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
|
| 1114 |
+
2024-03-06 14:50:25,904 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1115 |
+
2024-03-06 14:50:27,366 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1116 |
+
2024-03-06 14:50:30,905 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1117 |
+
2024-03-06 14:50:32,367 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1118 |
+
2024-03-06 14:50:35,907 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1119 |
+
2024-03-06 14:50:37,368 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1120 |
+
2024-03-06 14:50:40,908 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1121 |
+
2024-03-06 14:50:42,369 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1122 |
+
2024-03-06 14:50:45,909 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1123 |
+
2024-03-06 14:50:47,370 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1124 |
+
2024-03-06 14:50:47,806 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
|
| 1125 |
+
2024-03-06 14:50:47,807 DEBUG SenderThread:133 [sender.py:send():382] send: history
|
| 1126 |
+
2024-03-06 14:50:47,807 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
|
| 1127 |
+
2024-03-06 14:50:47,810 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
|
| 1128 |
+
2024-03-06 14:50:48,754 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
|
| 1129 |
+
2024-03-06 14:50:49,741 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
|
| 1130 |
+
2024-03-06 14:50:49,741 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
|
| 1131 |
+
2024-03-06 14:50:49,741 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
|
| 1132 |
+
2024-03-06 14:50:49,742 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 1133 |
+
2024-03-06 14:50:49,742 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
|
| 1134 |
+
2024-03-06 14:50:49,742 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
|
| 1135 |
+
2024-03-06 14:50:49,742 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
|
| 1136 |
+
2024-03-06 14:50:49,743 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
|
| 1137 |
+
2024-03-06 14:50:49,749 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
|
| 1138 |
+
2024-03-06 14:50:49,749 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
|
| 1139 |
+
2024-03-06 14:50:49,749 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
|
| 1140 |
+
2024-03-06 14:50:49,750 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 1141 |
+
2024-03-06 14:50:50,910 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1142 |
+
2024-03-06 14:50:52,751 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1143 |
+
2024-03-06 14:50:55,911 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1144 |
+
2024-03-06 14:50:57,752 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1145 |
+
2024-03-06 14:51:00,913 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1146 |
+
2024-03-06 14:51:02,754 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1147 |
+
2024-03-06 14:51:05,914 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1148 |
+
2024-03-06 14:51:07,755 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1149 |
+
2024-03-06 14:51:08,770 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
|
| 1150 |
+
2024-03-06 14:51:08,770 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
|
| 1151 |
+
2024-03-06 14:51:08,771 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
|
| 1152 |
+
2024-03-06 14:51:08,771 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
| 1153 |
+
2024-03-06 14:51:08,771 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
|
| 1154 |
+
2024-03-06 14:51:08,772 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
|
| 1155 |
+
2024-03-06 14:51:08,773 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
|
| 1156 |
+
2024-03-06 14:51:08,775 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
|
| 1157 |
+
2024-03-06 14:51:08,777 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
|
| 1158 |
+
2024-03-06 14:51:08,779 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
|
| 1159 |
+
2024-03-06 14:51:08,779 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
|
| 1160 |
+
2024-03-06 14:51:08,779 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
|
| 1161 |
+
2024-03-06 14:51:08,779 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 1162 |
+
2024-03-06 14:51:08,779 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
|
| 1163 |
+
2024-03-06 14:51:08,780 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
|
| 1164 |
+
2024-03-06 14:51:08,780 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
|
| 1165 |
+
2024-03-06 14:51:08,781 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
|
| 1166 |
+
2024-03-06 14:51:08,791 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
|
| 1167 |
+
2024-03-06 14:51:08,791 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
|
| 1168 |
+
2024-03-06 14:51:08,791 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
|
| 1169 |
+
2024-03-06 14:51:08,792 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 1170 |
+
2024-03-06 14:51:10,915 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1171 |
+
2024-03-06 14:51:12,600 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
|
| 1172 |
+
2024-03-06 14:51:12,600 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
|
| 1173 |
+
2024-03-06 14:51:12,600 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
|
| 1174 |
+
2024-03-06 14:51:12,600 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
| 1175 |
+
2024-03-06 14:51:12,601 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
|
| 1176 |
+
2024-03-06 14:51:12,602 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
|
| 1177 |
+
2024-03-06 14:51:12,605 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
|
| 1178 |
+
2024-03-06 14:51:12,605 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
|
| 1179 |
+
2024-03-06 14:51:12,606 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
|
| 1180 |
+
2024-03-06 14:51:12,793 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1181 |
+
2024-03-06 14:51:13,753 DEBUG SenderThread:133 [sender.py:send():382] send: config
|
| 1182 |
+
2024-03-06 14:51:13,755 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1183 |
+
2024-03-06 14:51:13,755 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1184 |
+
2024-03-06 14:51:13,755 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
|
| 1185 |
+
2024-03-06 14:51:13,763 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
|
| 1186 |
+
2024-03-06 14:51:15,919 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1187 |
+
2024-03-06 14:51:18,756 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1188 |
+
2024-03-06 14:51:20,922 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1189 |
+
2024-03-06 14:51:23,762 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1190 |
+
2024-03-06 14:51:24,767 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
|
| 1191 |
+
2024-03-06 14:51:25,923 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1192 |
+
2024-03-06 14:51:29,020 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1193 |
+
2024-03-06 14:51:30,925 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1194 |
+
2024-03-06 14:51:34,021 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1195 |
+
2024-03-06 14:51:35,928 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1196 |
+
2024-03-06 14:51:39,022 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1197 |
+
2024-03-06 14:51:40,429 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
|
| 1198 |
+
2024-03-06 14:51:40,430 DEBUG SenderThread:133 [sender.py:send():382] send: history
|
| 1199 |
+
2024-03-06 14:51:40,431 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
|
| 1200 |
+
2024-03-06 14:51:40,432 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
|
| 1201 |
+
2024-03-06 14:51:40,773 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
|
| 1202 |
+
2024-03-06 14:51:40,942 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1203 |
+
2024-03-06 14:51:42,098 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
|
| 1204 |
+
2024-03-06 14:51:42,101 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1205 |
+
2024-03-06 14:51:42,101 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1206 |
+
2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1207 |
+
2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1208 |
+
2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send():382] send: history
|
| 1209 |
+
2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
|
| 1210 |
+
2024-03-06 14:51:42,103 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
|
| 1211 |
+
2024-03-06 14:51:42,774 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
|
| 1212 |
+
2024-03-06 14:51:43,775 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
|
| 1213 |
+
2024-03-06 14:51:44,104 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1214 |
+
2024-03-06 14:51:46,440 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
|
| 1215 |
+
2024-03-06 14:51:46,441 DEBUG SenderThread:133 [sender.py:send():382] send: history
|
| 1216 |
+
2024-03-06 14:51:46,442 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
|
| 1217 |
+
2024-03-06 14:51:46,442 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
|
| 1218 |
+
2024-03-06 14:51:46,678 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1219 |
+
2024-03-06 14:51:46,776 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
|
wandb/debug.log
CHANGED
|
@@ -74,3 +74,24 @@ config: {}
|
|
| 74 |
2024-03-06 14:40:45,818 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 75 |
2024-03-06 14:43:14,295 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 76 |
2024-03-06 14:43:15,473 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
2024-03-06 14:40:45,818 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 75 |
2024-03-06 14:43:14,295 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 76 |
2024-03-06 14:43:15,473 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
|
| 77 |
+
2024-03-06 14:44:15,386 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
| 78 |
+
2024-03-06 14:44:15,386 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 79 |
+
2024-03-06 14:45:09,983 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 80 |
+
2024-03-06 14:45:09,985 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
| 81 |
+
2024-03-06 14:45:09,985 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 82 |
+
2024-03-06 14:45:26,835 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 83 |
+
2024-03-06 14:45:27,956 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-45-27_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
|
| 84 |
+
2024-03-06 14:45:56,512 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
| 85 |
+
2024-03-06 14:45:56,512 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 86 |
+
2024-03-06 14:50:11,637 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 87 |
+
2024-03-06 14:50:11,640 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
| 88 |
+
2024-03-06 14:50:11,640 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 89 |
+
2024-03-06 14:50:19,878 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 90 |
+
2024-03-06 14:50:21,110 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-50-20_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
|
| 91 |
+
2024-03-06 14:50:49,740 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
| 92 |
+
2024-03-06 14:50:49,740 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 93 |
+
2024-03-06 14:51:08,770 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 94 |
+
2024-03-06 14:51:08,772 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
| 95 |
+
2024-03-06 14:51:08,773 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 96 |
+
2024-03-06 14:51:12,599 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 97 |
+
2024-03-06 14:51:13,748 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-51-12_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
|
wandb/run-20240306_142408-og3d0ld1/files/config.yaml
CHANGED
|
@@ -422,7 +422,7 @@ log_on_each_node:
|
|
| 422 |
value: true
|
| 423 |
logging_dir:
|
| 424 |
desc: null
|
| 425 |
-
value: /kaggle/working/runs/Mar06_14-
|
| 426 |
logging_strategy:
|
| 427 |
desc: null
|
| 428 |
value: epoch
|
|
|
|
| 422 |
value: true
|
| 423 |
logging_dir:
|
| 424 |
desc: null
|
| 425 |
+
value: /kaggle/working/runs/Mar06_14-51-12_cd2c3b1980c7
|
| 426 |
logging_strategy:
|
| 427 |
desc: null
|
| 428 |
value: epoch
|
wandb/run-20240306_142408-og3d0ld1/files/output.log
CHANGED
|
@@ -29,3 +29,10 @@ Class Weights: tensor([0.8491, 0.9698, 0.7866, 0.6530, 0.9310, 0.8578, 0.9526],
|
|
| 29 |
Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
|
| 30 |
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
| 31 |
Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
|
| 30 |
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
| 31 |
Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
|
| 32 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
| 33 |
+
Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
|
| 34 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
| 35 |
+
Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
|
| 36 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
| 37 |
+
Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
|
| 38 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"train/loss": 1.7916, "train/grad_norm": 7.054403781890869, "train/learning_rate": 3.4482758620689656e-07, "train/epoch": 1.0, "train/global_step": 58, "_timestamp":
|
|
|
|
| 1 |
+
{"train/loss": 1.7916, "train/grad_norm": 7.054403781890869, "train/learning_rate": 3.4482758620689656e-07, "train/epoch": 1.0, "train/global_step": 58, "_timestamp": 1709736706.4396355, "_runtime": 1657.8019905090332, "_step": 10, "eval/loss": 1.811458706855774, "eval/runtime": 1.6651, "eval/samples_per_second": 49.246, "eval/steps_per_second": 6.606, "train/train_runtime": 32.7083, "train/train_samples_per_second": 14.186, "train/train_steps_per_second": 1.773, "train/total_flos": 122089010380800.0, "train/train_loss": 1.791607692323882, "eval/accuracy": 0.32926829268292684, "eval/precision": 0.15702844661708878, "eval/recall": 0.32926829268292684, "eval/f1": 0.20646817673609674}
|
wandb/run-20240306_142408-og3d0ld1/logs/debug-internal.log
CHANGED
|
@@ -874,3 +874,346 @@
|
|
| 874 |
2024-03-06 14:44:13,758 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 875 |
2024-03-06 14:44:14,301 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 876 |
2024-03-06 14:44:14,303 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 874 |
2024-03-06 14:44:13,758 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 875 |
2024-03-06 14:44:14,301 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 876 |
2024-03-06 14:44:14,303 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 877 |
+
2024-03-06 14:44:15,387 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
|
| 878 |
+
2024-03-06 14:44:15,387 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
|
| 879 |
+
2024-03-06 14:44:15,387 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
|
| 880 |
+
2024-03-06 14:44:15,387 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
|
| 881 |
+
2024-03-06 14:44:15,387 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
|
| 882 |
+
2024-03-06 14:44:15,388 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
|
| 883 |
+
2024-03-06 14:44:15,389 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
|
| 884 |
+
2024-03-06 14:44:15,394 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
|
| 885 |
+
2024-03-06 14:44:15,395 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
|
| 886 |
+
2024-03-06 14:44:15,395 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
|
| 887 |
+
2024-03-06 14:44:15,395 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 888 |
+
2024-03-06 14:44:15,630 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 889 |
+
2024-03-06 14:44:19,396 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 890 |
+
2024-03-06 14:44:20,631 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 891 |
+
2024-03-06 14:44:24,398 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 892 |
+
2024-03-06 14:44:25,633 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 893 |
+
2024-03-06 14:44:29,399 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 894 |
+
2024-03-06 14:44:30,634 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 895 |
+
2024-03-06 14:44:34,400 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 896 |
+
2024-03-06 14:44:35,635 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 897 |
+
2024-03-06 14:44:39,401 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 898 |
+
2024-03-06 14:44:40,636 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 899 |
+
2024-03-06 14:44:44,403 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 900 |
+
2024-03-06 14:44:45,637 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 901 |
+
2024-03-06 14:44:49,404 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 902 |
+
2024-03-06 14:44:50,639 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 903 |
+
2024-03-06 14:44:54,405 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 904 |
+
2024-03-06 14:44:55,640 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 905 |
+
2024-03-06 14:44:59,407 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 906 |
+
2024-03-06 14:45:00,641 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 907 |
+
2024-03-06 14:45:04,407 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 908 |
+
2024-03-06 14:45:05,642 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 909 |
+
2024-03-06 14:45:09,409 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 910 |
+
2024-03-06 14:45:09,984 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
|
| 911 |
+
2024-03-06 14:45:09,984 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
|
| 912 |
+
2024-03-06 14:45:09,984 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
|
| 913 |
+
2024-03-06 14:45:09,984 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
| 914 |
+
2024-03-06 14:45:09,984 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
|
| 915 |
+
2024-03-06 14:45:09,985 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
|
| 916 |
+
2024-03-06 14:45:09,986 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
|
| 917 |
+
2024-03-06 14:45:09,986 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
|
| 918 |
+
2024-03-06 14:45:09,986 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
|
| 919 |
+
2024-03-06 14:45:09,986 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 920 |
+
2024-03-06 14:45:09,986 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
|
| 921 |
+
2024-03-06 14:45:09,986 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
|
| 922 |
+
2024-03-06 14:45:09,987 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
|
| 923 |
+
2024-03-06 14:45:09,989 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
|
| 924 |
+
2024-03-06 14:45:09,989 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 925 |
+
2024-03-06 14:45:10,643 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 926 |
+
2024-03-06 14:45:14,990 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 927 |
+
2024-03-06 14:45:15,645 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 928 |
+
2024-03-06 14:45:19,992 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 929 |
+
2024-03-06 14:45:20,646 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 930 |
+
2024-03-06 14:45:24,993 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 931 |
+
2024-03-06 14:45:25,647 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 932 |
+
2024-03-06 14:45:26,835 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
|
| 933 |
+
2024-03-06 14:45:26,836 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
|
| 934 |
+
2024-03-06 14:45:26,836 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
|
| 935 |
+
2024-03-06 14:45:26,836 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
| 936 |
+
2024-03-06 14:45:26,837 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
|
| 937 |
+
2024-03-06 14:45:26,837 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
|
| 938 |
+
2024-03-06 14:45:26,839 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
|
| 939 |
+
2024-03-06 14:45:26,840 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
|
| 940 |
+
2024-03-06 14:45:26,840 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
|
| 941 |
+
2024-03-06 14:45:27,960 DEBUG SenderThread:133 [sender.py:send():382] send: config
|
| 942 |
+
2024-03-06 14:45:27,962 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 943 |
+
2024-03-06 14:45:27,962 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 944 |
+
2024-03-06 14:45:27,962 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
|
| 945 |
+
2024-03-06 14:45:29,624 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
|
| 946 |
+
2024-03-06 14:45:30,774 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 947 |
+
2024-03-06 14:45:30,963 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 948 |
+
2024-03-06 14:45:35,775 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 949 |
+
2024-03-06 14:45:35,964 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 950 |
+
2024-03-06 14:45:40,777 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 951 |
+
2024-03-06 14:45:40,965 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 952 |
+
2024-03-06 14:45:45,784 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 953 |
+
2024-03-06 14:45:45,966 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 954 |
+
2024-03-06 14:45:50,785 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 955 |
+
2024-03-06 14:45:50,972 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 956 |
+
2024-03-06 14:45:51,633 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
|
| 957 |
+
2024-03-06 14:45:54,625 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
|
| 958 |
+
2024-03-06 14:45:54,626 DEBUG SenderThread:133 [sender.py:send():382] send: history
|
| 959 |
+
2024-03-06 14:45:54,626 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
|
| 960 |
+
2024-03-06 14:45:54,627 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
|
| 961 |
+
2024-03-06 14:45:54,634 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
|
| 962 |
+
2024-03-06 14:45:55,786 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 963 |
+
2024-03-06 14:45:56,513 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
|
| 964 |
+
2024-03-06 14:45:56,513 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
|
| 965 |
+
2024-03-06 14:45:56,513 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
|
| 966 |
+
2024-03-06 14:45:56,514 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 967 |
+
2024-03-06 14:45:56,514 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
|
| 968 |
+
2024-03-06 14:45:56,514 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
|
| 969 |
+
2024-03-06 14:45:56,514 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
|
| 970 |
+
2024-03-06 14:45:56,515 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
|
| 971 |
+
2024-03-06 14:45:56,521 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
|
| 972 |
+
2024-03-06 14:45:56,521 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
|
| 973 |
+
2024-03-06 14:45:56,521 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
|
| 974 |
+
2024-03-06 14:45:56,522 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 975 |
+
2024-03-06 14:45:56,522 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 976 |
+
2024-03-06 14:46:00,787 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 977 |
+
2024-03-06 14:46:01,523 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 978 |
+
2024-03-06 14:46:05,788 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 979 |
+
2024-03-06 14:46:06,525 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 980 |
+
2024-03-06 14:46:10,789 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 981 |
+
2024-03-06 14:46:11,526 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 982 |
+
2024-03-06 14:46:15,790 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 983 |
+
2024-03-06 14:46:16,527 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 984 |
+
2024-03-06 14:46:20,792 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 985 |
+
2024-03-06 14:46:21,528 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 986 |
+
2024-03-06 14:46:25,793 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 987 |
+
2024-03-06 14:46:26,529 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 988 |
+
2024-03-06 14:46:30,794 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 989 |
+
2024-03-06 14:46:31,530 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 990 |
+
2024-03-06 14:46:35,795 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 991 |
+
2024-03-06 14:46:36,531 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 992 |
+
2024-03-06 14:46:40,797 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 993 |
+
2024-03-06 14:46:41,533 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 994 |
+
2024-03-06 14:46:45,798 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 995 |
+
2024-03-06 14:46:46,534 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 996 |
+
2024-03-06 14:46:50,799 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 997 |
+
2024-03-06 14:46:51,535 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 998 |
+
2024-03-06 14:46:55,800 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 999 |
+
2024-03-06 14:46:56,536 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1000 |
+
2024-03-06 14:47:00,801 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1001 |
+
2024-03-06 14:47:01,537 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1002 |
+
2024-03-06 14:47:05,802 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1003 |
+
2024-03-06 14:47:06,538 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1004 |
+
2024-03-06 14:47:10,804 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1005 |
+
2024-03-06 14:47:11,540 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1006 |
+
2024-03-06 14:47:15,805 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1007 |
+
2024-03-06 14:47:16,541 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1008 |
+
2024-03-06 14:47:20,806 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1009 |
+
2024-03-06 14:47:21,542 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1010 |
+
2024-03-06 14:47:25,807 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1011 |
+
2024-03-06 14:47:26,543 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1012 |
+
2024-03-06 14:47:30,808 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1013 |
+
2024-03-06 14:47:31,544 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1014 |
+
2024-03-06 14:47:35,809 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1015 |
+
2024-03-06 14:47:36,546 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1016 |
+
2024-03-06 14:47:40,810 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1017 |
+
2024-03-06 14:47:41,547 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1018 |
+
2024-03-06 14:47:45,811 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1019 |
+
2024-03-06 14:47:46,548 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1020 |
+
2024-03-06 14:47:50,813 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1021 |
+
2024-03-06 14:47:51,549 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1022 |
+
2024-03-06 14:47:55,814 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1023 |
+
2024-03-06 14:47:56,550 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1024 |
+
2024-03-06 14:48:00,815 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1025 |
+
2024-03-06 14:48:01,551 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1026 |
+
2024-03-06 14:48:05,816 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1027 |
+
2024-03-06 14:48:06,552 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1028 |
+
2024-03-06 14:48:10,817 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1029 |
+
2024-03-06 14:48:11,554 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1030 |
+
2024-03-06 14:48:15,818 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1031 |
+
2024-03-06 14:48:16,555 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1032 |
+
2024-03-06 14:48:20,819 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1033 |
+
2024-03-06 14:48:21,556 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1034 |
+
2024-03-06 14:48:25,820 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1035 |
+
2024-03-06 14:48:26,557 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1036 |
+
2024-03-06 14:48:30,821 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1037 |
+
2024-03-06 14:48:31,559 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1038 |
+
2024-03-06 14:48:35,822 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1039 |
+
2024-03-06 14:48:36,560 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1040 |
+
2024-03-06 14:48:40,823 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1041 |
+
2024-03-06 14:48:41,561 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1042 |
+
2024-03-06 14:48:45,824 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1043 |
+
2024-03-06 14:48:46,562 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1044 |
+
2024-03-06 14:48:50,825 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1045 |
+
2024-03-06 14:48:51,563 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1046 |
+
2024-03-06 14:48:55,827 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1047 |
+
2024-03-06 14:48:56,564 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1048 |
+
2024-03-06 14:49:00,828 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1049 |
+
2024-03-06 14:49:01,565 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1050 |
+
2024-03-06 14:49:05,829 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1051 |
+
2024-03-06 14:49:06,566 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1052 |
+
2024-03-06 14:49:10,830 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1053 |
+
2024-03-06 14:49:11,567 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1054 |
+
2024-03-06 14:49:15,831 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1055 |
+
2024-03-06 14:49:16,568 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1056 |
+
2024-03-06 14:49:20,832 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1057 |
+
2024-03-06 14:49:21,569 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1058 |
+
2024-03-06 14:49:25,834 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1059 |
+
2024-03-06 14:49:26,570 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1060 |
+
2024-03-06 14:49:30,835 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1061 |
+
2024-03-06 14:49:31,571 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1062 |
+
2024-03-06 14:49:35,836 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1063 |
+
2024-03-06 14:49:36,572 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1064 |
+
2024-03-06 14:49:40,837 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1065 |
+
2024-03-06 14:49:41,573 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1066 |
+
2024-03-06 14:49:45,838 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1067 |
+
2024-03-06 14:49:46,574 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1068 |
+
2024-03-06 14:49:50,839 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1069 |
+
2024-03-06 14:49:51,575 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1070 |
+
2024-03-06 14:49:55,841 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1071 |
+
2024-03-06 14:49:56,576 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1072 |
+
2024-03-06 14:50:00,842 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1073 |
+
2024-03-06 14:50:01,577 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1074 |
+
2024-03-06 14:50:05,843 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1075 |
+
2024-03-06 14:50:06,578 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1076 |
+
2024-03-06 14:50:10,844 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1077 |
+
2024-03-06 14:50:11,580 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1078 |
+
2024-03-06 14:50:11,638 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
|
| 1079 |
+
2024-03-06 14:50:11,639 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
|
| 1080 |
+
2024-03-06 14:50:11,639 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
|
| 1081 |
+
2024-03-06 14:50:11,639 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
| 1082 |
+
2024-03-06 14:50:11,639 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
|
| 1083 |
+
2024-03-06 14:50:11,640 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
|
| 1084 |
+
2024-03-06 14:50:11,641 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
|
| 1085 |
+
2024-03-06 14:50:11,641 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
|
| 1086 |
+
2024-03-06 14:50:11,641 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
|
| 1087 |
+
2024-03-06 14:50:11,642 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
|
| 1088 |
+
2024-03-06 14:50:11,642 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
|
| 1089 |
+
2024-03-06 14:50:11,642 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 1090 |
+
2024-03-06 14:50:11,642 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
|
| 1091 |
+
2024-03-06 14:50:11,643 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
|
| 1092 |
+
2024-03-06 14:50:11,645 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
|
| 1093 |
+
2024-03-06 14:50:11,653 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
|
| 1094 |
+
2024-03-06 14:50:11,654 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 1095 |
+
2024-03-06 14:50:15,846 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1096 |
+
2024-03-06 14:50:16,655 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1097 |
+
2024-03-06 14:50:19,879 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
|
| 1098 |
+
2024-03-06 14:50:19,879 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
|
| 1099 |
+
2024-03-06 14:50:19,879 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
|
| 1100 |
+
2024-03-06 14:50:19,879 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
| 1101 |
+
2024-03-06 14:50:19,880 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
|
| 1102 |
+
2024-03-06 14:50:19,881 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
|
| 1103 |
+
2024-03-06 14:50:19,882 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
|
| 1104 |
+
2024-03-06 14:50:19,883 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
|
| 1105 |
+
2024-03-06 14:50:19,885 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
|
| 1106 |
+
2024-03-06 14:50:20,901 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1107 |
+
2024-03-06 14:50:21,115 DEBUG SenderThread:133 [sender.py:send():382] send: config
|
| 1108 |
+
2024-03-06 14:50:21,116 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1109 |
+
2024-03-06 14:50:21,117 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1110 |
+
2024-03-06 14:50:21,117 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
|
| 1111 |
+
2024-03-06 14:50:21,743 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
|
| 1112 |
+
2024-03-06 14:50:22,122 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1113 |
+
2024-03-06 14:50:22,744 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
|
| 1114 |
+
2024-03-06 14:50:25,904 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1115 |
+
2024-03-06 14:50:27,366 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1116 |
+
2024-03-06 14:50:30,905 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1117 |
+
2024-03-06 14:50:32,367 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1118 |
+
2024-03-06 14:50:35,907 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1119 |
+
2024-03-06 14:50:37,368 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1120 |
+
2024-03-06 14:50:40,908 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1121 |
+
2024-03-06 14:50:42,369 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1122 |
+
2024-03-06 14:50:45,909 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1123 |
+
2024-03-06 14:50:47,370 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1124 |
+
2024-03-06 14:50:47,806 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
|
| 1125 |
+
2024-03-06 14:50:47,807 DEBUG SenderThread:133 [sender.py:send():382] send: history
|
| 1126 |
+
2024-03-06 14:50:47,807 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
|
| 1127 |
+
2024-03-06 14:50:47,810 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
|
| 1128 |
+
2024-03-06 14:50:48,754 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
|
| 1129 |
+
2024-03-06 14:50:49,741 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
|
| 1130 |
+
2024-03-06 14:50:49,741 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
|
| 1131 |
+
2024-03-06 14:50:49,741 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
|
| 1132 |
+
2024-03-06 14:50:49,742 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 1133 |
+
2024-03-06 14:50:49,742 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
|
| 1134 |
+
2024-03-06 14:50:49,742 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
|
| 1135 |
+
2024-03-06 14:50:49,742 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
|
| 1136 |
+
2024-03-06 14:50:49,743 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
|
| 1137 |
+
2024-03-06 14:50:49,749 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
|
| 1138 |
+
2024-03-06 14:50:49,749 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
|
| 1139 |
+
2024-03-06 14:50:49,749 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
|
| 1140 |
+
2024-03-06 14:50:49,750 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 1141 |
+
2024-03-06 14:50:50,910 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1142 |
+
2024-03-06 14:50:52,751 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1143 |
+
2024-03-06 14:50:55,911 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1144 |
+
2024-03-06 14:50:57,752 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1145 |
+
2024-03-06 14:51:00,913 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1146 |
+
2024-03-06 14:51:02,754 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1147 |
+
2024-03-06 14:51:05,914 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1148 |
+
2024-03-06 14:51:07,755 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1149 |
+
2024-03-06 14:51:08,770 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
|
| 1150 |
+
2024-03-06 14:51:08,770 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
|
| 1151 |
+
2024-03-06 14:51:08,771 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
|
| 1152 |
+
2024-03-06 14:51:08,771 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
| 1153 |
+
2024-03-06 14:51:08,771 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
|
| 1154 |
+
2024-03-06 14:51:08,772 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
|
| 1155 |
+
2024-03-06 14:51:08,773 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
|
| 1156 |
+
2024-03-06 14:51:08,775 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
|
| 1157 |
+
2024-03-06 14:51:08,777 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
|
| 1158 |
+
2024-03-06 14:51:08,779 INFO HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
|
| 1159 |
+
2024-03-06 14:51:08,779 INFO HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
|
| 1160 |
+
2024-03-06 14:51:08,779 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
|
| 1161 |
+
2024-03-06 14:51:08,779 DEBUG SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
| 1162 |
+
2024-03-06 14:51:08,779 DEBUG SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
|
| 1163 |
+
2024-03-06 14:51:08,780 DEBUG SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
|
| 1164 |
+
2024-03-06 14:51:08,780 INFO HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
|
| 1165 |
+
2024-03-06 14:51:08,781 INFO HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
|
| 1166 |
+
2024-03-06 14:51:08,791 INFO HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
|
| 1167 |
+
2024-03-06 14:51:08,791 INFO HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
|
| 1168 |
+
2024-03-06 14:51:08,791 INFO HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
|
| 1169 |
+
2024-03-06 14:51:08,792 DEBUG SenderThread:133 [sender.py:send():382] send: stats
|
| 1170 |
+
2024-03-06 14:51:10,915 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1171 |
+
2024-03-06 14:51:12,600 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
|
| 1172 |
+
2024-03-06 14:51:12,600 INFO HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
|
| 1173 |
+
2024-03-06 14:51:12,600 INFO HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
|
| 1174 |
+
2024-03-06 14:51:12,600 INFO SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
| 1175 |
+
2024-03-06 14:51:12,601 INFO SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
|
| 1176 |
+
2024-03-06 14:51:12,602 INFO SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
|
| 1177 |
+
2024-03-06 14:51:12,605 INFO SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
|
| 1178 |
+
2024-03-06 14:51:12,605 INFO SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
|
| 1179 |
+
2024-03-06 14:51:12,606 INFO SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
|
| 1180 |
+
2024-03-06 14:51:12,793 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1181 |
+
2024-03-06 14:51:13,753 DEBUG SenderThread:133 [sender.py:send():382] send: config
|
| 1182 |
+
2024-03-06 14:51:13,755 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1183 |
+
2024-03-06 14:51:13,755 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1184 |
+
2024-03-06 14:51:13,755 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
|
| 1185 |
+
2024-03-06 14:51:13,763 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
|
| 1186 |
+
2024-03-06 14:51:15,919 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1187 |
+
2024-03-06 14:51:18,756 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1188 |
+
2024-03-06 14:51:20,922 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1189 |
+
2024-03-06 14:51:23,762 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1190 |
+
2024-03-06 14:51:24,767 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
|
| 1191 |
+
2024-03-06 14:51:25,923 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1192 |
+
2024-03-06 14:51:29,020 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1193 |
+
2024-03-06 14:51:30,925 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1194 |
+
2024-03-06 14:51:34,021 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1195 |
+
2024-03-06 14:51:35,928 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1196 |
+
2024-03-06 14:51:39,022 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1197 |
+
2024-03-06 14:51:40,429 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
|
| 1198 |
+
2024-03-06 14:51:40,430 DEBUG SenderThread:133 [sender.py:send():382] send: history
|
| 1199 |
+
2024-03-06 14:51:40,431 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
|
| 1200 |
+
2024-03-06 14:51:40,432 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
|
| 1201 |
+
2024-03-06 14:51:40,773 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
|
| 1202 |
+
2024-03-06 14:51:40,942 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1203 |
+
2024-03-06 14:51:42,098 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
|
| 1204 |
+
2024-03-06 14:51:42,101 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1205 |
+
2024-03-06 14:51:42,101 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1206 |
+
2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1207 |
+
2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send():382] send: metric
|
| 1208 |
+
2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send():382] send: history
|
| 1209 |
+
2024-03-06 14:51:42,102 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
|
| 1210 |
+
2024-03-06 14:51:42,103 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
|
| 1211 |
+
2024-03-06 14:51:42,774 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
|
| 1212 |
+
2024-03-06 14:51:43,775 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
|
| 1213 |
+
2024-03-06 14:51:44,104 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
|
| 1214 |
+
2024-03-06 14:51:46,440 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
|
| 1215 |
+
2024-03-06 14:51:46,441 DEBUG SenderThread:133 [sender.py:send():382] send: history
|
| 1216 |
+
2024-03-06 14:51:46,442 DEBUG SenderThread:133 [sender.py:send_request():409] send_request: summary_record
|
| 1217 |
+
2024-03-06 14:51:46,442 INFO SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
|
| 1218 |
+
2024-03-06 14:51:46,678 DEBUG HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
|
| 1219 |
+
2024-03-06 14:51:46,776 INFO Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
|
wandb/run-20240306_142408-og3d0ld1/logs/debug.log
CHANGED
|
@@ -74,3 +74,24 @@ config: {}
|
|
| 74 |
2024-03-06 14:40:45,818 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 75 |
2024-03-06 14:43:14,295 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 76 |
2024-03-06 14:43:15,473 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
2024-03-06 14:40:45,818 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 75 |
2024-03-06 14:43:14,295 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 76 |
2024-03-06 14:43:15,473 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
|
| 77 |
+
2024-03-06 14:44:15,386 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
| 78 |
+
2024-03-06 14:44:15,386 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 79 |
+
2024-03-06 14:45:09,983 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 80 |
+
2024-03-06 14:45:09,985 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
| 81 |
+
2024-03-06 14:45:09,985 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 82 |
+
2024-03-06 14:45:26,835 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 83 |
+
2024-03-06 14:45:27,956 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-45-27_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
|
| 84 |
+
2024-03-06 14:45:56,512 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
| 85 |
+
2024-03-06 14:45:56,512 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 86 |
+
2024-03-06 14:50:11,637 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 87 |
+
2024-03-06 14:50:11,640 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
| 88 |
+
2024-03-06 14:50:11,640 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 89 |
+
2024-03-06 14:50:19,878 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 90 |
+
2024-03-06 14:50:21,110 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-50-20_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
|
| 91 |
+
2024-03-06 14:50:49,740 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
| 92 |
+
2024-03-06 14:50:49,740 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 93 |
+
2024-03-06 14:51:08,770 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 94 |
+
2024-03-06 14:51:08,772 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
| 95 |
+
2024-03-06 14:51:08,773 INFO MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
|
| 96 |
+
2024-03-06 14:51:12,599 INFO MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
|
| 97 |
+
2024-03-06 14:51:13,748 INFO MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-51-12_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
|
wandb/run-20240306_142408-og3d0ld1/run-og3d0ld1.wandb
CHANGED
|
Binary files a/wandb/run-20240306_142408-og3d0ld1/run-og3d0ld1.wandb and b/wandb/run-20240306_142408-og3d0ld1/run-og3d0ld1.wandb differ
|
|
|