Training in progress, epoch 1

Browse files

Files changed (12) hide show

runs/Mar06_14-45-27_cd2c3b1980c7/events.out.tfevents.1709736327.cd2c3b1980c7.34.4 +3 -0
runs/Mar06_14-50-20_cd2c3b1980c7/events.out.tfevents.1709736621.cd2c3b1980c7.34.5 +3 -0
runs/Mar06_14-51-12_cd2c3b1980c7/events.out.tfevents.1709736673.cd2c3b1980c7.34.6 +3 -0
training_args.bin +1 -1
wandb/debug-internal.log +343 -0
wandb/debug.log +21 -0
wandb/run-20240306_142408-og3d0ld1/files/config.yaml +1 -1
wandb/run-20240306_142408-og3d0ld1/files/output.log +7 -0
wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json +1 -1
wandb/run-20240306_142408-og3d0ld1/logs/debug-internal.log +343 -0
wandb/run-20240306_142408-og3d0ld1/logs/debug.log +21 -0
wandb/run-20240306_142408-og3d0ld1/run-og3d0ld1.wandb +0 -0

runs/Mar06_14-45-27_cd2c3b1980c7/events.out.tfevents.1709736327.cd2c3b1980c7.34.4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f645083ede524cda645409ba956fa9ad914ea27655e9c752b11826e464d69c84
+size 5374

runs/Mar06_14-50-20_cd2c3b1980c7/events.out.tfevents.1709736621.cd2c3b1980c7.34.5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a301590a4808a5fa5fcfa1db4131ac3207c9712745d495cc156aacd42dbd2116
+size 5374

runs/Mar06_14-51-12_cd2c3b1980c7/events.out.tfevents.1709736673.cd2c3b1980c7.34.6 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:773e13429b1bf17993993809acd0e6c51078afe5d27a291d147d444234f8251c
+size 6185

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5dc70d1f390dffc6ea61c4aff646ef9dc475c8030276d3b0440a503b96593826
 size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:170b6b5c6913afd023aae993ae4a337517f868260c0c93323166526cc988705b
 size 4856

wandb/debug-internal.log CHANGED Viewed

@@ -874,3 +874,346 @@
 2024-03-06 14:44:13,758 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
 2024-03-06 14:44:14,301 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
 2024-03-06 14:44:14,303 DEBUG   SenderThread:133 [sender.py:send():382] send: stats

 2024-03-06 14:44:13,758 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
 2024-03-06 14:44:14,301 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
 2024-03-06 14:44:14,303 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:44:15,387 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
+2024-03-06 14:44:15,387 INFO    HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
+2024-03-06 14:44:15,387 INFO    HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
+2024-03-06 14:44:15,387 DEBUG   SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
+2024-03-06 14:44:15,387 DEBUG   SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
+2024-03-06 14:44:15,388 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
+2024-03-06 14:44:15,389 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
+2024-03-06 14:44:15,394 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
+2024-03-06 14:44:15,395 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
+2024-03-06 14:44:15,395 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
+2024-03-06 14:44:15,395 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:44:15,630 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:19,396 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:20,631 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:24,398 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:25,633 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:29,399 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:30,634 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:34,400 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:35,635 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:39,401 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:40,636 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:44,403 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:45,637 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:49,404 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:50,639 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:54,405 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:55,640 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:59,407 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:00,641 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:04,407 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:05,642 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:09,409 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:09,984 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
+2024-03-06 14:45:09,984 INFO    HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
+2024-03-06 14:45:09,984 INFO    HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
+2024-03-06 14:45:09,984 INFO    SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
+2024-03-06 14:45:09,984 INFO    SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
+2024-03-06 14:45:09,985 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
+2024-03-06 14:45:09,986 INFO    HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
+2024-03-06 14:45:09,986 INFO    HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
+2024-03-06 14:45:09,986 INFO    SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
+2024-03-06 14:45:09,986 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
+2024-03-06 14:45:09,986 DEBUG   SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
+2024-03-06 14:45:09,986 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
+2024-03-06 14:45:09,987 DEBUG   SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
+2024-03-06 14:45:09,989 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
+2024-03-06 14:45:09,989 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:45:10,643 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:14,990 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:15,645 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:19,992 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:20,646 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:24,993 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:25,647 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:26,835 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
+2024-03-06 14:45:26,836 INFO    HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
+2024-03-06 14:45:26,836 INFO    HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
+2024-03-06 14:45:26,836 INFO    SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
+2024-03-06 14:45:26,837 INFO    SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
+2024-03-06 14:45:26,837 INFO    SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
+2024-03-06 14:45:26,839 INFO    SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
+2024-03-06 14:45:26,840 INFO    SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
+2024-03-06 14:45:26,840 INFO    SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
+2024-03-06 14:45:27,960 DEBUG   SenderThread:133 [sender.py:send():382] send: config
+2024-03-06 14:45:27,962 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:45:27,962 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:45:27,962 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
+2024-03-06 14:45:29,624 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
+2024-03-06 14:45:30,774 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:30,963 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:35,775 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:35,964 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:40,777 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:40,965 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:45,784 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:45,966 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:50,785 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:50,972 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:51,633 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
+2024-03-06 14:45:54,625 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
+2024-03-06 14:45:54,626 DEBUG   SenderThread:133 [sender.py:send():382] send: history
+2024-03-06 14:45:54,626 DEBUG   SenderThread:133 [sender.py:send_request():409] send_request: summary_record
+2024-03-06 14:45:54,627 INFO    SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
+2024-03-06 14:45:54,634 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
+2024-03-06 14:45:55,786 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:56,513 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
+2024-03-06 14:45:56,513 INFO    HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
+2024-03-06 14:45:56,513 INFO    HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
+2024-03-06 14:45:56,514 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
+2024-03-06 14:45:56,514 DEBUG   SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
+2024-03-06 14:45:56,514 DEBUG   SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
+2024-03-06 14:45:56,514 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
+2024-03-06 14:45:56,515 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
+2024-03-06 14:45:56,521 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
+2024-03-06 14:45:56,521 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
+2024-03-06 14:45:56,521 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
+2024-03-06 14:45:56,522 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:45:56,522 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:00,787 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:01,523 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:05,788 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:06,525 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:10,789 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:11,526 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:15,790 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:16,527 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:20,792 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:21,528 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:25,793 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:26,529 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:30,794 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:31,530 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:35,795 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:36,531 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:40,797 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:41,533 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:45,798 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:46,534 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:50,799 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:51,535 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:55,800 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:56,536 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:00,801 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:01,537 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:05,802 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:06,538 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:10,804 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:11,540 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:15,805 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:16,541 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:20,806 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:21,542 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:25,807 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:26,543 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:30,808 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:31,544 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:35,809 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:36,546 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:40,810 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:41,547 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:45,811 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:46,548 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:50,813 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:51,549 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:55,814 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:56,550 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:00,815 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:01,551 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:05,816 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:06,552 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:10,817 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:11,554 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:15,818 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:16,555 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:20,819 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:21,556 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:25,820 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:26,557 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:30,821 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:31,559 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:35,822 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:36,560 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:40,823 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:41,561 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:45,824 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:46,562 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:50,825 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:51,563 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:55,827 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:56,564 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:00,828 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:01,565 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:05,829 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:06,566 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:10,830 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:11,567 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:15,831 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:16,568 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:20,832 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:21,569 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:25,834 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:26,570 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:30,835 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:31,571 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:35,836 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:36,572 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:40,837 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:41,573 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:45,838 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:46,574 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:50,839 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:51,575 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:55,841 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:56,576 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:00,842 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:01,577 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:05,843 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:06,578 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:10,844 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:11,580 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:11,638 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
+2024-03-06 14:50:11,639 INFO    HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
+2024-03-06 14:50:11,639 INFO    HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
+2024-03-06 14:50:11,639 INFO    SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
+2024-03-06 14:50:11,639 INFO    SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
+2024-03-06 14:50:11,640 INFO    SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
+2024-03-06 14:50:11,641 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
+2024-03-06 14:50:11,641 INFO    HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
+2024-03-06 14:50:11,641 INFO    HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
+2024-03-06 14:50:11,642 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
+2024-03-06 14:50:11,642 INFO    SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
+2024-03-06 14:50:11,642 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
+2024-03-06 14:50:11,642 DEBUG   SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
+2024-03-06 14:50:11,643 DEBUG   SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
+2024-03-06 14:50:11,645 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
+2024-03-06 14:50:11,653 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
+2024-03-06 14:50:11,654 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:50:15,846 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:16,655 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:19,879 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
+2024-03-06 14:50:19,879 INFO    HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
+2024-03-06 14:50:19,879 INFO    HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
+2024-03-06 14:50:19,879 INFO    SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
+2024-03-06 14:50:19,880 INFO    SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
+2024-03-06 14:50:19,881 INFO    SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
+2024-03-06 14:50:19,882 INFO    SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
+2024-03-06 14:50:19,883 INFO    SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
+2024-03-06 14:50:19,885 INFO    SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
+2024-03-06 14:50:20,901 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:21,115 DEBUG   SenderThread:133 [sender.py:send():382] send: config
+2024-03-06 14:50:21,116 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:50:21,117 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:50:21,117 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
+2024-03-06 14:50:21,743 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
+2024-03-06 14:50:22,122 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:22,744 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
+2024-03-06 14:50:25,904 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:27,366 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:30,905 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:32,367 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:35,907 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:37,368 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:40,908 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:42,369 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:45,909 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:47,370 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:47,806 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
+2024-03-06 14:50:47,807 DEBUG   SenderThread:133 [sender.py:send():382] send: history
+2024-03-06 14:50:47,807 DEBUG   SenderThread:133 [sender.py:send_request():409] send_request: summary_record
+2024-03-06 14:50:47,810 INFO    SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
+2024-03-06 14:50:48,754 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
+2024-03-06 14:50:49,741 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
+2024-03-06 14:50:49,741 INFO    HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
+2024-03-06 14:50:49,741 INFO    HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
+2024-03-06 14:50:49,742 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
+2024-03-06 14:50:49,742 DEBUG   SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
+2024-03-06 14:50:49,742 DEBUG   SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
+2024-03-06 14:50:49,742 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
+2024-03-06 14:50:49,743 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
+2024-03-06 14:50:49,749 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
+2024-03-06 14:50:49,749 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
+2024-03-06 14:50:49,749 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
+2024-03-06 14:50:49,750 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:50:50,910 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:52,751 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:55,911 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:57,752 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:00,913 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:02,754 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:05,914 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:07,755 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:08,770 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
+2024-03-06 14:51:08,770 INFO    HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
+2024-03-06 14:51:08,771 INFO    HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
+2024-03-06 14:51:08,771 INFO    SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
+2024-03-06 14:51:08,771 INFO    SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
+2024-03-06 14:51:08,772 INFO    SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
+2024-03-06 14:51:08,773 INFO    SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
+2024-03-06 14:51:08,775 INFO    SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
+2024-03-06 14:51:08,777 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
+2024-03-06 14:51:08,779 INFO    HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
+2024-03-06 14:51:08,779 INFO    HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
+2024-03-06 14:51:08,779 INFO    SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
+2024-03-06 14:51:08,779 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
+2024-03-06 14:51:08,779 DEBUG   SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
+2024-03-06 14:51:08,780 DEBUG   SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
+2024-03-06 14:51:08,780 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
+2024-03-06 14:51:08,781 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
+2024-03-06 14:51:08,791 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
+2024-03-06 14:51:08,791 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
+2024-03-06 14:51:08,791 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
+2024-03-06 14:51:08,792 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:51:10,915 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:12,600 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
+2024-03-06 14:51:12,600 INFO    HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
+2024-03-06 14:51:12,600 INFO    HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
+2024-03-06 14:51:12,600 INFO    SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
+2024-03-06 14:51:12,601 INFO    SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
+2024-03-06 14:51:12,602 INFO    SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
+2024-03-06 14:51:12,605 INFO    SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
+2024-03-06 14:51:12,605 INFO    SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
+2024-03-06 14:51:12,606 INFO    SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
+2024-03-06 14:51:12,793 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:13,753 DEBUG   SenderThread:133 [sender.py:send():382] send: config
+2024-03-06 14:51:13,755 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:51:13,755 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:51:13,755 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
+2024-03-06 14:51:13,763 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
+2024-03-06 14:51:15,919 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:18,756 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:20,922 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:23,762 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:24,767 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
+2024-03-06 14:51:25,923 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:29,020 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:30,925 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:34,021 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:35,928 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:39,022 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:40,429 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
+2024-03-06 14:51:40,430 DEBUG   SenderThread:133 [sender.py:send():382] send: history
+2024-03-06 14:51:40,431 DEBUG   SenderThread:133 [sender.py:send_request():409] send_request: summary_record
+2024-03-06 14:51:40,432 INFO    SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
+2024-03-06 14:51:40,773 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
+2024-03-06 14:51:40,942 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:42,098 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
+2024-03-06 14:51:42,101 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:51:42,101 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:51:42,102 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:51:42,102 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:51:42,102 DEBUG   SenderThread:133 [sender.py:send():382] send: history
+2024-03-06 14:51:42,102 DEBUG   SenderThread:133 [sender.py:send_request():409] send_request: summary_record
+2024-03-06 14:51:42,103 INFO    SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
+2024-03-06 14:51:42,774 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
+2024-03-06 14:51:43,775 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
+2024-03-06 14:51:44,104 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:46,440 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
+2024-03-06 14:51:46,441 DEBUG   SenderThread:133 [sender.py:send():382] send: history
+2024-03-06 14:51:46,442 DEBUG   SenderThread:133 [sender.py:send_request():409] send_request: summary_record
+2024-03-06 14:51:46,442 INFO    SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
+2024-03-06 14:51:46,678 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:46,776 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json

wandb/debug.log CHANGED Viewed

@@ -74,3 +74,24 @@ config: {}
 2024-03-06 14:40:45,818 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
 2024-03-06 14:43:14,295 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
 2024-03-06 14:43:15,473 INFO    MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}

 2024-03-06 14:40:45,818 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
 2024-03-06 14:43:14,295 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
 2024-03-06 14:43:15,473 INFO    MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
+2024-03-06 14:44:15,386 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-06 14:44:15,386 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-06 14:45:09,983 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-06 14:45:09,985 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-06 14:45:09,985 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-06 14:45:26,835 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-06 14:45:27,956 INFO    MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-45-27_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
+2024-03-06 14:45:56,512 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-06 14:45:56,512 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-06 14:50:11,637 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-06 14:50:11,640 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-06 14:50:11,640 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-06 14:50:19,878 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-06 14:50:21,110 INFO    MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-50-20_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
+2024-03-06 14:50:49,740 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-06 14:50:49,740 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-06 14:51:08,770 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-06 14:51:08,772 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-06 14:51:08,773 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-06 14:51:12,599 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-06 14:51:13,748 INFO    MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-51-12_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}

wandb/run-20240306_142408-og3d0ld1/files/config.yaml CHANGED Viewed

@@ -422,7 +422,7 @@ log_on_each_node:
   value: true
 logging_dir:
   desc: null
-  value: /kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7
 logging_strategy:
   desc: null
   value: epoch

   value: true
 logging_dir:
   desc: null
+  value: /kaggle/working/runs/Mar06_14-51-12_cd2c3b1980c7
 logging_strategy:
   desc: null
   value: epoch

wandb/run-20240306_142408-og3d0ld1/files/output.log CHANGED Viewed

@@ -29,3 +29,10 @@ Class Weights: tensor([0.8491, 0.9698, 0.7866, 0.6530, 0.9310, 0.8578, 0.9526],
 Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
 You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
 Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']

 Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
 You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
 Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at mmukh/SOBertBase and are newly initialized: ['bert.embeddings.token_type_embeddings.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json CHANGED Viewed

@@ -1 +1 @@

- {"train/loss": 1.7916, "train/grad_norm": 7.054403781890869, "train/learning_rate": 3.4482758620689656e-07, "train/epoch": 1.0, "train/global_step": 58, "_timestamp": ~~1709736226~~.~~5010476~~, "_runtime": ~~1177~~.~~8634026050568~~, "_step": 5, "eval/loss": 1.811458706855774, "eval/runtime": 1.~~6208~~, "eval/samples_per_second": 50.~~593~~, "eval/steps_per_second": 6.~~787~~, "train/train_runtime": 31.~~0436~~, "train/train_samples_per_second": 14.~~947~~, "train/train_steps_per_second": 1.~~868~~, "train/total_flos": 122089010380800.0, "train/train_loss": 1.791607692323882}

+ {"train/loss": 1.7916, "train/grad_norm": 7.054403781890869, "train/learning_rate": 3.4482758620689656e-07, "train/epoch": 1.0, "train/global_step": 58, "_timestamp": 1709736706.4396355, "_runtime": 1657.8019905090332, "_step": 10, "eval/loss": 1.811458706855774, "eval/runtime": 1.6651, "eval/samples_per_second": 49.246, "eval/steps_per_second": 6.606, "train/train_runtime": 32.7083, "train/train_samples_per_second": 14.186, "train/train_steps_per_second": 1.773, "train/total_flos": 122089010380800.0, "train/train_loss": 1.791607692323882, "eval/accuracy": 0.32926829268292684, "eval/precision": 0.15702844661708878, "eval/recall": 0.32926829268292684, "eval/f1": 0.20646817673609674}

wandb/run-20240306_142408-og3d0ld1/logs/debug-internal.log CHANGED Viewed

@@ -874,3 +874,346 @@
 2024-03-06 14:44:13,758 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
 2024-03-06 14:44:14,301 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
 2024-03-06 14:44:14,303 DEBUG   SenderThread:133 [sender.py:send():382] send: stats

 2024-03-06 14:44:13,758 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
 2024-03-06 14:44:14,301 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
 2024-03-06 14:44:14,303 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:44:15,387 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
+2024-03-06 14:44:15,387 INFO    HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
+2024-03-06 14:44:15,387 INFO    HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
+2024-03-06 14:44:15,387 DEBUG   SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
+2024-03-06 14:44:15,387 DEBUG   SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
+2024-03-06 14:44:15,388 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
+2024-03-06 14:44:15,389 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
+2024-03-06 14:44:15,394 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
+2024-03-06 14:44:15,395 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
+2024-03-06 14:44:15,395 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
+2024-03-06 14:44:15,395 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:44:15,630 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:19,396 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:20,631 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:24,398 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:25,633 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:29,399 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:30,634 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:34,400 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:35,635 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:39,401 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:40,636 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:44,403 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:45,637 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:49,404 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:50,639 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:54,405 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:44:55,640 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:44:59,407 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:00,641 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:04,407 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:05,642 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:09,409 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:09,984 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
+2024-03-06 14:45:09,984 INFO    HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
+2024-03-06 14:45:09,984 INFO    HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
+2024-03-06 14:45:09,984 INFO    SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
+2024-03-06 14:45:09,984 INFO    SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
+2024-03-06 14:45:09,985 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
+2024-03-06 14:45:09,986 INFO    HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
+2024-03-06 14:45:09,986 INFO    HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
+2024-03-06 14:45:09,986 INFO    SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
+2024-03-06 14:45:09,986 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
+2024-03-06 14:45:09,986 DEBUG   SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
+2024-03-06 14:45:09,986 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
+2024-03-06 14:45:09,987 DEBUG   SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
+2024-03-06 14:45:09,989 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
+2024-03-06 14:45:09,989 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:45:10,643 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:14,990 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:15,645 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:19,992 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:20,646 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:24,993 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:25,647 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:26,835 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
+2024-03-06 14:45:26,836 INFO    HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
+2024-03-06 14:45:26,836 INFO    HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
+2024-03-06 14:45:26,836 INFO    SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
+2024-03-06 14:45:26,837 INFO    SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
+2024-03-06 14:45:26,837 INFO    SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
+2024-03-06 14:45:26,839 INFO    SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
+2024-03-06 14:45:26,840 INFO    SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
+2024-03-06 14:45:26,840 INFO    SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
+2024-03-06 14:45:27,960 DEBUG   SenderThread:133 [sender.py:send():382] send: config
+2024-03-06 14:45:27,962 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:45:27,962 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:45:27,962 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
+2024-03-06 14:45:29,624 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
+2024-03-06 14:45:30,774 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:30,963 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:35,775 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:35,964 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:40,777 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:40,965 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:45,784 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:45,966 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:50,785 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:50,972 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:45:51,633 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
+2024-03-06 14:45:54,625 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
+2024-03-06 14:45:54,626 DEBUG   SenderThread:133 [sender.py:send():382] send: history
+2024-03-06 14:45:54,626 DEBUG   SenderThread:133 [sender.py:send_request():409] send_request: summary_record
+2024-03-06 14:45:54,627 INFO    SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
+2024-03-06 14:45:54,634 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
+2024-03-06 14:45:55,786 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:45:56,513 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
+2024-03-06 14:45:56,513 INFO    HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
+2024-03-06 14:45:56,513 INFO    HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
+2024-03-06 14:45:56,514 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
+2024-03-06 14:45:56,514 DEBUG   SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
+2024-03-06 14:45:56,514 DEBUG   SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
+2024-03-06 14:45:56,514 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
+2024-03-06 14:45:56,515 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
+2024-03-06 14:45:56,521 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
+2024-03-06 14:45:56,521 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
+2024-03-06 14:45:56,521 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
+2024-03-06 14:45:56,522 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:45:56,522 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:00,787 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:01,523 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:05,788 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:06,525 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:10,789 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:11,526 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:15,790 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:16,527 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:20,792 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:21,528 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:25,793 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:26,529 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:30,794 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:31,530 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:35,795 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:36,531 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:40,797 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:41,533 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:45,798 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:46,534 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:50,799 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:51,535 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:46:55,800 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:46:56,536 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:00,801 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:01,537 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:05,802 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:06,538 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:10,804 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:11,540 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:15,805 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:16,541 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:20,806 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:21,542 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:25,807 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:26,543 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:30,808 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:31,544 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:35,809 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:36,546 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:40,810 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:41,547 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:45,811 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:46,548 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:50,813 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:51,549 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:47:55,814 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:47:56,550 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:00,815 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:01,551 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:05,816 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:06,552 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:10,817 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:11,554 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:15,818 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:16,555 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:20,819 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:21,556 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:25,820 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:26,557 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:30,821 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:31,559 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:35,822 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:36,560 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:40,823 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:41,561 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:45,824 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:46,562 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:50,825 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:51,563 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:48:55,827 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:48:56,564 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:00,828 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:01,565 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:05,829 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:06,566 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:10,830 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:11,567 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:15,831 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:16,568 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:20,832 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:21,569 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:25,834 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:26,570 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:30,835 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:31,571 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:35,836 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:36,572 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:40,837 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:41,573 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:45,838 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:46,574 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:50,839 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:51,575 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:49:55,841 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:49:56,576 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:00,842 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:01,577 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:05,843 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:06,578 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:10,844 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:11,580 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:11,638 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
+2024-03-06 14:50:11,639 INFO    HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
+2024-03-06 14:50:11,639 INFO    HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
+2024-03-06 14:50:11,639 INFO    SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
+2024-03-06 14:50:11,639 INFO    SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
+2024-03-06 14:50:11,640 INFO    SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
+2024-03-06 14:50:11,641 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
+2024-03-06 14:50:11,641 INFO    HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
+2024-03-06 14:50:11,641 INFO    HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
+2024-03-06 14:50:11,642 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
+2024-03-06 14:50:11,642 INFO    SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
+2024-03-06 14:50:11,642 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
+2024-03-06 14:50:11,642 DEBUG   SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
+2024-03-06 14:50:11,643 DEBUG   SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
+2024-03-06 14:50:11,645 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
+2024-03-06 14:50:11,653 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
+2024-03-06 14:50:11,654 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:50:15,846 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:16,655 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:19,879 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
+2024-03-06 14:50:19,879 INFO    HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
+2024-03-06 14:50:19,879 INFO    HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
+2024-03-06 14:50:19,879 INFO    SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
+2024-03-06 14:50:19,880 INFO    SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
+2024-03-06 14:50:19,881 INFO    SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
+2024-03-06 14:50:19,882 INFO    SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
+2024-03-06 14:50:19,883 INFO    SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
+2024-03-06 14:50:19,885 INFO    SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
+2024-03-06 14:50:20,901 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:21,115 DEBUG   SenderThread:133 [sender.py:send():382] send: config
+2024-03-06 14:50:21,116 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:50:21,117 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:50:21,117 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
+2024-03-06 14:50:21,743 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
+2024-03-06 14:50:22,122 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:22,744 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
+2024-03-06 14:50:25,904 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:27,366 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:30,905 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:32,367 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:35,907 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:37,368 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:40,908 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:42,369 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:45,909 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:47,370 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:47,806 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
+2024-03-06 14:50:47,807 DEBUG   SenderThread:133 [sender.py:send():382] send: history
+2024-03-06 14:50:47,807 DEBUG   SenderThread:133 [sender.py:send_request():409] send_request: summary_record
+2024-03-06 14:50:47,810 INFO    SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
+2024-03-06 14:50:48,754 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
+2024-03-06 14:50:49,741 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
+2024-03-06 14:50:49,741 INFO    HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
+2024-03-06 14:50:49,741 INFO    HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
+2024-03-06 14:50:49,742 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
+2024-03-06 14:50:49,742 DEBUG   SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
+2024-03-06 14:50:49,742 DEBUG   SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
+2024-03-06 14:50:49,742 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
+2024-03-06 14:50:49,743 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
+2024-03-06 14:50:49,749 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
+2024-03-06 14:50:49,749 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
+2024-03-06 14:50:49,749 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
+2024-03-06 14:50:49,750 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:50:50,910 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:52,751 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:50:55,911 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:50:57,752 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:00,913 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:02,754 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:05,914 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:07,755 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:08,770 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
+2024-03-06 14:51:08,770 INFO    HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
+2024-03-06 14:51:08,771 INFO    HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
+2024-03-06 14:51:08,771 INFO    SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
+2024-03-06 14:51:08,771 INFO    SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
+2024-03-06 14:51:08,772 INFO    SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
+2024-03-06 14:51:08,773 INFO    SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
+2024-03-06 14:51:08,775 INFO    SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
+2024-03-06 14:51:08,777 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: pause
+2024-03-06 14:51:08,779 INFO    HandlerThread:133 [handler.py:handle_request_pause():708] stopping system metrics thread
+2024-03-06 14:51:08,779 INFO    HandlerThread:133 [system_monitor.py:finish():203] Stopping system monitor
+2024-03-06 14:51:08,779 INFO    SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
+2024-03-06 14:51:08,779 DEBUG   SystemMonitor:133 [system_monitor.py:_start():172] Starting system metrics aggregation loop
+2024-03-06 14:51:08,779 DEBUG   SystemMonitor:133 [system_monitor.py:_start():179] Finished system metrics aggregation loop
+2024-03-06 14:51:08,780 DEBUG   SystemMonitor:133 [system_monitor.py:_start():183] Publishing last batch of metrics
+2024-03-06 14:51:08,780 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined cpu monitor
+2024-03-06 14:51:08,781 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined disk monitor
+2024-03-06 14:51:08,791 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined gpu monitor
+2024-03-06 14:51:08,791 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined memory monitor
+2024-03-06 14:51:08,791 INFO    HandlerThread:133 [interfaces.py:finish():202] Joined network monitor
+2024-03-06 14:51:08,792 DEBUG   SenderThread:133 [sender.py:send():382] send: stats
+2024-03-06 14:51:10,915 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:12,600 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: resume
+2024-03-06 14:51:12,600 INFO    HandlerThread:133 [handler.py:handle_request_resume():699] starting system metrics thread
+2024-03-06 14:51:12,600 INFO    HandlerThread:133 [system_monitor.py:start():194] Starting system monitor
+2024-03-06 14:51:12,600 INFO    SystemMonitor:133 [system_monitor.py:_start():158] Starting system asset monitoring threads
+2024-03-06 14:51:12,601 INFO    SystemMonitor:133 [interfaces.py:start():190] Started cpu monitoring
+2024-03-06 14:51:12,602 INFO    SystemMonitor:133 [interfaces.py:start():190] Started disk monitoring
+2024-03-06 14:51:12,605 INFO    SystemMonitor:133 [interfaces.py:start():190] Started gpu monitoring
+2024-03-06 14:51:12,605 INFO    SystemMonitor:133 [interfaces.py:start():190] Started memory monitoring
+2024-03-06 14:51:12,606 INFO    SystemMonitor:133 [interfaces.py:start():190] Started network monitoring
+2024-03-06 14:51:12,793 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:13,753 DEBUG   SenderThread:133 [sender.py:send():382] send: config
+2024-03-06 14:51:13,755 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:51:13,755 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:51:13,755 WARNING SenderThread:133 [sender.py:send_metric():1354] Seen metric with glob (shouldn't happen)
+2024-03-06 14:51:13,763 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
+2024-03-06 14:51:15,919 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:18,756 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:20,922 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:23,762 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:24,767 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/config.yaml
+2024-03-06 14:51:25,923 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:29,020 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:30,925 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:34,021 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:35,928 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:39,022 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:40,429 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
+2024-03-06 14:51:40,430 DEBUG   SenderThread:133 [sender.py:send():382] send: history
+2024-03-06 14:51:40,431 DEBUG   SenderThread:133 [sender.py:send_request():409] send_request: summary_record
+2024-03-06 14:51:40,432 INFO    SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
+2024-03-06 14:51:40,773 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
+2024-03-06 14:51:40,942 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:42,098 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
+2024-03-06 14:51:42,101 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:51:42,101 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:51:42,102 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:51:42,102 DEBUG   SenderThread:133 [sender.py:send():382] send: metric
+2024-03-06 14:51:42,102 DEBUG   SenderThread:133 [sender.py:send():382] send: history
+2024-03-06 14:51:42,102 DEBUG   SenderThread:133 [sender.py:send_request():409] send_request: summary_record
+2024-03-06 14:51:42,103 INFO    SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
+2024-03-06 14:51:42,774 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json
+2024-03-06 14:51:43,775 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/output.log
+2024-03-06 14:51:44,104 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: status_report
+2024-03-06 14:51:46,440 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: partial_history
+2024-03-06 14:51:46,441 DEBUG   SenderThread:133 [sender.py:send():382] send: history
+2024-03-06 14:51:46,442 DEBUG   SenderThread:133 [sender.py:send_request():409] send_request: summary_record
+2024-03-06 14:51:46,442 INFO    SenderThread:133 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end
+2024-03-06 14:51:46,678 DEBUG   HandlerThread:133 [handler.py:handle_request():146] handle_request: keepalive
+2024-03-06 14:51:46,776 INFO    Thread-12 :133 [dir_watcher.py:_on_file_modified():288] file/dir modified: /kaggle/working/wandb/run-20240306_142408-og3d0ld1/files/wandb-summary.json

wandb/run-20240306_142408-og3d0ld1/logs/debug.log CHANGED Viewed

@@ -74,3 +74,24 @@ config: {}
 2024-03-06 14:40:45,818 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
 2024-03-06 14:43:14,295 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
 2024-03-06 14:43:15,473 INFO    MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}

 2024-03-06 14:40:45,818 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
 2024-03-06 14:43:14,295 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
 2024-03-06 14:43:15,473 INFO    MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-43-14_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
+2024-03-06 14:44:15,386 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-06 14:44:15,386 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-06 14:45:09,983 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-06 14:45:09,985 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-06 14:45:09,985 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-06 14:45:26,835 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-06 14:45:27,956 INFO    MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-45-27_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
+2024-03-06 14:45:56,512 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-06 14:45:56,512 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-06 14:50:11,637 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-06 14:50:11,640 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-06 14:50:11,640 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-06 14:50:19,878 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-06 14:50:21,110 INFO    MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-50-20_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}
+2024-03-06 14:50:49,740 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-06 14:50:49,740 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-06 14:51:08,770 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-06 14:51:08,772 INFO    MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2024-03-06 14:51:08,773 INFO    MainThread:34 [wandb_init.py:_pause_backend():437] pausing backend
+2024-03-06 14:51:12,599 INFO    MainThread:34 [wandb_init.py:_resume_backend():442] resuming backend
+2024-03-06 14:51:13,748 INFO    MainThread:34 [wandb_run.py:_config_callback():1343] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'Documentation Ambiguity', 1: 'Documentation Completeness', 2: 'Documentation Replicability', 3: 'Documentation Replication on Other Examples', 4: 'Inadequate Examples', 5: 'Lack of Alternative Solutions/Documentation', 6: 'Requesting (Additional) Documentation/Examples'}, 'label2id': {'Documentation Ambiguity': 0, 'Documentation Completeness': 1, 'Documentation Replicability': 2, 'Documentation Replication on Other Examples': 3, 'Inadequate Examples': 4, 'Lack of Alternative Solutions/Documentation': 5, 'Requesting (Additional) Documentation/Examples': 6}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'mmukh/SOBertBase', 'transformers_version': '4.38.1', 'model_type': 'megatron-bert', 'tokenizer_type': 'SentencePieceTokenizer', 'vocab_size': 50048, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 2048, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Mar06_14-51-12_cd2c3b1980c7', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None}

wandb/run-20240306_142408-og3d0ld1/run-og3d0ld1.wandb CHANGED Viewed

Binary files a/wandb/run-20240306_142408-og3d0ld1/run-og3d0ld1.wandb and b/wandb/run-20240306_142408-og3d0ld1/run-og3d0ld1.wandb differ