isikz commited on
Commit
d436953
·
verified ·
1 Parent(s): 79dffb2

Delete wandb

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. wandb/debug-internal.log +0 -21
  2. wandb/debug.log +0 -27
  3. wandb/run-20250504_132610-pxg645u5/files/config.yaml +0 -44
  4. wandb/run-20250504_132610-pxg645u5/files/output.log +0 -37
  5. wandb/run-20250504_132610-pxg645u5/files/requirements.txt +0 -541
  6. wandb/run-20250504_132610-pxg645u5/files/wandb-metadata.json +0 -77
  7. wandb/run-20250504_132610-pxg645u5/files/wandb-summary.json +0 -1
  8. wandb/run-20250504_132610-pxg645u5/logs/debug-core.log +0 -14
  9. wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log +0 -19
  10. wandb/run-20250504_132610-pxg645u5/logs/debug.log +0 -26
  11. wandb/run-20250504_132610-pxg645u5/run-pxg645u5.wandb +0 -0
  12. wandb/run-20250504_132912-1agsw1y8/files/config.yaml +0 -374
  13. wandb/run-20250504_132912-1agsw1y8/files/output.log +0 -87
  14. wandb/run-20250504_132912-1agsw1y8/files/requirements.txt +0 -541
  15. wandb/run-20250504_132912-1agsw1y8/files/wandb-metadata.json +0 -77
  16. wandb/run-20250504_132912-1agsw1y8/files/wandb-summary.json +0 -1
  17. wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log +0 -14
  18. wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log +0 -19
  19. wandb/run-20250504_132912-1agsw1y8/logs/debug.log +0 -27
  20. wandb/run-20250504_132912-1agsw1y8/run-1agsw1y8.wandb +0 -3
  21. wandb/run-20250504_160615-f65jh2lv/files/output.log +0 -8
  22. wandb/run-20250504_160615-f65jh2lv/files/requirements.txt +0 -541
  23. wandb/run-20250504_160615-f65jh2lv/files/wandb-metadata.json +0 -77
  24. wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log +0 -7
  25. wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log +0 -8
  26. wandb/run-20250504_160615-f65jh2lv/logs/debug.log +0 -26
  27. wandb/run-20250504_160615-f65jh2lv/run-f65jh2lv.wandb +0 -0
  28. wandb/run-20250504_160955-rqk2hbkf/files/config.yaml +0 -44
  29. wandb/run-20250504_160955-rqk2hbkf/files/output.log +0 -24
  30. wandb/run-20250504_160955-rqk2hbkf/files/requirements.txt +0 -541
  31. wandb/run-20250504_160955-rqk2hbkf/files/wandb-metadata.json +0 -77
  32. wandb/run-20250504_160955-rqk2hbkf/files/wandb-summary.json +0 -1
  33. wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log +0 -14
  34. wandb/run-20250504_160955-rqk2hbkf/logs/debug-internal.log +0 -19
  35. wandb/run-20250504_160955-rqk2hbkf/logs/debug.log +0 -26
  36. wandb/run-20250504_160955-rqk2hbkf/run-rqk2hbkf.wandb +0 -0
  37. wandb/run-20250504_161246-rdbtc2pz/files/config.yaml +0 -357
  38. wandb/run-20250504_161246-rdbtc2pz/files/output.log +0 -27
  39. wandb/run-20250504_161246-rdbtc2pz/files/requirements.txt +0 -541
  40. wandb/run-20250504_161246-rdbtc2pz/files/wandb-metadata.json +0 -77
  41. wandb/run-20250504_161246-rdbtc2pz/files/wandb-summary.json +0 -1
  42. wandb/run-20250504_161246-rdbtc2pz/logs/debug-core.log +0 -14
  43. wandb/run-20250504_161246-rdbtc2pz/logs/debug-internal.log +0 -19
  44. wandb/run-20250504_161246-rdbtc2pz/logs/debug.log +0 -27
  45. wandb/run-20250504_161246-rdbtc2pz/run-rdbtc2pz.wandb +0 -0
  46. wandb/run-20250504_162343-cp870jym/files/config.yaml +0 -357
  47. wandb/run-20250504_162343-cp870jym/files/output.log +0 -27
  48. wandb/run-20250504_162343-cp870jym/files/requirements.txt +0 -541
  49. wandb/run-20250504_162343-cp870jym/files/wandb-metadata.json +0 -77
  50. wandb/run-20250504_162343-cp870jym/files/wandb-summary.json +0 -1
wandb/debug-internal.log DELETED
@@ -1,21 +0,0 @@
1
- {"time":"2025-05-04T17:25:03.375857654+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
2
- {"time":"2025-05-04T17:25:03.375905253+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug-core.log"}
3
- {"time":"2025-05-04T17:25:03.501241143+03:00","level":"INFO","msg":"created new stream","id":"0ictlmwf"}
4
- {"time":"2025-05-04T17:25:03.501294637+03:00","level":"INFO","msg":"stream: started","id":"0ictlmwf"}
5
- {"time":"2025-05-04T17:25:03.501448652+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"0ictlmwf"}
6
- {"time":"2025-05-04T17:25:03.501451145+03:00","level":"INFO","msg":"handler: started","stream_id":"0ictlmwf"}
7
- {"time":"2025-05-04T17:25:03.501574427+03:00","level":"INFO","msg":"sender: started","stream_id":"0ictlmwf"}
8
- {"time":"2025-05-04T17:25:03.865922055+03:00","level":"INFO","msg":"Starting system monitor"}
9
- {"time":"2025-05-04T22:47:43.191425732+03:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/isikz/finetuning-bc-protT5/0ictlmwf/file_stream\": dial tcp 35.186.228.49:443: connect: connection timed out"}
10
- {"time":"2025-05-05T00:01:47.351449692+03:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/isikz/finetuning-bc-protT5/0ictlmwf/file_stream\": dial tcp 35.186.228.49:443: connect: connection timed out"}
11
- {"time":"2025-05-05T00:49:32.57779148+03:00","level":"INFO","msg":"stream: closing","id":"0ictlmwf"}
12
- {"time":"2025-05-05T00:49:32.577842715+03:00","level":"INFO","msg":"Stopping system monitor"}
13
- {"time":"2025-05-05T00:49:32.578849729+03:00","level":"INFO","msg":"Stopped system monitor"}
14
- {"time":"2025-05-05T00:49:32.781968337+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
15
- {"time":"2025-05-05T00:49:32.781997123+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
16
- {"time":"2025-05-05T00:49:32.782008311+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
17
- {"time":"2025-05-05T00:49:33.357099059+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
18
- {"time":"2025-05-05T00:49:33.741524339+03:00","level":"INFO","msg":"handler: closed","stream_id":"0ictlmwf"}
19
- {"time":"2025-05-05T00:49:33.741583153+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"0ictlmwf"}
20
- {"time":"2025-05-05T00:49:33.741593811+03:00","level":"INFO","msg":"sender: closed","stream_id":"0ictlmwf"}
21
- {"time":"2025-05-05T00:49:33.741652369+03:00","level":"INFO","msg":"stream: closed","id":"0ictlmwf"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/debug.log DELETED
@@ -1,27 +0,0 @@
1
- 2025-05-04 17:25:03,365 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
- 2025-05-04 17:25:03,365 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Configure stats pid to 3189710
3
- 2025-05-04 17:25:03,365 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
4
- 2025-05-04 17:25:03,365 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
5
- 2025-05-04 17:25:03,365 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
- 2025-05-04 17:25:03,365 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
7
- 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Applying login settings: {}
8
- 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Applying login settings: {}
9
- 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug.log
10
- 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug-internal.log
11
- 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_init.py:init():619] calling init triggers
12
- 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
- config: {}
14
- 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_init.py:init():669] starting backend
15
- 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_init.py:init():673] sending inform_init request
16
- 2025-05-04 17:25:03,371 INFO MainThread:3189710 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
- 2025-05-04 17:25:03,371 INFO MainThread:3189710 [wandb_init.py:init():686] backend started and connected
18
- 2025-05-04 17:25:03,379 INFO MainThread:3189710 [wandb_init.py:init():781] updated telemetry
19
- 2025-05-04 17:25:03,382 INFO MainThread:3189710 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
- 2025-05-04 17:25:03,852 INFO MainThread:3189710 [wandb_init.py:init():867] starting run threads in backend
21
- 2025-05-04 17:25:05,277 INFO MainThread:3189710 [wandb_run.py:_console_start():2456] atexit reg
22
- 2025-05-04 17:25:05,278 INFO MainThread:3189710 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
- 2025-05-04 17:25:05,278 INFO MainThread:3189710 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
- 2025-05-04 17:25:05,278 INFO MainThread:3189710 [wandb_run.py:_redirect():2395] Redirects installed.
25
- 2025-05-04 17:25:05,283 INFO MainThread:3189710 [wandb_init.py:init():911] run started, returning control to user process
26
- 2025-05-04 17:25:53,069 INFO MainThread:3189710 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_17-25-43_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
27
- 2025-05-05 00:49:32,578 WARNING MsgRouterThr:3189710 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132610-pxg645u5/files/config.yaml DELETED
@@ -1,44 +0,0 @@
1
- _wandb:
2
- value:
3
- cli_version: 0.18.7
4
- m: []
5
- python_version: 3.10.15
6
- t:
7
- "1":
8
- - 1
9
- - 2
10
- - 3
11
- - 5
12
- - 11
13
- - 12
14
- - 49
15
- - 51
16
- - 53
17
- - 55
18
- - 71
19
- - 98
20
- - 105
21
- "2":
22
- - 1
23
- - 2
24
- - 3
25
- - 5
26
- - 11
27
- - 12
28
- - 49
29
- - 51
30
- - 53
31
- - 55
32
- - 71
33
- - 98
34
- - 105
35
- "3":
36
- - 23
37
- - 55
38
- "4": 3.10.15
39
- "5": 0.18.7
40
- "6": 4.45.2
41
- "8":
42
- - 5
43
- "12": 0.18.7
44
- "13": linux-x86_64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132610-pxg645u5/files/output.log DELETED
@@ -1,37 +0,0 @@
1
- You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
2
- Traceback (most recent call last):
3
- File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 45, in <module>
4
- train_ds = load_dataset("json", data_files={"train": "-"},
5
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 2132, in load_dataset
6
- builder_instance = load_dataset_builder(
7
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1853, in load_dataset_builder
8
- dataset_module = dataset_module_factory(
9
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1562, in dataset_module_factory
10
- ).get_module()
11
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 942, in get_module
12
- data_files = DataFilesDict.from_patterns(
13
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 721, in from_patterns
14
- else DataFilesList.from_patterns(
15
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 624, in from_patterns
16
- resolve_pattern(
17
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 411, in resolve_pattern
18
- raise FileNotFoundError(error_msg)
19
- FileNotFoundError: Unable to find '/arf/scratch/zisik/prott5_bc_ft/-'
20
- Traceback (most recent call last):
21
- File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 45, in <module>
22
- train_ds = load_dataset("json", data_files={"train": "-"},
23
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 2132, in load_dataset
24
- builder_instance = load_dataset_builder(
25
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1853, in load_dataset_builder
26
- dataset_module = dataset_module_factory(
27
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1562, in dataset_module_factory
28
- ).get_module()
29
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 942, in get_module
30
- data_files = DataFilesDict.from_patterns(
31
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 721, in from_patterns
32
- else DataFilesList.from_patterns(
33
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 624, in from_patterns
34
- resolve_pattern(
35
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 411, in resolve_pattern
36
- raise FileNotFoundError(error_msg)
37
- FileNotFoundError: Unable to find '/arf/scratch/zisik/prott5_bc_ft/-'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132610-pxg645u5/files/requirements.txt DELETED
@@ -1,541 +0,0 @@
1
- nvidia-cuda-cupti-cu12==12.4.127
2
- nvidia-cuda-nvrtc-cu12==12.4.127
3
- pyg-lib==0.4.0+pt20cu117
4
- biopython==1.85
5
- iniconfig==2.0.0
6
- tokenizers==0.20.0
7
- accelerate==1.3.0
8
- torch==2.6.0
9
- nvidia-nccl-cu12==2.21.5
10
- transformers==4.45.2
11
- nvidia-cusparse-cu12==12.3.1.170
12
- torch-scatter==2.1.2+pt20cu117
13
- nvidia-cusparselt-cu12==0.6.2
14
- nvidia-nvtx-cu12==12.4.127
15
- zstd==1.5.6.6
16
- fair-esm==2.0.0
17
- omegaconf==2.3.0
18
- pluggy==1.5.0
19
- pytest==8.3.5
20
- nvidia-curand-cu12==10.3.5.147
21
- nvidia-cufft-cu12==11.2.1.3
22
- torch-cluster==1.6.3+pt20cu117
23
- regex==2024.9.11
24
- nvidia-cudnn-cu12==9.1.0.70
25
- torch-spline-conv==1.2.2+pt20cu117
26
- nvidia-cusolver-cu12==11.6.1.9
27
- antlr4-python3-runtime==4.9.3
28
- msgpack-numpy==0.4.8
29
- nlp==0.2.0
30
- einops==0.8.1
31
- nvidia-cublas-cu12==12.4.5.8
32
- triton==3.2.0
33
- ninja==1.11.1.3
34
- hydra-core==1.3.2
35
- nvidia-nvjitlink-cu12==12.4.127
36
- biotite==0.41.2
37
- torch-sparse==0.6.18+pt20cu117
38
- esm==3.1.4
39
- sympy==1.13.1
40
- nvidia-cuda-runtime-cu12==12.4.127
41
- jupyter-lsp==2.2.5
42
- jupyter-events==0.10.0
43
- ipykernel==6.29.5
44
- Mako==1.3.5
45
- proto-plus==1.25.0
46
- fst-pso==1.8.1
47
- gensim==4.3.3
48
- htmlmin==0.1.12
49
- tokenizers==0.13.3
50
- timm==1.0.11
51
- MarkupSafe==3.0.2
52
- safetensors==0.4.5
53
- requests==2.32.3
54
- gast==0.5.5
55
- cuml==24.12.0a33
56
- jaxlib==0.4.23.dev20240214
57
- spacy-loggers==1.0.5
58
- pytz==2024.1
59
- idna==3.10
60
- python-dateutil==2.9.0
61
- mdurl==0.1.2
62
- blis==0.7.10
63
- jupyter==1.1.1
64
- pyerfa==2.0.1.5
65
- comm==0.2.2
66
- pygraphviz==1.14
67
- dill==0.3.8
68
- paramiko==3.5.0
69
- llama-index==0.8.36
70
- mdit-py-plugins==0.4.2
71
- Werkzeug==3.1.3
72
- pyu2f==0.1.5
73
- dask-glm==0.2.0
74
- httpx==0.27.2
75
- typeguard==4.4.1
76
- mypy-extensions==1.0.0
77
- kmodes==0.12.2
78
- keras==2.15.0
79
- ydata-profiling==0.0.dev0
80
- regex==2024.11.6
81
- xarray==2024.11.0
82
- setuptools==75.3.0
83
- charset-normalizer==3.4.0
84
- jupyterlab_nvdashboard==0.11.0
85
- pylibraft==24.12.0a36
86
- spacy==3.7.6
87
- mlflow-skinny==2.17.2
88
- nvtx==0.2.10
89
- multimethod==1.12
90
- pexpect==4.9.0
91
- torch==2.1.0.post301
92
- flatbuffers==24.3.25
93
- python-json-logger==2.0.7
94
- PyJWT==2.9.0
95
- multiprocess==0.70.16
96
- colorlover==0.3.0
97
- yarl==1.16.0
98
- locket==1.0.0
99
- patsy==1.0.0
100
- rapids-dask-dependency==24.12.0a0
101
- stanza==1.9.2
102
- debugpy==1.8.8
103
- jupyterlab_pygments==0.3.0
104
- pylibcudf==24.12.0a337
105
- lz4==4.3.3
106
- pandas==2.2.3
107
- tifffile==2024.9.20
108
- pynvml==11.4.1
109
- cufflinks==0.17.3
110
- ipywidgets==8.1.5
111
- requests-oauthlib==2.0.0
112
- google-auth-oauthlib==1.2.1
113
- rsa==4.9
114
- webcolors==24.8.0
115
- jsonschema-specifications==2024.10.1
116
- scikit-learn==1.5.2
117
- langchain-text-splitters==0.3.2
118
- pandas-datareader==0.10.0
119
- tomli==2.0.2
120
- tzdata==2024.2
121
- scikit-image==0.24.0
122
- tensorboard_data_server==0.7.0
123
- kiwisolver==1.4.7
124
- cloudpathlib==0.20.0
125
- isodate==0.6.1
126
- adversarial-robustness-toolbox==1.19.1
127
- SQLAlchemy==2.0.36
128
- pytest-runner==6.0.0
129
- pycairo==1.27.0
130
- treelite==4.3.0
131
- jiter==0.7.0
132
- threadpoolctl==3.5.0
133
- pandocfilters==1.5.0
134
- loguru==0.7.2
135
- smart_open==7.0.5
136
- shellingham==1.5.4
137
- deepspeed==0.15.4
138
- prompt_toolkit==3.0.48
139
- databricks-sdk==0.34.0
140
- langchain-core==0.3.15
141
- imageio==2.36.0
142
- openapi-schema-pydantic==1.2.4
143
- zict==3.0.0
144
- cachetools==5.5.0
145
- colorful==0.5.6
146
- mpmath==1.3.0
147
- nest_asyncio==1.6.0
148
- pyFUME==0.2.25
149
- opencv-python-headless==4.9.0
150
- fastai==2.7.18
151
- importlib_resources==6.4.5
152
- binaryornot==0.4.4
153
- evaluate==0.4.1
154
- matplotlib-inline==0.1.7
155
- wasabi==1.1.2
156
- pycparser==2.22
157
- GitPython==3.1.43
158
- pluggy==1.5.0
159
- async-lru==2.0.4
160
- pgmpy==0.1.24
161
- anyio==4.4.0
162
- executing==2.1.0
163
- orjson==3.10.11
164
- humanfriendly==10.0
165
- tornado==6.4.1
166
- gmpy2==2.1.5
167
- rlPyCairo==0.2.0
168
- distributed==2024.11.0
169
- FuzzyTM==2.0.5
170
- torchtext==0.15.2a0+5ce3163
171
- pytest==8.3.5
172
- pyod==2.0.2
173
- ImageHash==4.3.1
174
- soupsieve==2.5
175
- tblib==3.0.0
176
- emoji==2.14.0
177
- aiohappyeyeballs==2.4.3
178
- uri-template==1.3.0
179
- tensorflow_estimator==2.15.0
180
- babel==2.16.0
181
- dask-cuda==24.12.0a12
182
- overrides==7.7.0
183
- opencensus==0.11.3
184
- openai==0.28.1
185
- language_data==1.2.0
186
- jedi==0.19.2
187
- cookiecutter==2.6.0
188
- entrypoints==0.4
189
- exceptiongroup==1.2.2
190
- marisa-trie==1.2.0
191
- uvloop==0.20.0
192
- aiosignal==1.3.1
193
- Flask==3.0.3
194
- tensorboard==2.15.2
195
- cffi==1.17.1
196
- tf_keras==2.15.0
197
- absl-py==2.1.0
198
- blinker==1.9.0
199
- types-python-dateutil==2.9.0.20241003
200
- opencv-python==4.9.0
201
- frozendict==2.4.6
202
- aiohttp-cors==0.7.0
203
- statsmodels==0.14.4
204
- tinycss2==1.4.0
205
- terminado==0.18.1
206
- pycaret==2.2.3
207
- aiohttp==3.10.10
208
- distributed-ucxx==0.41.0
209
- prometheus_client==0.21.0
210
- fastdownload==0.0.7
211
- grpcio==1.59.3
212
- google-api-core==2.22.0
213
- jupyterlab_widgets==3.0.13
214
- appdirs==1.4.4
215
- littleutils==0.0.0
216
- ray==2.24.0
217
- kaggle==1.6.17
218
- jsonschema==4.23.0
219
- google-auth==2.36.0
220
- scikit-base==0.11.0
221
- visions==0.7.6
222
- pyarrow==15.0.0
223
- transformers==4.33.0
224
- prometheus_flask_exporter==0.23.1
225
- dm-tree==0.1.8
226
- colorama==0.4.6
227
- requests-toolbelt==1.0.0
228
- cached-property==1.5.2
229
- cymem==2.0.8
230
- PyNaCl==1.5.0
231
- PyWavelets==1.7.0
232
- httptools==0.6.1
233
- typing-utils==0.1.0
234
- email_validator==2.2.0
235
- marshmallow==3.23.1
236
- Deprecated==1.2.14
237
- virtualenv==20.4.7
238
- optuna==3.6.1
239
- jupyter_server==2.14.2
240
- termcolor==2.5.0
241
- mpi4py==4.0.1
242
- torchdata==0.7.1+8cea82f
243
- dataclasses==0.8
244
- cloudpickle==3.1.0
245
- tree_sitter_languages==1.10.2
246
- tabulate==0.9.0
247
- ipython==8.29.0
248
- lightgbm==4.3.0
249
- captum==0.6.0
250
- confuse==2.0.1
251
- torchvision==0.16.1+adc3221
252
- lxml==4.9.4
253
- fastapi==0.115.4
254
- python-multipart==0.0.17
255
- dnspython==2.7.0
256
- jupyter-console==6.6.3
257
- preshed==3.0.9
258
- py-cpuinfo==9.0.0
259
- Send2Trash==1.8.3
260
- murmurhash==1.0.10
261
- sniffio==1.3.1
262
- websockets==13.1
263
- h11==0.14.0
264
- smmap==5.0.0
265
- textual==0.85.2
266
- jsonpatch==1.33
267
- opencensus-context==0.1.3
268
- nbconvert==7.16.4
269
- sentry-sdk==2.19.0
270
- opentelemetry-semantic-conventions==0.37b0
271
- pandas-profiling==2.8.0
272
- pillow==10.3.0
273
- peft==0.13.2
274
- rpds-py==0.21.0
275
- bokeh==3.6.1
276
- distro==1.9.0
277
- itsdangerous==2.2.0
278
- wandb==0.18.7
279
- jsonpointer==3.0.0
280
- astropy-iers-data==0.2024.11.11.0.32.38
281
- horovod==0.28.1
282
- graphviz==0.20.3
283
- vtk==9.3.1
284
- bleach==6.2.0
285
- numexpr==2.8.7
286
- pydantic_core==2.23.4
287
- Jinja2==3.1.4
288
- widgetsnbextension==4.0.13
289
- filelock==3.16.1
290
- catboost==1.2.7
291
- raft-dask==24.12.0a36
292
- async-timeout==4.0.3
293
- datefinder==0.7.3
294
- coloredlogs==15.0.1
295
- platformdirs==4.3.6
296
- spacy-legacy==3.0.12
297
- chardet==5.2.0
298
- jupyter_client==8.6.3
299
- importlib_metadata==8.5.0
300
- rfc3986-validator==0.1.1
301
- huggingface_hub==0.26.2
302
- PySocks==1.7.1
303
- mlxtend==0.23.2
304
- outdated==0.2.2
305
- partd==1.4.2
306
- thinc==8.2.5
307
- astropy==6.1.6
308
- rdflib==6.3.2
309
- h2==4.1.0
310
- typer==0.13.0
311
- xyzservices==2024.9.0
312
- toolz==0.12.1
313
- frozenlist==1.5.0
314
- rdkit==2024.9.2
315
- pyasn1==0.6.1
316
- jupyter_server_terminals==0.5.3
317
- ucx-py==0.41.0a11
318
- astunparse==1.6.3
319
- simpful==2.12.0
320
- notebook_shim==0.2.4
321
- scipy==1.13.1
322
- colorlog==6.9.0
323
- tiktoken==0.3.3
324
- plotly==5.24.1
325
- fastrlock==0.8.2
326
- chart-studio==1.1.0
327
- stack-data==0.6.2
328
- google-pasta==0.2.0
329
- sktime==0.34.0
330
- PyYAML==6.0.2
331
- sympy==1.13.3
332
- multidict==6.1.0
333
- ml-dtypes==0.2.0
334
- tensorboardX==2.6.2.2
335
- decorator==5.1.1
336
- cytoolz==1.0.0
337
- ase==3.23.0
338
- isoduration==20.11.0
339
- html5lib==1.1
340
- langsmith==0.1.142
341
- future==1.0.0
342
- onnx2torch==1.5.15
343
- multipledispatch==0.6.0
344
- protobuf==4.24.4
345
- ucxx==0.41.0
346
- pandas_flavor==0.6.0
347
- msgpack==1.1.0
348
- pyasn1_modules==0.4.1
349
- imagecodecs==2024.1.1
350
- mlflow==2.17.2
351
- watchfiles==0.24.0
352
- dm-sonnet==2.0.2
353
- langcodes==3.4.1
354
- freetype-py==2.3.0
355
- argon2-cffi-bindings==21.2.0
356
- trimesh==4.5.2
357
- opt_einsum==3.4.0
358
- tenacity==8.5.0
359
- h5py==3.12.1
360
- fastapi-cli==0.0.5
361
- oauthlib==3.2.2
362
- parso==0.8.4
363
- weasel==0.4.1
364
- yfinance==0.2.49
365
- networkx==2.8.8
366
- bitsandbytes==0.44.1
367
- lazy_loader==0.4
368
- querystring_parser==1.2.4
369
- contourpy==1.3.0
370
- unicodedata2==15.1.0
371
- bcrypt==4.2.0
372
- munkres==1.1.4
373
- langchain==0.0.298
374
- hpack==4.0.0
375
- cryptography==43.0.3
376
- umap-learn==0.5.7
377
- arrow==1.3.0
378
- docker==7.1.0
379
- certifi==2025.1.31
380
- fastjsonschema==2.20.0
381
- tensorflow==2.15.0
382
- googleapis-common-protos==1.65.0
383
- iniconfig==2.0.0
384
- Markdown==3.6
385
- llvmlite==0.43.0
386
- wslink==2.3.2
387
- attrs==24.2.0
388
- rich==13.9.4
389
- cupy==13.3.0
390
- uc-micro-py==1.0.3
391
- alembic==1.14.0
392
- joblib==1.4.2
393
- reportlab==4.2.5
394
- miniful==0.0.6
395
- jupyter_core==5.7.2
396
- wheel==0.45.0
397
- phik==0.12.3
398
- mistune==3.0.2
399
- wcwidth==0.2.13
400
- dacite==1.8.1
401
- accelerate==0.22.0
402
- sacremoses==0.0.53
403
- revtok==0.0.3
404
- python-slugify==8.0.4
405
- tangled-up-in-unicode==0.2.0
406
- dask==2024.11.0
407
- markdown-it-py==3.0.0
408
- sentencepiece==0.1.99
409
- beautifulsoup4==4.12.3
410
- six==1.16.0
411
- numba-cuda==0.0.17
412
- argon2-cffi==23.1.0
413
- xxhash==3.5.0
414
- hjson==3.1.0
415
- fonttools==4.54.1
416
- graphql-core==3.2.5
417
- pyparsing==3.2.0
418
- pure_eval==0.2.3
419
- distlib==0.3.9
420
- lightning==2.4.0
421
- wordcloud==0.0.0
422
- catalogue==2.0.10
423
- jax==0.4.27
424
- tree-sitter==0.23.2
425
- notebook==7.2.2
426
- dataclasses-json==0.6.7
427
- propcache==0.2.0
428
- numba==0.60.0
429
- dask-expr==1.1.17
430
- pydantic==2.9.2
431
- gunicorn==22.0.0
432
- missingno==0.5.2
433
- pyOpenSSL==24.2.1
434
- openpyxl==3.1.5
435
- packaging==24.1
436
- python-dotenv==1.0.1
437
- cycler==0.12.1
438
- types-pytz==2024.2.0.20241003
439
- yellowbrick==1.5
440
- referencing==0.35.1
441
- pyLDAvis==3.4.1
442
- lazypredict==0.2.16
443
- fqdn==1.5.1
444
- websocket-client==1.8.0
445
- fastcore==1.7.19
446
- pynvjitlink-cu12==0.3.0
447
- pingouin==0.5.5
448
- numpy==1.26.4
449
- typing-inspect==0.9.0
450
- nltk==3.9.1
451
- onnxruntime==1.19.2
452
- tensorflow-probability==0.23.0
453
- datasets==3.0.2
454
- pickleshare==0.7.5
455
- peewee==3.17.7
456
- torch-geometric==2.6.1
457
- ptyprocess==0.7.0
458
- greenlet==3.1.1
459
- graphql-relay==3.2.0
460
- graphene==3.4.3
461
- et_xmlfile==2.0.0
462
- webencodings==0.5.1
463
- hyperframe==6.0.1
464
- multitasking==0.0.9
465
- typer-slim==0.13.0
466
- onnx==1.15.0
467
- uvicorn==0.32.0
468
- memray==1.13.4
469
- xgboost==2.1.2
470
- Brotli==1.1.0
471
- zipp==3.21.0
472
- nbformat==5.10.4
473
- responses==0.18.0
474
- funcy==2.0
475
- Pygments==2.18.0
476
- tqdm==4.67.0
477
- linkify-it-py==2.0.3
478
- srsly==2.4.8
479
- cuda-python==12.6.0
480
- lightning-utilities==0.11.8
481
- cudf==24.12.0a337
482
- dask-ml==2024.4.4
483
- docker-pycreds==0.4.0
484
- pkgutil_resolve_name==1.3.10
485
- opentelemetry-api==1.16.0
486
- fsspec==2024.9.0
487
- nbclient==0.10.0
488
- psutil==5.9.8
489
- pytorch-lightning==2.4.0
490
- sortedcontainers==2.4.0
491
- matplotlib==3.9.2
492
- defusedxml==0.7.1
493
- urllib3==1.26.19
494
- jupyterlab_server==2.27.3
495
- retrying==1.3.3
496
- dask-cudf==24.12.0a337
497
- sqlparse==0.5.1
498
- text-unidecode==1.3
499
- seaborn==0.13.2
500
- typing_extensions==4.12.2
501
- pyzmq==26.2.0
502
- rfc3339-validator==0.1.4
503
- pynndescent==0.5.13
504
- pip==24.3.1
505
- confection==0.1.4
506
- wrapt==1.14.1
507
- fastprogress==1.0.3
508
- traitlets==5.14.3
509
- asttokens==2.4.1
510
- json5==0.9.28
511
- pandas-stubs==2.2.3.241126
512
- torchmetrics==1.2.1
513
- gitdb==4.0.11
514
- annotated-types==0.7.0
515
- ipython-autotime==0.1
516
- httpcore==1.0.6
517
- click==8.1.7
518
- setproctitle==1.3.3
519
- starlette==0.41.2
520
- jupyterlab==4.2.5
521
- rmm==24.12.0a27
522
- opentelemetry-sdk==1.16.0
523
- textblob==0.15.3
524
- imbalanced-learn==0.12.4
525
- typeguard==4.3.0
526
- more-itertools==10.3.0
527
- zipp==3.19.2
528
- autocommand==2.2.2
529
- jaraco.context==5.3.0
530
- packaging==24.1
531
- importlib_metadata==8.0.0
532
- platformdirs==4.2.2
533
- jaraco.functools==4.0.1
534
- importlib_resources==6.4.0
535
- tomli==2.0.1
536
- jaraco.text==3.12.1
537
- wheel==0.43.0
538
- jaraco.collections==5.1.0
539
- typing_extensions==4.12.2
540
- inflect==7.3.1
541
- backports.tarfile==1.2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132610-pxg645u5/files/wandb-metadata.json DELETED
@@ -1,77 +0,0 @@
1
- {
2
- "os": "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
3
- "python": "3.10.15",
4
- "startedAt": "2025-05-04T10:26:10.053836Z",
5
- "program": "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
6
- "codePath": "finetuning_bc_prott5.py",
7
- "email": "zeynep.isik1@sabanciuniv.edu",
8
- "root": "/arf/scratch/zisik/prott5_bc_ft",
9
- "host": "kolyoz1",
10
- "username": "zisik",
11
- "executable": "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
12
- "codePathLocal": "finetuning_bc_prott5.py",
13
- "cpu_count": 64,
14
- "cpu_count_logical": 64,
15
- "gpu": "NVIDIA H100 80GB HBM3",
16
- "gpu_count": 1,
17
- "disk": {
18
- "/": {
19
- "total": "7643995308032",
20
- "used": "274767593472"
21
- }
22
- },
23
- "memory": {
24
- "total": "1081373220864"
25
- },
26
- "cpu": {
27
- "count": 64,
28
- "countLogical": 64
29
- },
30
- "gpu_nvidia": [
31
- {
32
- "name": "NVIDIA H100 80GB HBM3",
33
- "memoryTotal": "85520809984",
34
- "cudaCores": 16896,
35
- "architecture": "Hopper"
36
- }
37
- ],
38
- "slurm": {
39
- "cluster_name": "cuda",
40
- "conf": "/etc/slurm/slurm.conf",
41
- "cpus_on_node": "16",
42
- "cpus_per_task": "16",
43
- "gpus_on_node": "1",
44
- "gtids": "0",
45
- "job_account": "tbag154",
46
- "job_cpus_per_node": "16",
47
- "job_end_time": "1746613538",
48
- "job_gid": "11636",
49
- "job_gpus": "1",
50
- "job_id": "1027932",
51
- "job_name": "msa_ph_pt",
52
- "job_nodelist": "kolyoz1",
53
- "job_num_nodes": "1",
54
- "job_partition": "kolyoz-cuda",
55
- "job_qos": "tbag",
56
- "job_start_time": "1746354338",
57
- "job_uid": "11636",
58
- "job_user": "zisik",
59
- "jobid": "1027932",
60
- "localid": "0",
61
- "mem_per_cpu": "14000",
62
- "nnodes": "1",
63
- "node_aliases": "(null)",
64
- "nodeid": "0",
65
- "nodelist": "kolyoz1",
66
- "prio_process": "0",
67
- "procid": "0",
68
- "submit_dir": "/arf/scratch/zisik",
69
- "submit_host": "cuda-ui",
70
- "task_pid": "3156950",
71
- "tasks_per_node": "1",
72
- "topology_addr": "kolyoz1",
73
- "topology_addr_pattern": "node",
74
- "working_cluster": "cuda:slurmcontroller3.ib:6800:9984:109"
75
- },
76
- "cudaVersion": "12.6"
77
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132610-pxg645u5/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"_wandb":{"runtime":6}}
 
 
wandb/run-20250504_132610-pxg645u5/logs/debug-core.log DELETED
@@ -1,14 +0,0 @@
1
- {"time":"2025-05-04T13:26:09.392354119+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmppack6571/port-3156976.txt","pid":3156976,"debug":false,"disable-analytics":false}
2
- {"time":"2025-05-04T13:26:09.392402628+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
- {"time":"2025-05-04T13:26:09.393200765+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":36685,"Zone":""}}
4
- {"time":"2025-05-04T13:26:09.393299078+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3156976}
5
- {"time":"2025-05-04T13:26:09.570123715+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:37852"}
6
- {"time":"2025-05-04T13:26:10.055349971+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"pxg645u5","id":"127.0.0.1:37852"}
7
- {"time":"2025-05-04T13:26:10.180212249+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"pxg645u5","id":"127.0.0.1:37852"}
8
- {"time":"2025-05-04T13:26:16.993053475+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:37852"}
9
- {"time":"2025-05-04T13:26:16.994546738+03:00","level":"INFO","msg":"server is shutting down"}
10
- {"time":"2025-05-04T13:26:16.993862146+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:37852"}
11
- {"time":"2025-05-04T13:26:16.994899765+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:37852"}
12
- {"time":"2025-05-04T13:26:17.953982632+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:37852"}
13
- {"time":"2025-05-04T13:26:17.954000039+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:37852"}
14
- {"time":"2025-05-04T13:26:17.954015604+03:00","level":"INFO","msg":"server is closed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log DELETED
@@ -1,19 +0,0 @@
1
- {"time":"2025-05-04T13:26:10.056874799+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
2
- {"time":"2025-05-04T13:26:10.056920353+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132610-pxg645u5/logs/debug-core.log"}
3
- {"time":"2025-05-04T13:26:10.180146537+03:00","level":"INFO","msg":"created new stream","id":"pxg645u5"}
4
- {"time":"2025-05-04T13:26:10.180200098+03:00","level":"INFO","msg":"stream: started","id":"pxg645u5"}
5
- {"time":"2025-05-04T13:26:10.180372555+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"pxg645u5"}
6
- {"time":"2025-05-04T13:26:10.180478207+03:00","level":"INFO","msg":"sender: started","stream_id":"pxg645u5"}
7
- {"time":"2025-05-04T13:26:10.18057531+03:00","level":"INFO","msg":"handler: started","stream_id":"pxg645u5"}
8
- {"time":"2025-05-04T13:26:10.587540794+03:00","level":"INFO","msg":"Starting system monitor"}
9
- {"time":"2025-05-04T13:26:16.993666261+03:00","level":"INFO","msg":"stream: closing","id":"pxg645u5"}
10
- {"time":"2025-05-04T13:26:16.993748173+03:00","level":"INFO","msg":"Stopping system monitor"}
11
- {"time":"2025-05-04T13:26:16.995793958+03:00","level":"INFO","msg":"Stopped system monitor"}
12
- {"time":"2025-05-04T13:26:17.198876326+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
13
- {"time":"2025-05-04T13:26:17.198909473+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
14
- {"time":"2025-05-04T13:26:17.198920913+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
15
- {"time":"2025-05-04T13:26:17.694743818+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
16
- {"time":"2025-05-04T13:26:17.953755664+03:00","level":"INFO","msg":"handler: closed","stream_id":"pxg645u5"}
17
- {"time":"2025-05-04T13:26:17.953802728+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"pxg645u5"}
18
- {"time":"2025-05-04T13:26:17.953828101+03:00","level":"INFO","msg":"sender: closed","stream_id":"pxg645u5"}
19
- {"time":"2025-05-04T13:26:17.953904675+03:00","level":"INFO","msg":"stream: closed","id":"pxg645u5"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132610-pxg645u5/logs/debug.log DELETED
@@ -1,26 +0,0 @@
1
- 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
- 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Configure stats pid to 3156976
3
- 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
4
- 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
5
- 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
- 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
7
- 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Applying login settings: {}
8
- 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Applying login settings: {}
9
- 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132610-pxg645u5/logs/debug.log
10
- 2025-05-04 13:26:10,047 INFO MainThread:3156976 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log
11
- 2025-05-04 13:26:10,047 INFO MainThread:3156976 [wandb_init.py:init():619] calling init triggers
12
- 2025-05-04 13:26:10,047 INFO MainThread:3156976 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
- config: {}
14
- 2025-05-04 13:26:10,047 INFO MainThread:3156976 [wandb_init.py:init():669] starting backend
15
- 2025-05-04 13:26:10,047 INFO MainThread:3156976 [wandb_init.py:init():673] sending inform_init request
16
- 2025-05-04 13:26:10,052 INFO MainThread:3156976 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
- 2025-05-04 13:26:10,053 INFO MainThread:3156976 [wandb_init.py:init():686] backend started and connected
18
- 2025-05-04 13:26:10,061 INFO MainThread:3156976 [wandb_init.py:init():781] updated telemetry
19
- 2025-05-04 13:26:10,064 INFO MainThread:3156976 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
- 2025-05-04 13:26:10,574 INFO MainThread:3156976 [wandb_init.py:init():867] starting run threads in backend
21
- 2025-05-04 13:26:12,208 INFO MainThread:3156976 [wandb_run.py:_console_start():2456] atexit reg
22
- 2025-05-04 13:26:12,209 INFO MainThread:3156976 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
- 2025-05-04 13:26:12,209 INFO MainThread:3156976 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
- 2025-05-04 13:26:12,209 INFO MainThread:3156976 [wandb_run.py:_redirect():2395] Redirects installed.
25
- 2025-05-04 13:26:12,220 INFO MainThread:3156976 [wandb_init.py:init():911] run started, returning control to user process
26
- 2025-05-04 13:26:16,995 WARNING MsgRouterThr:3156976 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132610-pxg645u5/run-pxg645u5.wandb DELETED
Binary file (5.5 kB)
 
wandb/run-20250504_132912-1agsw1y8/files/config.yaml DELETED
@@ -1,374 +0,0 @@
1
- _wandb:
2
- value:
3
- cli_version: 0.18.7
4
- m:
5
- - "1": train/epoch
6
- "5": 2
7
- "6":
8
- - 1
9
- - 3
10
- "7": []
11
- - "1": train/global_step
12
- "6":
13
- - 3
14
- "7": []
15
- - "1": eval/runtime
16
- "5": 2
17
- "6":
18
- - 1
19
- - 3
20
- "7": []
21
- - "1": train/loss
22
- "5": 2
23
- "6":
24
- - 1
25
- - 3
26
- "7": []
27
- - "1": train/grad_norm
28
- "5": 2
29
- "6":
30
- - 1
31
- - 3
32
- "7": []
33
- - "1": train/learning_rate
34
- "5": 2
35
- "6":
36
- - 1
37
- - 3
38
- "7": []
39
- - "1": eval/loss
40
- "5": 2
41
- "6":
42
- - 1
43
- - 3
44
- "7": []
45
- - "1": eval/samples_per_second
46
- "5": 2
47
- "6":
48
- - 1
49
- - 3
50
- "7": []
51
- - "1": eval/steps_per_second
52
- "5": 2
53
- "6":
54
- - 1
55
- - 3
56
- "7": []
57
- - "1": eval/accuracy
58
- "5": 2
59
- "6":
60
- - 1
61
- - 3
62
- "7": []
63
- python_version: 3.10.15
64
- t:
65
- "1":
66
- - 1
67
- - 2
68
- - 3
69
- - 5
70
- - 11
71
- - 12
72
- - 49
73
- - 51
74
- - 53
75
- - 55
76
- - 71
77
- - 98
78
- - 105
79
- "2":
80
- - 1
81
- - 2
82
- - 3
83
- - 5
84
- - 6
85
- - 11
86
- - 12
87
- - 49
88
- - 51
89
- - 53
90
- - 55
91
- - 71
92
- - 98
93
- - 105
94
- "3":
95
- - 7
96
- - 23
97
- - 55
98
- - 66
99
- "4": 3.10.15
100
- "5": 0.18.7
101
- "6": 4.45.2
102
- "8":
103
- - 5
104
- "9":
105
- "1": transformers_trainer
106
- "12": 0.18.7
107
- "13": linux-x86_64
108
- accelerator_config:
109
- value:
110
- dispatch_batches: null
111
- even_batches: true
112
- gradient_accumulation_kwargs: null
113
- non_blocking: false
114
- split_batches: false
115
- use_seedable_sampler: true
116
- adafactor:
117
- value: false
118
- adam_beta1:
119
- value: 0.9
120
- adam_beta2:
121
- value: 0.999
122
- adam_epsilon:
123
- value: 1e-08
124
- auto_find_batch_size:
125
- value: false
126
- batch_eval_metrics:
127
- value: false
128
- bf16:
129
- value: false
130
- bf16_full_eval:
131
- value: false
132
- data_seed:
133
- value: null
134
- dataloader_drop_last:
135
- value: false
136
- dataloader_num_workers:
137
- value: 0
138
- dataloader_persistent_workers:
139
- value: false
140
- dataloader_pin_memory:
141
- value: true
142
- dataloader_prefetch_factor:
143
- value: null
144
- ddp_backend:
145
- value: null
146
- ddp_broadcast_buffers:
147
- value: null
148
- ddp_bucket_cap_mb:
149
- value: null
150
- ddp_find_unused_parameters:
151
- value: null
152
- ddp_timeout:
153
- value: 1800
154
- debug:
155
- value: []
156
- deepspeed:
157
- value: null
158
- disable_tqdm:
159
- value: false
160
- dispatch_batches:
161
- value: null
162
- do_eval:
163
- value: true
164
- do_predict:
165
- value: false
166
- do_train:
167
- value: false
168
- eval_accumulation_steps:
169
- value: null
170
- eval_delay:
171
- value: 0
172
- eval_do_concat_batches:
173
- value: true
174
- eval_on_start:
175
- value: false
176
- eval_steps:
177
- value: null
178
- eval_strategy:
179
- value: epoch
180
- eval_use_gather_object:
181
- value: false
182
- evaluation_strategy:
183
- value: epoch
184
- fp16:
185
- value: true
186
- fp16_backend:
187
- value: auto
188
- fp16_full_eval:
189
- value: false
190
- fp16_opt_level:
191
- value: O1
192
- fsdp:
193
- value: []
194
- fsdp_config:
195
- value:
196
- min_num_params: 0
197
- xla: false
198
- xla_fsdp_grad_ckpt: false
199
- xla_fsdp_v2: false
200
- fsdp_min_num_params:
201
- value: 0
202
- fsdp_transformer_layer_cls_to_wrap:
203
- value: null
204
- full_determinism:
205
- value: false
206
- gradient_accumulation_steps:
207
- value: 4
208
- gradient_checkpointing:
209
- value: false
210
- gradient_checkpointing_kwargs:
211
- value: null
212
- greater_is_better:
213
- value: false
214
- group_by_length:
215
- value: false
216
- half_precision_backend:
217
- value: auto
218
- hub_always_push:
219
- value: false
220
- hub_model_id:
221
- value: null
222
- hub_private_repo:
223
- value: false
224
- hub_strategy:
225
- value: every_save
226
- hub_token:
227
- value: <HUB_TOKEN>
228
- ignore_data_skip:
229
- value: false
230
- include_inputs_for_metrics:
231
- value: false
232
- include_num_input_tokens_seen:
233
- value: false
234
- include_tokens_per_second:
235
- value: false
236
- jit_mode_eval:
237
- value: false
238
- label_names:
239
- value: null
240
- label_smoothing_factor:
241
- value: 0
242
- learning_rate:
243
- value: 5e-05
244
- length_column_name:
245
- value: length
246
- load_best_model_at_end:
247
- value: true
248
- local_rank:
249
- value: 0
250
- log_level:
251
- value: passive
252
- log_level_replica:
253
- value: warning
254
- log_on_each_node:
255
- value: true
256
- logging_dir:
257
- value: t5-bc-out/runs/May04_13-33-08_kolyoz1
258
- logging_first_step:
259
- value: false
260
- logging_nan_inf_filter:
261
- value: true
262
- logging_steps:
263
- value: 500
264
- logging_strategy:
265
- value: steps
266
- lr_scheduler_type:
267
- value: linear
268
- max_grad_norm:
269
- value: 1
270
- max_steps:
271
- value: -1
272
- metric_for_best_model:
273
- value: loss
274
- mp_parameters:
275
- value: ""
276
- neftune_noise_alpha:
277
- value: null
278
- no_cuda:
279
- value: false
280
- num_train_epochs:
281
- value: 3
282
- optim:
283
- value: adamw_torch
284
- optim_args:
285
- value: null
286
- optim_target_modules:
287
- value: null
288
- output_dir:
289
- value: t5-bc-out
290
- overwrite_output_dir:
291
- value: false
292
- past_index:
293
- value: -1
294
- per_device_eval_batch_size:
295
- value: 8
296
- per_device_train_batch_size:
297
- value: 8
298
- per_gpu_eval_batch_size:
299
- value: null
300
- per_gpu_train_batch_size:
301
- value: null
302
- prediction_loss_only:
303
- value: false
304
- push_to_hub:
305
- value: false
306
- push_to_hub_model_id:
307
- value: null
308
- push_to_hub_organization:
309
- value: null
310
- push_to_hub_token:
311
- value: <PUSH_TO_HUB_TOKEN>
312
- ray_scope:
313
- value: last
314
- remove_unused_columns:
315
- value: true
316
- report_to:
317
- value:
318
- - wandb
319
- restore_callback_states_from_checkpoint:
320
- value: false
321
- resume_from_checkpoint:
322
- value: null
323
- run_name:
324
- value: t5-bc-out
325
- save_on_each_node:
326
- value: false
327
- save_only_model:
328
- value: false
329
- save_safetensors:
330
- value: true
331
- save_steps:
332
- value: 500
333
- save_strategy:
334
- value: epoch
335
- save_total_limit:
336
- value: null
337
- seed:
338
- value: 42
339
- skip_memory_metrics:
340
- value: true
341
- split_batches:
342
- value: null
343
- tf32:
344
- value: null
345
- torch_compile:
346
- value: false
347
- torch_compile_backend:
348
- value: null
349
- torch_compile_mode:
350
- value: null
351
- torch_empty_cache_steps:
352
- value: null
353
- torchdynamo:
354
- value: null
355
- tpu_metrics_debug:
356
- value: false
357
- tpu_num_cores:
358
- value: null
359
- use_cpu:
360
- value: false
361
- use_ipex:
362
- value: false
363
- use_legacy_prediction_loop:
364
- value: false
365
- use_liger_kernel:
366
- value: false
367
- use_mps_device:
368
- value: false
369
- warmup_ratio:
370
- value: 0
371
- warmup_steps:
372
- value: 0
373
- weight_decay:
374
- value: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132912-1agsw1y8/files/output.log DELETED
@@ -1,87 +0,0 @@
1
- You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
2
- Map: 100%|██████████| 511104/511104 [00:20<00:00, 25525.81 examples/s]
3
- Map: 100%|██████████| 109522/109522 [00:04<00:00, 26956.64 examples/s]
4
- /arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
5
- warnings.warn(
6
- [2025-05-04 13:33:14,758] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
7
- wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
8
- 33%|███▎ | 15972/47916 [2:22:01<4:54:49, 1.81it/s]
9
- {'loss': 0.6947, 'grad_norm': 0.09912440180778503, 'learning_rate': 4.947825361048502e-05, 'epoch': 0.03}
10
- {'loss': 0.6939, 'grad_norm': 0.23786939680576324, 'learning_rate': 4.8956507220970036e-05, 'epoch': 0.06}
11
- {'loss': 0.6936, 'grad_norm': 0.10555226355791092, 'learning_rate': 4.843476083145505e-05, 'epoch': 0.09}
12
- {'loss': 0.6935, 'grad_norm': 0.28058305382728577, 'learning_rate': 4.791301444194006e-05, 'epoch': 0.13}
13
- {'loss': 0.6937, 'grad_norm': 0.13599741458892822, 'learning_rate': 4.739126805242508e-05, 'epoch': 0.16}
14
- {'loss': 0.6935, 'grad_norm': 0.13076388835906982, 'learning_rate': 4.6869521662910095e-05, 'epoch': 0.19}
15
- {'loss': 0.6934, 'grad_norm': 0.1778457760810852, 'learning_rate': 4.634777527339511e-05, 'epoch': 0.22}
16
- {'loss': 0.6935, 'grad_norm': 0.4112167954444885, 'learning_rate': 4.582602888388012e-05, 'epoch': 0.25}
17
- {'loss': 0.6934, 'grad_norm': 0.1330016702413559, 'learning_rate': 4.530428249436514e-05, 'epoch': 0.28}
18
- {'loss': 0.6935, 'grad_norm': 0.09426847100257874, 'learning_rate': 4.478253610485016e-05, 'epoch': 0.31}
19
- {'loss': 0.6933, 'grad_norm': 0.3686296343803406, 'learning_rate': 4.426078971533517e-05, 'epoch': 0.34}
20
- {'loss': 0.6933, 'grad_norm': 0.21278153359889984, 'learning_rate': 4.373904332582019e-05, 'epoch': 0.38}
21
- {'loss': 0.6935, 'grad_norm': 0.23074378073215485, 'learning_rate': 4.321834042908423e-05, 'epoch': 0.41}
22
- {'loss': 0.6932, 'grad_norm': 0.5192509293556213, 'learning_rate': 4.269659403956925e-05, 'epoch': 0.44}
23
- {'loss': 0.6932, 'grad_norm': 0.07643919438123703, 'learning_rate': 4.217484765005426e-05, 'epoch': 0.47}
24
- {'loss': 0.6935, 'grad_norm': 0.09435634315013885, 'learning_rate': 4.1653101260539276e-05, 'epoch': 0.5}
25
- {'loss': 0.6932, 'grad_norm': 0.3456329107284546, 'learning_rate': 4.113239836380333e-05, 'epoch': 0.53}
26
- {'loss': 0.6934, 'grad_norm': 0.11689063161611557, 'learning_rate': 4.061065197428834e-05, 'epoch': 0.56}
27
- {'loss': 0.6934, 'grad_norm': 0.25019219517707825, 'learning_rate': 4.0088905584773355e-05, 'epoch': 0.59}
28
- {'loss': 0.6933, 'grad_norm': 0.12248441576957703, 'learning_rate': 3.956715919525837e-05, 'epoch': 0.63}
29
- {'loss': 0.6933, 'grad_norm': 0.11549345403909683, 'learning_rate': 3.9046456298522416e-05, 'epoch': 0.66}
30
- {'loss': 0.6934, 'grad_norm': 0.27383607625961304, 'learning_rate': 3.852470990900743e-05, 'epoch': 0.69}
31
- {'loss': 0.6935, 'grad_norm': 0.21311810612678528, 'learning_rate': 3.800296351949245e-05, 'epoch': 0.72}
32
- {'loss': 0.6933, 'grad_norm': 0.25916823744773865, 'learning_rate': 3.7481217129977466e-05, 'epoch': 0.75}
33
- {'loss': 0.6934, 'grad_norm': 0.13208124041557312, 'learning_rate': 3.6960514233241504e-05, 'epoch': 0.78}
34
- {'loss': 0.6934, 'grad_norm': 0.4182877242565155, 'learning_rate': 3.643876784372652e-05, 'epoch': 0.81}
35
- {'loss': 0.6933, 'grad_norm': 0.19375275075435638, 'learning_rate': 3.5917021454211544e-05, 'epoch': 0.85}
36
- {'loss': 0.6933, 'grad_norm': 0.1647150218486786, 'learning_rate': 3.5395275064696554e-05, 'epoch': 0.88}
37
- {'loss': 0.6933, 'grad_norm': 0.458692729473114, 'learning_rate': 3.48745721679606e-05, 'epoch': 0.91}
38
- {'loss': 0.6933, 'grad_norm': 0.24417555332183838, 'learning_rate': 3.4352825778445616e-05, 'epoch': 0.94}
39
- {'loss': 0.6932, 'grad_norm': 0.10788150876760483, 'learning_rate': 3.383107938893063e-05, 'epoch': 0.97}
40
- File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 125, in <module>
41
- {'eval_loss': 0.6931192278862, 'eval_accuracy': 0.4992604225635032, 'eval_runtime': 182.4166, 'eval_samples_per_second': 600.395, 'eval_steps_per_second': 75.053, 'epoch': 1.0}
42
- trainer.train()
43
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2052, in train
44
- return inner_training_loop(
45
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2487, in _inner_training_loop
46
- self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
47
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2918, in _maybe_log_save_evaluate
48
- self._save_checkpoint(model, trial, metrics=metrics)
49
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3008, in _save_checkpoint
50
- self.save_model(output_dir, _internal_call=True)
51
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3623, in save_model
52
- self._save(output_dir)
53
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3721, in _save
54
- safetensors.torch.save_file(
55
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 286, in save_file
56
- serialize_file(_flatten(tensors), filename, metadata=metadata)
57
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 488, in _flatten
58
- raise RuntimeError(
59
- RuntimeError:
60
- Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: [{'encoder.encoder.embed_tokens.weight', 'encoder.shared.weight'}].
61
- A potential way to correctly save your model is to use `save_model`.
62
- More information at https://huggingface.co/docs/safetensors/torch_shared_tensors
63
-
64
- Traceback (most recent call last):
65
- File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 125, in <module>
66
- trainer.train()
67
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2052, in train
68
- return inner_training_loop(
69
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2487, in _inner_training_loop
70
- self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
71
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2918, in _maybe_log_save_evaluate
72
- self._save_checkpoint(model, trial, metrics=metrics)
73
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3008, in _save_checkpoint
74
- self.save_model(output_dir, _internal_call=True)
75
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3623, in save_model
76
- self._save(output_dir)
77
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3721, in _save
78
- safetensors.torch.save_file(
79
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 286, in save_file
80
- serialize_file(_flatten(tensors), filename, metadata=metadata)
81
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 488, in _flatten
82
- raise RuntimeError(
83
- RuntimeError:
84
- Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: [{'encoder.encoder.embed_tokens.weight', 'encoder.shared.weight'}].
85
- A potential way to correctly save your model is to use `save_model`.
86
- More information at https://huggingface.co/docs/safetensors/torch_shared_tensors
87
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132912-1agsw1y8/files/requirements.txt DELETED
@@ -1,541 +0,0 @@
1
- nvidia-cuda-cupti-cu12==12.4.127
2
- nvidia-cuda-nvrtc-cu12==12.4.127
3
- pyg-lib==0.4.0+pt20cu117
4
- biopython==1.85
5
- iniconfig==2.0.0
6
- tokenizers==0.20.0
7
- accelerate==1.3.0
8
- torch==2.6.0
9
- nvidia-nccl-cu12==2.21.5
10
- transformers==4.45.2
11
- nvidia-cusparse-cu12==12.3.1.170
12
- torch-scatter==2.1.2+pt20cu117
13
- nvidia-cusparselt-cu12==0.6.2
14
- nvidia-nvtx-cu12==12.4.127
15
- zstd==1.5.6.6
16
- fair-esm==2.0.0
17
- omegaconf==2.3.0
18
- pluggy==1.5.0
19
- pytest==8.3.5
20
- nvidia-curand-cu12==10.3.5.147
21
- nvidia-cufft-cu12==11.2.1.3
22
- torch-cluster==1.6.3+pt20cu117
23
- regex==2024.9.11
24
- nvidia-cudnn-cu12==9.1.0.70
25
- torch-spline-conv==1.2.2+pt20cu117
26
- nvidia-cusolver-cu12==11.6.1.9
27
- antlr4-python3-runtime==4.9.3
28
- msgpack-numpy==0.4.8
29
- nlp==0.2.0
30
- einops==0.8.1
31
- nvidia-cublas-cu12==12.4.5.8
32
- triton==3.2.0
33
- ninja==1.11.1.3
34
- hydra-core==1.3.2
35
- nvidia-nvjitlink-cu12==12.4.127
36
- biotite==0.41.2
37
- torch-sparse==0.6.18+pt20cu117
38
- esm==3.1.4
39
- sympy==1.13.1
40
- nvidia-cuda-runtime-cu12==12.4.127
41
- jupyter-lsp==2.2.5
42
- jupyter-events==0.10.0
43
- ipykernel==6.29.5
44
- Mako==1.3.5
45
- proto-plus==1.25.0
46
- fst-pso==1.8.1
47
- gensim==4.3.3
48
- htmlmin==0.1.12
49
- tokenizers==0.13.3
50
- timm==1.0.11
51
- MarkupSafe==3.0.2
52
- safetensors==0.4.5
53
- requests==2.32.3
54
- gast==0.5.5
55
- cuml==24.12.0a33
56
- jaxlib==0.4.23.dev20240214
57
- spacy-loggers==1.0.5
58
- pytz==2024.1
59
- idna==3.10
60
- python-dateutil==2.9.0
61
- mdurl==0.1.2
62
- blis==0.7.10
63
- jupyter==1.1.1
64
- pyerfa==2.0.1.5
65
- comm==0.2.2
66
- pygraphviz==1.14
67
- dill==0.3.8
68
- paramiko==3.5.0
69
- llama-index==0.8.36
70
- mdit-py-plugins==0.4.2
71
- Werkzeug==3.1.3
72
- pyu2f==0.1.5
73
- dask-glm==0.2.0
74
- httpx==0.27.2
75
- typeguard==4.4.1
76
- mypy-extensions==1.0.0
77
- kmodes==0.12.2
78
- keras==2.15.0
79
- ydata-profiling==0.0.dev0
80
- regex==2024.11.6
81
- xarray==2024.11.0
82
- setuptools==75.3.0
83
- charset-normalizer==3.4.0
84
- jupyterlab_nvdashboard==0.11.0
85
- pylibraft==24.12.0a36
86
- spacy==3.7.6
87
- mlflow-skinny==2.17.2
88
- nvtx==0.2.10
89
- multimethod==1.12
90
- pexpect==4.9.0
91
- torch==2.1.0.post301
92
- flatbuffers==24.3.25
93
- python-json-logger==2.0.7
94
- PyJWT==2.9.0
95
- multiprocess==0.70.16
96
- colorlover==0.3.0
97
- yarl==1.16.0
98
- locket==1.0.0
99
- patsy==1.0.0
100
- rapids-dask-dependency==24.12.0a0
101
- stanza==1.9.2
102
- debugpy==1.8.8
103
- jupyterlab_pygments==0.3.0
104
- pylibcudf==24.12.0a337
105
- lz4==4.3.3
106
- pandas==2.2.3
107
- tifffile==2024.9.20
108
- pynvml==11.4.1
109
- cufflinks==0.17.3
110
- ipywidgets==8.1.5
111
- requests-oauthlib==2.0.0
112
- google-auth-oauthlib==1.2.1
113
- rsa==4.9
114
- webcolors==24.8.0
115
- jsonschema-specifications==2024.10.1
116
- scikit-learn==1.5.2
117
- langchain-text-splitters==0.3.2
118
- pandas-datareader==0.10.0
119
- tomli==2.0.2
120
- tzdata==2024.2
121
- scikit-image==0.24.0
122
- tensorboard_data_server==0.7.0
123
- kiwisolver==1.4.7
124
- cloudpathlib==0.20.0
125
- isodate==0.6.1
126
- adversarial-robustness-toolbox==1.19.1
127
- SQLAlchemy==2.0.36
128
- pytest-runner==6.0.0
129
- pycairo==1.27.0
130
- treelite==4.3.0
131
- jiter==0.7.0
132
- threadpoolctl==3.5.0
133
- pandocfilters==1.5.0
134
- loguru==0.7.2
135
- smart_open==7.0.5
136
- shellingham==1.5.4
137
- deepspeed==0.15.4
138
- prompt_toolkit==3.0.48
139
- databricks-sdk==0.34.0
140
- langchain-core==0.3.15
141
- imageio==2.36.0
142
- openapi-schema-pydantic==1.2.4
143
- zict==3.0.0
144
- cachetools==5.5.0
145
- colorful==0.5.6
146
- mpmath==1.3.0
147
- nest_asyncio==1.6.0
148
- pyFUME==0.2.25
149
- opencv-python-headless==4.9.0
150
- fastai==2.7.18
151
- importlib_resources==6.4.5
152
- binaryornot==0.4.4
153
- evaluate==0.4.1
154
- matplotlib-inline==0.1.7
155
- wasabi==1.1.2
156
- pycparser==2.22
157
- GitPython==3.1.43
158
- pluggy==1.5.0
159
- async-lru==2.0.4
160
- pgmpy==0.1.24
161
- anyio==4.4.0
162
- executing==2.1.0
163
- orjson==3.10.11
164
- humanfriendly==10.0
165
- tornado==6.4.1
166
- gmpy2==2.1.5
167
- rlPyCairo==0.2.0
168
- distributed==2024.11.0
169
- FuzzyTM==2.0.5
170
- torchtext==0.15.2a0+5ce3163
171
- pytest==8.3.5
172
- pyod==2.0.2
173
- ImageHash==4.3.1
174
- soupsieve==2.5
175
- tblib==3.0.0
176
- emoji==2.14.0
177
- aiohappyeyeballs==2.4.3
178
- uri-template==1.3.0
179
- tensorflow_estimator==2.15.0
180
- babel==2.16.0
181
- dask-cuda==24.12.0a12
182
- overrides==7.7.0
183
- opencensus==0.11.3
184
- openai==0.28.1
185
- language_data==1.2.0
186
- jedi==0.19.2
187
- cookiecutter==2.6.0
188
- entrypoints==0.4
189
- exceptiongroup==1.2.2
190
- marisa-trie==1.2.0
191
- uvloop==0.20.0
192
- aiosignal==1.3.1
193
- Flask==3.0.3
194
- tensorboard==2.15.2
195
- cffi==1.17.1
196
- tf_keras==2.15.0
197
- absl-py==2.1.0
198
- blinker==1.9.0
199
- types-python-dateutil==2.9.0.20241003
200
- opencv-python==4.9.0
201
- frozendict==2.4.6
202
- aiohttp-cors==0.7.0
203
- statsmodels==0.14.4
204
- tinycss2==1.4.0
205
- terminado==0.18.1
206
- pycaret==2.2.3
207
- aiohttp==3.10.10
208
- distributed-ucxx==0.41.0
209
- prometheus_client==0.21.0
210
- fastdownload==0.0.7
211
- grpcio==1.59.3
212
- google-api-core==2.22.0
213
- jupyterlab_widgets==3.0.13
214
- appdirs==1.4.4
215
- littleutils==0.0.0
216
- ray==2.24.0
217
- kaggle==1.6.17
218
- jsonschema==4.23.0
219
- google-auth==2.36.0
220
- scikit-base==0.11.0
221
- visions==0.7.6
222
- pyarrow==15.0.0
223
- transformers==4.33.0
224
- prometheus_flask_exporter==0.23.1
225
- dm-tree==0.1.8
226
- colorama==0.4.6
227
- requests-toolbelt==1.0.0
228
- cached-property==1.5.2
229
- cymem==2.0.8
230
- PyNaCl==1.5.0
231
- PyWavelets==1.7.0
232
- httptools==0.6.1
233
- typing-utils==0.1.0
234
- email_validator==2.2.0
235
- marshmallow==3.23.1
236
- Deprecated==1.2.14
237
- virtualenv==20.4.7
238
- optuna==3.6.1
239
- jupyter_server==2.14.2
240
- termcolor==2.5.0
241
- mpi4py==4.0.1
242
- torchdata==0.7.1+8cea82f
243
- dataclasses==0.8
244
- cloudpickle==3.1.0
245
- tree_sitter_languages==1.10.2
246
- tabulate==0.9.0
247
- ipython==8.29.0
248
- lightgbm==4.3.0
249
- captum==0.6.0
250
- confuse==2.0.1
251
- torchvision==0.16.1+adc3221
252
- lxml==4.9.4
253
- fastapi==0.115.4
254
- python-multipart==0.0.17
255
- dnspython==2.7.0
256
- jupyter-console==6.6.3
257
- preshed==3.0.9
258
- py-cpuinfo==9.0.0
259
- Send2Trash==1.8.3
260
- murmurhash==1.0.10
261
- sniffio==1.3.1
262
- websockets==13.1
263
- h11==0.14.0
264
- smmap==5.0.0
265
- textual==0.85.2
266
- jsonpatch==1.33
267
- opencensus-context==0.1.3
268
- nbconvert==7.16.4
269
- sentry-sdk==2.19.0
270
- opentelemetry-semantic-conventions==0.37b0
271
- pandas-profiling==2.8.0
272
- pillow==10.3.0
273
- peft==0.13.2
274
- rpds-py==0.21.0
275
- bokeh==3.6.1
276
- distro==1.9.0
277
- itsdangerous==2.2.0
278
- wandb==0.18.7
279
- jsonpointer==3.0.0
280
- astropy-iers-data==0.2024.11.11.0.32.38
281
- horovod==0.28.1
282
- graphviz==0.20.3
283
- vtk==9.3.1
284
- bleach==6.2.0
285
- numexpr==2.8.7
286
- pydantic_core==2.23.4
287
- Jinja2==3.1.4
288
- widgetsnbextension==4.0.13
289
- filelock==3.16.1
290
- catboost==1.2.7
291
- raft-dask==24.12.0a36
292
- async-timeout==4.0.3
293
- datefinder==0.7.3
294
- coloredlogs==15.0.1
295
- platformdirs==4.3.6
296
- spacy-legacy==3.0.12
297
- chardet==5.2.0
298
- jupyter_client==8.6.3
299
- importlib_metadata==8.5.0
300
- rfc3986-validator==0.1.1
301
- huggingface_hub==0.26.2
302
- PySocks==1.7.1
303
- mlxtend==0.23.2
304
- outdated==0.2.2
305
- partd==1.4.2
306
- thinc==8.2.5
307
- astropy==6.1.6
308
- rdflib==6.3.2
309
- h2==4.1.0
310
- typer==0.13.0
311
- xyzservices==2024.9.0
312
- toolz==0.12.1
313
- frozenlist==1.5.0
314
- rdkit==2024.9.2
315
- pyasn1==0.6.1
316
- jupyter_server_terminals==0.5.3
317
- ucx-py==0.41.0a11
318
- astunparse==1.6.3
319
- simpful==2.12.0
320
- notebook_shim==0.2.4
321
- scipy==1.13.1
322
- colorlog==6.9.0
323
- tiktoken==0.3.3
324
- plotly==5.24.1
325
- fastrlock==0.8.2
326
- chart-studio==1.1.0
327
- stack-data==0.6.2
328
- google-pasta==0.2.0
329
- sktime==0.34.0
330
- PyYAML==6.0.2
331
- sympy==1.13.3
332
- multidict==6.1.0
333
- ml-dtypes==0.2.0
334
- tensorboardX==2.6.2.2
335
- decorator==5.1.1
336
- cytoolz==1.0.0
337
- ase==3.23.0
338
- isoduration==20.11.0
339
- html5lib==1.1
340
- langsmith==0.1.142
341
- future==1.0.0
342
- onnx2torch==1.5.15
343
- multipledispatch==0.6.0
344
- protobuf==4.24.4
345
- ucxx==0.41.0
346
- pandas_flavor==0.6.0
347
- msgpack==1.1.0
348
- pyasn1_modules==0.4.1
349
- imagecodecs==2024.1.1
350
- mlflow==2.17.2
351
- watchfiles==0.24.0
352
- dm-sonnet==2.0.2
353
- langcodes==3.4.1
354
- freetype-py==2.3.0
355
- argon2-cffi-bindings==21.2.0
356
- trimesh==4.5.2
357
- opt_einsum==3.4.0
358
- tenacity==8.5.0
359
- h5py==3.12.1
360
- fastapi-cli==0.0.5
361
- oauthlib==3.2.2
362
- parso==0.8.4
363
- weasel==0.4.1
364
- yfinance==0.2.49
365
- networkx==2.8.8
366
- bitsandbytes==0.44.1
367
- lazy_loader==0.4
368
- querystring_parser==1.2.4
369
- contourpy==1.3.0
370
- unicodedata2==15.1.0
371
- bcrypt==4.2.0
372
- munkres==1.1.4
373
- langchain==0.0.298
374
- hpack==4.0.0
375
- cryptography==43.0.3
376
- umap-learn==0.5.7
377
- arrow==1.3.0
378
- docker==7.1.0
379
- certifi==2025.1.31
380
- fastjsonschema==2.20.0
381
- tensorflow==2.15.0
382
- googleapis-common-protos==1.65.0
383
- iniconfig==2.0.0
384
- Markdown==3.6
385
- llvmlite==0.43.0
386
- wslink==2.3.2
387
- attrs==24.2.0
388
- rich==13.9.4
389
- cupy==13.3.0
390
- uc-micro-py==1.0.3
391
- alembic==1.14.0
392
- joblib==1.4.2
393
- reportlab==4.2.5
394
- miniful==0.0.6
395
- jupyter_core==5.7.2
396
- wheel==0.45.0
397
- phik==0.12.3
398
- mistune==3.0.2
399
- wcwidth==0.2.13
400
- dacite==1.8.1
401
- accelerate==0.22.0
402
- sacremoses==0.0.53
403
- revtok==0.0.3
404
- python-slugify==8.0.4
405
- tangled-up-in-unicode==0.2.0
406
- dask==2024.11.0
407
- markdown-it-py==3.0.0
408
- sentencepiece==0.1.99
409
- beautifulsoup4==4.12.3
410
- six==1.16.0
411
- numba-cuda==0.0.17
412
- argon2-cffi==23.1.0
413
- xxhash==3.5.0
414
- hjson==3.1.0
415
- fonttools==4.54.1
416
- graphql-core==3.2.5
417
- pyparsing==3.2.0
418
- pure_eval==0.2.3
419
- distlib==0.3.9
420
- lightning==2.4.0
421
- wordcloud==0.0.0
422
- catalogue==2.0.10
423
- jax==0.4.27
424
- tree-sitter==0.23.2
425
- notebook==7.2.2
426
- dataclasses-json==0.6.7
427
- propcache==0.2.0
428
- numba==0.60.0
429
- dask-expr==1.1.17
430
- pydantic==2.9.2
431
- gunicorn==22.0.0
432
- missingno==0.5.2
433
- pyOpenSSL==24.2.1
434
- openpyxl==3.1.5
435
- packaging==24.1
436
- python-dotenv==1.0.1
437
- cycler==0.12.1
438
- types-pytz==2024.2.0.20241003
439
- yellowbrick==1.5
440
- referencing==0.35.1
441
- pyLDAvis==3.4.1
442
- lazypredict==0.2.16
443
- fqdn==1.5.1
444
- websocket-client==1.8.0
445
- fastcore==1.7.19
446
- pynvjitlink-cu12==0.3.0
447
- pingouin==0.5.5
448
- numpy==1.26.4
449
- typing-inspect==0.9.0
450
- nltk==3.9.1
451
- onnxruntime==1.19.2
452
- tensorflow-probability==0.23.0
453
- datasets==3.0.2
454
- pickleshare==0.7.5
455
- peewee==3.17.7
456
- torch-geometric==2.6.1
457
- ptyprocess==0.7.0
458
- greenlet==3.1.1
459
- graphql-relay==3.2.0
460
- graphene==3.4.3
461
- et_xmlfile==2.0.0
462
- webencodings==0.5.1
463
- hyperframe==6.0.1
464
- multitasking==0.0.9
465
- typer-slim==0.13.0
466
- onnx==1.15.0
467
- uvicorn==0.32.0
468
- memray==1.13.4
469
- xgboost==2.1.2
470
- Brotli==1.1.0
471
- zipp==3.21.0
472
- nbformat==5.10.4
473
- responses==0.18.0
474
- funcy==2.0
475
- Pygments==2.18.0
476
- tqdm==4.67.0
477
- linkify-it-py==2.0.3
478
- srsly==2.4.8
479
- cuda-python==12.6.0
480
- lightning-utilities==0.11.8
481
- cudf==24.12.0a337
482
- dask-ml==2024.4.4
483
- docker-pycreds==0.4.0
484
- pkgutil_resolve_name==1.3.10
485
- opentelemetry-api==1.16.0
486
- fsspec==2024.9.0
487
- nbclient==0.10.0
488
- psutil==5.9.8
489
- pytorch-lightning==2.4.0
490
- sortedcontainers==2.4.0
491
- matplotlib==3.9.2
492
- defusedxml==0.7.1
493
- urllib3==1.26.19
494
- jupyterlab_server==2.27.3
495
- retrying==1.3.3
496
- dask-cudf==24.12.0a337
497
- sqlparse==0.5.1
498
- text-unidecode==1.3
499
- seaborn==0.13.2
500
- typing_extensions==4.12.2
501
- pyzmq==26.2.0
502
- rfc3339-validator==0.1.4
503
- pynndescent==0.5.13
504
- pip==24.3.1
505
- confection==0.1.4
506
- wrapt==1.14.1
507
- fastprogress==1.0.3
508
- traitlets==5.14.3
509
- asttokens==2.4.1
510
- json5==0.9.28
511
- pandas-stubs==2.2.3.241126
512
- torchmetrics==1.2.1
513
- gitdb==4.0.11
514
- annotated-types==0.7.0
515
- ipython-autotime==0.1
516
- httpcore==1.0.6
517
- click==8.1.7
518
- setproctitle==1.3.3
519
- starlette==0.41.2
520
- jupyterlab==4.2.5
521
- rmm==24.12.0a27
522
- opentelemetry-sdk==1.16.0
523
- textblob==0.15.3
524
- imbalanced-learn==0.12.4
525
- typeguard==4.3.0
526
- more-itertools==10.3.0
527
- zipp==3.19.2
528
- autocommand==2.2.2
529
- jaraco.context==5.3.0
530
- packaging==24.1
531
- importlib_metadata==8.0.0
532
- platformdirs==4.2.2
533
- jaraco.functools==4.0.1
534
- importlib_resources==6.4.0
535
- tomli==2.0.1
536
- jaraco.text==3.12.1
537
- wheel==0.43.0
538
- jaraco.collections==5.1.0
539
- typing_extensions==4.12.2
540
- inflect==7.3.1
541
- backports.tarfile==1.2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132912-1agsw1y8/files/wandb-metadata.json DELETED
@@ -1,77 +0,0 @@
1
- {
2
- "os": "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
3
- "python": "3.10.15",
4
- "startedAt": "2025-05-04T10:29:13.019628Z",
5
- "program": "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
6
- "codePath": "finetuning_bc_prott5.py",
7
- "email": "zeynep.isik1@sabanciuniv.edu",
8
- "root": "/arf/scratch/zisik/prott5_bc_ft",
9
- "host": "kolyoz1",
10
- "username": "zisik",
11
- "executable": "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
12
- "codePathLocal": "finetuning_bc_prott5.py",
13
- "cpu_count": 64,
14
- "cpu_count_logical": 64,
15
- "gpu": "NVIDIA H100 80GB HBM3",
16
- "gpu_count": 1,
17
- "disk": {
18
- "/": {
19
- "total": "7643995308032",
20
- "used": "274768302080"
21
- }
22
- },
23
- "memory": {
24
- "total": "1081373220864"
25
- },
26
- "cpu": {
27
- "count": 64,
28
- "countLogical": 64
29
- },
30
- "gpu_nvidia": [
31
- {
32
- "name": "NVIDIA H100 80GB HBM3",
33
- "memoryTotal": "85520809984",
34
- "cudaCores": 16896,
35
- "architecture": "Hopper"
36
- }
37
- ],
38
- "slurm": {
39
- "cluster_name": "cuda",
40
- "conf": "/etc/slurm/slurm.conf",
41
- "cpus_on_node": "16",
42
- "cpus_per_task": "16",
43
- "gpus_on_node": "1",
44
- "gtids": "0",
45
- "job_account": "tbag154",
46
- "job_cpus_per_node": "16",
47
- "job_end_time": "1746613727",
48
- "job_gid": "11636",
49
- "job_gpus": "1",
50
- "job_id": "1027934",
51
- "job_name": "msa_ph_pt",
52
- "job_nodelist": "kolyoz1",
53
- "job_num_nodes": "1",
54
- "job_partition": "kolyoz-cuda",
55
- "job_qos": "tbag",
56
- "job_start_time": "1746354527",
57
- "job_uid": "11636",
58
- "job_user": "zisik",
59
- "jobid": "1027934",
60
- "localid": "0",
61
- "mem_per_cpu": "14000",
62
- "nnodes": "1",
63
- "node_aliases": "(null)",
64
- "nodeid": "0",
65
- "nodelist": "kolyoz1",
66
- "prio_process": "0",
67
- "procid": "0",
68
- "submit_dir": "/arf/scratch/zisik",
69
- "submit_host": "cuda-ui",
70
- "task_pid": "3157550",
71
- "tasks_per_node": "1",
72
- "topology_addr": "kolyoz1",
73
- "topology_addr_pattern": "node",
74
- "working_cluster": "cuda:slurmcontroller3.ib:6800:9984:109"
75
- },
76
- "cudaVersion": "12.6"
77
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132912-1agsw1y8/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"train/learning_rate":3.383107938893063e-05,"train/global_step":15972,"eval/steps_per_second":75.053,"_timestamp":1.7463635035359182e+09,"eval/accuracy":0.4992604225635032,"_step":31,"eval/loss":0.6931192278862,"train/grad_norm":0.10788150876760483,"train/epoch":1,"_wandb":{"runtime":8950},"_runtime":8950.516897928,"train/loss":0.6932,"eval/runtime":182.4166,"eval/samples_per_second":600.395}
 
 
wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log DELETED
@@ -1,14 +0,0 @@
1
- {"time":"2025-05-04T13:29:12.35887463+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp1u83hfoi/port-3157577.txt","pid":3157577,"debug":false,"disable-analytics":false}
2
- {"time":"2025-05-04T13:29:12.358923345+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
- {"time":"2025-05-04T13:29:12.35977753+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":45947,"Zone":""}}
4
- {"time":"2025-05-04T13:29:12.359879073+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3157577}
5
- {"time":"2025-05-04T13:29:12.546636547+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:34718"}
6
- {"time":"2025-05-04T13:29:13.02161239+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"1agsw1y8","id":"127.0.0.1:34718"}
7
- {"time":"2025-05-04T13:29:13.145638422+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"1agsw1y8","id":"127.0.0.1:34718"}
8
- {"time":"2025-05-04T15:58:23.607250248+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:34718"}
9
- {"time":"2025-05-04T15:58:23.607435128+03:00","level":"INFO","msg":"server is shutting down"}
10
- {"time":"2025-05-04T15:58:23.607401252+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:34718"}
11
- {"time":"2025-05-04T15:58:23.607720003+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:34718"}
12
- {"time":"2025-05-04T15:58:24.801882716+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:34718"}
13
- {"time":"2025-05-04T15:58:24.801915389+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:34718"}
14
- {"time":"2025-05-04T15:58:24.801937893+03:00","level":"INFO","msg":"server is closed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log DELETED
@@ -1,19 +0,0 @@
1
- {"time":"2025-05-04T13:29:13.023253759+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
2
- {"time":"2025-05-04T13:29:13.023302807+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log"}
3
- {"time":"2025-05-04T13:29:13.145570529+03:00","level":"INFO","msg":"created new stream","id":"1agsw1y8"}
4
- {"time":"2025-05-04T13:29:13.145625833+03:00","level":"INFO","msg":"stream: started","id":"1agsw1y8"}
5
- {"time":"2025-05-04T13:29:13.145806528+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"1agsw1y8"}
6
- {"time":"2025-05-04T13:29:13.145923955+03:00","level":"INFO","msg":"handler: started","stream_id":"1agsw1y8"}
7
- {"time":"2025-05-04T13:29:13.146011145+03:00","level":"INFO","msg":"sender: started","stream_id":"1agsw1y8"}
8
- {"time":"2025-05-04T13:29:13.51656923+03:00","level":"INFO","msg":"Starting system monitor"}
9
- {"time":"2025-05-04T15:58:23.607363166+03:00","level":"INFO","msg":"stream: closing","id":"1agsw1y8"}
10
- {"time":"2025-05-04T15:58:23.607412721+03:00","level":"INFO","msg":"Stopping system monitor"}
11
- {"time":"2025-05-04T15:58:23.608736938+03:00","level":"INFO","msg":"Stopped system monitor"}
12
- {"time":"2025-05-04T15:58:23.995834762+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
13
- {"time":"2025-05-04T15:58:23.995863601+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
14
- {"time":"2025-05-04T15:58:23.995874256+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
15
- {"time":"2025-05-04T15:58:24.53730388+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
16
- {"time":"2025-05-04T15:58:24.801427373+03:00","level":"INFO","msg":"handler: closed","stream_id":"1agsw1y8"}
17
- {"time":"2025-05-04T15:58:24.801476891+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"1agsw1y8"}
18
- {"time":"2025-05-04T15:58:24.801525233+03:00","level":"INFO","msg":"sender: closed","stream_id":"1agsw1y8"}
19
- {"time":"2025-05-04T15:58:24.801589463+03:00","level":"INFO","msg":"stream: closed","id":"1agsw1y8"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132912-1agsw1y8/logs/debug.log DELETED
@@ -1,27 +0,0 @@
1
- 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
- 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Configure stats pid to 3157577
3
- 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
4
- 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
5
- 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
- 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
7
- 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Applying login settings: {}
8
- 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Applying login settings: {}
9
- 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132912-1agsw1y8/logs/debug.log
10
- 2025-05-04 13:29:13,014 INFO MainThread:3157577 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log
11
- 2025-05-04 13:29:13,014 INFO MainThread:3157577 [wandb_init.py:init():619] calling init triggers
12
- 2025-05-04 13:29:13,014 INFO MainThread:3157577 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
- config: {}
14
- 2025-05-04 13:29:13,014 INFO MainThread:3157577 [wandb_init.py:init():669] starting backend
15
- 2025-05-04 13:29:13,014 INFO MainThread:3157577 [wandb_init.py:init():673] sending inform_init request
16
- 2025-05-04 13:29:13,018 INFO MainThread:3157577 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
- 2025-05-04 13:29:13,019 INFO MainThread:3157577 [wandb_init.py:init():686] backend started and connected
18
- 2025-05-04 13:29:13,026 INFO MainThread:3157577 [wandb_init.py:init():781] updated telemetry
19
- 2025-05-04 13:29:13,030 INFO MainThread:3157577 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
- 2025-05-04 13:29:13,503 INFO MainThread:3157577 [wandb_init.py:init():867] starting run threads in backend
21
- 2025-05-04 13:29:14,946 INFO MainThread:3157577 [wandb_run.py:_console_start():2456] atexit reg
22
- 2025-05-04 13:29:14,946 INFO MainThread:3157577 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
- 2025-05-04 13:29:14,946 INFO MainThread:3157577 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
- 2025-05-04 13:29:14,946 INFO MainThread:3157577 [wandb_run.py:_redirect():2395] Redirects installed.
25
- 2025-05-04 13:29:14,954 INFO MainThread:3157577 [wandb_init.py:init():911] run started, returning control to user process
26
- 2025-05-04 13:33:19,417 INFO MainThread:3157577 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_13-33-08_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
27
- 2025-05-04 15:58:23,607 WARNING MsgRouterThr:3157577 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_132912-1agsw1y8/run-1agsw1y8.wandb DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:71cf2569d2e508f45833ce35b1904bcc5325f9369eef0a76ea074fad88d8621d
3
- size 5615901
 
 
 
 
wandb/run-20250504_160615-f65jh2lv/files/output.log DELETED
@@ -1,8 +0,0 @@
1
- You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
2
- Map: 100%|██████████| 511104/511104 [00:20<00:00, 25304.42 examples/s]
3
- Map: 100%|██████████| 109522/109522 [00:02<00:00, 36704.44 examples/s]
4
- /arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
5
- warnings.warn(
6
- [2025-05-04 16:06:52,248] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
7
- wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
8
- 1%| | 246/47916 [02:12<7:08:44, 1.85it/s]
 
 
 
 
 
 
 
 
 
wandb/run-20250504_160615-f65jh2lv/files/requirements.txt DELETED
@@ -1,541 +0,0 @@
1
- nvidia-cuda-cupti-cu12==12.4.127
2
- nvidia-cuda-nvrtc-cu12==12.4.127
3
- pyg-lib==0.4.0+pt20cu117
4
- biopython==1.85
5
- iniconfig==2.0.0
6
- tokenizers==0.20.0
7
- accelerate==1.3.0
8
- torch==2.6.0
9
- nvidia-nccl-cu12==2.21.5
10
- transformers==4.45.2
11
- nvidia-cusparse-cu12==12.3.1.170
12
- torch-scatter==2.1.2+pt20cu117
13
- nvidia-cusparselt-cu12==0.6.2
14
- nvidia-nvtx-cu12==12.4.127
15
- zstd==1.5.6.6
16
- fair-esm==2.0.0
17
- omegaconf==2.3.0
18
- pluggy==1.5.0
19
- pytest==8.3.5
20
- nvidia-curand-cu12==10.3.5.147
21
- nvidia-cufft-cu12==11.2.1.3
22
- torch-cluster==1.6.3+pt20cu117
23
- regex==2024.9.11
24
- nvidia-cudnn-cu12==9.1.0.70
25
- torch-spline-conv==1.2.2+pt20cu117
26
- nvidia-cusolver-cu12==11.6.1.9
27
- antlr4-python3-runtime==4.9.3
28
- msgpack-numpy==0.4.8
29
- nlp==0.2.0
30
- einops==0.8.1
31
- nvidia-cublas-cu12==12.4.5.8
32
- triton==3.2.0
33
- ninja==1.11.1.3
34
- hydra-core==1.3.2
35
- nvidia-nvjitlink-cu12==12.4.127
36
- biotite==0.41.2
37
- torch-sparse==0.6.18+pt20cu117
38
- esm==3.1.4
39
- sympy==1.13.1
40
- nvidia-cuda-runtime-cu12==12.4.127
41
- jupyter-lsp==2.2.5
42
- jupyter-events==0.10.0
43
- ipykernel==6.29.5
44
- Mako==1.3.5
45
- proto-plus==1.25.0
46
- fst-pso==1.8.1
47
- gensim==4.3.3
48
- htmlmin==0.1.12
49
- tokenizers==0.13.3
50
- timm==1.0.11
51
- MarkupSafe==3.0.2
52
- safetensors==0.4.5
53
- requests==2.32.3
54
- gast==0.5.5
55
- cuml==24.12.0a33
56
- jaxlib==0.4.23.dev20240214
57
- spacy-loggers==1.0.5
58
- pytz==2024.1
59
- idna==3.10
60
- python-dateutil==2.9.0
61
- mdurl==0.1.2
62
- blis==0.7.10
63
- jupyter==1.1.1
64
- pyerfa==2.0.1.5
65
- comm==0.2.2
66
- pygraphviz==1.14
67
- dill==0.3.8
68
- paramiko==3.5.0
69
- llama-index==0.8.36
70
- mdit-py-plugins==0.4.2
71
- Werkzeug==3.1.3
72
- pyu2f==0.1.5
73
- dask-glm==0.2.0
74
- httpx==0.27.2
75
- typeguard==4.4.1
76
- mypy-extensions==1.0.0
77
- kmodes==0.12.2
78
- keras==2.15.0
79
- ydata-profiling==0.0.dev0
80
- regex==2024.11.6
81
- xarray==2024.11.0
82
- setuptools==75.3.0
83
- charset-normalizer==3.4.0
84
- jupyterlab_nvdashboard==0.11.0
85
- pylibraft==24.12.0a36
86
- spacy==3.7.6
87
- mlflow-skinny==2.17.2
88
- nvtx==0.2.10
89
- multimethod==1.12
90
- pexpect==4.9.0
91
- torch==2.1.0.post301
92
- flatbuffers==24.3.25
93
- python-json-logger==2.0.7
94
- PyJWT==2.9.0
95
- multiprocess==0.70.16
96
- colorlover==0.3.0
97
- yarl==1.16.0
98
- locket==1.0.0
99
- patsy==1.0.0
100
- rapids-dask-dependency==24.12.0a0
101
- stanza==1.9.2
102
- debugpy==1.8.8
103
- jupyterlab_pygments==0.3.0
104
- pylibcudf==24.12.0a337
105
- lz4==4.3.3
106
- pandas==2.2.3
107
- tifffile==2024.9.20
108
- pynvml==11.4.1
109
- cufflinks==0.17.3
110
- ipywidgets==8.1.5
111
- requests-oauthlib==2.0.0
112
- google-auth-oauthlib==1.2.1
113
- rsa==4.9
114
- webcolors==24.8.0
115
- jsonschema-specifications==2024.10.1
116
- scikit-learn==1.5.2
117
- langchain-text-splitters==0.3.2
118
- pandas-datareader==0.10.0
119
- tomli==2.0.2
120
- tzdata==2024.2
121
- scikit-image==0.24.0
122
- tensorboard_data_server==0.7.0
123
- kiwisolver==1.4.7
124
- cloudpathlib==0.20.0
125
- isodate==0.6.1
126
- adversarial-robustness-toolbox==1.19.1
127
- SQLAlchemy==2.0.36
128
- pytest-runner==6.0.0
129
- pycairo==1.27.0
130
- treelite==4.3.0
131
- jiter==0.7.0
132
- threadpoolctl==3.5.0
133
- pandocfilters==1.5.0
134
- loguru==0.7.2
135
- smart_open==7.0.5
136
- shellingham==1.5.4
137
- deepspeed==0.15.4
138
- prompt_toolkit==3.0.48
139
- databricks-sdk==0.34.0
140
- langchain-core==0.3.15
141
- imageio==2.36.0
142
- openapi-schema-pydantic==1.2.4
143
- zict==3.0.0
144
- cachetools==5.5.0
145
- colorful==0.5.6
146
- mpmath==1.3.0
147
- nest_asyncio==1.6.0
148
- pyFUME==0.2.25
149
- opencv-python-headless==4.9.0
150
- fastai==2.7.18
151
- importlib_resources==6.4.5
152
- binaryornot==0.4.4
153
- evaluate==0.4.1
154
- matplotlib-inline==0.1.7
155
- wasabi==1.1.2
156
- pycparser==2.22
157
- GitPython==3.1.43
158
- pluggy==1.5.0
159
- async-lru==2.0.4
160
- pgmpy==0.1.24
161
- anyio==4.4.0
162
- executing==2.1.0
163
- orjson==3.10.11
164
- humanfriendly==10.0
165
- tornado==6.4.1
166
- gmpy2==2.1.5
167
- rlPyCairo==0.2.0
168
- distributed==2024.11.0
169
- FuzzyTM==2.0.5
170
- torchtext==0.15.2a0+5ce3163
171
- pytest==8.3.5
172
- pyod==2.0.2
173
- ImageHash==4.3.1
174
- soupsieve==2.5
175
- tblib==3.0.0
176
- emoji==2.14.0
177
- aiohappyeyeballs==2.4.3
178
- uri-template==1.3.0
179
- tensorflow_estimator==2.15.0
180
- babel==2.16.0
181
- dask-cuda==24.12.0a12
182
- overrides==7.7.0
183
- opencensus==0.11.3
184
- openai==0.28.1
185
- language_data==1.2.0
186
- jedi==0.19.2
187
- cookiecutter==2.6.0
188
- entrypoints==0.4
189
- exceptiongroup==1.2.2
190
- marisa-trie==1.2.0
191
- uvloop==0.20.0
192
- aiosignal==1.3.1
193
- Flask==3.0.3
194
- tensorboard==2.15.2
195
- cffi==1.17.1
196
- tf_keras==2.15.0
197
- absl-py==2.1.0
198
- blinker==1.9.0
199
- types-python-dateutil==2.9.0.20241003
200
- opencv-python==4.9.0
201
- frozendict==2.4.6
202
- aiohttp-cors==0.7.0
203
- statsmodels==0.14.4
204
- tinycss2==1.4.0
205
- terminado==0.18.1
206
- pycaret==2.2.3
207
- aiohttp==3.10.10
208
- distributed-ucxx==0.41.0
209
- prometheus_client==0.21.0
210
- fastdownload==0.0.7
211
- grpcio==1.59.3
212
- google-api-core==2.22.0
213
- jupyterlab_widgets==3.0.13
214
- appdirs==1.4.4
215
- littleutils==0.0.0
216
- ray==2.24.0
217
- kaggle==1.6.17
218
- jsonschema==4.23.0
219
- google-auth==2.36.0
220
- scikit-base==0.11.0
221
- visions==0.7.6
222
- pyarrow==15.0.0
223
- transformers==4.33.0
224
- prometheus_flask_exporter==0.23.1
225
- dm-tree==0.1.8
226
- colorama==0.4.6
227
- requests-toolbelt==1.0.0
228
- cached-property==1.5.2
229
- cymem==2.0.8
230
- PyNaCl==1.5.0
231
- PyWavelets==1.7.0
232
- httptools==0.6.1
233
- typing-utils==0.1.0
234
- email_validator==2.2.0
235
- marshmallow==3.23.1
236
- Deprecated==1.2.14
237
- virtualenv==20.4.7
238
- optuna==3.6.1
239
- jupyter_server==2.14.2
240
- termcolor==2.5.0
241
- mpi4py==4.0.1
242
- torchdata==0.7.1+8cea82f
243
- dataclasses==0.8
244
- cloudpickle==3.1.0
245
- tree_sitter_languages==1.10.2
246
- tabulate==0.9.0
247
- ipython==8.29.0
248
- lightgbm==4.3.0
249
- captum==0.6.0
250
- confuse==2.0.1
251
- torchvision==0.16.1+adc3221
252
- lxml==4.9.4
253
- fastapi==0.115.4
254
- python-multipart==0.0.17
255
- dnspython==2.7.0
256
- jupyter-console==6.6.3
257
- preshed==3.0.9
258
- py-cpuinfo==9.0.0
259
- Send2Trash==1.8.3
260
- murmurhash==1.0.10
261
- sniffio==1.3.1
262
- websockets==13.1
263
- h11==0.14.0
264
- smmap==5.0.0
265
- textual==0.85.2
266
- jsonpatch==1.33
267
- opencensus-context==0.1.3
268
- nbconvert==7.16.4
269
- sentry-sdk==2.19.0
270
- opentelemetry-semantic-conventions==0.37b0
271
- pandas-profiling==2.8.0
272
- pillow==10.3.0
273
- peft==0.13.2
274
- rpds-py==0.21.0
275
- bokeh==3.6.1
276
- distro==1.9.0
277
- itsdangerous==2.2.0
278
- wandb==0.18.7
279
- jsonpointer==3.0.0
280
- astropy-iers-data==0.2024.11.11.0.32.38
281
- horovod==0.28.1
282
- graphviz==0.20.3
283
- vtk==9.3.1
284
- bleach==6.2.0
285
- numexpr==2.8.7
286
- pydantic_core==2.23.4
287
- Jinja2==3.1.4
288
- widgetsnbextension==4.0.13
289
- filelock==3.16.1
290
- catboost==1.2.7
291
- raft-dask==24.12.0a36
292
- async-timeout==4.0.3
293
- datefinder==0.7.3
294
- coloredlogs==15.0.1
295
- platformdirs==4.3.6
296
- spacy-legacy==3.0.12
297
- chardet==5.2.0
298
- jupyter_client==8.6.3
299
- importlib_metadata==8.5.0
300
- rfc3986-validator==0.1.1
301
- huggingface_hub==0.26.2
302
- PySocks==1.7.1
303
- mlxtend==0.23.2
304
- outdated==0.2.2
305
- partd==1.4.2
306
- thinc==8.2.5
307
- astropy==6.1.6
308
- rdflib==6.3.2
309
- h2==4.1.0
310
- typer==0.13.0
311
- xyzservices==2024.9.0
312
- toolz==0.12.1
313
- frozenlist==1.5.0
314
- rdkit==2024.9.2
315
- pyasn1==0.6.1
316
- jupyter_server_terminals==0.5.3
317
- ucx-py==0.41.0a11
318
- astunparse==1.6.3
319
- simpful==2.12.0
320
- notebook_shim==0.2.4
321
- scipy==1.13.1
322
- colorlog==6.9.0
323
- tiktoken==0.3.3
324
- plotly==5.24.1
325
- fastrlock==0.8.2
326
- chart-studio==1.1.0
327
- stack-data==0.6.2
328
- google-pasta==0.2.0
329
- sktime==0.34.0
330
- PyYAML==6.0.2
331
- sympy==1.13.3
332
- multidict==6.1.0
333
- ml-dtypes==0.2.0
334
- tensorboardX==2.6.2.2
335
- decorator==5.1.1
336
- cytoolz==1.0.0
337
- ase==3.23.0
338
- isoduration==20.11.0
339
- html5lib==1.1
340
- langsmith==0.1.142
341
- future==1.0.0
342
- onnx2torch==1.5.15
343
- multipledispatch==0.6.0
344
- protobuf==4.24.4
345
- ucxx==0.41.0
346
- pandas_flavor==0.6.0
347
- msgpack==1.1.0
348
- pyasn1_modules==0.4.1
349
- imagecodecs==2024.1.1
350
- mlflow==2.17.2
351
- watchfiles==0.24.0
352
- dm-sonnet==2.0.2
353
- langcodes==3.4.1
354
- freetype-py==2.3.0
355
- argon2-cffi-bindings==21.2.0
356
- trimesh==4.5.2
357
- opt_einsum==3.4.0
358
- tenacity==8.5.0
359
- h5py==3.12.1
360
- fastapi-cli==0.0.5
361
- oauthlib==3.2.2
362
- parso==0.8.4
363
- weasel==0.4.1
364
- yfinance==0.2.49
365
- networkx==2.8.8
366
- bitsandbytes==0.44.1
367
- lazy_loader==0.4
368
- querystring_parser==1.2.4
369
- contourpy==1.3.0
370
- unicodedata2==15.1.0
371
- bcrypt==4.2.0
372
- munkres==1.1.4
373
- langchain==0.0.298
374
- hpack==4.0.0
375
- cryptography==43.0.3
376
- umap-learn==0.5.7
377
- arrow==1.3.0
378
- docker==7.1.0
379
- certifi==2025.1.31
380
- fastjsonschema==2.20.0
381
- tensorflow==2.15.0
382
- googleapis-common-protos==1.65.0
383
- iniconfig==2.0.0
384
- Markdown==3.6
385
- llvmlite==0.43.0
386
- wslink==2.3.2
387
- attrs==24.2.0
388
- rich==13.9.4
389
- cupy==13.3.0
390
- uc-micro-py==1.0.3
391
- alembic==1.14.0
392
- joblib==1.4.2
393
- reportlab==4.2.5
394
- miniful==0.0.6
395
- jupyter_core==5.7.2
396
- wheel==0.45.0
397
- phik==0.12.3
398
- mistune==3.0.2
399
- wcwidth==0.2.13
400
- dacite==1.8.1
401
- accelerate==0.22.0
402
- sacremoses==0.0.53
403
- revtok==0.0.3
404
- python-slugify==8.0.4
405
- tangled-up-in-unicode==0.2.0
406
- dask==2024.11.0
407
- markdown-it-py==3.0.0
408
- sentencepiece==0.1.99
409
- beautifulsoup4==4.12.3
410
- six==1.16.0
411
- numba-cuda==0.0.17
412
- argon2-cffi==23.1.0
413
- xxhash==3.5.0
414
- hjson==3.1.0
415
- fonttools==4.54.1
416
- graphql-core==3.2.5
417
- pyparsing==3.2.0
418
- pure_eval==0.2.3
419
- distlib==0.3.9
420
- lightning==2.4.0
421
- wordcloud==0.0.0
422
- catalogue==2.0.10
423
- jax==0.4.27
424
- tree-sitter==0.23.2
425
- notebook==7.2.2
426
- dataclasses-json==0.6.7
427
- propcache==0.2.0
428
- numba==0.60.0
429
- dask-expr==1.1.17
430
- pydantic==2.9.2
431
- gunicorn==22.0.0
432
- missingno==0.5.2
433
- pyOpenSSL==24.2.1
434
- openpyxl==3.1.5
435
- packaging==24.1
436
- python-dotenv==1.0.1
437
- cycler==0.12.1
438
- types-pytz==2024.2.0.20241003
439
- yellowbrick==1.5
440
- referencing==0.35.1
441
- pyLDAvis==3.4.1
442
- lazypredict==0.2.16
443
- fqdn==1.5.1
444
- websocket-client==1.8.0
445
- fastcore==1.7.19
446
- pynvjitlink-cu12==0.3.0
447
- pingouin==0.5.5
448
- numpy==1.26.4
449
- typing-inspect==0.9.0
450
- nltk==3.9.1
451
- onnxruntime==1.19.2
452
- tensorflow-probability==0.23.0
453
- datasets==3.0.2
454
- pickleshare==0.7.5
455
- peewee==3.17.7
456
- torch-geometric==2.6.1
457
- ptyprocess==0.7.0
458
- greenlet==3.1.1
459
- graphql-relay==3.2.0
460
- graphene==3.4.3
461
- et_xmlfile==2.0.0
462
- webencodings==0.5.1
463
- hyperframe==6.0.1
464
- multitasking==0.0.9
465
- typer-slim==0.13.0
466
- onnx==1.15.0
467
- uvicorn==0.32.0
468
- memray==1.13.4
469
- xgboost==2.1.2
470
- Brotli==1.1.0
471
- zipp==3.21.0
472
- nbformat==5.10.4
473
- responses==0.18.0
474
- funcy==2.0
475
- Pygments==2.18.0
476
- tqdm==4.67.0
477
- linkify-it-py==2.0.3
478
- srsly==2.4.8
479
- cuda-python==12.6.0
480
- lightning-utilities==0.11.8
481
- cudf==24.12.0a337
482
- dask-ml==2024.4.4
483
- docker-pycreds==0.4.0
484
- pkgutil_resolve_name==1.3.10
485
- opentelemetry-api==1.16.0
486
- fsspec==2024.9.0
487
- nbclient==0.10.0
488
- psutil==5.9.8
489
- pytorch-lightning==2.4.0
490
- sortedcontainers==2.4.0
491
- matplotlib==3.9.2
492
- defusedxml==0.7.1
493
- urllib3==1.26.19
494
- jupyterlab_server==2.27.3
495
- retrying==1.3.3
496
- dask-cudf==24.12.0a337
497
- sqlparse==0.5.1
498
- text-unidecode==1.3
499
- seaborn==0.13.2
500
- typing_extensions==4.12.2
501
- pyzmq==26.2.0
502
- rfc3339-validator==0.1.4
503
- pynndescent==0.5.13
504
- pip==24.3.1
505
- confection==0.1.4
506
- wrapt==1.14.1
507
- fastprogress==1.0.3
508
- traitlets==5.14.3
509
- asttokens==2.4.1
510
- json5==0.9.28
511
- pandas-stubs==2.2.3.241126
512
- torchmetrics==1.2.1
513
- gitdb==4.0.11
514
- annotated-types==0.7.0
515
- ipython-autotime==0.1
516
- httpcore==1.0.6
517
- click==8.1.7
518
- setproctitle==1.3.3
519
- starlette==0.41.2
520
- jupyterlab==4.2.5
521
- rmm==24.12.0a27
522
- opentelemetry-sdk==1.16.0
523
- textblob==0.15.3
524
- imbalanced-learn==0.12.4
525
- typeguard==4.3.0
526
- more-itertools==10.3.0
527
- zipp==3.19.2
528
- autocommand==2.2.2
529
- jaraco.context==5.3.0
530
- packaging==24.1
531
- importlib_metadata==8.0.0
532
- platformdirs==4.2.2
533
- jaraco.functools==4.0.1
534
- importlib_resources==6.4.0
535
- tomli==2.0.1
536
- jaraco.text==3.12.1
537
- wheel==0.43.0
538
- jaraco.collections==5.1.0
539
- typing_extensions==4.12.2
540
- inflect==7.3.1
541
- backports.tarfile==1.2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_160615-f65jh2lv/files/wandb-metadata.json DELETED
@@ -1,77 +0,0 @@
1
- {
2
- "os": "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
3
- "python": "3.10.15",
4
- "startedAt": "2025-05-04T13:06:15.895027Z",
5
- "program": "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
6
- "codePath": "finetuning_bc_prott5.py",
7
- "email": "zeynep.isik1@sabanciuniv.edu",
8
- "root": "/arf/scratch/zisik/prott5_bc_ft",
9
- "host": "kolyoz1",
10
- "username": "zisik",
11
- "executable": "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
12
- "codePathLocal": "finetuning_bc_prott5.py",
13
- "cpu_count": 64,
14
- "cpu_count_logical": 64,
15
- "gpu": "NVIDIA H100 80GB HBM3",
16
- "gpu_count": 1,
17
- "disk": {
18
- "/": {
19
- "total": "7643995308032",
20
- "used": "274886729728"
21
- }
22
- },
23
- "memory": {
24
- "total": "1081373220864"
25
- },
26
- "cpu": {
27
- "count": 64,
28
- "countLogical": 64
29
- },
30
- "gpu_nvidia": [
31
- {
32
- "name": "NVIDIA H100 80GB HBM3",
33
- "memoryTotal": "85520809984",
34
- "cudaCores": 16896,
35
- "architecture": "Hopper"
36
- }
37
- ],
38
- "slurm": {
39
- "cluster_name": "cuda",
40
- "conf": "/etc/slurm/slurm.conf",
41
- "cpus_on_node": "16",
42
- "cpus_per_task": "16",
43
- "gpus_on_node": "1",
44
- "gtids": "0",
45
- "job_account": "tbag154",
46
- "job_cpus_per_node": "16",
47
- "job_end_time": "1746623147",
48
- "job_gid": "11636",
49
- "job_gpus": "1",
50
- "job_id": "1027945",
51
- "job_name": "msa_ph_pt",
52
- "job_nodelist": "kolyoz1",
53
- "job_num_nodes": "1",
54
- "job_partition": "kolyoz-cuda",
55
- "job_qos": "tbag",
56
- "job_start_time": "1746363947",
57
- "job_uid": "11636",
58
- "job_user": "zisik",
59
- "jobid": "1027945",
60
- "localid": "0",
61
- "mem_per_cpu": "14000",
62
- "nnodes": "1",
63
- "node_aliases": "(null)",
64
- "nodeid": "0",
65
- "nodelist": "kolyoz1",
66
- "prio_process": "0",
67
- "procid": "0",
68
- "submit_dir": "/arf/scratch/zisik",
69
- "submit_host": "cuda-ui",
70
- "task_pid": "3178532",
71
- "tasks_per_node": "1",
72
- "topology_addr": "kolyoz1",
73
- "topology_addr_pattern": "node",
74
- "working_cluster": "cuda:slurmcontroller3.ib:6800:9984:109"
75
- },
76
- "cudaVersion": "12.6"
77
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log DELETED
@@ -1,7 +0,0 @@
1
- {"time":"2025-05-04T16:06:15.269316376+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp6sywt0mb/port-3178556.txt","pid":3178556,"debug":false,"disable-analytics":false}
2
- {"time":"2025-05-04T16:06:15.269366219+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
- {"time":"2025-05-04T16:06:15.2702663+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3178556}
4
- {"time":"2025-05-04T16:06:15.270143057+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37579,"Zone":""}}
5
- {"time":"2025-05-04T16:06:15.448913658+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:49916"}
6
- {"time":"2025-05-04T16:06:15.898453126+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"f65jh2lv","id":"127.0.0.1:49916"}
7
- {"time":"2025-05-04T16:06:16.021719647+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"f65jh2lv","id":"127.0.0.1:49916"}
 
 
 
 
 
 
 
 
wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log DELETED
@@ -1,8 +0,0 @@
1
- {"time":"2025-05-04T16:06:15.899998659+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
2
- {"time":"2025-05-04T16:06:15.900045512+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log"}
3
- {"time":"2025-05-04T16:06:16.021644692+03:00","level":"INFO","msg":"created new stream","id":"f65jh2lv"}
4
- {"time":"2025-05-04T16:06:16.021706945+03:00","level":"INFO","msg":"stream: started","id":"f65jh2lv"}
5
- {"time":"2025-05-04T16:06:16.021839756+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"f65jh2lv"}
6
- {"time":"2025-05-04T16:06:16.02194891+03:00","level":"INFO","msg":"handler: started","stream_id":"f65jh2lv"}
7
- {"time":"2025-05-04T16:06:16.022034888+03:00","level":"INFO","msg":"sender: started","stream_id":"f65jh2lv"}
8
- {"time":"2025-05-04T16:06:16.421916148+03:00","level":"INFO","msg":"Starting system monitor"}
 
 
 
 
 
 
 
 
 
wandb/run-20250504_160615-f65jh2lv/logs/debug.log DELETED
@@ -1,26 +0,0 @@
1
- 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
- 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Configure stats pid to 3178556
3
- 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
4
- 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
5
- 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
- 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
7
- 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Applying login settings: {}
8
- 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Applying login settings: {}
9
- 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160615-f65jh2lv/logs/debug.log
10
- 2025-05-04 16:06:15,889 INFO MainThread:3178556 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log
11
- 2025-05-04 16:06:15,889 INFO MainThread:3178556 [wandb_init.py:init():619] calling init triggers
12
- 2025-05-04 16:06:15,889 INFO MainThread:3178556 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
- config: {}
14
- 2025-05-04 16:06:15,889 INFO MainThread:3178556 [wandb_init.py:init():669] starting backend
15
- 2025-05-04 16:06:15,889 INFO MainThread:3178556 [wandb_init.py:init():673] sending inform_init request
16
- 2025-05-04 16:06:15,893 INFO MainThread:3178556 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
- 2025-05-04 16:06:15,894 INFO MainThread:3178556 [wandb_init.py:init():686] backend started and connected
18
- 2025-05-04 16:06:15,902 INFO MainThread:3178556 [wandb_init.py:init():781] updated telemetry
19
- 2025-05-04 16:06:15,905 INFO MainThread:3178556 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
- 2025-05-04 16:06:16,414 INFO MainThread:3178556 [wandb_init.py:init():867] starting run threads in backend
21
- 2025-05-04 16:06:17,992 INFO MainThread:3178556 [wandb_run.py:_console_start():2456] atexit reg
22
- 2025-05-04 16:06:17,993 INFO MainThread:3178556 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
- 2025-05-04 16:06:17,993 INFO MainThread:3178556 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
- 2025-05-04 16:06:17,993 INFO MainThread:3178556 [wandb_run.py:_redirect():2395] Redirects installed.
25
- 2025-05-04 16:06:18,004 INFO MainThread:3178556 [wandb_init.py:init():911] run started, returning control to user process
26
- 2025-05-04 16:06:56,772 INFO MainThread:3178556 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_16-06-46_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_160615-f65jh2lv/run-f65jh2lv.wandb DELETED
Binary file (98.3 kB)
 
wandb/run-20250504_160955-rqk2hbkf/files/config.yaml DELETED
@@ -1,44 +0,0 @@
1
- _wandb:
2
- value:
3
- cli_version: 0.18.7
4
- m: []
5
- python_version: 3.10.15
6
- t:
7
- "1":
8
- - 1
9
- - 2
10
- - 3
11
- - 5
12
- - 11
13
- - 12
14
- - 49
15
- - 51
16
- - 53
17
- - 55
18
- - 71
19
- - 98
20
- - 105
21
- "2":
22
- - 1
23
- - 2
24
- - 3
25
- - 5
26
- - 11
27
- - 12
28
- - 49
29
- - 51
30
- - 53
31
- - 55
32
- - 71
33
- - 98
34
- - 105
35
- "3":
36
- - 23
37
- - 55
38
- "4": 3.10.15
39
- "5": 0.18.7
40
- "6": 4.45.2
41
- "8":
42
- - 5
43
- "12": 0.18.7
44
- "13": linux-x86_64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_160955-rqk2hbkf/files/output.log DELETED
@@ -1,24 +0,0 @@
1
- Traceback (most recent call last):
2
- File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 33, in <module>
3
- X_train, X_temp, y_train, y_temp = train_test_split(prep_texts, labels, test_size=0.30, random_state=42)
4
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
5
- return func(*args, **kwargs)
6
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/model_selection/_split.py", line 2782, in train_test_split
7
- arrays = indexable(*arrays)
8
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 514, in indexable
9
- check_consistent_length(*result)
10
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 457, in check_consistent_length
11
- raise ValueError(
12
- ValueError: Found input variables with inconsistent numbers of samples: [10, 730149]
13
- Traceback (most recent call last):
14
- File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 33, in <module>
15
- X_train, X_temp, y_train, y_temp = train_test_split(prep_texts, labels, test_size=0.30, random_state=42)
16
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
17
- return func(*args, **kwargs)
18
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/model_selection/_split.py", line 2782, in train_test_split
19
- arrays = indexable(*arrays)
20
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 514, in indexable
21
- check_consistent_length(*result)
22
- File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 457, in check_consistent_length
23
- raise ValueError(
24
- ValueError: Found input variables with inconsistent numbers of samples: [10, 730149]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_160955-rqk2hbkf/files/requirements.txt DELETED
@@ -1,541 +0,0 @@
1
- nvidia-cuda-cupti-cu12==12.4.127
2
- nvidia-cuda-nvrtc-cu12==12.4.127
3
- pyg-lib==0.4.0+pt20cu117
4
- biopython==1.85
5
- iniconfig==2.0.0
6
- tokenizers==0.20.0
7
- accelerate==1.3.0
8
- torch==2.6.0
9
- nvidia-nccl-cu12==2.21.5
10
- transformers==4.45.2
11
- nvidia-cusparse-cu12==12.3.1.170
12
- torch-scatter==2.1.2+pt20cu117
13
- nvidia-cusparselt-cu12==0.6.2
14
- nvidia-nvtx-cu12==12.4.127
15
- zstd==1.5.6.6
16
- fair-esm==2.0.0
17
- omegaconf==2.3.0
18
- pluggy==1.5.0
19
- pytest==8.3.5
20
- nvidia-curand-cu12==10.3.5.147
21
- nvidia-cufft-cu12==11.2.1.3
22
- torch-cluster==1.6.3+pt20cu117
23
- regex==2024.9.11
24
- nvidia-cudnn-cu12==9.1.0.70
25
- torch-spline-conv==1.2.2+pt20cu117
26
- nvidia-cusolver-cu12==11.6.1.9
27
- antlr4-python3-runtime==4.9.3
28
- msgpack-numpy==0.4.8
29
- nlp==0.2.0
30
- einops==0.8.1
31
- nvidia-cublas-cu12==12.4.5.8
32
- triton==3.2.0
33
- ninja==1.11.1.3
34
- hydra-core==1.3.2
35
- nvidia-nvjitlink-cu12==12.4.127
36
- biotite==0.41.2
37
- torch-sparse==0.6.18+pt20cu117
38
- esm==3.1.4
39
- sympy==1.13.1
40
- nvidia-cuda-runtime-cu12==12.4.127
41
- jupyter-lsp==2.2.5
42
- jupyter-events==0.10.0
43
- ipykernel==6.29.5
44
- Mako==1.3.5
45
- proto-plus==1.25.0
46
- fst-pso==1.8.1
47
- gensim==4.3.3
48
- htmlmin==0.1.12
49
- tokenizers==0.13.3
50
- timm==1.0.11
51
- MarkupSafe==3.0.2
52
- safetensors==0.4.5
53
- requests==2.32.3
54
- gast==0.5.5
55
- cuml==24.12.0a33
56
- jaxlib==0.4.23.dev20240214
57
- spacy-loggers==1.0.5
58
- pytz==2024.1
59
- idna==3.10
60
- python-dateutil==2.9.0
61
- mdurl==0.1.2
62
- blis==0.7.10
63
- jupyter==1.1.1
64
- pyerfa==2.0.1.5
65
- comm==0.2.2
66
- pygraphviz==1.14
67
- dill==0.3.8
68
- paramiko==3.5.0
69
- llama-index==0.8.36
70
- mdit-py-plugins==0.4.2
71
- Werkzeug==3.1.3
72
- pyu2f==0.1.5
73
- dask-glm==0.2.0
74
- httpx==0.27.2
75
- typeguard==4.4.1
76
- mypy-extensions==1.0.0
77
- kmodes==0.12.2
78
- keras==2.15.0
79
- ydata-profiling==0.0.dev0
80
- regex==2024.11.6
81
- xarray==2024.11.0
82
- setuptools==75.3.0
83
- charset-normalizer==3.4.0
84
- jupyterlab_nvdashboard==0.11.0
85
- pylibraft==24.12.0a36
86
- spacy==3.7.6
87
- mlflow-skinny==2.17.2
88
- nvtx==0.2.10
89
- multimethod==1.12
90
- pexpect==4.9.0
91
- torch==2.1.0.post301
92
- flatbuffers==24.3.25
93
- python-json-logger==2.0.7
94
- PyJWT==2.9.0
95
- multiprocess==0.70.16
96
- colorlover==0.3.0
97
- yarl==1.16.0
98
- locket==1.0.0
99
- patsy==1.0.0
100
- rapids-dask-dependency==24.12.0a0
101
- stanza==1.9.2
102
- debugpy==1.8.8
103
- jupyterlab_pygments==0.3.0
104
- pylibcudf==24.12.0a337
105
- lz4==4.3.3
106
- pandas==2.2.3
107
- tifffile==2024.9.20
108
- pynvml==11.4.1
109
- cufflinks==0.17.3
110
- ipywidgets==8.1.5
111
- requests-oauthlib==2.0.0
112
- google-auth-oauthlib==1.2.1
113
- rsa==4.9
114
- webcolors==24.8.0
115
- jsonschema-specifications==2024.10.1
116
- scikit-learn==1.5.2
117
- langchain-text-splitters==0.3.2
118
- pandas-datareader==0.10.0
119
- tomli==2.0.2
120
- tzdata==2024.2
121
- scikit-image==0.24.0
122
- tensorboard_data_server==0.7.0
123
- kiwisolver==1.4.7
124
- cloudpathlib==0.20.0
125
- isodate==0.6.1
126
- adversarial-robustness-toolbox==1.19.1
127
- SQLAlchemy==2.0.36
128
- pytest-runner==6.0.0
129
- pycairo==1.27.0
130
- treelite==4.3.0
131
- jiter==0.7.0
132
- threadpoolctl==3.5.0
133
- pandocfilters==1.5.0
134
- loguru==0.7.2
135
- smart_open==7.0.5
136
- shellingham==1.5.4
137
- deepspeed==0.15.4
138
- prompt_toolkit==3.0.48
139
- databricks-sdk==0.34.0
140
- langchain-core==0.3.15
141
- imageio==2.36.0
142
- openapi-schema-pydantic==1.2.4
143
- zict==3.0.0
144
- cachetools==5.5.0
145
- colorful==0.5.6
146
- mpmath==1.3.0
147
- nest_asyncio==1.6.0
148
- pyFUME==0.2.25
149
- opencv-python-headless==4.9.0
150
- fastai==2.7.18
151
- importlib_resources==6.4.5
152
- binaryornot==0.4.4
153
- evaluate==0.4.1
154
- matplotlib-inline==0.1.7
155
- wasabi==1.1.2
156
- pycparser==2.22
157
- GitPython==3.1.43
158
- pluggy==1.5.0
159
- async-lru==2.0.4
160
- pgmpy==0.1.24
161
- anyio==4.4.0
162
- executing==2.1.0
163
- orjson==3.10.11
164
- humanfriendly==10.0
165
- tornado==6.4.1
166
- gmpy2==2.1.5
167
- rlPyCairo==0.2.0
168
- distributed==2024.11.0
169
- FuzzyTM==2.0.5
170
- torchtext==0.15.2a0+5ce3163
171
- pytest==8.3.5
172
- pyod==2.0.2
173
- ImageHash==4.3.1
174
- soupsieve==2.5
175
- tblib==3.0.0
176
- emoji==2.14.0
177
- aiohappyeyeballs==2.4.3
178
- uri-template==1.3.0
179
- tensorflow_estimator==2.15.0
180
- babel==2.16.0
181
- dask-cuda==24.12.0a12
182
- overrides==7.7.0
183
- opencensus==0.11.3
184
- openai==0.28.1
185
- language_data==1.2.0
186
- jedi==0.19.2
187
- cookiecutter==2.6.0
188
- entrypoints==0.4
189
- exceptiongroup==1.2.2
190
- marisa-trie==1.2.0
191
- uvloop==0.20.0
192
- aiosignal==1.3.1
193
- Flask==3.0.3
194
- tensorboard==2.15.2
195
- cffi==1.17.1
196
- tf_keras==2.15.0
197
- absl-py==2.1.0
198
- blinker==1.9.0
199
- types-python-dateutil==2.9.0.20241003
200
- opencv-python==4.9.0
201
- frozendict==2.4.6
202
- aiohttp-cors==0.7.0
203
- statsmodels==0.14.4
204
- tinycss2==1.4.0
205
- terminado==0.18.1
206
- pycaret==2.2.3
207
- aiohttp==3.10.10
208
- distributed-ucxx==0.41.0
209
- prometheus_client==0.21.0
210
- fastdownload==0.0.7
211
- grpcio==1.59.3
212
- google-api-core==2.22.0
213
- jupyterlab_widgets==3.0.13
214
- appdirs==1.4.4
215
- littleutils==0.0.0
216
- ray==2.24.0
217
- kaggle==1.6.17
218
- jsonschema==4.23.0
219
- google-auth==2.36.0
220
- scikit-base==0.11.0
221
- visions==0.7.6
222
- pyarrow==15.0.0
223
- transformers==4.33.0
224
- prometheus_flask_exporter==0.23.1
225
- dm-tree==0.1.8
226
- colorama==0.4.6
227
- requests-toolbelt==1.0.0
228
- cached-property==1.5.2
229
- cymem==2.0.8
230
- PyNaCl==1.5.0
231
- PyWavelets==1.7.0
232
- httptools==0.6.1
233
- typing-utils==0.1.0
234
- email_validator==2.2.0
235
- marshmallow==3.23.1
236
- Deprecated==1.2.14
237
- virtualenv==20.4.7
238
- optuna==3.6.1
239
- jupyter_server==2.14.2
240
- termcolor==2.5.0
241
- mpi4py==4.0.1
242
- torchdata==0.7.1+8cea82f
243
- dataclasses==0.8
244
- cloudpickle==3.1.0
245
- tree_sitter_languages==1.10.2
246
- tabulate==0.9.0
247
- ipython==8.29.0
248
- lightgbm==4.3.0
249
- captum==0.6.0
250
- confuse==2.0.1
251
- torchvision==0.16.1+adc3221
252
- lxml==4.9.4
253
- fastapi==0.115.4
254
- python-multipart==0.0.17
255
- dnspython==2.7.0
256
- jupyter-console==6.6.3
257
- preshed==3.0.9
258
- py-cpuinfo==9.0.0
259
- Send2Trash==1.8.3
260
- murmurhash==1.0.10
261
- sniffio==1.3.1
262
- websockets==13.1
263
- h11==0.14.0
264
- smmap==5.0.0
265
- textual==0.85.2
266
- jsonpatch==1.33
267
- opencensus-context==0.1.3
268
- nbconvert==7.16.4
269
- sentry-sdk==2.19.0
270
- opentelemetry-semantic-conventions==0.37b0
271
- pandas-profiling==2.8.0
272
- pillow==10.3.0
273
- peft==0.13.2
274
- rpds-py==0.21.0
275
- bokeh==3.6.1
276
- distro==1.9.0
277
- itsdangerous==2.2.0
278
- wandb==0.18.7
279
- jsonpointer==3.0.0
280
- astropy-iers-data==0.2024.11.11.0.32.38
281
- horovod==0.28.1
282
- graphviz==0.20.3
283
- vtk==9.3.1
284
- bleach==6.2.0
285
- numexpr==2.8.7
286
- pydantic_core==2.23.4
287
- Jinja2==3.1.4
288
- widgetsnbextension==4.0.13
289
- filelock==3.16.1
290
- catboost==1.2.7
291
- raft-dask==24.12.0a36
292
- async-timeout==4.0.3
293
- datefinder==0.7.3
294
- coloredlogs==15.0.1
295
- platformdirs==4.3.6
296
- spacy-legacy==3.0.12
297
- chardet==5.2.0
298
- jupyter_client==8.6.3
299
- importlib_metadata==8.5.0
300
- rfc3986-validator==0.1.1
301
- huggingface_hub==0.26.2
302
- PySocks==1.7.1
303
- mlxtend==0.23.2
304
- outdated==0.2.2
305
- partd==1.4.2
306
- thinc==8.2.5
307
- astropy==6.1.6
308
- rdflib==6.3.2
309
- h2==4.1.0
310
- typer==0.13.0
311
- xyzservices==2024.9.0
312
- toolz==0.12.1
313
- frozenlist==1.5.0
314
- rdkit==2024.9.2
315
- pyasn1==0.6.1
316
- jupyter_server_terminals==0.5.3
317
- ucx-py==0.41.0a11
318
- astunparse==1.6.3
319
- simpful==2.12.0
320
- notebook_shim==0.2.4
321
- scipy==1.13.1
322
- colorlog==6.9.0
323
- tiktoken==0.3.3
324
- plotly==5.24.1
325
- fastrlock==0.8.2
326
- chart-studio==1.1.0
327
- stack-data==0.6.2
328
- google-pasta==0.2.0
329
- sktime==0.34.0
330
- PyYAML==6.0.2
331
- sympy==1.13.3
332
- multidict==6.1.0
333
- ml-dtypes==0.2.0
334
- tensorboardX==2.6.2.2
335
- decorator==5.1.1
336
- cytoolz==1.0.0
337
- ase==3.23.0
338
- isoduration==20.11.0
339
- html5lib==1.1
340
- langsmith==0.1.142
341
- future==1.0.0
342
- onnx2torch==1.5.15
343
- multipledispatch==0.6.0
344
- protobuf==4.24.4
345
- ucxx==0.41.0
346
- pandas_flavor==0.6.0
347
- msgpack==1.1.0
348
- pyasn1_modules==0.4.1
349
- imagecodecs==2024.1.1
350
- mlflow==2.17.2
351
- watchfiles==0.24.0
352
- dm-sonnet==2.0.2
353
- langcodes==3.4.1
354
- freetype-py==2.3.0
355
- argon2-cffi-bindings==21.2.0
356
- trimesh==4.5.2
357
- opt_einsum==3.4.0
358
- tenacity==8.5.0
359
- h5py==3.12.1
360
- fastapi-cli==0.0.5
361
- oauthlib==3.2.2
362
- parso==0.8.4
363
- weasel==0.4.1
364
- yfinance==0.2.49
365
- networkx==2.8.8
366
- bitsandbytes==0.44.1
367
- lazy_loader==0.4
368
- querystring_parser==1.2.4
369
- contourpy==1.3.0
370
- unicodedata2==15.1.0
371
- bcrypt==4.2.0
372
- munkres==1.1.4
373
- langchain==0.0.298
374
- hpack==4.0.0
375
- cryptography==43.0.3
376
- umap-learn==0.5.7
377
- arrow==1.3.0
378
- docker==7.1.0
379
- certifi==2025.1.31
380
- fastjsonschema==2.20.0
381
- tensorflow==2.15.0
382
- googleapis-common-protos==1.65.0
383
- iniconfig==2.0.0
384
- Markdown==3.6
385
- llvmlite==0.43.0
386
- wslink==2.3.2
387
- attrs==24.2.0
388
- rich==13.9.4
389
- cupy==13.3.0
390
- uc-micro-py==1.0.3
391
- alembic==1.14.0
392
- joblib==1.4.2
393
- reportlab==4.2.5
394
- miniful==0.0.6
395
- jupyter_core==5.7.2
396
- wheel==0.45.0
397
- phik==0.12.3
398
- mistune==3.0.2
399
- wcwidth==0.2.13
400
- dacite==1.8.1
401
- accelerate==0.22.0
402
- sacremoses==0.0.53
403
- revtok==0.0.3
404
- python-slugify==8.0.4
405
- tangled-up-in-unicode==0.2.0
406
- dask==2024.11.0
407
- markdown-it-py==3.0.0
408
- sentencepiece==0.1.99
409
- beautifulsoup4==4.12.3
410
- six==1.16.0
411
- numba-cuda==0.0.17
412
- argon2-cffi==23.1.0
413
- xxhash==3.5.0
414
- hjson==3.1.0
415
- fonttools==4.54.1
416
- graphql-core==3.2.5
417
- pyparsing==3.2.0
418
- pure_eval==0.2.3
419
- distlib==0.3.9
420
- lightning==2.4.0
421
- wordcloud==0.0.0
422
- catalogue==2.0.10
423
- jax==0.4.27
424
- tree-sitter==0.23.2
425
- notebook==7.2.2
426
- dataclasses-json==0.6.7
427
- propcache==0.2.0
428
- numba==0.60.0
429
- dask-expr==1.1.17
430
- pydantic==2.9.2
431
- gunicorn==22.0.0
432
- missingno==0.5.2
433
- pyOpenSSL==24.2.1
434
- openpyxl==3.1.5
435
- packaging==24.1
436
- python-dotenv==1.0.1
437
- cycler==0.12.1
438
- types-pytz==2024.2.0.20241003
439
- yellowbrick==1.5
440
- referencing==0.35.1
441
- pyLDAvis==3.4.1
442
- lazypredict==0.2.16
443
- fqdn==1.5.1
444
- websocket-client==1.8.0
445
- fastcore==1.7.19
446
- pynvjitlink-cu12==0.3.0
447
- pingouin==0.5.5
448
- numpy==1.26.4
449
- typing-inspect==0.9.0
450
- nltk==3.9.1
451
- onnxruntime==1.19.2
452
- tensorflow-probability==0.23.0
453
- datasets==3.0.2
454
- pickleshare==0.7.5
455
- peewee==3.17.7
456
- torch-geometric==2.6.1
457
- ptyprocess==0.7.0
458
- greenlet==3.1.1
459
- graphql-relay==3.2.0
460
- graphene==3.4.3
461
- et_xmlfile==2.0.0
462
- webencodings==0.5.1
463
- hyperframe==6.0.1
464
- multitasking==0.0.9
465
- typer-slim==0.13.0
466
- onnx==1.15.0
467
- uvicorn==0.32.0
468
- memray==1.13.4
469
- xgboost==2.1.2
470
- Brotli==1.1.0
471
- zipp==3.21.0
472
- nbformat==5.10.4
473
- responses==0.18.0
474
- funcy==2.0
475
- Pygments==2.18.0
476
- tqdm==4.67.0
477
- linkify-it-py==2.0.3
478
- srsly==2.4.8
479
- cuda-python==12.6.0
480
- lightning-utilities==0.11.8
481
- cudf==24.12.0a337
482
- dask-ml==2024.4.4
483
- docker-pycreds==0.4.0
484
- pkgutil_resolve_name==1.3.10
485
- opentelemetry-api==1.16.0
486
- fsspec==2024.9.0
487
- nbclient==0.10.0
488
- psutil==5.9.8
489
- pytorch-lightning==2.4.0
490
- sortedcontainers==2.4.0
491
- matplotlib==3.9.2
492
- defusedxml==0.7.1
493
- urllib3==1.26.19
494
- jupyterlab_server==2.27.3
495
- retrying==1.3.3
496
- dask-cudf==24.12.0a337
497
- sqlparse==0.5.1
498
- text-unidecode==1.3
499
- seaborn==0.13.2
500
- typing_extensions==4.12.2
501
- pyzmq==26.2.0
502
- rfc3339-validator==0.1.4
503
- pynndescent==0.5.13
504
- pip==24.3.1
505
- confection==0.1.4
506
- wrapt==1.14.1
507
- fastprogress==1.0.3
508
- traitlets==5.14.3
509
- asttokens==2.4.1
510
- json5==0.9.28
511
- pandas-stubs==2.2.3.241126
512
- torchmetrics==1.2.1
513
- gitdb==4.0.11
514
- annotated-types==0.7.0
515
- ipython-autotime==0.1
516
- httpcore==1.0.6
517
- click==8.1.7
518
- setproctitle==1.3.3
519
- starlette==0.41.2
520
- jupyterlab==4.2.5
521
- rmm==24.12.0a27
522
- opentelemetry-sdk==1.16.0
523
- textblob==0.15.3
524
- imbalanced-learn==0.12.4
525
- typeguard==4.3.0
526
- more-itertools==10.3.0
527
- zipp==3.19.2
528
- autocommand==2.2.2
529
- jaraco.context==5.3.0
530
- packaging==24.1
531
- importlib_metadata==8.0.0
532
- platformdirs==4.2.2
533
- jaraco.functools==4.0.1
534
- importlib_resources==6.4.0
535
- tomli==2.0.1
536
- jaraco.text==3.12.1
537
- wheel==0.43.0
538
- jaraco.collections==5.1.0
539
- typing_extensions==4.12.2
540
- inflect==7.3.1
541
- backports.tarfile==1.2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_160955-rqk2hbkf/files/wandb-metadata.json DELETED
@@ -1,77 +0,0 @@
1
- {
2
- "os": "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
3
- "python": "3.10.15",
4
- "startedAt": "2025-05-04T13:09:55.928947Z",
5
- "program": "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
6
- "codePath": "finetuning_bc_prott5.py",
7
- "email": "zeynep.isik1@sabanciuniv.edu",
8
- "root": "/arf/scratch/zisik/prott5_bc_ft",
9
- "host": "kolyoz1",
10
- "username": "zisik",
11
- "executable": "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
12
- "codePathLocal": "finetuning_bc_prott5.py",
13
- "cpu_count": 64,
14
- "cpu_count_logical": 64,
15
- "gpu": "NVIDIA H100 80GB HBM3",
16
- "gpu_count": 1,
17
- "disk": {
18
- "/": {
19
- "total": "7643995308032",
20
- "used": "272740364288"
21
- }
22
- },
23
- "memory": {
24
- "total": "1081373220864"
25
- },
26
- "cpu": {
27
- "count": 64,
28
- "countLogical": 64
29
- },
30
- "gpu_nvidia": [
31
- {
32
- "name": "NVIDIA H100 80GB HBM3",
33
- "memoryTotal": "85520809984",
34
- "cudaCores": 16896,
35
- "architecture": "Hopper"
36
- }
37
- ],
38
- "slurm": {
39
- "cluster_name": "cuda",
40
- "conf": "/etc/slurm/slurm.conf",
41
- "cpus_on_node": "16",
42
- "cpus_per_task": "16",
43
- "gpus_on_node": "1",
44
- "gtids": "0",
45
- "job_account": "tbag154",
46
- "job_cpus_per_node": "16",
47
- "job_end_time": "1746623370",
48
- "job_gid": "11636",
49
- "job_gpus": "1",
50
- "job_id": "1027946",
51
- "job_name": "msa_ph_pt",
52
- "job_nodelist": "kolyoz1",
53
- "job_num_nodes": "1",
54
- "job_partition": "kolyoz-cuda",
55
- "job_qos": "tbag",
56
- "job_start_time": "1746364170",
57
- "job_uid": "11636",
58
- "job_user": "zisik",
59
- "jobid": "1027946",
60
- "localid": "0",
61
- "mem_per_cpu": "14000",
62
- "nnodes": "1",
63
- "node_aliases": "(null)",
64
- "nodeid": "0",
65
- "nodelist": "kolyoz1",
66
- "prio_process": "0",
67
- "procid": "0",
68
- "submit_dir": "/arf/scratch/zisik",
69
- "submit_host": "cuda-ui",
70
- "task_pid": "3179106",
71
- "tasks_per_node": "1",
72
- "topology_addr": "kolyoz1",
73
- "topology_addr_pattern": "node",
74
- "working_cluster": "cuda:slurmcontroller3.ib:6800:9984:109"
75
- },
76
- "cudaVersion": "12.6"
77
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_160955-rqk2hbkf/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"_wandb":{"runtime":2}}
 
 
wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log DELETED
@@ -1,14 +0,0 @@
1
- {"time":"2025-05-04T16:09:55.241065297+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmplpbc9pnb/port-3179132.txt","pid":3179132,"debug":false,"disable-analytics":false}
2
- {"time":"2025-05-04T16:09:55.241124751+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
- {"time":"2025-05-04T16:09:55.241864+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37981,"Zone":""}}
4
- {"time":"2025-05-04T16:09:55.241967868+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3179132}
5
- {"time":"2025-05-04T16:09:55.428960455+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:40950"}
6
- {"time":"2025-05-04T16:09:55.928508592+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"rqk2hbkf","id":"127.0.0.1:40950"}
7
- {"time":"2025-05-04T16:09:56.056026556+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"rqk2hbkf","id":"127.0.0.1:40950"}
8
- {"time":"2025-05-04T16:09:58.597503038+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:40950"}
9
- {"time":"2025-05-04T16:09:58.597631333+03:00","level":"INFO","msg":"server is shutting down"}
10
- {"time":"2025-05-04T16:09:58.597601675+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:40950"}
11
- {"time":"2025-05-04T16:09:58.597793186+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:40950"}
12
- {"time":"2025-05-04T16:09:59.528863432+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:40950"}
13
- {"time":"2025-05-04T16:09:59.528880642+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:40950"}
14
- {"time":"2025-05-04T16:09:59.528893164+03:00","level":"INFO","msg":"server is closed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_160955-rqk2hbkf/logs/debug-internal.log DELETED
@@ -1,19 +0,0 @@
1
- {"time":"2025-05-04T16:09:55.930352223+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
2
- {"time":"2025-05-04T16:09:55.930398642+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log"}
3
- {"time":"2025-05-04T16:09:56.055953645+03:00","level":"INFO","msg":"created new stream","id":"rqk2hbkf"}
4
- {"time":"2025-05-04T16:09:56.056013829+03:00","level":"INFO","msg":"stream: started","id":"rqk2hbkf"}
5
- {"time":"2025-05-04T16:09:56.056183059+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"rqk2hbkf"}
6
- {"time":"2025-05-04T16:09:56.056291373+03:00","level":"INFO","msg":"sender: started","stream_id":"rqk2hbkf"}
7
- {"time":"2025-05-04T16:09:56.056498843+03:00","level":"INFO","msg":"handler: started","stream_id":"rqk2hbkf"}
8
- {"time":"2025-05-04T16:09:56.455842701+03:00","level":"INFO","msg":"Starting system monitor"}
9
- {"time":"2025-05-04T16:09:58.597599181+03:00","level":"INFO","msg":"stream: closing","id":"rqk2hbkf"}
10
- {"time":"2025-05-04T16:09:58.597716873+03:00","level":"INFO","msg":"Stopping system monitor"}
11
- {"time":"2025-05-04T16:09:58.598825235+03:00","level":"INFO","msg":"Stopped system monitor"}
12
- {"time":"2025-05-04T16:09:58.792882763+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
13
- {"time":"2025-05-04T16:09:58.792915401+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
14
- {"time":"2025-05-04T16:09:58.792926694+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
15
- {"time":"2025-05-04T16:09:59.286977407+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
16
- {"time":"2025-05-04T16:09:59.528666057+03:00","level":"INFO","msg":"handler: closed","stream_id":"rqk2hbkf"}
17
- {"time":"2025-05-04T16:09:59.528710573+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"rqk2hbkf"}
18
- {"time":"2025-05-04T16:09:59.528726369+03:00","level":"INFO","msg":"sender: closed","stream_id":"rqk2hbkf"}
19
- {"time":"2025-05-04T16:09:59.528792264+03:00","level":"INFO","msg":"stream: closed","id":"rqk2hbkf"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_160955-rqk2hbkf/logs/debug.log DELETED
@@ -1,26 +0,0 @@
1
- 2025-05-04 16:09:55,914 INFO MainThread:3179132 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
- 2025-05-04 16:09:55,915 INFO MainThread:3179132 [wandb_setup.py:_flush():79] Configure stats pid to 3179132
3
- 2025-05-04 16:09:55,915 INFO MainThread:3179132 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
4
- 2025-05-04 16:09:55,915 INFO MainThread:3179132 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
5
- 2025-05-04 16:09:55,915 INFO MainThread:3179132 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
- 2025-05-04 16:09:55,915 INFO MainThread:3179132 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
7
- 2025-05-04 16:09:55,915 INFO MainThread:3179132 [wandb_setup.py:_flush():79] Applying login settings: {}
8
- 2025-05-04 16:09:55,916 INFO MainThread:3179132 [wandb_setup.py:_flush():79] Applying login settings: {}
9
- 2025-05-04 16:09:55,916 INFO MainThread:3179132 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160955-rqk2hbkf/logs/debug.log
10
- 2025-05-04 16:09:55,916 INFO MainThread:3179132 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160955-rqk2hbkf/logs/debug-internal.log
11
- 2025-05-04 16:09:55,917 INFO MainThread:3179132 [wandb_init.py:init():619] calling init triggers
12
- 2025-05-04 16:09:55,917 INFO MainThread:3179132 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
- config: {}
14
- 2025-05-04 16:09:55,917 INFO MainThread:3179132 [wandb_init.py:init():669] starting backend
15
- 2025-05-04 16:09:55,917 INFO MainThread:3179132 [wandb_init.py:init():673] sending inform_init request
16
- 2025-05-04 16:09:55,925 INFO MainThread:3179132 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
- 2025-05-04 16:09:55,927 INFO MainThread:3179132 [wandb_init.py:init():686] backend started and connected
18
- 2025-05-04 16:09:55,965 INFO MainThread:3179132 [wandb_init.py:init():781] updated telemetry
19
- 2025-05-04 16:09:55,969 INFO MainThread:3179132 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
- 2025-05-04 16:09:56,441 INFO MainThread:3179132 [wandb_init.py:init():867] starting run threads in backend
21
- 2025-05-04 16:09:57,857 INFO MainThread:3179132 [wandb_run.py:_console_start():2456] atexit reg
22
- 2025-05-04 16:09:57,858 INFO MainThread:3179132 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
- 2025-05-04 16:09:57,859 INFO MainThread:3179132 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
- 2025-05-04 16:09:57,859 INFO MainThread:3179132 [wandb_run.py:_redirect():2395] Redirects installed.
25
- 2025-05-04 16:09:57,874 INFO MainThread:3179132 [wandb_init.py:init():911] run started, returning control to user process
26
- 2025-05-04 16:09:58,598 WARNING MsgRouterThr:3179132 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_160955-rqk2hbkf/run-rqk2hbkf.wandb DELETED
Binary file (3.69 kB)
 
wandb/run-20250504_161246-rdbtc2pz/files/config.yaml DELETED
@@ -1,357 +0,0 @@
1
- _wandb:
2
- value:
3
- cli_version: 0.18.7
4
- m:
5
- - "1": eval/loss
6
- "5": 2
7
- "6":
8
- - 1
9
- - 3
10
- "7": []
11
- - "1": train/global_step
12
- "6":
13
- - 3
14
- "7": []
15
- - "1": eval/runtime
16
- "5": 2
17
- "6":
18
- - 1
19
- - 3
20
- "7": []
21
- - "1": eval/samples_per_second
22
- "5": 2
23
- "6":
24
- - 1
25
- - 3
26
- "7": []
27
- - "1": eval/steps_per_second
28
- "5": 2
29
- "6":
30
- - 1
31
- - 3
32
- "7": []
33
- - "1": eval/accuracy
34
- "5": 2
35
- "6":
36
- - 1
37
- - 3
38
- "7": []
39
- - "1": train/epoch
40
- "5": 2
41
- "6":
42
- - 1
43
- - 3
44
- "7": []
45
- python_version: 3.10.15
46
- t:
47
- "1":
48
- - 1
49
- - 2
50
- - 3
51
- - 5
52
- - 11
53
- - 12
54
- - 49
55
- - 51
56
- - 53
57
- - 55
58
- - 71
59
- - 98
60
- - 105
61
- "2":
62
- - 1
63
- - 2
64
- - 3
65
- - 5
66
- - 6
67
- - 11
68
- - 12
69
- - 49
70
- - 51
71
- - 53
72
- - 55
73
- - 71
74
- - 98
75
- - 105
76
- "3":
77
- - 7
78
- - 23
79
- - 55
80
- - 62
81
- - 66
82
- "4": 3.10.15
83
- "5": 0.18.7
84
- "6": 4.45.2
85
- "8":
86
- - 5
87
- "9":
88
- "1": transformers_trainer
89
- "12": 0.18.7
90
- "13": linux-x86_64
91
- accelerator_config:
92
- value:
93
- dispatch_batches: null
94
- even_batches: true
95
- gradient_accumulation_kwargs: null
96
- non_blocking: false
97
- split_batches: false
98
- use_seedable_sampler: true
99
- adafactor:
100
- value: false
101
- adam_beta1:
102
- value: 0.9
103
- adam_beta2:
104
- value: 0.999
105
- adam_epsilon:
106
- value: 1e-08
107
- auto_find_batch_size:
108
- value: false
109
- batch_eval_metrics:
110
- value: false
111
- bf16:
112
- value: false
113
- bf16_full_eval:
114
- value: false
115
- data_seed:
116
- value: null
117
- dataloader_drop_last:
118
- value: false
119
- dataloader_num_workers:
120
- value: 0
121
- dataloader_persistent_workers:
122
- value: false
123
- dataloader_pin_memory:
124
- value: true
125
- dataloader_prefetch_factor:
126
- value: null
127
- ddp_backend:
128
- value: null
129
- ddp_broadcast_buffers:
130
- value: null
131
- ddp_bucket_cap_mb:
132
- value: null
133
- ddp_find_unused_parameters:
134
- value: null
135
- ddp_timeout:
136
- value: 1800
137
- debug:
138
- value: []
139
- deepspeed:
140
- value: null
141
- disable_tqdm:
142
- value: false
143
- dispatch_batches:
144
- value: null
145
- do_eval:
146
- value: true
147
- do_predict:
148
- value: false
149
- do_train:
150
- value: false
151
- eval_accumulation_steps:
152
- value: null
153
- eval_delay:
154
- value: 0
155
- eval_do_concat_batches:
156
- value: true
157
- eval_on_start:
158
- value: false
159
- eval_steps:
160
- value: null
161
- eval_strategy:
162
- value: epoch
163
- eval_use_gather_object:
164
- value: false
165
- evaluation_strategy:
166
- value: epoch
167
- fp16:
168
- value: true
169
- fp16_backend:
170
- value: auto
171
- fp16_full_eval:
172
- value: false
173
- fp16_opt_level:
174
- value: O1
175
- fsdp:
176
- value: []
177
- fsdp_config:
178
- value:
179
- min_num_params: 0
180
- xla: false
181
- xla_fsdp_grad_ckpt: false
182
- xla_fsdp_v2: false
183
- fsdp_min_num_params:
184
- value: 0
185
- fsdp_transformer_layer_cls_to_wrap:
186
- value: null
187
- full_determinism:
188
- value: false
189
- gradient_accumulation_steps:
190
- value: 4
191
- gradient_checkpointing:
192
- value: false
193
- gradient_checkpointing_kwargs:
194
- value: null
195
- greater_is_better:
196
- value: false
197
- group_by_length:
198
- value: false
199
- half_precision_backend:
200
- value: auto
201
- hub_always_push:
202
- value: false
203
- hub_model_id:
204
- value: null
205
- hub_private_repo:
206
- value: false
207
- hub_strategy:
208
- value: every_save
209
- hub_token:
210
- value: <HUB_TOKEN>
211
- ignore_data_skip:
212
- value: false
213
- include_inputs_for_metrics:
214
- value: false
215
- include_num_input_tokens_seen:
216
- value: false
217
- include_tokens_per_second:
218
- value: false
219
- jit_mode_eval:
220
- value: false
221
- label_names:
222
- value: null
223
- label_smoothing_factor:
224
- value: 0
225
- learning_rate:
226
- value: 5e-05
227
- length_column_name:
228
- value: length
229
- load_best_model_at_end:
230
- value: true
231
- local_rank:
232
- value: 0
233
- log_level:
234
- value: passive
235
- log_level_replica:
236
- value: warning
237
- log_on_each_node:
238
- value: true
239
- logging_dir:
240
- value: t5-bc-out/runs/May04_16-12-52_kolyoz1
241
- logging_first_step:
242
- value: false
243
- logging_nan_inf_filter:
244
- value: true
245
- logging_steps:
246
- value: 500
247
- logging_strategy:
248
- value: steps
249
- lr_scheduler_type:
250
- value: linear
251
- max_grad_norm:
252
- value: 1
253
- max_steps:
254
- value: -1
255
- metric_for_best_model:
256
- value: loss
257
- mp_parameters:
258
- value: ""
259
- neftune_noise_alpha:
260
- value: null
261
- no_cuda:
262
- value: false
263
- num_train_epochs:
264
- value: 3
265
- optim:
266
- value: adamw_torch
267
- optim_args:
268
- value: null
269
- optim_target_modules:
270
- value: null
271
- output_dir:
272
- value: t5-bc-out
273
- overwrite_output_dir:
274
- value: false
275
- past_index:
276
- value: -1
277
- per_device_eval_batch_size:
278
- value: 8
279
- per_device_train_batch_size:
280
- value: 8
281
- per_gpu_eval_batch_size:
282
- value: null
283
- per_gpu_train_batch_size:
284
- value: null
285
- prediction_loss_only:
286
- value: false
287
- push_to_hub:
288
- value: false
289
- push_to_hub_model_id:
290
- value: null
291
- push_to_hub_organization:
292
- value: null
293
- push_to_hub_token:
294
- value: <PUSH_TO_HUB_TOKEN>
295
- ray_scope:
296
- value: last
297
- remove_unused_columns:
298
- value: true
299
- report_to:
300
- value:
301
- - wandb
302
- restore_callback_states_from_checkpoint:
303
- value: false
304
- resume_from_checkpoint:
305
- value: null
306
- run_name:
307
- value: t5-bc-out
308
- save_on_each_node:
309
- value: false
310
- save_only_model:
311
- value: false
312
- save_safetensors:
313
- value: false
314
- save_steps:
315
- value: 500
316
- save_strategy:
317
- value: epoch
318
- save_total_limit:
319
- value: null
320
- seed:
321
- value: 42
322
- skip_memory_metrics:
323
- value: true
324
- split_batches:
325
- value: null
326
- tf32:
327
- value: null
328
- torch_compile:
329
- value: false
330
- torch_compile_backend:
331
- value: null
332
- torch_compile_mode:
333
- value: null
334
- torch_empty_cache_steps:
335
- value: null
336
- torchdynamo:
337
- value: null
338
- tpu_metrics_debug:
339
- value: false
340
- tpu_num_cores:
341
- value: null
342
- use_cpu:
343
- value: false
344
- use_ipex:
345
- value: false
346
- use_legacy_prediction_loop:
347
- value: false
348
- use_liger_kernel:
349
- value: false
350
- use_mps_device:
351
- value: false
352
- warmup_ratio:
353
- value: 0
354
- warmup_steps:
355
- value: 0
356
- weight_decay:
357
- value: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_161246-rdbtc2pz/files/output.log DELETED
@@ -1,27 +0,0 @@
1
- You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
2
- Map: 100%|██████████| 70/70 [00:00<00:00, 4499.50 examples/s]
3
- Map: 100%|██████████| 15/15 [00:00<00:00, 2515.68 examples/s]
4
- /arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
5
- warnings.warn(
6
- [2025-05-04 16:12:57,595] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
7
- wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
8
- 100%|██████████| 6/6 [01:04<00:00, 10.71s/it]
9
- Map: 100%|██████████| 15/15 [00:00<00:00, 3408.53 examples/s]
10
- {'eval_loss': 0.2836913764476776, 'eval_accuracy': 1.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 179.205, 'eval_steps_per_second': 23.894, 'epoch': 0.89}
11
- {'eval_loss': 0.10505779087543488, 'eval_accuracy': 1.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 172.624, 'eval_steps_per_second': 23.017, 'epoch': 1.78}
12
- {'eval_loss': 0.05776570364832878, 'eval_accuracy': 1.0, 'eval_runtime': 0.1, 'eval_samples_per_second': 149.979, 'eval_steps_per_second': 19.997, 'epoch': 2.67}
13
- {'train_runtime': 64.2466, 'train_samples_per_second': 3.269, 'train_steps_per_second': 0.093, 'train_loss': 0.3210471471150716, 'epoch': 2.67}
14
- 100%|██████████| 2/2 [00:00<00:00, 77.74it/s]
15
- {'eval_loss': 0.05800781771540642, 'eval_accuracy': 1.0, 'eval_runtime': 0.0642, 'eval_samples_per_second': 233.689, 'eval_steps_per_second': 31.158, 'epoch': 2.6666666666666665}
16
- Traceback (most recent call last):
17
- File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 141, in <module>
18
- model.push_to_hub("isikz/prot_t5_binary_classifier")
19
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1928, in __getattr__
20
- raise AttributeError(
21
- AttributeError: 'T5BinaryClassifier' object has no attribute 'push_to_hub'
22
- Traceback (most recent call last):
23
- File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 141, in <module>
24
- model.push_to_hub("isikz/prot_t5_binary_classifier")
25
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1928, in __getattr__
26
- raise AttributeError(
27
- AttributeError: 'T5BinaryClassifier' object has no attribute 'push_to_hub'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_161246-rdbtc2pz/files/requirements.txt DELETED
@@ -1,541 +0,0 @@
1
- nvidia-cuda-cupti-cu12==12.4.127
2
- nvidia-cuda-nvrtc-cu12==12.4.127
3
- pyg-lib==0.4.0+pt20cu117
4
- biopython==1.85
5
- iniconfig==2.0.0
6
- tokenizers==0.20.0
7
- accelerate==1.3.0
8
- torch==2.6.0
9
- nvidia-nccl-cu12==2.21.5
10
- transformers==4.45.2
11
- nvidia-cusparse-cu12==12.3.1.170
12
- torch-scatter==2.1.2+pt20cu117
13
- nvidia-cusparselt-cu12==0.6.2
14
- nvidia-nvtx-cu12==12.4.127
15
- zstd==1.5.6.6
16
- fair-esm==2.0.0
17
- omegaconf==2.3.0
18
- pluggy==1.5.0
19
- pytest==8.3.5
20
- nvidia-curand-cu12==10.3.5.147
21
- nvidia-cufft-cu12==11.2.1.3
22
- torch-cluster==1.6.3+pt20cu117
23
- regex==2024.9.11
24
- nvidia-cudnn-cu12==9.1.0.70
25
- torch-spline-conv==1.2.2+pt20cu117
26
- nvidia-cusolver-cu12==11.6.1.9
27
- antlr4-python3-runtime==4.9.3
28
- msgpack-numpy==0.4.8
29
- nlp==0.2.0
30
- einops==0.8.1
31
- nvidia-cublas-cu12==12.4.5.8
32
- triton==3.2.0
33
- ninja==1.11.1.3
34
- hydra-core==1.3.2
35
- nvidia-nvjitlink-cu12==12.4.127
36
- biotite==0.41.2
37
- torch-sparse==0.6.18+pt20cu117
38
- esm==3.1.4
39
- sympy==1.13.1
40
- nvidia-cuda-runtime-cu12==12.4.127
41
- jupyter-lsp==2.2.5
42
- jupyter-events==0.10.0
43
- ipykernel==6.29.5
44
- Mako==1.3.5
45
- proto-plus==1.25.0
46
- fst-pso==1.8.1
47
- gensim==4.3.3
48
- htmlmin==0.1.12
49
- tokenizers==0.13.3
50
- timm==1.0.11
51
- MarkupSafe==3.0.2
52
- safetensors==0.4.5
53
- requests==2.32.3
54
- gast==0.5.5
55
- cuml==24.12.0a33
56
- jaxlib==0.4.23.dev20240214
57
- spacy-loggers==1.0.5
58
- pytz==2024.1
59
- idna==3.10
60
- python-dateutil==2.9.0
61
- mdurl==0.1.2
62
- blis==0.7.10
63
- jupyter==1.1.1
64
- pyerfa==2.0.1.5
65
- comm==0.2.2
66
- pygraphviz==1.14
67
- dill==0.3.8
68
- paramiko==3.5.0
69
- llama-index==0.8.36
70
- mdit-py-plugins==0.4.2
71
- Werkzeug==3.1.3
72
- pyu2f==0.1.5
73
- dask-glm==0.2.0
74
- httpx==0.27.2
75
- typeguard==4.4.1
76
- mypy-extensions==1.0.0
77
- kmodes==0.12.2
78
- keras==2.15.0
79
- ydata-profiling==0.0.dev0
80
- regex==2024.11.6
81
- xarray==2024.11.0
82
- setuptools==75.3.0
83
- charset-normalizer==3.4.0
84
- jupyterlab_nvdashboard==0.11.0
85
- pylibraft==24.12.0a36
86
- spacy==3.7.6
87
- mlflow-skinny==2.17.2
88
- nvtx==0.2.10
89
- multimethod==1.12
90
- pexpect==4.9.0
91
- torch==2.1.0.post301
92
- flatbuffers==24.3.25
93
- python-json-logger==2.0.7
94
- PyJWT==2.9.0
95
- multiprocess==0.70.16
96
- colorlover==0.3.0
97
- yarl==1.16.0
98
- locket==1.0.0
99
- patsy==1.0.0
100
- rapids-dask-dependency==24.12.0a0
101
- stanza==1.9.2
102
- debugpy==1.8.8
103
- jupyterlab_pygments==0.3.0
104
- pylibcudf==24.12.0a337
105
- lz4==4.3.3
106
- pandas==2.2.3
107
- tifffile==2024.9.20
108
- pynvml==11.4.1
109
- cufflinks==0.17.3
110
- ipywidgets==8.1.5
111
- requests-oauthlib==2.0.0
112
- google-auth-oauthlib==1.2.1
113
- rsa==4.9
114
- webcolors==24.8.0
115
- jsonschema-specifications==2024.10.1
116
- scikit-learn==1.5.2
117
- langchain-text-splitters==0.3.2
118
- pandas-datareader==0.10.0
119
- tomli==2.0.2
120
- tzdata==2024.2
121
- scikit-image==0.24.0
122
- tensorboard_data_server==0.7.0
123
- kiwisolver==1.4.7
124
- cloudpathlib==0.20.0
125
- isodate==0.6.1
126
- adversarial-robustness-toolbox==1.19.1
127
- SQLAlchemy==2.0.36
128
- pytest-runner==6.0.0
129
- pycairo==1.27.0
130
- treelite==4.3.0
131
- jiter==0.7.0
132
- threadpoolctl==3.5.0
133
- pandocfilters==1.5.0
134
- loguru==0.7.2
135
- smart_open==7.0.5
136
- shellingham==1.5.4
137
- deepspeed==0.15.4
138
- prompt_toolkit==3.0.48
139
- databricks-sdk==0.34.0
140
- langchain-core==0.3.15
141
- imageio==2.36.0
142
- openapi-schema-pydantic==1.2.4
143
- zict==3.0.0
144
- cachetools==5.5.0
145
- colorful==0.5.6
146
- mpmath==1.3.0
147
- nest_asyncio==1.6.0
148
- pyFUME==0.2.25
149
- opencv-python-headless==4.9.0
150
- fastai==2.7.18
151
- importlib_resources==6.4.5
152
- binaryornot==0.4.4
153
- evaluate==0.4.1
154
- matplotlib-inline==0.1.7
155
- wasabi==1.1.2
156
- pycparser==2.22
157
- GitPython==3.1.43
158
- pluggy==1.5.0
159
- async-lru==2.0.4
160
- pgmpy==0.1.24
161
- anyio==4.4.0
162
- executing==2.1.0
163
- orjson==3.10.11
164
- humanfriendly==10.0
165
- tornado==6.4.1
166
- gmpy2==2.1.5
167
- rlPyCairo==0.2.0
168
- distributed==2024.11.0
169
- FuzzyTM==2.0.5
170
- torchtext==0.15.2a0+5ce3163
171
- pytest==8.3.5
172
- pyod==2.0.2
173
- ImageHash==4.3.1
174
- soupsieve==2.5
175
- tblib==3.0.0
176
- emoji==2.14.0
177
- aiohappyeyeballs==2.4.3
178
- uri-template==1.3.0
179
- tensorflow_estimator==2.15.0
180
- babel==2.16.0
181
- dask-cuda==24.12.0a12
182
- overrides==7.7.0
183
- opencensus==0.11.3
184
- openai==0.28.1
185
- language_data==1.2.0
186
- jedi==0.19.2
187
- cookiecutter==2.6.0
188
- entrypoints==0.4
189
- exceptiongroup==1.2.2
190
- marisa-trie==1.2.0
191
- uvloop==0.20.0
192
- aiosignal==1.3.1
193
- Flask==3.0.3
194
- tensorboard==2.15.2
195
- cffi==1.17.1
196
- tf_keras==2.15.0
197
- absl-py==2.1.0
198
- blinker==1.9.0
199
- types-python-dateutil==2.9.0.20241003
200
- opencv-python==4.9.0
201
- frozendict==2.4.6
202
- aiohttp-cors==0.7.0
203
- statsmodels==0.14.4
204
- tinycss2==1.4.0
205
- terminado==0.18.1
206
- pycaret==2.2.3
207
- aiohttp==3.10.10
208
- distributed-ucxx==0.41.0
209
- prometheus_client==0.21.0
210
- fastdownload==0.0.7
211
- grpcio==1.59.3
212
- google-api-core==2.22.0
213
- jupyterlab_widgets==3.0.13
214
- appdirs==1.4.4
215
- littleutils==0.0.0
216
- ray==2.24.0
217
- kaggle==1.6.17
218
- jsonschema==4.23.0
219
- google-auth==2.36.0
220
- scikit-base==0.11.0
221
- visions==0.7.6
222
- pyarrow==15.0.0
223
- transformers==4.33.0
224
- prometheus_flask_exporter==0.23.1
225
- dm-tree==0.1.8
226
- colorama==0.4.6
227
- requests-toolbelt==1.0.0
228
- cached-property==1.5.2
229
- cymem==2.0.8
230
- PyNaCl==1.5.0
231
- PyWavelets==1.7.0
232
- httptools==0.6.1
233
- typing-utils==0.1.0
234
- email_validator==2.2.0
235
- marshmallow==3.23.1
236
- Deprecated==1.2.14
237
- virtualenv==20.4.7
238
- optuna==3.6.1
239
- jupyter_server==2.14.2
240
- termcolor==2.5.0
241
- mpi4py==4.0.1
242
- torchdata==0.7.1+8cea82f
243
- dataclasses==0.8
244
- cloudpickle==3.1.0
245
- tree_sitter_languages==1.10.2
246
- tabulate==0.9.0
247
- ipython==8.29.0
248
- lightgbm==4.3.0
249
- captum==0.6.0
250
- confuse==2.0.1
251
- torchvision==0.16.1+adc3221
252
- lxml==4.9.4
253
- fastapi==0.115.4
254
- python-multipart==0.0.17
255
- dnspython==2.7.0
256
- jupyter-console==6.6.3
257
- preshed==3.0.9
258
- py-cpuinfo==9.0.0
259
- Send2Trash==1.8.3
260
- murmurhash==1.0.10
261
- sniffio==1.3.1
262
- websockets==13.1
263
- h11==0.14.0
264
- smmap==5.0.0
265
- textual==0.85.2
266
- jsonpatch==1.33
267
- opencensus-context==0.1.3
268
- nbconvert==7.16.4
269
- sentry-sdk==2.19.0
270
- opentelemetry-semantic-conventions==0.37b0
271
- pandas-profiling==2.8.0
272
- pillow==10.3.0
273
- peft==0.13.2
274
- rpds-py==0.21.0
275
- bokeh==3.6.1
276
- distro==1.9.0
277
- itsdangerous==2.2.0
278
- wandb==0.18.7
279
- jsonpointer==3.0.0
280
- astropy-iers-data==0.2024.11.11.0.32.38
281
- horovod==0.28.1
282
- graphviz==0.20.3
283
- vtk==9.3.1
284
- bleach==6.2.0
285
- numexpr==2.8.7
286
- pydantic_core==2.23.4
287
- Jinja2==3.1.4
288
- widgetsnbextension==4.0.13
289
- filelock==3.16.1
290
- catboost==1.2.7
291
- raft-dask==24.12.0a36
292
- async-timeout==4.0.3
293
- datefinder==0.7.3
294
- coloredlogs==15.0.1
295
- platformdirs==4.3.6
296
- spacy-legacy==3.0.12
297
- chardet==5.2.0
298
- jupyter_client==8.6.3
299
- importlib_metadata==8.5.0
300
- rfc3986-validator==0.1.1
301
- huggingface_hub==0.26.2
302
- PySocks==1.7.1
303
- mlxtend==0.23.2
304
- outdated==0.2.2
305
- partd==1.4.2
306
- thinc==8.2.5
307
- astropy==6.1.6
308
- rdflib==6.3.2
309
- h2==4.1.0
310
- typer==0.13.0
311
- xyzservices==2024.9.0
312
- toolz==0.12.1
313
- frozenlist==1.5.0
314
- rdkit==2024.9.2
315
- pyasn1==0.6.1
316
- jupyter_server_terminals==0.5.3
317
- ucx-py==0.41.0a11
318
- astunparse==1.6.3
319
- simpful==2.12.0
320
- notebook_shim==0.2.4
321
- scipy==1.13.1
322
- colorlog==6.9.0
323
- tiktoken==0.3.3
324
- plotly==5.24.1
325
- fastrlock==0.8.2
326
- chart-studio==1.1.0
327
- stack-data==0.6.2
328
- google-pasta==0.2.0
329
- sktime==0.34.0
330
- PyYAML==6.0.2
331
- sympy==1.13.3
332
- multidict==6.1.0
333
- ml-dtypes==0.2.0
334
- tensorboardX==2.6.2.2
335
- decorator==5.1.1
336
- cytoolz==1.0.0
337
- ase==3.23.0
338
- isoduration==20.11.0
339
- html5lib==1.1
340
- langsmith==0.1.142
341
- future==1.0.0
342
- onnx2torch==1.5.15
343
- multipledispatch==0.6.0
344
- protobuf==4.24.4
345
- ucxx==0.41.0
346
- pandas_flavor==0.6.0
347
- msgpack==1.1.0
348
- pyasn1_modules==0.4.1
349
- imagecodecs==2024.1.1
350
- mlflow==2.17.2
351
- watchfiles==0.24.0
352
- dm-sonnet==2.0.2
353
- langcodes==3.4.1
354
- freetype-py==2.3.0
355
- argon2-cffi-bindings==21.2.0
356
- trimesh==4.5.2
357
- opt_einsum==3.4.0
358
- tenacity==8.5.0
359
- h5py==3.12.1
360
- fastapi-cli==0.0.5
361
- oauthlib==3.2.2
362
- parso==0.8.4
363
- weasel==0.4.1
364
- yfinance==0.2.49
365
- networkx==2.8.8
366
- bitsandbytes==0.44.1
367
- lazy_loader==0.4
368
- querystring_parser==1.2.4
369
- contourpy==1.3.0
370
- unicodedata2==15.1.0
371
- bcrypt==4.2.0
372
- munkres==1.1.4
373
- langchain==0.0.298
374
- hpack==4.0.0
375
- cryptography==43.0.3
376
- umap-learn==0.5.7
377
- arrow==1.3.0
378
- docker==7.1.0
379
- certifi==2025.1.31
380
- fastjsonschema==2.20.0
381
- tensorflow==2.15.0
382
- googleapis-common-protos==1.65.0
383
- iniconfig==2.0.0
384
- Markdown==3.6
385
- llvmlite==0.43.0
386
- wslink==2.3.2
387
- attrs==24.2.0
388
- rich==13.9.4
389
- cupy==13.3.0
390
- uc-micro-py==1.0.3
391
- alembic==1.14.0
392
- joblib==1.4.2
393
- reportlab==4.2.5
394
- miniful==0.0.6
395
- jupyter_core==5.7.2
396
- wheel==0.45.0
397
- phik==0.12.3
398
- mistune==3.0.2
399
- wcwidth==0.2.13
400
- dacite==1.8.1
401
- accelerate==0.22.0
402
- sacremoses==0.0.53
403
- revtok==0.0.3
404
- python-slugify==8.0.4
405
- tangled-up-in-unicode==0.2.0
406
- dask==2024.11.0
407
- markdown-it-py==3.0.0
408
- sentencepiece==0.1.99
409
- beautifulsoup4==4.12.3
410
- six==1.16.0
411
- numba-cuda==0.0.17
412
- argon2-cffi==23.1.0
413
- xxhash==3.5.0
414
- hjson==3.1.0
415
- fonttools==4.54.1
416
- graphql-core==3.2.5
417
- pyparsing==3.2.0
418
- pure_eval==0.2.3
419
- distlib==0.3.9
420
- lightning==2.4.0
421
- wordcloud==0.0.0
422
- catalogue==2.0.10
423
- jax==0.4.27
424
- tree-sitter==0.23.2
425
- notebook==7.2.2
426
- dataclasses-json==0.6.7
427
- propcache==0.2.0
428
- numba==0.60.0
429
- dask-expr==1.1.17
430
- pydantic==2.9.2
431
- gunicorn==22.0.0
432
- missingno==0.5.2
433
- pyOpenSSL==24.2.1
434
- openpyxl==3.1.5
435
- packaging==24.1
436
- python-dotenv==1.0.1
437
- cycler==0.12.1
438
- types-pytz==2024.2.0.20241003
439
- yellowbrick==1.5
440
- referencing==0.35.1
441
- pyLDAvis==3.4.1
442
- lazypredict==0.2.16
443
- fqdn==1.5.1
444
- websocket-client==1.8.0
445
- fastcore==1.7.19
446
- pynvjitlink-cu12==0.3.0
447
- pingouin==0.5.5
448
- numpy==1.26.4
449
- typing-inspect==0.9.0
450
- nltk==3.9.1
451
- onnxruntime==1.19.2
452
- tensorflow-probability==0.23.0
453
- datasets==3.0.2
454
- pickleshare==0.7.5
455
- peewee==3.17.7
456
- torch-geometric==2.6.1
457
- ptyprocess==0.7.0
458
- greenlet==3.1.1
459
- graphql-relay==3.2.0
460
- graphene==3.4.3
461
- et_xmlfile==2.0.0
462
- webencodings==0.5.1
463
- hyperframe==6.0.1
464
- multitasking==0.0.9
465
- typer-slim==0.13.0
466
- onnx==1.15.0
467
- uvicorn==0.32.0
468
- memray==1.13.4
469
- xgboost==2.1.2
470
- Brotli==1.1.0
471
- zipp==3.21.0
472
- nbformat==5.10.4
473
- responses==0.18.0
474
- funcy==2.0
475
- Pygments==2.18.0
476
- tqdm==4.67.0
477
- linkify-it-py==2.0.3
478
- srsly==2.4.8
479
- cuda-python==12.6.0
480
- lightning-utilities==0.11.8
481
- cudf==24.12.0a337
482
- dask-ml==2024.4.4
483
- docker-pycreds==0.4.0
484
- pkgutil_resolve_name==1.3.10
485
- opentelemetry-api==1.16.0
486
- fsspec==2024.9.0
487
- nbclient==0.10.0
488
- psutil==5.9.8
489
- pytorch-lightning==2.4.0
490
- sortedcontainers==2.4.0
491
- matplotlib==3.9.2
492
- defusedxml==0.7.1
493
- urllib3==1.26.19
494
- jupyterlab_server==2.27.3
495
- retrying==1.3.3
496
- dask-cudf==24.12.0a337
497
- sqlparse==0.5.1
498
- text-unidecode==1.3
499
- seaborn==0.13.2
500
- typing_extensions==4.12.2
501
- pyzmq==26.2.0
502
- rfc3339-validator==0.1.4
503
- pynndescent==0.5.13
504
- pip==24.3.1
505
- confection==0.1.4
506
- wrapt==1.14.1
507
- fastprogress==1.0.3
508
- traitlets==5.14.3
509
- asttokens==2.4.1
510
- json5==0.9.28
511
- pandas-stubs==2.2.3.241126
512
- torchmetrics==1.2.1
513
- gitdb==4.0.11
514
- annotated-types==0.7.0
515
- ipython-autotime==0.1
516
- httpcore==1.0.6
517
- click==8.1.7
518
- setproctitle==1.3.3
519
- starlette==0.41.2
520
- jupyterlab==4.2.5
521
- rmm==24.12.0a27
522
- opentelemetry-sdk==1.16.0
523
- textblob==0.15.3
524
- imbalanced-learn==0.12.4
525
- typeguard==4.3.0
526
- more-itertools==10.3.0
527
- zipp==3.19.2
528
- autocommand==2.2.2
529
- jaraco.context==5.3.0
530
- packaging==24.1
531
- importlib_metadata==8.0.0
532
- platformdirs==4.2.2
533
- jaraco.functools==4.0.1
534
- importlib_resources==6.4.0
535
- tomli==2.0.1
536
- jaraco.text==3.12.1
537
- wheel==0.43.0
538
- jaraco.collections==5.1.0
539
- typing_extensions==4.12.2
540
- inflect==7.3.1
541
- backports.tarfile==1.2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_161246-rdbtc2pz/files/wandb-metadata.json DELETED
@@ -1,77 +0,0 @@
1
- {
2
- "os": "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
3
- "python": "3.10.15",
4
- "startedAt": "2025-05-04T13:12:46.058889Z",
5
- "program": "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
6
- "codePath": "finetuning_bc_prott5.py",
7
- "email": "zeynep.isik1@sabanciuniv.edu",
8
- "root": "/arf/scratch/zisik/prott5_bc_ft",
9
- "host": "kolyoz1",
10
- "username": "zisik",
11
- "executable": "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
12
- "codePathLocal": "finetuning_bc_prott5.py",
13
- "cpu_count": 64,
14
- "cpu_count_logical": 64,
15
- "gpu": "NVIDIA H100 80GB HBM3",
16
- "gpu_count": 1,
17
- "disk": {
18
- "/": {
19
- "total": "7643995308032",
20
- "used": "274907410432"
21
- }
22
- },
23
- "memory": {
24
- "total": "1081373220864"
25
- },
26
- "cpu": {
27
- "count": 64,
28
- "countLogical": 64
29
- },
30
- "gpu_nvidia": [
31
- {
32
- "name": "NVIDIA H100 80GB HBM3",
33
- "memoryTotal": "85520809984",
34
- "cudaCores": 16896,
35
- "architecture": "Hopper"
36
- }
37
- ],
38
- "slurm": {
39
- "cluster_name": "cuda",
40
- "conf": "/etc/slurm/slurm.conf",
41
- "cpus_on_node": "16",
42
- "cpus_per_task": "16",
43
- "gpus_on_node": "1",
44
- "gtids": "0",
45
- "job_account": "tbag154",
46
- "job_cpus_per_node": "16",
47
- "job_end_time": "1746623540",
48
- "job_gid": "11636",
49
- "job_gpus": "1",
50
- "job_id": "1027947",
51
- "job_name": "msa_ph_pt",
52
- "job_nodelist": "kolyoz1",
53
- "job_num_nodes": "1",
54
- "job_partition": "kolyoz-cuda",
55
- "job_qos": "tbag",
56
- "job_start_time": "1746364340",
57
- "job_uid": "11636",
58
- "job_user": "zisik",
59
- "jobid": "1027947",
60
- "localid": "0",
61
- "mem_per_cpu": "14000",
62
- "nnodes": "1",
63
- "node_aliases": "(null)",
64
- "nodeid": "0",
65
- "nodelist": "kolyoz1",
66
- "prio_process": "0",
67
- "procid": "0",
68
- "submit_dir": "/arf/scratch/zisik",
69
- "submit_host": "cuda-ui",
70
- "task_pid": "3179500",
71
- "tasks_per_node": "1",
72
- "topology_addr": "kolyoz1",
73
- "topology_addr_pattern": "node",
74
- "working_cluster": "cuda:slurmcontroller3.ib:6800:9984:109"
75
- },
76
- "cudaVersion": "12.6"
77
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_161246-rdbtc2pz/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"train_loss":0.3210471471150716,"_runtime":80.142129451,"train_runtime":64.2466,"eval/loss":0.05800781771540642,"eval/steps_per_second":31.158,"total_flos":0,"eval/samples_per_second":233.689,"train/global_step":6,"_timestamp":1.746364446200474e+09,"train_samples_per_second":3.269,"_wandb":{"runtime":80},"eval/runtime":0.0642,"train_steps_per_second":0.093,"train/epoch":2.6666666666666665,"eval/accuracy":1,"_step":4}
 
 
wandb/run-20250504_161246-rdbtc2pz/logs/debug-core.log DELETED
@@ -1,14 +0,0 @@
1
- {"time":"2025-05-04T16:12:45.059197409+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmphflqkva1/port-3179526.txt","pid":3179526,"debug":false,"disable-analytics":false}
2
- {"time":"2025-05-04T16:12:45.059250836+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
- {"time":"2025-05-04T16:12:45.060076988+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3179526}
4
- {"time":"2025-05-04T16:12:45.059982306+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":45921,"Zone":""}}
5
- {"time":"2025-05-04T16:12:45.246915089+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:33132"}
6
- {"time":"2025-05-04T16:12:46.063164622+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"rdbtc2pz","id":"127.0.0.1:33132"}
7
- {"time":"2025-05-04T16:12:46.187062148+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"rdbtc2pz","id":"127.0.0.1:33132"}
8
- {"time":"2025-05-04T16:14:06.269673416+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:33132"}
9
- {"time":"2025-05-04T16:14:06.269788395+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:33132"}
10
- {"time":"2025-05-04T16:14:06.26984398+03:00","level":"INFO","msg":"server is shutting down"}
11
- {"time":"2025-05-04T16:14:06.269980058+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:33132"}
12
- {"time":"2025-05-04T16:14:07.608460726+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:33132"}
13
- {"time":"2025-05-04T16:14:07.608482723+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:33132"}
14
- {"time":"2025-05-04T16:14:07.60849804+03:00","level":"INFO","msg":"server is closed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_161246-rdbtc2pz/logs/debug-internal.log DELETED
@@ -1,19 +0,0 @@
1
- {"time":"2025-05-04T16:12:46.065859772+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
2
- {"time":"2025-05-04T16:12:46.065909143+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_161246-rdbtc2pz/logs/debug-core.log"}
3
- {"time":"2025-05-04T16:12:46.186999454+03:00","level":"INFO","msg":"created new stream","id":"rdbtc2pz"}
4
- {"time":"2025-05-04T16:12:46.187050012+03:00","level":"INFO","msg":"stream: started","id":"rdbtc2pz"}
5
- {"time":"2025-05-04T16:12:46.187228889+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"rdbtc2pz"}
6
- {"time":"2025-05-04T16:12:46.187328701+03:00","level":"INFO","msg":"handler: started","stream_id":"rdbtc2pz"}
7
- {"time":"2025-05-04T16:12:46.187417103+03:00","level":"INFO","msg":"sender: started","stream_id":"rdbtc2pz"}
8
- {"time":"2025-05-04T16:12:46.598141294+03:00","level":"INFO","msg":"Starting system monitor"}
9
- {"time":"2025-05-04T16:14:06.269782406+03:00","level":"INFO","msg":"stream: closing","id":"rdbtc2pz"}
10
- {"time":"2025-05-04T16:14:06.269825637+03:00","level":"INFO","msg":"Stopping system monitor"}
11
- {"time":"2025-05-04T16:14:06.270879471+03:00","level":"INFO","msg":"Stopped system monitor"}
12
- {"time":"2025-05-04T16:14:06.55541099+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
13
- {"time":"2025-05-04T16:14:06.555433954+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
14
- {"time":"2025-05-04T16:14:06.555445965+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
15
- {"time":"2025-05-04T16:14:07.09767572+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
16
- {"time":"2025-05-04T16:14:07.607443104+03:00","level":"INFO","msg":"handler: closed","stream_id":"rdbtc2pz"}
17
- {"time":"2025-05-04T16:14:07.607487355+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"rdbtc2pz"}
18
- {"time":"2025-05-04T16:14:07.607532609+03:00","level":"INFO","msg":"sender: closed","stream_id":"rdbtc2pz"}
19
- {"time":"2025-05-04T16:14:07.607587557+03:00","level":"INFO","msg":"stream: closed","id":"rdbtc2pz"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_161246-rdbtc2pz/logs/debug.log DELETED
@@ -1,27 +0,0 @@
1
- 2025-05-04 16:12:46,051 INFO MainThread:3179526 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
- 2025-05-04 16:12:46,052 INFO MainThread:3179526 [wandb_setup.py:_flush():79] Configure stats pid to 3179526
3
- 2025-05-04 16:12:46,052 INFO MainThread:3179526 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
4
- 2025-05-04 16:12:46,052 INFO MainThread:3179526 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
5
- 2025-05-04 16:12:46,052 INFO MainThread:3179526 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
- 2025-05-04 16:12:46,052 INFO MainThread:3179526 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
7
- 2025-05-04 16:12:46,052 INFO MainThread:3179526 [wandb_setup.py:_flush():79] Applying login settings: {}
8
- 2025-05-04 16:12:46,052 INFO MainThread:3179526 [wandb_setup.py:_flush():79] Applying login settings: {}
9
- 2025-05-04 16:12:46,052 INFO MainThread:3179526 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_161246-rdbtc2pz/logs/debug.log
10
- 2025-05-04 16:12:46,053 INFO MainThread:3179526 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_161246-rdbtc2pz/logs/debug-internal.log
11
- 2025-05-04 16:12:46,053 INFO MainThread:3179526 [wandb_init.py:init():619] calling init triggers
12
- 2025-05-04 16:12:46,053 INFO MainThread:3179526 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
- config: {}
14
- 2025-05-04 16:12:46,053 INFO MainThread:3179526 [wandb_init.py:init():669] starting backend
15
- 2025-05-04 16:12:46,053 INFO MainThread:3179526 [wandb_init.py:init():673] sending inform_init request
16
- 2025-05-04 16:12:46,057 INFO MainThread:3179526 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
- 2025-05-04 16:12:46,058 INFO MainThread:3179526 [wandb_init.py:init():686] backend started and connected
18
- 2025-05-04 16:12:46,064 INFO MainThread:3179526 [wandb_init.py:init():781] updated telemetry
19
- 2025-05-04 16:12:46,067 INFO MainThread:3179526 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
- 2025-05-04 16:12:46,584 INFO MainThread:3179526 [wandb_init.py:init():867] starting run threads in backend
21
- 2025-05-04 16:12:47,966 INFO MainThread:3179526 [wandb_run.py:_console_start():2456] atexit reg
22
- 2025-05-04 16:12:47,966 INFO MainThread:3179526 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
- 2025-05-04 16:12:47,966 INFO MainThread:3179526 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
- 2025-05-04 16:12:47,966 INFO MainThread:3179526 [wandb_run.py:_redirect():2395] Redirects installed.
25
- 2025-05-04 16:12:47,974 INFO MainThread:3179526 [wandb_init.py:init():911] run started, returning control to user process
26
- 2025-05-04 16:13:01,857 INFO MainThread:3179526 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_16-12-52_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
27
- 2025-05-04 16:14:06,270 WARNING MsgRouterThr:3179526 [router.py:message_loop():75] message_loop has been closed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_161246-rdbtc2pz/run-rdbtc2pz.wandb DELETED
Binary file (45.3 kB)
 
wandb/run-20250504_162343-cp870jym/files/config.yaml DELETED
@@ -1,357 +0,0 @@
1
- _wandb:
2
- value:
3
- cli_version: 0.18.7
4
- m:
5
- - "1": eval/steps_per_second
6
- "5": 2
7
- "6":
8
- - 1
9
- - 3
10
- "7": []
11
- - "1": train/global_step
12
- "6":
13
- - 3
14
- "7": []
15
- - "1": eval/loss
16
- "5": 2
17
- "6":
18
- - 1
19
- - 3
20
- "7": []
21
- - "1": train/epoch
22
- "5": 2
23
- "6":
24
- - 1
25
- - 3
26
- "7": []
27
- - "1": eval/accuracy
28
- "5": 2
29
- "6":
30
- - 1
31
- - 3
32
- "7": []
33
- - "1": eval/runtime
34
- "5": 2
35
- "6":
36
- - 1
37
- - 3
38
- "7": []
39
- - "1": eval/samples_per_second
40
- "5": 2
41
- "6":
42
- - 1
43
- - 3
44
- "7": []
45
- python_version: 3.10.15
46
- t:
47
- "1":
48
- - 1
49
- - 2
50
- - 3
51
- - 5
52
- - 11
53
- - 12
54
- - 49
55
- - 51
56
- - 53
57
- - 55
58
- - 71
59
- - 98
60
- - 105
61
- "2":
62
- - 1
63
- - 2
64
- - 3
65
- - 5
66
- - 6
67
- - 11
68
- - 12
69
- - 49
70
- - 51
71
- - 53
72
- - 55
73
- - 71
74
- - 98
75
- - 105
76
- "3":
77
- - 7
78
- - 23
79
- - 55
80
- - 62
81
- - 66
82
- "4": 3.10.15
83
- "5": 0.18.7
84
- "6": 4.45.2
85
- "8":
86
- - 5
87
- "9":
88
- "1": transformers_trainer
89
- "12": 0.18.7
90
- "13": linux-x86_64
91
- accelerator_config:
92
- value:
93
- dispatch_batches: null
94
- even_batches: true
95
- gradient_accumulation_kwargs: null
96
- non_blocking: false
97
- split_batches: false
98
- use_seedable_sampler: true
99
- adafactor:
100
- value: false
101
- adam_beta1:
102
- value: 0.9
103
- adam_beta2:
104
- value: 0.999
105
- adam_epsilon:
106
- value: 1e-08
107
- auto_find_batch_size:
108
- value: false
109
- batch_eval_metrics:
110
- value: false
111
- bf16:
112
- value: false
113
- bf16_full_eval:
114
- value: false
115
- data_seed:
116
- value: null
117
- dataloader_drop_last:
118
- value: false
119
- dataloader_num_workers:
120
- value: 0
121
- dataloader_persistent_workers:
122
- value: false
123
- dataloader_pin_memory:
124
- value: true
125
- dataloader_prefetch_factor:
126
- value: null
127
- ddp_backend:
128
- value: null
129
- ddp_broadcast_buffers:
130
- value: null
131
- ddp_bucket_cap_mb:
132
- value: null
133
- ddp_find_unused_parameters:
134
- value: null
135
- ddp_timeout:
136
- value: 1800
137
- debug:
138
- value: []
139
- deepspeed:
140
- value: null
141
- disable_tqdm:
142
- value: false
143
- dispatch_batches:
144
- value: null
145
- do_eval:
146
- value: true
147
- do_predict:
148
- value: false
149
- do_train:
150
- value: false
151
- eval_accumulation_steps:
152
- value: null
153
- eval_delay:
154
- value: 0
155
- eval_do_concat_batches:
156
- value: true
157
- eval_on_start:
158
- value: false
159
- eval_steps:
160
- value: null
161
- eval_strategy:
162
- value: epoch
163
- eval_use_gather_object:
164
- value: false
165
- evaluation_strategy:
166
- value: epoch
167
- fp16:
168
- value: true
169
- fp16_backend:
170
- value: auto
171
- fp16_full_eval:
172
- value: false
173
- fp16_opt_level:
174
- value: O1
175
- fsdp:
176
- value: []
177
- fsdp_config:
178
- value:
179
- min_num_params: 0
180
- xla: false
181
- xla_fsdp_grad_ckpt: false
182
- xla_fsdp_v2: false
183
- fsdp_min_num_params:
184
- value: 0
185
- fsdp_transformer_layer_cls_to_wrap:
186
- value: null
187
- full_determinism:
188
- value: false
189
- gradient_accumulation_steps:
190
- value: 4
191
- gradient_checkpointing:
192
- value: false
193
- gradient_checkpointing_kwargs:
194
- value: null
195
- greater_is_better:
196
- value: false
197
- group_by_length:
198
- value: false
199
- half_precision_backend:
200
- value: auto
201
- hub_always_push:
202
- value: false
203
- hub_model_id:
204
- value: null
205
- hub_private_repo:
206
- value: false
207
- hub_strategy:
208
- value: every_save
209
- hub_token:
210
- value: <HUB_TOKEN>
211
- ignore_data_skip:
212
- value: false
213
- include_inputs_for_metrics:
214
- value: false
215
- include_num_input_tokens_seen:
216
- value: false
217
- include_tokens_per_second:
218
- value: false
219
- jit_mode_eval:
220
- value: false
221
- label_names:
222
- value: null
223
- label_smoothing_factor:
224
- value: 0
225
- learning_rate:
226
- value: 5e-05
227
- length_column_name:
228
- value: length
229
- load_best_model_at_end:
230
- value: true
231
- local_rank:
232
- value: 0
233
- log_level:
234
- value: passive
235
- log_level_replica:
236
- value: warning
237
- log_on_each_node:
238
- value: true
239
- logging_dir:
240
- value: t5-bc-out/runs/May04_16-23-49_kolyoz1
241
- logging_first_step:
242
- value: false
243
- logging_nan_inf_filter:
244
- value: true
245
- logging_steps:
246
- value: 500
247
- logging_strategy:
248
- value: steps
249
- lr_scheduler_type:
250
- value: linear
251
- max_grad_norm:
252
- value: 1
253
- max_steps:
254
- value: -1
255
- metric_for_best_model:
256
- value: loss
257
- mp_parameters:
258
- value: ""
259
- neftune_noise_alpha:
260
- value: null
261
- no_cuda:
262
- value: false
263
- num_train_epochs:
264
- value: 3
265
- optim:
266
- value: adamw_torch
267
- optim_args:
268
- value: null
269
- optim_target_modules:
270
- value: null
271
- output_dir:
272
- value: t5-bc-out
273
- overwrite_output_dir:
274
- value: false
275
- past_index:
276
- value: -1
277
- per_device_eval_batch_size:
278
- value: 8
279
- per_device_train_batch_size:
280
- value: 8
281
- per_gpu_eval_batch_size:
282
- value: null
283
- per_gpu_train_batch_size:
284
- value: null
285
- prediction_loss_only:
286
- value: false
287
- push_to_hub:
288
- value: false
289
- push_to_hub_model_id:
290
- value: null
291
- push_to_hub_organization:
292
- value: null
293
- push_to_hub_token:
294
- value: <PUSH_TO_HUB_TOKEN>
295
- ray_scope:
296
- value: last
297
- remove_unused_columns:
298
- value: true
299
- report_to:
300
- value:
301
- - wandb
302
- restore_callback_states_from_checkpoint:
303
- value: false
304
- resume_from_checkpoint:
305
- value: null
306
- run_name:
307
- value: t5-bc-out
308
- save_on_each_node:
309
- value: false
310
- save_only_model:
311
- value: false
312
- save_safetensors:
313
- value: false
314
- save_steps:
315
- value: 500
316
- save_strategy:
317
- value: epoch
318
- save_total_limit:
319
- value: null
320
- seed:
321
- value: 42
322
- skip_memory_metrics:
323
- value: true
324
- split_batches:
325
- value: null
326
- tf32:
327
- value: null
328
- torch_compile:
329
- value: false
330
- torch_compile_backend:
331
- value: null
332
- torch_compile_mode:
333
- value: null
334
- torch_empty_cache_steps:
335
- value: null
336
- torchdynamo:
337
- value: null
338
- tpu_metrics_debug:
339
- value: false
340
- tpu_num_cores:
341
- value: null
342
- use_cpu:
343
- value: false
344
- use_ipex:
345
- value: false
346
- use_legacy_prediction_loop:
347
- value: false
348
- use_liger_kernel:
349
- value: false
350
- use_mps_device:
351
- value: false
352
- warmup_ratio:
353
- value: 0
354
- warmup_steps:
355
- value: 0
356
- weight_decay:
357
- value: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_162343-cp870jym/files/output.log DELETED
@@ -1,27 +0,0 @@
1
- You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
2
- Map: 100%|██████████| 70/70 [00:00<00:00, 4479.59 examples/s]
3
- Map: 100%|██████████| 15/15 [00:00<00:00, 2556.26 examples/s]
4
- /arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
5
- warnings.warn(
6
- [2025-05-04 16:23:55,053] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
7
- wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
8
- 100%|██████████| 6/6 [01:08<00:00, 11.47s/it]
9
- Map: 100%|██████████| 15/15 [00:00<00:00, 3414.44 examples/s]
10
- {'eval_loss': 0.32496747374534607, 'eval_accuracy': 1.0, 'eval_runtime': 0.0946, 'eval_samples_per_second': 158.536, 'eval_steps_per_second': 21.138, 'epoch': 0.89}
11
- {'eval_loss': 0.14126792550086975, 'eval_accuracy': 1.0, 'eval_runtime': 0.0935, 'eval_samples_per_second': 160.347, 'eval_steps_per_second': 21.38, 'epoch': 1.78}
12
- {'eval_loss': 0.08305665105581284, 'eval_accuracy': 1.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 172.874, 'eval_steps_per_second': 23.05, 'epoch': 2.67}
13
- {'train_runtime': 68.815, 'train_samples_per_second': 3.052, 'train_steps_per_second': 0.087, 'train_loss': 0.34361688296000165, 'epoch': 2.67}
14
- 100%|██████████| 2/2 [00:00<00:00, 93.00it/s]
15
- {'eval_loss': 0.07820229977369308, 'eval_accuracy': 1.0, 'eval_runtime': 0.0516, 'eval_samples_per_second': 290.667, 'eval_steps_per_second': 38.756, 'epoch': 2.6666666666666665}
16
- Traceback (most recent call last):
17
- File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 141, in <module>
18
- model.save_pretrained(
19
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1928, in __getattr__
20
- raise AttributeError(
21
- AttributeError: 'T5BinaryClassifier' object has no attribute 'save_pretrained'
22
- Traceback (most recent call last):
23
- File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 141, in <module>
24
- model.save_pretrained(
25
- File "/arf/home/zisik/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1928, in __getattr__
26
- raise AttributeError(
27
- AttributeError: 'T5BinaryClassifier' object has no attribute 'save_pretrained'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_162343-cp870jym/files/requirements.txt DELETED
@@ -1,541 +0,0 @@
1
- nvidia-cuda-cupti-cu12==12.4.127
2
- nvidia-cuda-nvrtc-cu12==12.4.127
3
- pyg-lib==0.4.0+pt20cu117
4
- biopython==1.85
5
- iniconfig==2.0.0
6
- tokenizers==0.20.0
7
- accelerate==1.3.0
8
- torch==2.6.0
9
- nvidia-nccl-cu12==2.21.5
10
- transformers==4.45.2
11
- nvidia-cusparse-cu12==12.3.1.170
12
- torch-scatter==2.1.2+pt20cu117
13
- nvidia-cusparselt-cu12==0.6.2
14
- nvidia-nvtx-cu12==12.4.127
15
- zstd==1.5.6.6
16
- fair-esm==2.0.0
17
- omegaconf==2.3.0
18
- pluggy==1.5.0
19
- pytest==8.3.5
20
- nvidia-curand-cu12==10.3.5.147
21
- nvidia-cufft-cu12==11.2.1.3
22
- torch-cluster==1.6.3+pt20cu117
23
- regex==2024.9.11
24
- nvidia-cudnn-cu12==9.1.0.70
25
- torch-spline-conv==1.2.2+pt20cu117
26
- nvidia-cusolver-cu12==11.6.1.9
27
- antlr4-python3-runtime==4.9.3
28
- msgpack-numpy==0.4.8
29
- nlp==0.2.0
30
- einops==0.8.1
31
- nvidia-cublas-cu12==12.4.5.8
32
- triton==3.2.0
33
- ninja==1.11.1.3
34
- hydra-core==1.3.2
35
- nvidia-nvjitlink-cu12==12.4.127
36
- biotite==0.41.2
37
- torch-sparse==0.6.18+pt20cu117
38
- esm==3.1.4
39
- sympy==1.13.1
40
- nvidia-cuda-runtime-cu12==12.4.127
41
- jupyter-lsp==2.2.5
42
- jupyter-events==0.10.0
43
- ipykernel==6.29.5
44
- Mako==1.3.5
45
- proto-plus==1.25.0
46
- fst-pso==1.8.1
47
- gensim==4.3.3
48
- htmlmin==0.1.12
49
- tokenizers==0.13.3
50
- timm==1.0.11
51
- MarkupSafe==3.0.2
52
- safetensors==0.4.5
53
- requests==2.32.3
54
- gast==0.5.5
55
- cuml==24.12.0a33
56
- jaxlib==0.4.23.dev20240214
57
- spacy-loggers==1.0.5
58
- pytz==2024.1
59
- idna==3.10
60
- python-dateutil==2.9.0
61
- mdurl==0.1.2
62
- blis==0.7.10
63
- jupyter==1.1.1
64
- pyerfa==2.0.1.5
65
- comm==0.2.2
66
- pygraphviz==1.14
67
- dill==0.3.8
68
- paramiko==3.5.0
69
- llama-index==0.8.36
70
- mdit-py-plugins==0.4.2
71
- Werkzeug==3.1.3
72
- pyu2f==0.1.5
73
- dask-glm==0.2.0
74
- httpx==0.27.2
75
- typeguard==4.4.1
76
- mypy-extensions==1.0.0
77
- kmodes==0.12.2
78
- keras==2.15.0
79
- ydata-profiling==0.0.dev0
80
- regex==2024.11.6
81
- xarray==2024.11.0
82
- setuptools==75.3.0
83
- charset-normalizer==3.4.0
84
- jupyterlab_nvdashboard==0.11.0
85
- pylibraft==24.12.0a36
86
- spacy==3.7.6
87
- mlflow-skinny==2.17.2
88
- nvtx==0.2.10
89
- multimethod==1.12
90
- pexpect==4.9.0
91
- torch==2.1.0.post301
92
- flatbuffers==24.3.25
93
- python-json-logger==2.0.7
94
- PyJWT==2.9.0
95
- multiprocess==0.70.16
96
- colorlover==0.3.0
97
- yarl==1.16.0
98
- locket==1.0.0
99
- patsy==1.0.0
100
- rapids-dask-dependency==24.12.0a0
101
- stanza==1.9.2
102
- debugpy==1.8.8
103
- jupyterlab_pygments==0.3.0
104
- pylibcudf==24.12.0a337
105
- lz4==4.3.3
106
- pandas==2.2.3
107
- tifffile==2024.9.20
108
- pynvml==11.4.1
109
- cufflinks==0.17.3
110
- ipywidgets==8.1.5
111
- requests-oauthlib==2.0.0
112
- google-auth-oauthlib==1.2.1
113
- rsa==4.9
114
- webcolors==24.8.0
115
- jsonschema-specifications==2024.10.1
116
- scikit-learn==1.5.2
117
- langchain-text-splitters==0.3.2
118
- pandas-datareader==0.10.0
119
- tomli==2.0.2
120
- tzdata==2024.2
121
- scikit-image==0.24.0
122
- tensorboard_data_server==0.7.0
123
- kiwisolver==1.4.7
124
- cloudpathlib==0.20.0
125
- isodate==0.6.1
126
- adversarial-robustness-toolbox==1.19.1
127
- SQLAlchemy==2.0.36
128
- pytest-runner==6.0.0
129
- pycairo==1.27.0
130
- treelite==4.3.0
131
- jiter==0.7.0
132
- threadpoolctl==3.5.0
133
- pandocfilters==1.5.0
134
- loguru==0.7.2
135
- smart_open==7.0.5
136
- shellingham==1.5.4
137
- deepspeed==0.15.4
138
- prompt_toolkit==3.0.48
139
- databricks-sdk==0.34.0
140
- langchain-core==0.3.15
141
- imageio==2.36.0
142
- openapi-schema-pydantic==1.2.4
143
- zict==3.0.0
144
- cachetools==5.5.0
145
- colorful==0.5.6
146
- mpmath==1.3.0
147
- nest_asyncio==1.6.0
148
- pyFUME==0.2.25
149
- opencv-python-headless==4.9.0
150
- fastai==2.7.18
151
- importlib_resources==6.4.5
152
- binaryornot==0.4.4
153
- evaluate==0.4.1
154
- matplotlib-inline==0.1.7
155
- wasabi==1.1.2
156
- pycparser==2.22
157
- GitPython==3.1.43
158
- pluggy==1.5.0
159
- async-lru==2.0.4
160
- pgmpy==0.1.24
161
- anyio==4.4.0
162
- executing==2.1.0
163
- orjson==3.10.11
164
- humanfriendly==10.0
165
- tornado==6.4.1
166
- gmpy2==2.1.5
167
- rlPyCairo==0.2.0
168
- distributed==2024.11.0
169
- FuzzyTM==2.0.5
170
- torchtext==0.15.2a0+5ce3163
171
- pytest==8.3.5
172
- pyod==2.0.2
173
- ImageHash==4.3.1
174
- soupsieve==2.5
175
- tblib==3.0.0
176
- emoji==2.14.0
177
- aiohappyeyeballs==2.4.3
178
- uri-template==1.3.0
179
- tensorflow_estimator==2.15.0
180
- babel==2.16.0
181
- dask-cuda==24.12.0a12
182
- overrides==7.7.0
183
- opencensus==0.11.3
184
- openai==0.28.1
185
- language_data==1.2.0
186
- jedi==0.19.2
187
- cookiecutter==2.6.0
188
- entrypoints==0.4
189
- exceptiongroup==1.2.2
190
- marisa-trie==1.2.0
191
- uvloop==0.20.0
192
- aiosignal==1.3.1
193
- Flask==3.0.3
194
- tensorboard==2.15.2
195
- cffi==1.17.1
196
- tf_keras==2.15.0
197
- absl-py==2.1.0
198
- blinker==1.9.0
199
- types-python-dateutil==2.9.0.20241003
200
- opencv-python==4.9.0
201
- frozendict==2.4.6
202
- aiohttp-cors==0.7.0
203
- statsmodels==0.14.4
204
- tinycss2==1.4.0
205
- terminado==0.18.1
206
- pycaret==2.2.3
207
- aiohttp==3.10.10
208
- distributed-ucxx==0.41.0
209
- prometheus_client==0.21.0
210
- fastdownload==0.0.7
211
- grpcio==1.59.3
212
- google-api-core==2.22.0
213
- jupyterlab_widgets==3.0.13
214
- appdirs==1.4.4
215
- littleutils==0.0.0
216
- ray==2.24.0
217
- kaggle==1.6.17
218
- jsonschema==4.23.0
219
- google-auth==2.36.0
220
- scikit-base==0.11.0
221
- visions==0.7.6
222
- pyarrow==15.0.0
223
- transformers==4.33.0
224
- prometheus_flask_exporter==0.23.1
225
- dm-tree==0.1.8
226
- colorama==0.4.6
227
- requests-toolbelt==1.0.0
228
- cached-property==1.5.2
229
- cymem==2.0.8
230
- PyNaCl==1.5.0
231
- PyWavelets==1.7.0
232
- httptools==0.6.1
233
- typing-utils==0.1.0
234
- email_validator==2.2.0
235
- marshmallow==3.23.1
236
- Deprecated==1.2.14
237
- virtualenv==20.4.7
238
- optuna==3.6.1
239
- jupyter_server==2.14.2
240
- termcolor==2.5.0
241
- mpi4py==4.0.1
242
- torchdata==0.7.1+8cea82f
243
- dataclasses==0.8
244
- cloudpickle==3.1.0
245
- tree_sitter_languages==1.10.2
246
- tabulate==0.9.0
247
- ipython==8.29.0
248
- lightgbm==4.3.0
249
- captum==0.6.0
250
- confuse==2.0.1
251
- torchvision==0.16.1+adc3221
252
- lxml==4.9.4
253
- fastapi==0.115.4
254
- python-multipart==0.0.17
255
- dnspython==2.7.0
256
- jupyter-console==6.6.3
257
- preshed==3.0.9
258
- py-cpuinfo==9.0.0
259
- Send2Trash==1.8.3
260
- murmurhash==1.0.10
261
- sniffio==1.3.1
262
- websockets==13.1
263
- h11==0.14.0
264
- smmap==5.0.0
265
- textual==0.85.2
266
- jsonpatch==1.33
267
- opencensus-context==0.1.3
268
- nbconvert==7.16.4
269
- sentry-sdk==2.19.0
270
- opentelemetry-semantic-conventions==0.37b0
271
- pandas-profiling==2.8.0
272
- pillow==10.3.0
273
- peft==0.13.2
274
- rpds-py==0.21.0
275
- bokeh==3.6.1
276
- distro==1.9.0
277
- itsdangerous==2.2.0
278
- wandb==0.18.7
279
- jsonpointer==3.0.0
280
- astropy-iers-data==0.2024.11.11.0.32.38
281
- horovod==0.28.1
282
- graphviz==0.20.3
283
- vtk==9.3.1
284
- bleach==6.2.0
285
- numexpr==2.8.7
286
- pydantic_core==2.23.4
287
- Jinja2==3.1.4
288
- widgetsnbextension==4.0.13
289
- filelock==3.16.1
290
- catboost==1.2.7
291
- raft-dask==24.12.0a36
292
- async-timeout==4.0.3
293
- datefinder==0.7.3
294
- coloredlogs==15.0.1
295
- platformdirs==4.3.6
296
- spacy-legacy==3.0.12
297
- chardet==5.2.0
298
- jupyter_client==8.6.3
299
- importlib_metadata==8.5.0
300
- rfc3986-validator==0.1.1
301
- huggingface_hub==0.26.2
302
- PySocks==1.7.1
303
- mlxtend==0.23.2
304
- outdated==0.2.2
305
- partd==1.4.2
306
- thinc==8.2.5
307
- astropy==6.1.6
308
- rdflib==6.3.2
309
- h2==4.1.0
310
- typer==0.13.0
311
- xyzservices==2024.9.0
312
- toolz==0.12.1
313
- frozenlist==1.5.0
314
- rdkit==2024.9.2
315
- pyasn1==0.6.1
316
- jupyter_server_terminals==0.5.3
317
- ucx-py==0.41.0a11
318
- astunparse==1.6.3
319
- simpful==2.12.0
320
- notebook_shim==0.2.4
321
- scipy==1.13.1
322
- colorlog==6.9.0
323
- tiktoken==0.3.3
324
- plotly==5.24.1
325
- fastrlock==0.8.2
326
- chart-studio==1.1.0
327
- stack-data==0.6.2
328
- google-pasta==0.2.0
329
- sktime==0.34.0
330
- PyYAML==6.0.2
331
- sympy==1.13.3
332
- multidict==6.1.0
333
- ml-dtypes==0.2.0
334
- tensorboardX==2.6.2.2
335
- decorator==5.1.1
336
- cytoolz==1.0.0
337
- ase==3.23.0
338
- isoduration==20.11.0
339
- html5lib==1.1
340
- langsmith==0.1.142
341
- future==1.0.0
342
- onnx2torch==1.5.15
343
- multipledispatch==0.6.0
344
- protobuf==4.24.4
345
- ucxx==0.41.0
346
- pandas_flavor==0.6.0
347
- msgpack==1.1.0
348
- pyasn1_modules==0.4.1
349
- imagecodecs==2024.1.1
350
- mlflow==2.17.2
351
- watchfiles==0.24.0
352
- dm-sonnet==2.0.2
353
- langcodes==3.4.1
354
- freetype-py==2.3.0
355
- argon2-cffi-bindings==21.2.0
356
- trimesh==4.5.2
357
- opt_einsum==3.4.0
358
- tenacity==8.5.0
359
- h5py==3.12.1
360
- fastapi-cli==0.0.5
361
- oauthlib==3.2.2
362
- parso==0.8.4
363
- weasel==0.4.1
364
- yfinance==0.2.49
365
- networkx==2.8.8
366
- bitsandbytes==0.44.1
367
- lazy_loader==0.4
368
- querystring_parser==1.2.4
369
- contourpy==1.3.0
370
- unicodedata2==15.1.0
371
- bcrypt==4.2.0
372
- munkres==1.1.4
373
- langchain==0.0.298
374
- hpack==4.0.0
375
- cryptography==43.0.3
376
- umap-learn==0.5.7
377
- arrow==1.3.0
378
- docker==7.1.0
379
- certifi==2025.1.31
380
- fastjsonschema==2.20.0
381
- tensorflow==2.15.0
382
- googleapis-common-protos==1.65.0
383
- iniconfig==2.0.0
384
- Markdown==3.6
385
- llvmlite==0.43.0
386
- wslink==2.3.2
387
- attrs==24.2.0
388
- rich==13.9.4
389
- cupy==13.3.0
390
- uc-micro-py==1.0.3
391
- alembic==1.14.0
392
- joblib==1.4.2
393
- reportlab==4.2.5
394
- miniful==0.0.6
395
- jupyter_core==5.7.2
396
- wheel==0.45.0
397
- phik==0.12.3
398
- mistune==3.0.2
399
- wcwidth==0.2.13
400
- dacite==1.8.1
401
- accelerate==0.22.0
402
- sacremoses==0.0.53
403
- revtok==0.0.3
404
- python-slugify==8.0.4
405
- tangled-up-in-unicode==0.2.0
406
- dask==2024.11.0
407
- markdown-it-py==3.0.0
408
- sentencepiece==0.1.99
409
- beautifulsoup4==4.12.3
410
- six==1.16.0
411
- numba-cuda==0.0.17
412
- argon2-cffi==23.1.0
413
- xxhash==3.5.0
414
- hjson==3.1.0
415
- fonttools==4.54.1
416
- graphql-core==3.2.5
417
- pyparsing==3.2.0
418
- pure_eval==0.2.3
419
- distlib==0.3.9
420
- lightning==2.4.0
421
- wordcloud==0.0.0
422
- catalogue==2.0.10
423
- jax==0.4.27
424
- tree-sitter==0.23.2
425
- notebook==7.2.2
426
- dataclasses-json==0.6.7
427
- propcache==0.2.0
428
- numba==0.60.0
429
- dask-expr==1.1.17
430
- pydantic==2.9.2
431
- gunicorn==22.0.0
432
- missingno==0.5.2
433
- pyOpenSSL==24.2.1
434
- openpyxl==3.1.5
435
- packaging==24.1
436
- python-dotenv==1.0.1
437
- cycler==0.12.1
438
- types-pytz==2024.2.0.20241003
439
- yellowbrick==1.5
440
- referencing==0.35.1
441
- pyLDAvis==3.4.1
442
- lazypredict==0.2.16
443
- fqdn==1.5.1
444
- websocket-client==1.8.0
445
- fastcore==1.7.19
446
- pynvjitlink-cu12==0.3.0
447
- pingouin==0.5.5
448
- numpy==1.26.4
449
- typing-inspect==0.9.0
450
- nltk==3.9.1
451
- onnxruntime==1.19.2
452
- tensorflow-probability==0.23.0
453
- datasets==3.0.2
454
- pickleshare==0.7.5
455
- peewee==3.17.7
456
- torch-geometric==2.6.1
457
- ptyprocess==0.7.0
458
- greenlet==3.1.1
459
- graphql-relay==3.2.0
460
- graphene==3.4.3
461
- et_xmlfile==2.0.0
462
- webencodings==0.5.1
463
- hyperframe==6.0.1
464
- multitasking==0.0.9
465
- typer-slim==0.13.0
466
- onnx==1.15.0
467
- uvicorn==0.32.0
468
- memray==1.13.4
469
- xgboost==2.1.2
470
- Brotli==1.1.0
471
- zipp==3.21.0
472
- nbformat==5.10.4
473
- responses==0.18.0
474
- funcy==2.0
475
- Pygments==2.18.0
476
- tqdm==4.67.0
477
- linkify-it-py==2.0.3
478
- srsly==2.4.8
479
- cuda-python==12.6.0
480
- lightning-utilities==0.11.8
481
- cudf==24.12.0a337
482
- dask-ml==2024.4.4
483
- docker-pycreds==0.4.0
484
- pkgutil_resolve_name==1.3.10
485
- opentelemetry-api==1.16.0
486
- fsspec==2024.9.0
487
- nbclient==0.10.0
488
- psutil==5.9.8
489
- pytorch-lightning==2.4.0
490
- sortedcontainers==2.4.0
491
- matplotlib==3.9.2
492
- defusedxml==0.7.1
493
- urllib3==1.26.19
494
- jupyterlab_server==2.27.3
495
- retrying==1.3.3
496
- dask-cudf==24.12.0a337
497
- sqlparse==0.5.1
498
- text-unidecode==1.3
499
- seaborn==0.13.2
500
- typing_extensions==4.12.2
501
- pyzmq==26.2.0
502
- rfc3339-validator==0.1.4
503
- pynndescent==0.5.13
504
- pip==24.3.1
505
- confection==0.1.4
506
- wrapt==1.14.1
507
- fastprogress==1.0.3
508
- traitlets==5.14.3
509
- asttokens==2.4.1
510
- json5==0.9.28
511
- pandas-stubs==2.2.3.241126
512
- torchmetrics==1.2.1
513
- gitdb==4.0.11
514
- annotated-types==0.7.0
515
- ipython-autotime==0.1
516
- httpcore==1.0.6
517
- click==8.1.7
518
- setproctitle==1.3.3
519
- starlette==0.41.2
520
- jupyterlab==4.2.5
521
- rmm==24.12.0a27
522
- opentelemetry-sdk==1.16.0
523
- textblob==0.15.3
524
- imbalanced-learn==0.12.4
525
- typeguard==4.3.0
526
- more-itertools==10.3.0
527
- zipp==3.19.2
528
- autocommand==2.2.2
529
- jaraco.context==5.3.0
530
- packaging==24.1
531
- importlib_metadata==8.0.0
532
- platformdirs==4.2.2
533
- jaraco.functools==4.0.1
534
- importlib_resources==6.4.0
535
- tomli==2.0.1
536
- jaraco.text==3.12.1
537
- wheel==0.43.0
538
- jaraco.collections==5.1.0
539
- typing_extensions==4.12.2
540
- inflect==7.3.1
541
- backports.tarfile==1.2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_162343-cp870jym/files/wandb-metadata.json DELETED
@@ -1,77 +0,0 @@
1
- {
2
- "os": "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
3
- "python": "3.10.15",
4
- "startedAt": "2025-05-04T13:23:43.746737Z",
5
- "program": "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
6
- "codePath": "finetuning_bc_prott5.py",
7
- "email": "zeynep.isik1@sabanciuniv.edu",
8
- "root": "/arf/scratch/zisik/prott5_bc_ft",
9
- "host": "kolyoz1",
10
- "username": "zisik",
11
- "executable": "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
12
- "codePathLocal": "finetuning_bc_prott5.py",
13
- "cpu_count": 64,
14
- "cpu_count_logical": 64,
15
- "gpu": "NVIDIA H100 80GB HBM3",
16
- "gpu_count": 1,
17
- "disk": {
18
- "/": {
19
- "total": "7643995308032",
20
- "used": "274884100096"
21
- }
22
- },
23
- "memory": {
24
- "total": "1081373220864"
25
- },
26
- "cpu": {
27
- "count": 64,
28
- "countLogical": 64
29
- },
30
- "gpu_nvidia": [
31
- {
32
- "name": "NVIDIA H100 80GB HBM3",
33
- "memoryTotal": "85520809984",
34
- "cudaCores": 16896,
35
- "architecture": "Hopper"
36
- }
37
- ],
38
- "slurm": {
39
- "cluster_name": "cuda",
40
- "conf": "/etc/slurm/slurm.conf",
41
- "cpus_on_node": "16",
42
- "cpus_per_task": "16",
43
- "gpus_on_node": "1",
44
- "gtids": "0",
45
- "job_account": "tbag154",
46
- "job_cpus_per_node": "16",
47
- "job_end_time": "1746624198",
48
- "job_gid": "11636",
49
- "job_gpus": "1",
50
- "job_id": "1027950",
51
- "job_name": "msa_ph_pt",
52
- "job_nodelist": "kolyoz1",
53
- "job_num_nodes": "1",
54
- "job_partition": "kolyoz-cuda",
55
- "job_qos": "tbag",
56
- "job_start_time": "1746364998",
57
- "job_uid": "11636",
58
- "job_user": "zisik",
59
- "jobid": "1027950",
60
- "localid": "0",
61
- "mem_per_cpu": "14000",
62
- "nnodes": "1",
63
- "node_aliases": "(null)",
64
- "nodeid": "0",
65
- "nodelist": "kolyoz1",
66
- "prio_process": "0",
67
- "procid": "0",
68
- "submit_dir": "/arf/scratch/zisik",
69
- "submit_host": "cuda-ui",
70
- "task_pid": "3180708",
71
- "tasks_per_node": "1",
72
- "topology_addr": "kolyoz1",
73
- "topology_addr_pattern": "node",
74
- "working_cluster": "cuda:slurmcontroller3.ib:6800:9984:109"
75
- },
76
- "cudaVersion": "12.6"
77
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_162343-cp870jym/files/wandb-summary.json DELETED
@@ -1 +0,0 @@
1
- {"_step":4,"_runtime":84.708140457,"train_runtime":68.815,"eval/runtime":0.0516,"_wandb":{"runtime":84},"train_samples_per_second":3.052,"train/epoch":2.6666666666666665,"eval/loss":0.07820229977369308,"train_loss":0.34361688296000165,"total_flos":0,"_timestamp":1.7463651084544086e+09,"eval/samples_per_second":290.667,"eval/accuracy":1,"train_steps_per_second":0.087,"train/global_step":6,"eval/steps_per_second":38.756}