diff --git a/.gitattributes b/.gitattributes index a66db3e51f9ac2908391de9d50b408cd1edaa309..20119c20db2815e6e83ce38839d4835bac6e58ae 100644 --- a/.gitattributes +++ b/.gitattributes @@ -103,3 +103,10 @@ wandb/run-20241101_202058-jijqbvs1/run-jijqbvs1.wandb filter=lfs diff=lfs merge= wandb/run-20241129_083813-gsvlu1z8/run-gsvlu1z8.wandb filter=lfs diff=lfs merge=lfs -text wandb/run-20241030_231835-o1t74f3e/run-o1t74f3e.wandb filter=lfs diff=lfs merge=lfs -text wandb/run-20241030_233740-98qje3cr/run-98qje3cr.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20241106_234348-zan8h57j/run-zan8h57j.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20241105_163248-rhhc1g6i/run-rhhc1g6i.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20241105_163029-2rkpz70q/run-2rkpz70q.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20241105_163248-thalxhcd/run-thalxhcd.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20241031_002020-q6ot1vz6/run-q6ot1vz6.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20241106_234348-l3eig11b/run-l3eig11b.wandb filter=lfs diff=lfs merge=lfs -text +wandb/run-20241031_122114-2k9672ya/run-2k9672ya.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/wandb/run-20241030_010305-y2ohxj86/logs/debug-internal.log b/wandb/run-20241030_010305-y2ohxj86/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..2f300e806d47492b091185b4206c57ef6db0004b --- /dev/null +++ b/wandb/run-20241030_010305-y2ohxj86/logs/debug-internal.log @@ -0,0 +1,16 @@ +{"time":"2024-10-30T01:03:05.8822364-04:00","level":"INFO","msg":"using version","core version":"0.18.5"} +{"time":"2024-10-30T01:03:05.88225013-04:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_010305-y2ohxj86/logs/debug-core.log"} +{"time":"2024-10-30T01:03:05.989239396-04:00","level":"INFO","msg":"created new stream","id":"y2ohxj86"} +{"time":"2024-10-30T01:03:05.989281006-04:00","level":"INFO","msg":"stream: started","id":"y2ohxj86"} +{"time":"2024-10-30T01:03:05.989342226-04:00","level":"INFO","msg":"sender: started","stream_id":"y2ohxj86"} +{"time":"2024-10-30T01:03:05.989341646-04:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"y2ohxj86"}} +{"time":"2024-10-30T01:03:05.989369606-04:00","level":"INFO","msg":"handler: started","stream_id":{"value":"y2ohxj86"}} +{"time":"2024-10-30T01:03:06.155395708-04:00","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-10-30T01:03:06.261798859-04:00","level":"INFO","msg":"stream: closing","id":"y2ohxj86"} +{"time":"2024-10-30T01:03:06.26182351-04:00","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-10-30T01:03:06.262278953-04:00","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-10-30T01:03:07.554925721-04:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2024-10-30T01:03:07.677085611-04:00","level":"INFO","msg":"handler: closed","stream_id":{"value":"y2ohxj86"}} +{"time":"2024-10-30T01:03:07.677137671-04:00","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"y2ohxj86"}} +{"time":"2024-10-30T01:03:07.677156552-04:00","level":"INFO","msg":"sender: closed","stream_id":"y2ohxj86"} +{"time":"2024-10-30T01:03:07.677223022-04:00","level":"INFO","msg":"stream: closed","id":"y2ohxj86"} diff --git a/wandb/run-20241030_010641-fp8gbo2l/files/config.yaml b/wandb/run-20241030_010641-fp8gbo2l/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f8546482f67cbb9041af9d756594322c485d67a9 --- /dev/null +++ b/wandb/run-20241030_010641-fp8gbo2l/files/config.yaml @@ -0,0 +1,47 @@ +_wandb: + value: + cli_version: 0.18.5 + m: [] + python_version: 3.9.19 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + "3": + - 13 + - 23 + - 55 + "4": 3.9.19 + "5": 0.18.5 + "6": 4.45.1 + "8": + - 5 + "12": 0.18.5 + "13": linux-x86_64 +batch_size: + value: 3 +epoch: + value: 7 +perturbation: + value: reverse_control +seed: + value: 0 +train_set: + value: 10M diff --git a/wandb/run-20241030_010641-fp8gbo2l/files/output.log b/wandb/run-20241030_010641-fp8gbo2l/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1777f063b107f651dddc063d1d3a3fa80015bf9e --- /dev/null +++ b/wandb/run-20241030_010641-fp8gbo2l/files/output.log @@ -0,0 +1,4 @@ +Traceback (most recent call last): + File "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", line 162, in + dataset_name = f"babylm_{args.perturbation}_{args.train_zset}_seed{args.seed}" +AttributeError: 'Namespace' object has no attribute 'train_zset' diff --git a/wandb/run-20241030_010641-fp8gbo2l/files/wandb-metadata.json b/wandb/run-20241030_010641-fp8gbo2l/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6cdb4e7f753298f0342103bac3b78f7d621c2163 --- /dev/null +++ b/wandb/run-20241030_010641-fp8gbo2l/files/wandb-metadata.json @@ -0,0 +1,29 @@ +{ + "os": "Linux-5.4.0-162-generic-x86_64-with-glibc2.31", + "python": "3.9.19", + "startedAt": "2024-10-30T05:06:41.433074Z", + "args": [ + "--perturbation", + "reverse_control", + "--train_set", + "10M", + "--batch_size", + "3", + "--epoch", + "7", + "--seed", + "0" + ], + "program": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", + "codePath": "train/train_deep_wandb.py", + "git": { + "remote": "git@hf.co:Yaning1001/Impossible_llm.git", + "commit": "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4" + }, + "email": "yaning1001@gmail.com", + "root": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train", + "host": "mms-large-2", + "username": "chunhui", + "executable": "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python", + "codePathLocal": "train_deep_wandb.py" +} \ No newline at end of file diff --git a/wandb/run-20241030_010641-fp8gbo2l/files/wandb-summary.json b/wandb/run-20241030_010641-fp8gbo2l/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..4e355fc8e9915c58fba97556eba40fd65c826d6a --- /dev/null +++ b/wandb/run-20241030_010641-fp8gbo2l/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":1}} \ No newline at end of file diff --git a/wandb/run-20241030_010641-fp8gbo2l/logs/debug-internal.log b/wandb/run-20241030_010641-fp8gbo2l/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..1e6333daf50a6c98ba96f7a5351439afe85ac38d --- /dev/null +++ b/wandb/run-20241030_010641-fp8gbo2l/logs/debug-internal.log @@ -0,0 +1,16 @@ +{"time":"2024-10-30T01:06:41.435404628-04:00","level":"INFO","msg":"using version","core version":"0.18.5"} +{"time":"2024-10-30T01:06:41.435416958-04:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_010641-fp8gbo2l/logs/debug-core.log"} +{"time":"2024-10-30T01:06:41.543037882-04:00","level":"INFO","msg":"created new stream","id":"fp8gbo2l"} +{"time":"2024-10-30T01:06:41.543083813-04:00","level":"INFO","msg":"stream: started","id":"fp8gbo2l"} +{"time":"2024-10-30T01:06:41.543120933-04:00","level":"INFO","msg":"sender: started","stream_id":"fp8gbo2l"} +{"time":"2024-10-30T01:06:41.543122113-04:00","level":"INFO","msg":"handler: started","stream_id":{"value":"fp8gbo2l"}} +{"time":"2024-10-30T01:06:41.543101113-04:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"fp8gbo2l"}} +{"time":"2024-10-30T01:06:43.031925671-04:00","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-10-30T01:06:43.129140054-04:00","level":"INFO","msg":"stream: closing","id":"fp8gbo2l"} +{"time":"2024-10-30T01:06:43.129196235-04:00","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-10-30T01:06:43.222777423-04:00","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-10-30T01:06:43.624872197-04:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2024-10-30T01:06:43.737064212-04:00","level":"INFO","msg":"handler: closed","stream_id":{"value":"fp8gbo2l"}} +{"time":"2024-10-30T01:06:43.737102682-04:00","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"fp8gbo2l"}} +{"time":"2024-10-30T01:06:43.737133182-04:00","level":"INFO","msg":"sender: closed","stream_id":"fp8gbo2l"} +{"time":"2024-10-30T01:06:43.737155973-04:00","level":"INFO","msg":"stream: closed","id":"fp8gbo2l"} diff --git a/wandb/run-20241030_010641-fp8gbo2l/logs/debug.log b/wandb/run-20241030_010641-fp8gbo2l/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..c1ca9ceb9880a84ebcad94b263565c8c8916af28 --- /dev/null +++ b/wandb/run-20241030_010641-fp8gbo2l/logs/debug.log @@ -0,0 +1,27 @@ +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5 +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_setup.py:_flush():79] Configure stats pid to 321594 +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_setup.py:_flush():79] Loading settings from environment variables: {} +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None} +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'} +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_setup.py:_flush():79] Applying login settings: {} +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_010641-fp8gbo2l/logs/debug.log +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_010641-fp8gbo2l/logs/debug-internal.log +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_init.py:init():621] calling init triggers +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_init.py:init():628] wandb.init called with sweep_config: {} +config: {} +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_init.py:init():671] starting backend +2024-10-30 01:06:41,431 INFO MainThread:321594 [wandb_init.py:init():675] sending inform_init request +2024-10-30 01:06:41,432 INFO MainThread:321594 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-10-30 01:06:41,432 INFO MainThread:321594 [wandb_init.py:init():688] backend started and connected +2024-10-30 01:06:41,436 INFO MainThread:321594 [wandb_init.py:init():783] updated telemetry +2024-10-30 01:06:41,456 INFO MainThread:321594 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout +2024-10-30 01:06:43,029 INFO MainThread:321594 [wandb_init.py:init():867] starting run threads in backend +2024-10-30 01:06:43,125 INFO MainThread:321594 [wandb_run.py:_console_start():2463] atexit reg +2024-10-30 01:06:43,125 INFO MainThread:321594 [wandb_run.py:_redirect():2311] redirect: wrap_raw +2024-10-30 01:06:43,125 INFO MainThread:321594 [wandb_run.py:_redirect():2376] Wrapping output streams. +2024-10-30 01:06:43,125 INFO MainThread:321594 [wandb_run.py:_redirect():2401] Redirects installed. +2024-10-30 01:06:43,128 INFO MainThread:321594 [wandb_init.py:init():911] run started, returning control to user process +2024-10-30 01:06:43,128 INFO MainThread:321594 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'reverse_control', 'train_set': '10M', 'batch_size': 3, 'epoch': 7, 'seed': 0} +2024-10-30 01:06:43,129 WARNING MsgRouterThr:321594 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20241030_010641-fp8gbo2l/run-fp8gbo2l.wandb b/wandb/run-20241030_010641-fp8gbo2l/run-fp8gbo2l.wandb new file mode 100644 index 0000000000000000000000000000000000000000..a4619ee5046e45cd223b2c97236f4ee1bb3be9bc Binary files /dev/null and b/wandb/run-20241030_010641-fp8gbo2l/run-fp8gbo2l.wandb differ diff --git a/wandb/run-20241030_011509-zmlu7388/files/output.log b/wandb/run-20241030_011509-zmlu7388/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..92f7313e35fea966eab2b8648443f957b4616099 --- /dev/null +++ b/wandb/run-20241030_011509-zmlu7388/files/output.log @@ -0,0 +1,15 @@ +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00, 2.33s/it] +Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17519/17519 [00:52<00:00, 336.12 examples/s] +Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18140/18140 [00:53<00:00, 341.91 examples/s] +tokenized_valid: Dataset({ + features: ['input_ids', 'attention_mask'], + num_rows: 600 +}) +/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead + warnings.warn( +[2024-10-30 01:17:01,020] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-10-30 01:17:07,829] [INFO] [comm.py:652:init_distributed] cdb=None +Installed CUDA version 11.8 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination +Using /home/chunhui/.cache/torch_extensions/py39_cu117 as PyTorch extensions root... +Loading extension module cpu_adam... +Time to load cpu_adam op: 4.184397220611572 seconds diff --git a/wandb/run-20241030_011509-zmlu7388/files/requirements.txt b/wandb/run-20241030_011509-zmlu7388/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..95a931302e269cc9e4fa5b719b6511f176ee2416 --- /dev/null +++ b/wandb/run-20241030_011509-zmlu7388/files/requirements.txt @@ -0,0 +1,147 @@ +funcsigs==1.0.2 +sentry-sdk==2.17.0 +multiprocess==0.70.16 +numpy==1.26.2 +pluralizer==1.2.0 +debugpy==1.6.7 +nvidia-cudnn-cu11==8.5.0.96 +deepspeed==0.15.2 +data==0.4 +pandas==2.1.3 +tomli==2.0.1 +charset-normalizer==3.3.2 +attrs==24.2.0 +aiosignal==1.3.1 +fsspec==2023.10.0 +nvidia-cusparse-cu11==11.7.4.91 +zipp==3.12.0 +mypy-extensions==1.0.0 +datasets==3.0.1 +joblib==1.3.2 +hjson==3.1.0 +traitlets==5.7.1 +stack-data==0.6.0 +transformers==4.45.1 +sympy==1.11.1 +Pygments==2.15.0 +docker-pycreds==0.4.0 +dill==0.3.8 +wheel==0.44.0 +prompt-toolkit==3.0.30 +parso==0.8.3 +ipykernel==6.23.1 +pyarrow==17.0.0 +certifi==2023.11.17 +nvidia-cufft-cu11==10.9.0.58 +six==1.16.0 +pydantic==2.9.2 +click==8.1.7 +nest-asyncio==1.5.6 +gmpy2==2.1.0 +matplotlib==3.8.2 +scipy==1.11.4 +typing_extensions==4.12.2 +statsmodels==0.14.0 +huggingface-hub==0.25.0 +frozenlist==1.4.1 +gpustat==1.1.1 +nvidia-nvtx-cu11==11.7.91 +safetensors==0.4.5 +stanza==1.9.2 +decorator==5.1.1 +seaborn==0.13.0 +sentencepiece==0.2.0 +PyYAML==6.0.1 +black==24.8.0 +protobuf==4.25.1 +pickleshare==0.7.5 +peft==0.13.0 +triton==2.0.0 +nvidia-cuda-runtime-cu11==11.7.99 +Jinja2==3.1.2 +nvidia-cusolver-cu11==11.4.0.1 +executing==1.2.0 +jupyter_client==8.1.0 +pluggy==1.3.0 +cmake==3.30.3 +pytz==2023.3.post1 +aiohappyeyeballs==2.4.2 +kiwisolver==1.4.5 +py-cpuinfo==9.0.0 +Pillow==10.1.0 +ptyprocess==0.7.0 +importlib_resources==6.4.5 +GitPython==3.1.43 +importlib-metadata==6.0.0 +iniconfig==2.0.0 +scikit-learn==1.3.2 +exceptiongroup==1.1.0 +networkx==2.8.6 +accelerate==1.0.0 +nltk==3.8.1 +shutilwhich==1.1.0 +fonttools==4.45.1 +future==0.18.3 +aiohttp==3.10.6 +wcwidth==0.2.5 +idna==3.6 +filelock==3.12.2 +pathspec==0.12.1 +jupyter_core==5.1.0 +lit==18.1.8 +nvidia-curand-cu11==10.2.10.91 +nvidia-cublas-cu11==11.10.3.66 +nvidia-ml-py==12.560.30 +msgpack==1.1.0 +python-dateutil==2.8.2 +blessed==1.20.0 +packaging==23.0 +gitdb==4.0.11 +yarl==1.13.0 +emoji==2.8.0 +tzdata==2023.3 +cycler==0.12.1 +tornado==6.2 +backcall==0.2.0 +plotnine==0.12.4 +ninja==1.11.1.1 +latex==0.7.0 +wandb==0.18.5 +setproctitle==1.3.3 +threadpoolctl==3.2.0 +requests==2.32.3 +pyparsing==3.1.1 +smmap==5.0.1 +pyzmq==23.0.0 +async-timeout==4.0.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.6 +latexcodec==1.0.0 +ipython==8.0.0 +patsy==0.5.3 +contourpy==1.2.0 +multidict==6.1.0 +mizani==0.9.3 +urllib3==2.1.0 +tokenizers==0.20.0 +MarkupSafe==2.1.2 +pip==24.2 +pexpect==4.8.0 +tqdm==4.66.5 +jedi==0.18.2 +pydantic_core==2.23.4 +tempdir==0.7.1 +mpmath==1.2.1 +setuptools==72.1.0 +pytest==7.4.3 +pure-eval==0.2.2 +psutil==5.9.1 +comm==0.1.2 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-nvrtc-cu11==11.7.99 +regex==2023.10.3 +platformdirs==2.5.2 +asttokens==2.2.1 +torch==2.0.0 +nvidia-nccl-cu11==2.14.3 +xxhash==3.5.0 diff --git a/wandb/run-20241030_011509-zmlu7388/files/wandb-metadata.json b/wandb/run-20241030_011509-zmlu7388/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..7aac7a9bef34b9dd4fd24509cc451368c402d6be --- /dev/null +++ b/wandb/run-20241030_011509-zmlu7388/files/wandb-metadata.json @@ -0,0 +1,97 @@ +{ + "os": "Linux-5.4.0-162-generic-x86_64-with-glibc2.31", + "python": "3.9.19", + "startedAt": "2024-10-30T05:15:09.352977Z", + "args": [ + "--perturbation", + "reverse_control", + "--train_set", + "10M", + "--batch_size", + "3", + "--epoch", + "7", + "--seed", + "0" + ], + "program": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", + "codePath": "train/train_deep_wandb.py", + "git": { + "remote": "git@hf.co:Yaning1001/Impossible_llm.git", + "commit": "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4" + }, + "email": "yaning1001@gmail.com", + "root": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train", + "host": "mms-large-2", + "username": "chunhui", + "executable": "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python", + "codePathLocal": "train_deep_wandb.py", + "cpu_count": 32, + "cpu_count_logical": 64, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "1888559353856", + "used": "1719287033856" + } + }, + "memory": { + "total": "202617098240" + }, + "cpu": { + "count": 32, + "countLogical": 64 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + } + ], + "cudaVersion": "11.8" +} \ No newline at end of file diff --git a/wandb/run-20241030_011509-zmlu7388/logs/debug-internal.log b/wandb/run-20241030_011509-zmlu7388/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..feca05f5547153be3b1ef5e98b456e4034f9b3fb --- /dev/null +++ b/wandb/run-20241030_011509-zmlu7388/logs/debug-internal.log @@ -0,0 +1,8 @@ +{"time":"2024-10-30T01:15:09.354988919-04:00","level":"INFO","msg":"using version","core version":"0.18.5"} +{"time":"2024-10-30T01:15:09.355002219-04:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_011509-zmlu7388/logs/debug-core.log"} +{"time":"2024-10-30T01:15:09.462920738-04:00","level":"INFO","msg":"created new stream","id":"zmlu7388"} +{"time":"2024-10-30T01:15:09.462953758-04:00","level":"INFO","msg":"stream: started","id":"zmlu7388"} +{"time":"2024-10-30T01:15:09.462991088-04:00","level":"INFO","msg":"sender: started","stream_id":"zmlu7388"} +{"time":"2024-10-30T01:15:09.462971648-04:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"zmlu7388"}} +{"time":"2024-10-30T01:15:09.463001818-04:00","level":"INFO","msg":"handler: started","stream_id":{"value":"zmlu7388"}} +{"time":"2024-10-30T01:15:09.699769799-04:00","level":"INFO","msg":"Starting system monitor"} diff --git a/wandb/run-20241030_011509-zmlu7388/logs/debug.log b/wandb/run-20241030_011509-zmlu7388/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..566805922d5b449c7aec927c408202b6e2917735 --- /dev/null +++ b/wandb/run-20241030_011509-zmlu7388/logs/debug.log @@ -0,0 +1,26 @@ +2024-10-30 01:15:09,350 INFO MainThread:324929 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5 +2024-10-30 01:15:09,350 INFO MainThread:324929 [wandb_setup.py:_flush():79] Configure stats pid to 324929 +2024-10-30 01:15:09,351 INFO MainThread:324929 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings +2024-10-30 01:15:09,351 INFO MainThread:324929 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings +2024-10-30 01:15:09,351 INFO MainThread:324929 [wandb_setup.py:_flush():79] Loading settings from environment variables: {} +2024-10-30 01:15:09,351 INFO MainThread:324929 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None} +2024-10-30 01:15:09,351 INFO MainThread:324929 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'} +2024-10-30 01:15:09,351 INFO MainThread:324929 [wandb_setup.py:_flush():79] Applying login settings: {} +2024-10-30 01:15:09,351 INFO MainThread:324929 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_011509-zmlu7388/logs/debug.log +2024-10-30 01:15:09,351 INFO MainThread:324929 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241030_011509-zmlu7388/logs/debug-internal.log +2024-10-30 01:15:09,351 INFO MainThread:324929 [wandb_init.py:init():621] calling init triggers +2024-10-30 01:15:09,351 INFO MainThread:324929 [wandb_init.py:init():628] wandb.init called with sweep_config: {} +config: {} +2024-10-30 01:15:09,351 INFO MainThread:324929 [wandb_init.py:init():671] starting backend +2024-10-30 01:15:09,351 INFO MainThread:324929 [wandb_init.py:init():675] sending inform_init request +2024-10-30 01:15:09,352 INFO MainThread:324929 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-10-30 01:15:09,352 INFO MainThread:324929 [wandb_init.py:init():688] backend started and connected +2024-10-30 01:15:09,355 INFO MainThread:324929 [wandb_init.py:init():783] updated telemetry +2024-10-30 01:15:09,380 INFO MainThread:324929 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout +2024-10-30 01:15:09,696 INFO MainThread:324929 [wandb_init.py:init():867] starting run threads in backend +2024-10-30 01:15:09,791 INFO MainThread:324929 [wandb_run.py:_console_start():2463] atexit reg +2024-10-30 01:15:09,791 INFO MainThread:324929 [wandb_run.py:_redirect():2311] redirect: wrap_raw +2024-10-30 01:15:09,791 INFO MainThread:324929 [wandb_run.py:_redirect():2376] Wrapping output streams. +2024-10-30 01:15:09,791 INFO MainThread:324929 [wandb_run.py:_redirect():2401] Redirects installed. +2024-10-30 01:15:09,793 INFO MainThread:324929 [wandb_init.py:init():911] run started, returning control to user process +2024-10-30 01:15:09,794 INFO MainThread:324929 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'reverse_control', 'train_set': '10M', 'batch_size': 3, 'epoch': 7, 'seed': 0} diff --git a/wandb/run-20241031_002020-q6ot1vz6/run-q6ot1vz6.wandb b/wandb/run-20241031_002020-q6ot1vz6/run-q6ot1vz6.wandb new file mode 100644 index 0000000000000000000000000000000000000000..ab7a84e722017531c553bfbb1aadb00b2bb3df9f --- /dev/null +++ b/wandb/run-20241031_002020-q6ot1vz6/run-q6ot1vz6.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:950afa23825d7fc6e85fb932c976838964b187a182167c4b2250b3eff1d7f35e +size 19443127 diff --git a/wandb/run-20241031_122006-f2ep45tp/files/config.yaml b/wandb/run-20241031_122006-f2ep45tp/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..725c8381c5f9fe81efa0c182e9fe88850f0f19e9 --- /dev/null +++ b/wandb/run-20241031_122006-f2ep45tp/files/config.yaml @@ -0,0 +1,49 @@ +_wandb: + value: + cli_version: 0.18.5 + m: [] + python_version: 3.9.19 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + "3": + - 13 + - 23 + - 55 + "4": 3.9.19 + "5": 0.18.5 + "6": 4.45.1 + "8": + - 5 + "12": 0.18.5 + "13": linux-x86_64 +batch_size: + value: 3 +epoch: + value: 6 +lr: + value: 5e-06 +perturbation: + value: reverse_full +seed: + value: 0 +train_set: + value: 10M diff --git a/wandb/run-20241031_122006-f2ep45tp/files/output.log b/wandb/run-20241031_122006-f2ep45tp/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..2d836d095928a45fafe5dd7b6dde4915866eb6b5 --- /dev/null +++ b/wandb/run-20241031_122006-f2ep45tp/files/output.log @@ -0,0 +1,60 @@ +Downloading shards: 0%| | 0/2 [00:22 +Traceback (most recent call last): + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/tqdm/std.py", line 1196, in __iter__ + self.close() + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/tqdm/std.py", line 1303, in close + fp_write('\n') + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/tqdm/std.py", line 1287, in fp_write + self.fp.write(str(s)) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/tqdm/utils.py", line 196, in inner + return func(*args, **kwargs) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/wandb/sdk/lib/redirect.py", line 648, in write + cb(data) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/wandb/sdk/wandb_run.py", line 2386, in + lambda data: self._console_raw_callback("stderr", data), + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/wandb/sdk/wandb_run.py", line 403, in wrapper_fn + return func(self, *args, **kwargs) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/wandb/sdk/wandb_run.py", line 1547, in _console_raw_callback + self._backend.interface.publish_output_raw(name, data) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/wandb/sdk/interface/interface.py", line 721, in publish_output_raw + self._publish_output_raw(o) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/wandb/sdk/interface/interface_shared.py", line 79, in _publish_output_raw + self._publish(rec) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/wandb/sdk/interface/interface_sock.py", line 51, in _publish + self._sock_client.send_record_publish(record) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/wandb/sdk/lib/sock_client.py", line 225, in send_record_publish + self.send_server_request(server_req) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/wandb/sdk/lib/sock_client.py", line 157, in send_server_request + self._send_message(msg) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/wandb/sdk/lib/sock_client.py", line 154, in _send_message + self._sendall_with_error_handle(header + data) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/wandb/sdk/lib/sock_client.py", line 132, in _sendall_with_error_handle + sent = self._sock.send(data) +KeyboardInterrupt: +Traceback (most recent call last): + File "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", line 173, in + model = AutoModelForCausalLM.from_pretrained(model_name, + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/models/auto/auto_factory.py", line 564, in from_pretrained + return model_class.from_pretrained( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/modeling_utils.py", line 3769, in from_pretrained + resolved_archive_file, sharded_metadata = get_checkpoint_shard_files( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/utils/hub.py", line 1098, in get_checkpoint_shard_files + cached_filename = cached_file( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/utils/hub.py", line 403, in cached_file + resolved_file = hf_hub_download( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/utils/_deprecation.py", line 101, in inner_f + return f(*args, **kwargs) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn + return fn(*args, **kwargs) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/file_download.py", line 1232, in hf_hub_download + return _hf_hub_download_to_cache_dir( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/file_download.py", line 1380, in _hf_hub_download_to_cache_dir + with WeakFileLock(lock_path): + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/contextlib.py", line 119, in __enter__ + return next(self.gen) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/utils/_fixes.py", line 98, in WeakFileLock + lock.acquire() + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/filelock/_api.py", line 225, in acquire + time.sleep(poll_interval) +KeyboardInterrupt diff --git a/wandb/run-20241031_122006-f2ep45tp/files/requirements.txt b/wandb/run-20241031_122006-f2ep45tp/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..95a931302e269cc9e4fa5b719b6511f176ee2416 --- /dev/null +++ b/wandb/run-20241031_122006-f2ep45tp/files/requirements.txt @@ -0,0 +1,147 @@ +funcsigs==1.0.2 +sentry-sdk==2.17.0 +multiprocess==0.70.16 +numpy==1.26.2 +pluralizer==1.2.0 +debugpy==1.6.7 +nvidia-cudnn-cu11==8.5.0.96 +deepspeed==0.15.2 +data==0.4 +pandas==2.1.3 +tomli==2.0.1 +charset-normalizer==3.3.2 +attrs==24.2.0 +aiosignal==1.3.1 +fsspec==2023.10.0 +nvidia-cusparse-cu11==11.7.4.91 +zipp==3.12.0 +mypy-extensions==1.0.0 +datasets==3.0.1 +joblib==1.3.2 +hjson==3.1.0 +traitlets==5.7.1 +stack-data==0.6.0 +transformers==4.45.1 +sympy==1.11.1 +Pygments==2.15.0 +docker-pycreds==0.4.0 +dill==0.3.8 +wheel==0.44.0 +prompt-toolkit==3.0.30 +parso==0.8.3 +ipykernel==6.23.1 +pyarrow==17.0.0 +certifi==2023.11.17 +nvidia-cufft-cu11==10.9.0.58 +six==1.16.0 +pydantic==2.9.2 +click==8.1.7 +nest-asyncio==1.5.6 +gmpy2==2.1.0 +matplotlib==3.8.2 +scipy==1.11.4 +typing_extensions==4.12.2 +statsmodels==0.14.0 +huggingface-hub==0.25.0 +frozenlist==1.4.1 +gpustat==1.1.1 +nvidia-nvtx-cu11==11.7.91 +safetensors==0.4.5 +stanza==1.9.2 +decorator==5.1.1 +seaborn==0.13.0 +sentencepiece==0.2.0 +PyYAML==6.0.1 +black==24.8.0 +protobuf==4.25.1 +pickleshare==0.7.5 +peft==0.13.0 +triton==2.0.0 +nvidia-cuda-runtime-cu11==11.7.99 +Jinja2==3.1.2 +nvidia-cusolver-cu11==11.4.0.1 +executing==1.2.0 +jupyter_client==8.1.0 +pluggy==1.3.0 +cmake==3.30.3 +pytz==2023.3.post1 +aiohappyeyeballs==2.4.2 +kiwisolver==1.4.5 +py-cpuinfo==9.0.0 +Pillow==10.1.0 +ptyprocess==0.7.0 +importlib_resources==6.4.5 +GitPython==3.1.43 +importlib-metadata==6.0.0 +iniconfig==2.0.0 +scikit-learn==1.3.2 +exceptiongroup==1.1.0 +networkx==2.8.6 +accelerate==1.0.0 +nltk==3.8.1 +shutilwhich==1.1.0 +fonttools==4.45.1 +future==0.18.3 +aiohttp==3.10.6 +wcwidth==0.2.5 +idna==3.6 +filelock==3.12.2 +pathspec==0.12.1 +jupyter_core==5.1.0 +lit==18.1.8 +nvidia-curand-cu11==10.2.10.91 +nvidia-cublas-cu11==11.10.3.66 +nvidia-ml-py==12.560.30 +msgpack==1.1.0 +python-dateutil==2.8.2 +blessed==1.20.0 +packaging==23.0 +gitdb==4.0.11 +yarl==1.13.0 +emoji==2.8.0 +tzdata==2023.3 +cycler==0.12.1 +tornado==6.2 +backcall==0.2.0 +plotnine==0.12.4 +ninja==1.11.1.1 +latex==0.7.0 +wandb==0.18.5 +setproctitle==1.3.3 +threadpoolctl==3.2.0 +requests==2.32.3 +pyparsing==3.1.1 +smmap==5.0.1 +pyzmq==23.0.0 +async-timeout==4.0.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.6 +latexcodec==1.0.0 +ipython==8.0.0 +patsy==0.5.3 +contourpy==1.2.0 +multidict==6.1.0 +mizani==0.9.3 +urllib3==2.1.0 +tokenizers==0.20.0 +MarkupSafe==2.1.2 +pip==24.2 +pexpect==4.8.0 +tqdm==4.66.5 +jedi==0.18.2 +pydantic_core==2.23.4 +tempdir==0.7.1 +mpmath==1.2.1 +setuptools==72.1.0 +pytest==7.4.3 +pure-eval==0.2.2 +psutil==5.9.1 +comm==0.1.2 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-nvrtc-cu11==11.7.99 +regex==2023.10.3 +platformdirs==2.5.2 +asttokens==2.2.1 +torch==2.0.0 +nvidia-nccl-cu11==2.14.3 +xxhash==3.5.0 diff --git a/wandb/run-20241031_122006-f2ep45tp/files/wandb-metadata.json b/wandb/run-20241031_122006-f2ep45tp/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6304bd0f83964a8ef38172d8000898c9d987944e --- /dev/null +++ b/wandb/run-20241031_122006-f2ep45tp/files/wandb-metadata.json @@ -0,0 +1,97 @@ +{ + "os": "Linux-5.4.0-162-generic-x86_64-with-glibc2.31", + "python": "3.9.19", + "startedAt": "2024-10-31T16:20:06.045293Z", + "args": [ + "--perturbation", + "reverse_full", + "--train_set", + "10M", + "--batch_size", + "3", + "--epoch", + "6", + "--seed", + "0" + ], + "program": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", + "codePath": "train/train_deep_wandb.py", + "git": { + "remote": "git@hf.co:Yaning1001/Impossible_llm.git", + "commit": "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4" + }, + "email": "yaning1001@gmail.com", + "root": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train", + "host": "mms-large-2", + "username": "chunhui", + "executable": "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python", + "codePathLocal": "train_deep_wandb.py", + "cpu_count": 32, + "cpu_count_logical": 64, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "1888559353856", + "used": "1753159847936" + } + }, + "memory": { + "total": "202617098240" + }, + "cpu": { + "count": 32, + "countLogical": 64 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + } + ], + "cudaVersion": "11.8" +} \ No newline at end of file diff --git a/wandb/run-20241031_122006-f2ep45tp/files/wandb-summary.json b/wandb/run-20241031_122006-f2ep45tp/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..15f6b8e9049a55292dab131278b3f2fc1f52e50d --- /dev/null +++ b/wandb/run-20241031_122006-f2ep45tp/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":23}} \ No newline at end of file diff --git a/wandb/run-20241031_122006-f2ep45tp/logs/debug-internal.log b/wandb/run-20241031_122006-f2ep45tp/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..0edb6d9b96035db3989cb6d7346c483320b92ead --- /dev/null +++ b/wandb/run-20241031_122006-f2ep45tp/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2024-10-31T12:20:06.047606029-04:00","level":"INFO","msg":"using version","core version":"0.18.5"} +{"time":"2024-10-31T12:20:06.047621499-04:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241031_122006-f2ep45tp/logs/debug-core.log"} +{"time":"2024-10-31T12:20:06.154938982-04:00","level":"INFO","msg":"created new stream","id":"f2ep45tp"} +{"time":"2024-10-31T12:20:06.155004252-04:00","level":"INFO","msg":"stream: started","id":"f2ep45tp"} +{"time":"2024-10-31T12:20:06.155045152-04:00","level":"INFO","msg":"sender: started","stream_id":"f2ep45tp"} +{"time":"2024-10-31T12:20:06.155033412-04:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"f2ep45tp"}} +{"time":"2024-10-31T12:20:06.155042312-04:00","level":"INFO","msg":"handler: started","stream_id":{"value":"f2ep45tp"}} +{"time":"2024-10-31T12:20:06.357525131-04:00","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-10-31T12:20:29.350768191-04:00","level":"INFO","msg":"stream: closing","id":"f2ep45tp"} +{"time":"2024-10-31T12:20:29.350824761-04:00","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-10-31T12:20:29.351697402-04:00","level":"INFO","msg":"Stopped system monitor"} diff --git a/wandb/run-20241031_122006-f2ep45tp/logs/debug.log b/wandb/run-20241031_122006-f2ep45tp/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..84aa9e30e7a50093f4bea930f9d4097e0a934839 --- /dev/null +++ b/wandb/run-20241031_122006-f2ep45tp/logs/debug.log @@ -0,0 +1,27 @@ +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5 +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_setup.py:_flush():79] Configure stats pid to 557182 +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_setup.py:_flush():79] Loading settings from environment variables: {} +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None} +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'} +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_setup.py:_flush():79] Applying login settings: {} +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241031_122006-f2ep45tp/logs/debug.log +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241031_122006-f2ep45tp/logs/debug-internal.log +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_init.py:init():621] calling init triggers +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_init.py:init():628] wandb.init called with sweep_config: {} +config: {} +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_init.py:init():671] starting backend +2024-10-31 12:20:06,042 INFO MainThread:557182 [wandb_init.py:init():675] sending inform_init request +2024-10-31 12:20:06,044 INFO MainThread:557182 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-10-31 12:20:06,045 INFO MainThread:557182 [wandb_init.py:init():688] backend started and connected +2024-10-31 12:20:06,049 INFO MainThread:557182 [wandb_init.py:init():783] updated telemetry +2024-10-31 12:20:06,079 INFO MainThread:557182 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout +2024-10-31 12:20:06,353 INFO MainThread:557182 [wandb_init.py:init():867] starting run threads in backend +2024-10-31 12:20:06,461 INFO MainThread:557182 [wandb_run.py:_console_start():2463] atexit reg +2024-10-31 12:20:06,461 INFO MainThread:557182 [wandb_run.py:_redirect():2311] redirect: wrap_raw +2024-10-31 12:20:06,461 INFO MainThread:557182 [wandb_run.py:_redirect():2376] Wrapping output streams. +2024-10-31 12:20:06,461 INFO MainThread:557182 [wandb_run.py:_redirect():2401] Redirects installed. +2024-10-31 12:20:06,463 INFO MainThread:557182 [wandb_init.py:init():911] run started, returning control to user process +2024-10-31 12:20:06,463 INFO MainThread:557182 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'reverse_full', 'train_set': '10M', 'batch_size': 3, 'epoch': 6, 'seed': 0, 'lr': 5e-06} +2024-10-31 12:20:29,350 WARNING MsgRouterThr:557182 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20241031_122006-f2ep45tp/run-f2ep45tp.wandb b/wandb/run-20241031_122006-f2ep45tp/run-f2ep45tp.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/run-20241031_122114-2k9672ya/run-2k9672ya.wandb b/wandb/run-20241031_122114-2k9672ya/run-2k9672ya.wandb new file mode 100644 index 0000000000000000000000000000000000000000..ee966b1c925e7a7162627734dccc0d001378d945 --- /dev/null +++ b/wandb/run-20241031_122114-2k9672ya/run-2k9672ya.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:861609c20aeebe97837b894194bce5f8603f3e17a86171d54909acd17db795e9 +size 19251238 diff --git a/wandb/run-20241101_093116-8434p043/files/output.log b/wandb/run-20241101_093116-8434p043/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..ff64aec8bf6c3c4a88a8f4e3c5181187c8a67817 --- /dev/null +++ b/wandb/run-20241101_093116-8434p043/files/output.log @@ -0,0 +1,16 @@ +Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:18<00:00, 9.05s/it] +tokenized_valid: Dataset({ + features: ['input_ids', 'attention_mask'], + num_rows: 600 +}) +/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead + warnings.warn( +[2024-11-01 09:31:36,808] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-11-01 09:31:44,772] [INFO] [comm.py:652:init_distributed] cdb=None +Installed CUDA version 11.8 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination +Using /home/chunhui/.cache/torch_extensions/py39_cu117 as PyTorch extensions root... +Emitting ninja build file /home/chunhui/.cache/torch_extensions/py39_cu117/cpu_adam/build.ninja... +Building extension module cpu_adam... +Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +Loading extension module cpu_adam... +Time to load cpu_adam op: 4.949513912200928 seconds diff --git a/wandb/run-20241101_093116-8434p043/files/requirements.txt b/wandb/run-20241101_093116-8434p043/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..95a931302e269cc9e4fa5b719b6511f176ee2416 --- /dev/null +++ b/wandb/run-20241101_093116-8434p043/files/requirements.txt @@ -0,0 +1,147 @@ +funcsigs==1.0.2 +sentry-sdk==2.17.0 +multiprocess==0.70.16 +numpy==1.26.2 +pluralizer==1.2.0 +debugpy==1.6.7 +nvidia-cudnn-cu11==8.5.0.96 +deepspeed==0.15.2 +data==0.4 +pandas==2.1.3 +tomli==2.0.1 +charset-normalizer==3.3.2 +attrs==24.2.0 +aiosignal==1.3.1 +fsspec==2023.10.0 +nvidia-cusparse-cu11==11.7.4.91 +zipp==3.12.0 +mypy-extensions==1.0.0 +datasets==3.0.1 +joblib==1.3.2 +hjson==3.1.0 +traitlets==5.7.1 +stack-data==0.6.0 +transformers==4.45.1 +sympy==1.11.1 +Pygments==2.15.0 +docker-pycreds==0.4.0 +dill==0.3.8 +wheel==0.44.0 +prompt-toolkit==3.0.30 +parso==0.8.3 +ipykernel==6.23.1 +pyarrow==17.0.0 +certifi==2023.11.17 +nvidia-cufft-cu11==10.9.0.58 +six==1.16.0 +pydantic==2.9.2 +click==8.1.7 +nest-asyncio==1.5.6 +gmpy2==2.1.0 +matplotlib==3.8.2 +scipy==1.11.4 +typing_extensions==4.12.2 +statsmodels==0.14.0 +huggingface-hub==0.25.0 +frozenlist==1.4.1 +gpustat==1.1.1 +nvidia-nvtx-cu11==11.7.91 +safetensors==0.4.5 +stanza==1.9.2 +decorator==5.1.1 +seaborn==0.13.0 +sentencepiece==0.2.0 +PyYAML==6.0.1 +black==24.8.0 +protobuf==4.25.1 +pickleshare==0.7.5 +peft==0.13.0 +triton==2.0.0 +nvidia-cuda-runtime-cu11==11.7.99 +Jinja2==3.1.2 +nvidia-cusolver-cu11==11.4.0.1 +executing==1.2.0 +jupyter_client==8.1.0 +pluggy==1.3.0 +cmake==3.30.3 +pytz==2023.3.post1 +aiohappyeyeballs==2.4.2 +kiwisolver==1.4.5 +py-cpuinfo==9.0.0 +Pillow==10.1.0 +ptyprocess==0.7.0 +importlib_resources==6.4.5 +GitPython==3.1.43 +importlib-metadata==6.0.0 +iniconfig==2.0.0 +scikit-learn==1.3.2 +exceptiongroup==1.1.0 +networkx==2.8.6 +accelerate==1.0.0 +nltk==3.8.1 +shutilwhich==1.1.0 +fonttools==4.45.1 +future==0.18.3 +aiohttp==3.10.6 +wcwidth==0.2.5 +idna==3.6 +filelock==3.12.2 +pathspec==0.12.1 +jupyter_core==5.1.0 +lit==18.1.8 +nvidia-curand-cu11==10.2.10.91 +nvidia-cublas-cu11==11.10.3.66 +nvidia-ml-py==12.560.30 +msgpack==1.1.0 +python-dateutil==2.8.2 +blessed==1.20.0 +packaging==23.0 +gitdb==4.0.11 +yarl==1.13.0 +emoji==2.8.0 +tzdata==2023.3 +cycler==0.12.1 +tornado==6.2 +backcall==0.2.0 +plotnine==0.12.4 +ninja==1.11.1.1 +latex==0.7.0 +wandb==0.18.5 +setproctitle==1.3.3 +threadpoolctl==3.2.0 +requests==2.32.3 +pyparsing==3.1.1 +smmap==5.0.1 +pyzmq==23.0.0 +async-timeout==4.0.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.6 +latexcodec==1.0.0 +ipython==8.0.0 +patsy==0.5.3 +contourpy==1.2.0 +multidict==6.1.0 +mizani==0.9.3 +urllib3==2.1.0 +tokenizers==0.20.0 +MarkupSafe==2.1.2 +pip==24.2 +pexpect==4.8.0 +tqdm==4.66.5 +jedi==0.18.2 +pydantic_core==2.23.4 +tempdir==0.7.1 +mpmath==1.2.1 +setuptools==72.1.0 +pytest==7.4.3 +pure-eval==0.2.2 +psutil==5.9.1 +comm==0.1.2 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-nvrtc-cu11==11.7.99 +regex==2023.10.3 +platformdirs==2.5.2 +asttokens==2.2.1 +torch==2.0.0 +nvidia-nccl-cu11==2.14.3 +xxhash==3.5.0 diff --git a/wandb/run-20241101_093116-8434p043/files/wandb-metadata.json b/wandb/run-20241101_093116-8434p043/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..64757511a4e17438e2cbe1eaabbd57dd7ad6dc7a --- /dev/null +++ b/wandb/run-20241101_093116-8434p043/files/wandb-metadata.json @@ -0,0 +1,97 @@ +{ + "os": "Linux-5.4.0-162-generic-x86_64-with-glibc2.31", + "python": "3.9.19", + "startedAt": "2024-11-01T13:31:16.528969Z", + "args": [ + "--perturbation", + "reverse_control", + "--train_set", + "10M", + "--batch_size", + "3", + "--epoch", + "7", + "--seed", + "0" + ], + "program": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", + "codePath": "train/train_deep_wandb.py", + "git": { + "remote": "git@hf.co:Yaning1001/Impossible_llm.git", + "commit": "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4" + }, + "email": "yaning1001@gmail.com", + "root": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train", + "host": "mms-large-2", + "username": "chunhui", + "executable": "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python", + "codePathLocal": "train_deep_wandb.py", + "cpu_count": 32, + "cpu_count_logical": 64, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "1888559353856", + "used": "1754716262400" + } + }, + "memory": { + "total": "202617098240" + }, + "cpu": { + "count": 32, + "countLogical": 64 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + } + ], + "cudaVersion": "11.8" +} \ No newline at end of file diff --git a/wandb/run-20241101_093116-8434p043/logs/debug-internal.log b/wandb/run-20241101_093116-8434p043/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c32c49964d657ff4f0d112e0ecd5f036b883a844 --- /dev/null +++ b/wandb/run-20241101_093116-8434p043/logs/debug-internal.log @@ -0,0 +1,8 @@ +{"time":"2024-11-01T09:31:16.530787979-04:00","level":"INFO","msg":"using version","core version":"0.18.5"} +{"time":"2024-11-01T09:31:16.530798049-04:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241101_093116-8434p043/logs/debug-core.log"} +{"time":"2024-11-01T09:31:16.637062272-04:00","level":"INFO","msg":"created new stream","id":"8434p043"} +{"time":"2024-11-01T09:31:16.637096822-04:00","level":"INFO","msg":"stream: started","id":"8434p043"} +{"time":"2024-11-01T09:31:16.637119892-04:00","level":"INFO","msg":"sender: started","stream_id":"8434p043"} +{"time":"2024-11-01T09:31:16.637111572-04:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"8434p043"}} +{"time":"2024-11-01T09:31:16.637139322-04:00","level":"INFO","msg":"handler: started","stream_id":{"value":"8434p043"}} +{"time":"2024-11-01T09:31:16.85617314-04:00","level":"INFO","msg":"Starting system monitor"} diff --git a/wandb/run-20241101_093116-8434p043/logs/debug.log b/wandb/run-20241101_093116-8434p043/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..fcde920c2449068f22048f719817a574837a38f9 --- /dev/null +++ b/wandb/run-20241101_093116-8434p043/logs/debug.log @@ -0,0 +1,26 @@ +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5 +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_setup.py:_flush():79] Configure stats pid to 781948 +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_setup.py:_flush():79] Loading settings from environment variables: {} +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None} +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'} +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_setup.py:_flush():79] Applying login settings: {} +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241101_093116-8434p043/logs/debug.log +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241101_093116-8434p043/logs/debug-internal.log +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_init.py:init():621] calling init triggers +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_init.py:init():628] wandb.init called with sweep_config: {} +config: {} +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_init.py:init():671] starting backend +2024-11-01 09:31:16,527 INFO MainThread:781948 [wandb_init.py:init():675] sending inform_init request +2024-11-01 09:31:16,528 INFO MainThread:781948 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-11-01 09:31:16,528 INFO MainThread:781948 [wandb_init.py:init():688] backend started and connected +2024-11-01 09:31:16,531 INFO MainThread:781948 [wandb_init.py:init():783] updated telemetry +2024-11-01 09:31:16,553 INFO MainThread:781948 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout +2024-11-01 09:31:16,853 INFO MainThread:781948 [wandb_init.py:init():867] starting run threads in backend +2024-11-01 09:31:16,946 INFO MainThread:781948 [wandb_run.py:_console_start():2463] atexit reg +2024-11-01 09:31:16,946 INFO MainThread:781948 [wandb_run.py:_redirect():2311] redirect: wrap_raw +2024-11-01 09:31:16,946 INFO MainThread:781948 [wandb_run.py:_redirect():2376] Wrapping output streams. +2024-11-01 09:31:16,946 INFO MainThread:781948 [wandb_run.py:_redirect():2401] Redirects installed. +2024-11-01 09:31:16,947 INFO MainThread:781948 [wandb_init.py:init():911] run started, returning control to user process +2024-11-01 09:31:16,948 INFO MainThread:781948 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'reverse_control', 'train_set': '10M', 'batch_size': 3, 'epoch': 7, 'seed': 0, 'lr': 5e-06} diff --git a/wandb/run-20241101_093116-8434p043/run-8434p043.wandb b/wandb/run-20241101_093116-8434p043/run-8434p043.wandb new file mode 100644 index 0000000000000000000000000000000000000000..36b76d73b7988155fb6ded11cd300f9e8c177079 Binary files /dev/null and b/wandb/run-20241101_093116-8434p043/run-8434p043.wandb differ diff --git a/wandb/run-20241101_094656-v2rxhny6/files/wandb-metadata.json b/wandb/run-20241101_094656-v2rxhny6/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..4a96d57a829e4420387435e06a94d777e867c8b3 --- /dev/null +++ b/wandb/run-20241101_094656-v2rxhny6/files/wandb-metadata.json @@ -0,0 +1,97 @@ +{ + "os": "Linux-5.4.0-162-generic-x86_64-with-glibc2.31", + "python": "3.9.19", + "startedAt": "2024-11-01T13:46:56.279635Z", + "args": [ + "--perturbation", + "reverse_control", + "--train_set", + "10M", + "--batch_size", + "3", + "--epoch", + "7", + "--seed", + "0" + ], + "program": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", + "codePath": "train/train_deep_wandb.py", + "git": { + "remote": "git@hf.co:Yaning1001/Impossible_llm.git", + "commit": "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4" + }, + "email": "yaning1001@gmail.com", + "root": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train", + "host": "mms-large-2", + "username": "chunhui", + "executable": "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python", + "codePathLocal": "train_deep_wandb.py", + "cpu_count": 32, + "cpu_count_logical": 64, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "1888559353856", + "used": "1754695659520" + } + }, + "memory": { + "total": "202617098240" + }, + "cpu": { + "count": 32, + "countLogical": 64 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + } + ], + "cudaVersion": "11.8" +} \ No newline at end of file diff --git a/wandb/run-20241101_094656-v2rxhny6/run-v2rxhny6.wandb b/wandb/run-20241101_094656-v2rxhny6/run-v2rxhny6.wandb new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wandb/run-20241101_200535-6xsf0vem/files/output.log b/wandb/run-20241101_200535-6xsf0vem/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..2786cafeaf23a9cfd53cb0abafe933ade4416c1b --- /dev/null +++ b/wandb/run-20241101_200535-6xsf0vem/files/output.log @@ -0,0 +1,21 @@ +Downloading shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [02:25<00:00, 72.90s/it] +Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00, 2.67s/it] +tokenized_valid: Dataset({ + features: ['input_ids', 'attention_mask'], + num_rows: 600 +}) +/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead + warnings.warn( +[2024-11-01 20:08:09,443] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-11-01 20:08:19,298] [INFO] [comm.py:652:init_distributed] cdb=None +[2024-11-01 20:08:19,298] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. +Installed CUDA version 11.8 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination +Using /home/chunhui/.cache/torch_extensions/py39_cu117 as PyTorch extensions root... +Emitting ninja build file /home/chunhui/.cache/torch_extensions/py39_cu117/cpu_adam/build.ninja... +Building extension module cpu_adam... +Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +Loading extension module cpu_adam... +Time to load cpu_adam op: 5.526603698730469 seconds +wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter. + 0%| | 0/2739 [00:00', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False} +2024-11-01 20:08:48,836 INFO MainThread:871224 [wandb_config.py:__setitem__():154] config set model/num_parameters = 3212749824 - > +2024-11-01 20:08:48,837 INFO MainThread:871224 [wandb_run.py:_config_callback():1390] config_cb model/num_parameters 3212749824 None diff --git a/wandb/run-20241101_200535-hnfjoqai/files/output.log b/wandb/run-20241101_200535-hnfjoqai/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..962b7bcf978bbfe624f8db0eb917de298181154d --- /dev/null +++ b/wandb/run-20241101_200535-hnfjoqai/files/output.log @@ -0,0 +1,14 @@ +Downloading shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [02:25<00:00, 72.82s/it] +Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00, 2.68s/it] +tokenized_valid: Dataset({ + features: ['input_ids', 'attention_mask'], + num_rows: 600 +}) +/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead + warnings.warn( +[2024-11-01 20:08:09,442] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-11-01 20:08:19,404] [INFO] [comm.py:652:init_distributed] cdb=None +Installed CUDA version 11.8 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination +Using /home/chunhui/.cache/torch_extensions/py39_cu117 as PyTorch extensions root... +Loading extension module cpu_adam... +Time to load cpu_adam op: 5.504069089889526 seconds diff --git a/wandb/run-20241101_200535-hnfjoqai/files/requirements.txt b/wandb/run-20241101_200535-hnfjoqai/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..95a931302e269cc9e4fa5b719b6511f176ee2416 --- /dev/null +++ b/wandb/run-20241101_200535-hnfjoqai/files/requirements.txt @@ -0,0 +1,147 @@ +funcsigs==1.0.2 +sentry-sdk==2.17.0 +multiprocess==0.70.16 +numpy==1.26.2 +pluralizer==1.2.0 +debugpy==1.6.7 +nvidia-cudnn-cu11==8.5.0.96 +deepspeed==0.15.2 +data==0.4 +pandas==2.1.3 +tomli==2.0.1 +charset-normalizer==3.3.2 +attrs==24.2.0 +aiosignal==1.3.1 +fsspec==2023.10.0 +nvidia-cusparse-cu11==11.7.4.91 +zipp==3.12.0 +mypy-extensions==1.0.0 +datasets==3.0.1 +joblib==1.3.2 +hjson==3.1.0 +traitlets==5.7.1 +stack-data==0.6.0 +transformers==4.45.1 +sympy==1.11.1 +Pygments==2.15.0 +docker-pycreds==0.4.0 +dill==0.3.8 +wheel==0.44.0 +prompt-toolkit==3.0.30 +parso==0.8.3 +ipykernel==6.23.1 +pyarrow==17.0.0 +certifi==2023.11.17 +nvidia-cufft-cu11==10.9.0.58 +six==1.16.0 +pydantic==2.9.2 +click==8.1.7 +nest-asyncio==1.5.6 +gmpy2==2.1.0 +matplotlib==3.8.2 +scipy==1.11.4 +typing_extensions==4.12.2 +statsmodels==0.14.0 +huggingface-hub==0.25.0 +frozenlist==1.4.1 +gpustat==1.1.1 +nvidia-nvtx-cu11==11.7.91 +safetensors==0.4.5 +stanza==1.9.2 +decorator==5.1.1 +seaborn==0.13.0 +sentencepiece==0.2.0 +PyYAML==6.0.1 +black==24.8.0 +protobuf==4.25.1 +pickleshare==0.7.5 +peft==0.13.0 +triton==2.0.0 +nvidia-cuda-runtime-cu11==11.7.99 +Jinja2==3.1.2 +nvidia-cusolver-cu11==11.4.0.1 +executing==1.2.0 +jupyter_client==8.1.0 +pluggy==1.3.0 +cmake==3.30.3 +pytz==2023.3.post1 +aiohappyeyeballs==2.4.2 +kiwisolver==1.4.5 +py-cpuinfo==9.0.0 +Pillow==10.1.0 +ptyprocess==0.7.0 +importlib_resources==6.4.5 +GitPython==3.1.43 +importlib-metadata==6.0.0 +iniconfig==2.0.0 +scikit-learn==1.3.2 +exceptiongroup==1.1.0 +networkx==2.8.6 +accelerate==1.0.0 +nltk==3.8.1 +shutilwhich==1.1.0 +fonttools==4.45.1 +future==0.18.3 +aiohttp==3.10.6 +wcwidth==0.2.5 +idna==3.6 +filelock==3.12.2 +pathspec==0.12.1 +jupyter_core==5.1.0 +lit==18.1.8 +nvidia-curand-cu11==10.2.10.91 +nvidia-cublas-cu11==11.10.3.66 +nvidia-ml-py==12.560.30 +msgpack==1.1.0 +python-dateutil==2.8.2 +blessed==1.20.0 +packaging==23.0 +gitdb==4.0.11 +yarl==1.13.0 +emoji==2.8.0 +tzdata==2023.3 +cycler==0.12.1 +tornado==6.2 +backcall==0.2.0 +plotnine==0.12.4 +ninja==1.11.1.1 +latex==0.7.0 +wandb==0.18.5 +setproctitle==1.3.3 +threadpoolctl==3.2.0 +requests==2.32.3 +pyparsing==3.1.1 +smmap==5.0.1 +pyzmq==23.0.0 +async-timeout==4.0.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.6 +latexcodec==1.0.0 +ipython==8.0.0 +patsy==0.5.3 +contourpy==1.2.0 +multidict==6.1.0 +mizani==0.9.3 +urllib3==2.1.0 +tokenizers==0.20.0 +MarkupSafe==2.1.2 +pip==24.2 +pexpect==4.8.0 +tqdm==4.66.5 +jedi==0.18.2 +pydantic_core==2.23.4 +tempdir==0.7.1 +mpmath==1.2.1 +setuptools==72.1.0 +pytest==7.4.3 +pure-eval==0.2.2 +psutil==5.9.1 +comm==0.1.2 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-nvrtc-cu11==11.7.99 +regex==2023.10.3 +platformdirs==2.5.2 +asttokens==2.2.1 +torch==2.0.0 +nvidia-nccl-cu11==2.14.3 +xxhash==3.5.0 diff --git a/wandb/run-20241101_200535-hnfjoqai/files/wandb-metadata.json b/wandb/run-20241101_200535-hnfjoqai/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..ac844102b43a46bcc70aa60325743f21d05562c7 --- /dev/null +++ b/wandb/run-20241101_200535-hnfjoqai/files/wandb-metadata.json @@ -0,0 +1,97 @@ +{ + "os": "Linux-5.4.0-162-generic-x86_64-with-glibc2.31", + "python": "3.9.19", + "startedAt": "2024-11-02T00:05:35.852878Z", + "args": [ + "--perturbation", + "shuffle_nondeterministic", + "--train_set", + "10M", + "--batch_size", + "3", + "--epoch", + "3", + "--seed", + "0" + ], + "program": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", + "codePath": "train/train_deep_wandb.py", + "git": { + "remote": "git@hf.co:Yaning1001/Impossible_llm.git", + "commit": "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4" + }, + "email": "yaning1001@gmail.com", + "root": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train", + "host": "mms-large-2", + "username": "chunhui", + "executable": "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python", + "codePathLocal": "train_deep_wandb.py", + "cpu_count": 32, + "cpu_count_logical": 64, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "1888559353856", + "used": "1754801680384" + } + }, + "memory": { + "total": "202617098240" + }, + "cpu": { + "count": 32, + "countLogical": 64 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + } + ], + "cudaVersion": "11.8" +} \ No newline at end of file diff --git a/wandb/run-20241101_200535-hnfjoqai/logs/debug-internal.log b/wandb/run-20241101_200535-hnfjoqai/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..7db2ed45e3cf98a36d22a5c084ea9fd6a66df5ef --- /dev/null +++ b/wandb/run-20241101_200535-hnfjoqai/logs/debug-internal.log @@ -0,0 +1,8 @@ +{"time":"2024-11-01T20:05:35.855602934-04:00","level":"INFO","msg":"using version","core version":"0.18.5"} +{"time":"2024-11-01T20:05:35.855625184-04:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241101_200535-hnfjoqai/logs/debug-core.log"} +{"time":"2024-11-01T20:05:35.961440222-04:00","level":"INFO","msg":"created new stream","id":"hnfjoqai"} +{"time":"2024-11-01T20:05:35.961471872-04:00","level":"INFO","msg":"stream: started","id":"hnfjoqai"} +{"time":"2024-11-01T20:05:35.961498182-04:00","level":"INFO","msg":"sender: started","stream_id":"hnfjoqai"} +{"time":"2024-11-01T20:05:35.961486682-04:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"hnfjoqai"}} +{"time":"2024-11-01T20:05:35.961499202-04:00","level":"INFO","msg":"handler: started","stream_id":{"value":"hnfjoqai"}} +{"time":"2024-11-01T20:05:36.182162772-04:00","level":"INFO","msg":"Starting system monitor"} diff --git a/wandb/run-20241101_200535-hnfjoqai/logs/debug.log b/wandb/run-20241101_200535-hnfjoqai/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..d9bc87aa645a66339f9a6f892a4b0b50a3fc0a44 --- /dev/null +++ b/wandb/run-20241101_200535-hnfjoqai/logs/debug.log @@ -0,0 +1,26 @@ +2024-11-01 20:05:35,849 INFO MainThread:871227 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5 +2024-11-01 20:05:35,849 INFO MainThread:871227 [wandb_setup.py:_flush():79] Configure stats pid to 871227 +2024-11-01 20:05:35,849 INFO MainThread:871227 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings +2024-11-01 20:05:35,849 INFO MainThread:871227 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings +2024-11-01 20:05:35,850 INFO MainThread:871227 [wandb_setup.py:_flush():79] Loading settings from environment variables: {} +2024-11-01 20:05:35,850 INFO MainThread:871227 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None} +2024-11-01 20:05:35,850 INFO MainThread:871227 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'} +2024-11-01 20:05:35,850 INFO MainThread:871227 [wandb_setup.py:_flush():79] Applying login settings: {} +2024-11-01 20:05:35,850 INFO MainThread:871227 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241101_200535-hnfjoqai/logs/debug.log +2024-11-01 20:05:35,850 INFO MainThread:871227 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241101_200535-hnfjoqai/logs/debug-internal.log +2024-11-01 20:05:35,850 INFO MainThread:871227 [wandb_init.py:init():621] calling init triggers +2024-11-01 20:05:35,850 INFO MainThread:871227 [wandb_init.py:init():628] wandb.init called with sweep_config: {} +config: {} +2024-11-01 20:05:35,850 INFO MainThread:871227 [wandb_init.py:init():671] starting backend +2024-11-01 20:05:35,850 INFO MainThread:871227 [wandb_init.py:init():675] sending inform_init request +2024-11-01 20:05:35,852 INFO MainThread:871227 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-11-01 20:05:35,852 INFO MainThread:871227 [wandb_init.py:init():688] backend started and connected +2024-11-01 20:05:35,855 INFO MainThread:871227 [wandb_init.py:init():783] updated telemetry +2024-11-01 20:05:35,877 INFO MainThread:871227 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout +2024-11-01 20:05:36,179 INFO MainThread:871227 [wandb_init.py:init():867] starting run threads in backend +2024-11-01 20:05:36,268 INFO MainThread:871227 [wandb_run.py:_console_start():2463] atexit reg +2024-11-01 20:05:36,268 INFO MainThread:871227 [wandb_run.py:_redirect():2311] redirect: wrap_raw +2024-11-01 20:05:36,268 INFO MainThread:871227 [wandb_run.py:_redirect():2376] Wrapping output streams. +2024-11-01 20:05:36,268 INFO MainThread:871227 [wandb_run.py:_redirect():2401] Redirects installed. +2024-11-01 20:05:36,269 INFO MainThread:871227 [wandb_init.py:init():911] run started, returning control to user process +2024-11-01 20:05:36,270 INFO MainThread:871227 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'shuffle_nondeterministic', 'train_set': '10M', 'batch_size': 3, 'epoch': 3, 'seed': 0, 'lr': 5e-06} diff --git a/wandb/run-20241101_201910-hnwfqg73/files/output.log b/wandb/run-20241101_201910-hnwfqg73/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..04ca77dae62c42467f2e3713ee4aa0dba0d01be1 --- /dev/null +++ b/wandb/run-20241101_201910-hnwfqg73/files/output.log @@ -0,0 +1 @@ +Loading checkpoint shards: 0%| | 0/2 [00:00 + dataset = load_dataset('babylm_dataset_test.py', name=dataset_name, trust_remote_code=True) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/load.py", line 2096, in load_dataset + builder_instance.download_and_prepare( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/builder.py", line 855, in download_and_prepare + Path(self._output_dir).parent.mkdir(parents=True, exist_ok=True) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/pathlib.py", line 1327, in mkdir + self.parent.mkdir(parents=True, exist_ok=True) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/pathlib.py", line 1323, in mkdir + self._accessor.mkdir(self, mode) +OSError: [Errno 28] No space left on device: '/home/chunhui/.cache/huggingface/datasets/babylm_dataset_test/babylm_shuffle_deterministic21_10M_seed0' diff --git a/wandb/run-20241105_160217-21j8oh7z/files/requirements.txt b/wandb/run-20241105_160217-21j8oh7z/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..95a931302e269cc9e4fa5b719b6511f176ee2416 --- /dev/null +++ b/wandb/run-20241105_160217-21j8oh7z/files/requirements.txt @@ -0,0 +1,147 @@ +funcsigs==1.0.2 +sentry-sdk==2.17.0 +multiprocess==0.70.16 +numpy==1.26.2 +pluralizer==1.2.0 +debugpy==1.6.7 +nvidia-cudnn-cu11==8.5.0.96 +deepspeed==0.15.2 +data==0.4 +pandas==2.1.3 +tomli==2.0.1 +charset-normalizer==3.3.2 +attrs==24.2.0 +aiosignal==1.3.1 +fsspec==2023.10.0 +nvidia-cusparse-cu11==11.7.4.91 +zipp==3.12.0 +mypy-extensions==1.0.0 +datasets==3.0.1 +joblib==1.3.2 +hjson==3.1.0 +traitlets==5.7.1 +stack-data==0.6.0 +transformers==4.45.1 +sympy==1.11.1 +Pygments==2.15.0 +docker-pycreds==0.4.0 +dill==0.3.8 +wheel==0.44.0 +prompt-toolkit==3.0.30 +parso==0.8.3 +ipykernel==6.23.1 +pyarrow==17.0.0 +certifi==2023.11.17 +nvidia-cufft-cu11==10.9.0.58 +six==1.16.0 +pydantic==2.9.2 +click==8.1.7 +nest-asyncio==1.5.6 +gmpy2==2.1.0 +matplotlib==3.8.2 +scipy==1.11.4 +typing_extensions==4.12.2 +statsmodels==0.14.0 +huggingface-hub==0.25.0 +frozenlist==1.4.1 +gpustat==1.1.1 +nvidia-nvtx-cu11==11.7.91 +safetensors==0.4.5 +stanza==1.9.2 +decorator==5.1.1 +seaborn==0.13.0 +sentencepiece==0.2.0 +PyYAML==6.0.1 +black==24.8.0 +protobuf==4.25.1 +pickleshare==0.7.5 +peft==0.13.0 +triton==2.0.0 +nvidia-cuda-runtime-cu11==11.7.99 +Jinja2==3.1.2 +nvidia-cusolver-cu11==11.4.0.1 +executing==1.2.0 +jupyter_client==8.1.0 +pluggy==1.3.0 +cmake==3.30.3 +pytz==2023.3.post1 +aiohappyeyeballs==2.4.2 +kiwisolver==1.4.5 +py-cpuinfo==9.0.0 +Pillow==10.1.0 +ptyprocess==0.7.0 +importlib_resources==6.4.5 +GitPython==3.1.43 +importlib-metadata==6.0.0 +iniconfig==2.0.0 +scikit-learn==1.3.2 +exceptiongroup==1.1.0 +networkx==2.8.6 +accelerate==1.0.0 +nltk==3.8.1 +shutilwhich==1.1.0 +fonttools==4.45.1 +future==0.18.3 +aiohttp==3.10.6 +wcwidth==0.2.5 +idna==3.6 +filelock==3.12.2 +pathspec==0.12.1 +jupyter_core==5.1.0 +lit==18.1.8 +nvidia-curand-cu11==10.2.10.91 +nvidia-cublas-cu11==11.10.3.66 +nvidia-ml-py==12.560.30 +msgpack==1.1.0 +python-dateutil==2.8.2 +blessed==1.20.0 +packaging==23.0 +gitdb==4.0.11 +yarl==1.13.0 +emoji==2.8.0 +tzdata==2023.3 +cycler==0.12.1 +tornado==6.2 +backcall==0.2.0 +plotnine==0.12.4 +ninja==1.11.1.1 +latex==0.7.0 +wandb==0.18.5 +setproctitle==1.3.3 +threadpoolctl==3.2.0 +requests==2.32.3 +pyparsing==3.1.1 +smmap==5.0.1 +pyzmq==23.0.0 +async-timeout==4.0.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.6 +latexcodec==1.0.0 +ipython==8.0.0 +patsy==0.5.3 +contourpy==1.2.0 +multidict==6.1.0 +mizani==0.9.3 +urllib3==2.1.0 +tokenizers==0.20.0 +MarkupSafe==2.1.2 +pip==24.2 +pexpect==4.8.0 +tqdm==4.66.5 +jedi==0.18.2 +pydantic_core==2.23.4 +tempdir==0.7.1 +mpmath==1.2.1 +setuptools==72.1.0 +pytest==7.4.3 +pure-eval==0.2.2 +psutil==5.9.1 +comm==0.1.2 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-nvrtc-cu11==11.7.99 +regex==2023.10.3 +platformdirs==2.5.2 +asttokens==2.2.1 +torch==2.0.0 +nvidia-nccl-cu11==2.14.3 +xxhash==3.5.0 diff --git a/wandb/run-20241105_160217-21j8oh7z/files/wandb-metadata.json b/wandb/run-20241105_160217-21j8oh7z/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..55741a5cce8ce003af4e675e613960cd6ad6bec9 --- /dev/null +++ b/wandb/run-20241105_160217-21j8oh7z/files/wandb-metadata.json @@ -0,0 +1,44 @@ +{ + "os": "Linux-5.4.0-162-generic-x86_64-with-glibc2.31", + "python": "3.9.19", + "startedAt": "2024-11-05T21:02:17.597054Z", + "args": [ + "--perturbation", + "shuffle_deterministic21", + "--train_set", + "10M", + "--batch_size", + "3", + "--epoch", + "3", + "--seed", + "0" + ], + "program": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", + "codePath": "train/train_deep_wandb.py", + "git": { + "remote": "git@hf.co:Yaning1001/Impossible_llm.git", + "commit": "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4" + }, + "email": "yaning1001@gmail.com", + "root": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train", + "host": "mms-large-2", + "username": "chunhui", + "executable": "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python", + "codePathLocal": "train_deep_wandb.py", + "cpu_count": 32, + "cpu_count_logical": 64, + "disk": { + "/": { + "total": "1888559353856", + "used": "1792550322176" + } + }, + "memory": { + "total": "202617098240" + }, + "cpu": { + "count": 32, + "countLogical": 64 + } +} \ No newline at end of file diff --git a/wandb/run-20241105_160217-21j8oh7z/files/wandb-summary.json b/wandb/run-20241105_160217-21j8oh7z/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a59211b910c7b68e6827eb6c887d30d98244727c --- /dev/null +++ b/wandb/run-20241105_160217-21j8oh7z/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":5}} \ No newline at end of file diff --git a/wandb/run-20241105_160217-21j8oh7z/logs/debug-internal.log b/wandb/run-20241105_160217-21j8oh7z/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..3d5738a00b8cca20e1e2fa4fddb56cf411e59d22 --- /dev/null +++ b/wandb/run-20241105_160217-21j8oh7z/logs/debug-internal.log @@ -0,0 +1,17 @@ +{"time":"2024-11-05T16:02:17.599322616-05:00","level":"INFO","msg":"using version","core version":"0.18.5"} +{"time":"2024-11-05T16:02:17.599336826-05:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241105_160217-21j8oh7z/logs/debug-core.log"} +{"time":"2024-11-05T16:02:22.624797468-05:00","level":"INFO","msg":"created new stream","id":"21j8oh7z"} +{"time":"2024-11-05T16:02:22.624911108-05:00","level":"INFO","msg":"stream: started","id":"21j8oh7z"} +{"time":"2024-11-05T16:02:22.624954939-05:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"21j8oh7z"}} +{"time":"2024-11-05T16:02:22.62531649-05:00","level":"INFO","msg":"sender: started","stream_id":"21j8oh7z"} +{"time":"2024-11-05T16:02:22.62530827-05:00","level":"INFO","msg":"handler: started","stream_id":{"value":"21j8oh7z"}} +{"time":"2024-11-05T16:02:22.843404741-05:00","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-11-05T16:02:22.969038166-05:00","level":"INFO","msg":"stream: closing","id":"21j8oh7z"} +{"time":"2024-11-05T16:02:22.969085756-05:00","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-11-05T16:02:22.969161546-05:00","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-11-05T16:02:23.219586953-05:00","level":"ERROR","msg":"sender: sendDefer: failed to build job artifact","error":"failed to write data to file: write /tmp/tmpfile-994168657: no space left on device"} +{"time":"2024-11-05T16:02:23.510500832-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2024-11-05T16:02:23.642087785-05:00","level":"INFO","msg":"handler: closed","stream_id":{"value":"21j8oh7z"}} +{"time":"2024-11-05T16:02:23.642112355-05:00","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"21j8oh7z"}} +{"time":"2024-11-05T16:02:23.642123165-05:00","level":"INFO","msg":"sender: closed","stream_id":"21j8oh7z"} +{"time":"2024-11-05T16:02:23.642162225-05:00","level":"INFO","msg":"stream: closed","id":"21j8oh7z"} diff --git a/wandb/run-20241105_160217-21j8oh7z/logs/debug.log b/wandb/run-20241105_160217-21j8oh7z/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..28d1e1fc011251fb30594029e71eaf213b78ba9b --- /dev/null +++ b/wandb/run-20241105_160217-21j8oh7z/logs/debug.log @@ -0,0 +1,27 @@ +2024-11-05 16:02:17,593 INFO MainThread:1770491 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5 +2024-11-05 16:02:17,593 INFO MainThread:1770491 [wandb_setup.py:_flush():79] Configure stats pid to 1770491 +2024-11-05 16:02:17,594 INFO MainThread:1770491 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings +2024-11-05 16:02:17,594 INFO MainThread:1770491 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings +2024-11-05 16:02:17,594 INFO MainThread:1770491 [wandb_setup.py:_flush():79] Loading settings from environment variables: {} +2024-11-05 16:02:17,594 INFO MainThread:1770491 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None} +2024-11-05 16:02:17,594 INFO MainThread:1770491 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'} +2024-11-05 16:02:17,594 INFO MainThread:1770491 [wandb_setup.py:_flush():79] Applying login settings: {} +2024-11-05 16:02:17,594 INFO MainThread:1770491 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241105_160217-21j8oh7z/logs/debug.log +2024-11-05 16:02:17,594 INFO MainThread:1770491 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241105_160217-21j8oh7z/logs/debug-internal.log +2024-11-05 16:02:17,594 INFO MainThread:1770491 [wandb_init.py:init():621] calling init triggers +2024-11-05 16:02:17,594 INFO MainThread:1770491 [wandb_init.py:init():628] wandb.init called with sweep_config: {} +config: {} +2024-11-05 16:02:17,594 INFO MainThread:1770491 [wandb_init.py:init():671] starting backend +2024-11-05 16:02:17,594 INFO MainThread:1770491 [wandb_init.py:init():675] sending inform_init request +2024-11-05 16:02:17,596 INFO MainThread:1770491 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-11-05 16:02:17,596 INFO MainThread:1770491 [wandb_init.py:init():688] backend started and connected +2024-11-05 16:02:17,601 INFO MainThread:1770491 [wandb_init.py:init():783] updated telemetry +2024-11-05 16:02:17,627 INFO MainThread:1770491 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout +2024-11-05 16:02:22,840 INFO MainThread:1770491 [wandb_init.py:init():867] starting run threads in backend +2024-11-05 16:02:22,944 INFO MainThread:1770491 [wandb_run.py:_console_start():2463] atexit reg +2024-11-05 16:02:22,944 INFO MainThread:1770491 [wandb_run.py:_redirect():2311] redirect: wrap_raw +2024-11-05 16:02:22,944 INFO MainThread:1770491 [wandb_run.py:_redirect():2376] Wrapping output streams. +2024-11-05 16:02:22,944 INFO MainThread:1770491 [wandb_run.py:_redirect():2401] Redirects installed. +2024-11-05 16:02:22,945 INFO MainThread:1770491 [wandb_init.py:init():911] run started, returning control to user process +2024-11-05 16:02:22,946 INFO MainThread:1770491 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'shuffle_deterministic21', 'train_set': '10M', 'batch_size': 3, 'epoch': 3, 'seed': 0, 'lr': 5e-06} +2024-11-05 16:02:22,969 WARNING MsgRouterThr:1770491 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20241105_160217-21j8oh7z/run-21j8oh7z.wandb b/wandb/run-20241105_160217-21j8oh7z/run-21j8oh7z.wandb new file mode 100644 index 0000000000000000000000000000000000000000..54d9ebd13f26404fc6bffd3f3ff76148602618cd Binary files /dev/null and b/wandb/run-20241105_160217-21j8oh7z/run-21j8oh7z.wandb differ diff --git a/wandb/run-20241105_161113-6baonvj0/files/config.yaml b/wandb/run-20241105_161113-6baonvj0/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba0e0eb5aa81d7186d91b3c8f342ad5574a4c100 --- /dev/null +++ b/wandb/run-20241105_161113-6baonvj0/files/config.yaml @@ -0,0 +1,49 @@ +_wandb: + value: + cli_version: 0.18.5 + m: [] + python_version: 3.9.19 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + "3": + - 13 + - 23 + - 55 + "4": 3.9.19 + "5": 0.18.5 + "6": 4.45.1 + "8": + - 5 + "12": 0.18.5 + "13": linux-x86_64 +batch_size: + value: 3 +epoch: + value: 3 +lr: + value: 5e-06 +perturbation: + value: shuffle_deterministic21 +seed: + value: 0 +train_set: + value: 10M diff --git a/wandb/run-20241105_161113-6baonvj0/files/output.log b/wandb/run-20241105_161113-6baonvj0/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..330f79ccad12d3f3a0b8e78019eca22e1861ade7 --- /dev/null +++ b/wandb/run-20241105_161113-6baonvj0/files/output.log @@ -0,0 +1,58 @@ +100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1098312/1098312 [00:04<00:00, 274402.05it/s] +100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1098312/1098312 [00:00<00:00, 3125210.76it/s] +100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16426/16426 [00:00<00:00, 37199.10it/s] +Generating train split: 1999 examples [00:05, 365.46 examples/s]███████████████████████████████████████████████████████████ | 14150/16426 [00:00<00:00, 39727.11it/s] +Traceback (most recent call last): + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/builder.py", line 1624, in _prepare_split_single + writer.write(example, key) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/arrow_writer.py", line 495, in write + self.write_examples_on_file() + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/arrow_writer.py", line 453, in write_examples_on_file + self.write_batch(batch_examples=batch_examples) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/arrow_writer.py", line 567, in write_batch + self.write_table(pa_table, writer_batch_size) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/arrow_writer.py", line 585, in write_table + self.pa_writer.write_table(pa_table, writer_batch_size) + File "pyarrow/ipc.pxi", line 529, in pyarrow.lib._CRecordBatchWriter.write_table + File "pyarrow/error.pxi", line 89, in pyarrow.lib.check_status + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/fsspec/implementations/local.py", line 369, in write + return self.f.write(*args, **kwargs) +OSError: [Errno 28] No space left on device + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/builder.py", line 1633, in _prepare_split_single + num_examples, num_bytes = writer.finalize() + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/arrow_writer.py", line 594, in finalize + self.write_examples_on_file() + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/arrow_writer.py", line 453, in write_examples_on_file + self.write_batch(batch_examples=batch_examples) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/arrow_writer.py", line 567, in write_batch + self.write_table(pa_table, writer_batch_size) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/arrow_writer.py", line 585, in write_table + self.pa_writer.write_table(pa_table, writer_batch_size) + File "pyarrow/ipc.pxi", line 529, in pyarrow.lib._CRecordBatchWriter.write_table + File "pyarrow/error.pxi", line 89, in pyarrow.lib.check_status + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/fsspec/implementations/local.py", line 369, in write + return self.f.write(*args, **kwargs) +OSError: [Errno 28] No space left on device + +The above exception was the direct cause of the following exception: + +Traceback (most recent call last): + File "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", line 165, in + dataset = load_dataset('babylm_dataset_test.py', name=dataset_name, trust_remote_code=True) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/load.py", line 2096, in load_dataset + builder_instance.download_and_prepare( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/builder.py", line 924, in download_and_prepare + self._download_and_prepare( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/builder.py", line 1647, in _download_and_prepare + super()._download_and_prepare( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/builder.py", line 999, in _download_and_prepare + self._prepare_split(split_generator, **prepare_split_kwargs) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/builder.py", line 1485, in _prepare_split + for job_id, done, content in self._prepare_split_single( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/datasets/builder.py", line 1642, in _prepare_split_single + raise DatasetGenerationError("An error occurred while generating the dataset") from e +datasets.exceptions.DatasetGenerationError: An error occurred while generating the dataset diff --git a/wandb/run-20241105_161113-6baonvj0/files/requirements.txt b/wandb/run-20241105_161113-6baonvj0/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..95a931302e269cc9e4fa5b719b6511f176ee2416 --- /dev/null +++ b/wandb/run-20241105_161113-6baonvj0/files/requirements.txt @@ -0,0 +1,147 @@ +funcsigs==1.0.2 +sentry-sdk==2.17.0 +multiprocess==0.70.16 +numpy==1.26.2 +pluralizer==1.2.0 +debugpy==1.6.7 +nvidia-cudnn-cu11==8.5.0.96 +deepspeed==0.15.2 +data==0.4 +pandas==2.1.3 +tomli==2.0.1 +charset-normalizer==3.3.2 +attrs==24.2.0 +aiosignal==1.3.1 +fsspec==2023.10.0 +nvidia-cusparse-cu11==11.7.4.91 +zipp==3.12.0 +mypy-extensions==1.0.0 +datasets==3.0.1 +joblib==1.3.2 +hjson==3.1.0 +traitlets==5.7.1 +stack-data==0.6.0 +transformers==4.45.1 +sympy==1.11.1 +Pygments==2.15.0 +docker-pycreds==0.4.0 +dill==0.3.8 +wheel==0.44.0 +prompt-toolkit==3.0.30 +parso==0.8.3 +ipykernel==6.23.1 +pyarrow==17.0.0 +certifi==2023.11.17 +nvidia-cufft-cu11==10.9.0.58 +six==1.16.0 +pydantic==2.9.2 +click==8.1.7 +nest-asyncio==1.5.6 +gmpy2==2.1.0 +matplotlib==3.8.2 +scipy==1.11.4 +typing_extensions==4.12.2 +statsmodels==0.14.0 +huggingface-hub==0.25.0 +frozenlist==1.4.1 +gpustat==1.1.1 +nvidia-nvtx-cu11==11.7.91 +safetensors==0.4.5 +stanza==1.9.2 +decorator==5.1.1 +seaborn==0.13.0 +sentencepiece==0.2.0 +PyYAML==6.0.1 +black==24.8.0 +protobuf==4.25.1 +pickleshare==0.7.5 +peft==0.13.0 +triton==2.0.0 +nvidia-cuda-runtime-cu11==11.7.99 +Jinja2==3.1.2 +nvidia-cusolver-cu11==11.4.0.1 +executing==1.2.0 +jupyter_client==8.1.0 +pluggy==1.3.0 +cmake==3.30.3 +pytz==2023.3.post1 +aiohappyeyeballs==2.4.2 +kiwisolver==1.4.5 +py-cpuinfo==9.0.0 +Pillow==10.1.0 +ptyprocess==0.7.0 +importlib_resources==6.4.5 +GitPython==3.1.43 +importlib-metadata==6.0.0 +iniconfig==2.0.0 +scikit-learn==1.3.2 +exceptiongroup==1.1.0 +networkx==2.8.6 +accelerate==1.0.0 +nltk==3.8.1 +shutilwhich==1.1.0 +fonttools==4.45.1 +future==0.18.3 +aiohttp==3.10.6 +wcwidth==0.2.5 +idna==3.6 +filelock==3.12.2 +pathspec==0.12.1 +jupyter_core==5.1.0 +lit==18.1.8 +nvidia-curand-cu11==10.2.10.91 +nvidia-cublas-cu11==11.10.3.66 +nvidia-ml-py==12.560.30 +msgpack==1.1.0 +python-dateutil==2.8.2 +blessed==1.20.0 +packaging==23.0 +gitdb==4.0.11 +yarl==1.13.0 +emoji==2.8.0 +tzdata==2023.3 +cycler==0.12.1 +tornado==6.2 +backcall==0.2.0 +plotnine==0.12.4 +ninja==1.11.1.1 +latex==0.7.0 +wandb==0.18.5 +setproctitle==1.3.3 +threadpoolctl==3.2.0 +requests==2.32.3 +pyparsing==3.1.1 +smmap==5.0.1 +pyzmq==23.0.0 +async-timeout==4.0.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.6 +latexcodec==1.0.0 +ipython==8.0.0 +patsy==0.5.3 +contourpy==1.2.0 +multidict==6.1.0 +mizani==0.9.3 +urllib3==2.1.0 +tokenizers==0.20.0 +MarkupSafe==2.1.2 +pip==24.2 +pexpect==4.8.0 +tqdm==4.66.5 +jedi==0.18.2 +pydantic_core==2.23.4 +tempdir==0.7.1 +mpmath==1.2.1 +setuptools==72.1.0 +pytest==7.4.3 +pure-eval==0.2.2 +psutil==5.9.1 +comm==0.1.2 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-nvrtc-cu11==11.7.99 +regex==2023.10.3 +platformdirs==2.5.2 +asttokens==2.2.1 +torch==2.0.0 +nvidia-nccl-cu11==2.14.3 +xxhash==3.5.0 diff --git a/wandb/run-20241105_161113-6baonvj0/files/wandb-metadata.json b/wandb/run-20241105_161113-6baonvj0/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b3eb2248cc2d6ee8791a38e88f2f827eb6a7b4eb --- /dev/null +++ b/wandb/run-20241105_161113-6baonvj0/files/wandb-metadata.json @@ -0,0 +1,97 @@ +{ + "os": "Linux-5.4.0-162-generic-x86_64-with-glibc2.31", + "python": "3.9.19", + "startedAt": "2024-11-05T21:11:13.767924Z", + "args": [ + "--perturbation", + "shuffle_deterministic21", + "--train_set", + "10M", + "--batch_size", + "3", + "--epoch", + "3", + "--seed", + "0" + ], + "program": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", + "codePath": "train/train_deep_wandb.py", + "git": { + "remote": "git@hf.co:Yaning1001/Impossible_llm.git", + "commit": "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4" + }, + "email": "yaning1001@gmail.com", + "root": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train", + "host": "mms-large-2", + "username": "chunhui", + "executable": "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python", + "codePathLocal": "train_deep_wandb.py", + "cpu_count": 32, + "cpu_count_logical": 64, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "1888559353856", + "used": "1792542838784" + } + }, + "memory": { + "total": "202617098240" + }, + "cpu": { + "count": 32, + "countLogical": 64 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + } + ], + "cudaVersion": "11.8" +} \ No newline at end of file diff --git a/wandb/run-20241105_161113-6baonvj0/files/wandb-summary.json b/wandb/run-20241105_161113-6baonvj0/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a59211b910c7b68e6827eb6c887d30d98244727c --- /dev/null +++ b/wandb/run-20241105_161113-6baonvj0/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":5}} \ No newline at end of file diff --git a/wandb/run-20241105_161113-6baonvj0/logs/debug-internal.log b/wandb/run-20241105_161113-6baonvj0/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..91859f7c4c8f090598156bf034f971144867d934 --- /dev/null +++ b/wandb/run-20241105_161113-6baonvj0/logs/debug-internal.log @@ -0,0 +1,17 @@ +{"time":"2024-11-05T16:11:13.770093254-05:00","level":"INFO","msg":"using version","core version":"0.18.5"} +{"time":"2024-11-05T16:11:13.770105194-05:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241105_161113-6baonvj0/logs/debug-core.log"} +{"time":"2024-11-05T16:11:13.876440546-05:00","level":"INFO","msg":"created new stream","id":"6baonvj0"} +{"time":"2024-11-05T16:11:13.876670797-05:00","level":"INFO","msg":"stream: started","id":"6baonvj0"} +{"time":"2024-11-05T16:11:13.876773058-05:00","level":"INFO","msg":"sender: started","stream_id":"6baonvj0"} +{"time":"2024-11-05T16:11:13.876715137-05:00","level":"INFO","msg":"handler: started","stream_id":{"value":"6baonvj0"}} +{"time":"2024-11-05T16:11:13.876761958-05:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"6baonvj0"}} +{"time":"2024-11-05T16:11:14.089836214-05:00","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-11-05T16:11:19.67130911-05:00","level":"INFO","msg":"stream: closing","id":"6baonvj0"} +{"time":"2024-11-05T16:11:19.67133415-05:00","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-11-05T16:11:19.671993254-05:00","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-11-05T16:11:19.737389962-05:00","level":"ERROR","msg":"sender: sendDefer: failed to build job artifact","error":"failed to write data to file: write /tmp/tmpfile-79569462: no space left on device"} +{"time":"2024-11-05T16:11:20.000869155-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2024-11-05T16:11:20.138426872-05:00","level":"INFO","msg":"handler: closed","stream_id":{"value":"6baonvj0"}} +{"time":"2024-11-05T16:11:20.138461552-05:00","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"6baonvj0"}} +{"time":"2024-11-05T16:11:20.138503763-05:00","level":"INFO","msg":"sender: closed","stream_id":"6baonvj0"} +{"time":"2024-11-05T16:11:20.138566163-05:00","level":"INFO","msg":"stream: closed","id":"6baonvj0"} diff --git a/wandb/run-20241105_161113-6baonvj0/logs/debug.log b/wandb/run-20241105_161113-6baonvj0/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..b41683a4e40589de9d7fe249ff8a545878cf9d89 --- /dev/null +++ b/wandb/run-20241105_161113-6baonvj0/logs/debug.log @@ -0,0 +1,27 @@ +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5 +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_setup.py:_flush():79] Configure stats pid to 1772521 +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_setup.py:_flush():79] Loading settings from environment variables: {} +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None} +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'} +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_setup.py:_flush():79] Applying login settings: {} +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241105_161113-6baonvj0/logs/debug.log +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241105_161113-6baonvj0/logs/debug-internal.log +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_init.py:init():621] calling init triggers +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_init.py:init():628] wandb.init called with sweep_config: {} +config: {} +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_init.py:init():671] starting backend +2024-11-05 16:11:13,766 INFO MainThread:1772521 [wandb_init.py:init():675] sending inform_init request +2024-11-05 16:11:13,767 INFO MainThread:1772521 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-11-05 16:11:13,767 INFO MainThread:1772521 [wandb_init.py:init():688] backend started and connected +2024-11-05 16:11:13,770 INFO MainThread:1772521 [wandb_init.py:init():783] updated telemetry +2024-11-05 16:11:13,803 INFO MainThread:1772521 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout +2024-11-05 16:11:14,087 INFO MainThread:1772521 [wandb_init.py:init():867] starting run threads in backend +2024-11-05 16:11:14,173 INFO MainThread:1772521 [wandb_run.py:_console_start():2463] atexit reg +2024-11-05 16:11:14,173 INFO MainThread:1772521 [wandb_run.py:_redirect():2311] redirect: wrap_raw +2024-11-05 16:11:14,173 INFO MainThread:1772521 [wandb_run.py:_redirect():2376] Wrapping output streams. +2024-11-05 16:11:14,173 INFO MainThread:1772521 [wandb_run.py:_redirect():2401] Redirects installed. +2024-11-05 16:11:14,175 INFO MainThread:1772521 [wandb_init.py:init():911] run started, returning control to user process +2024-11-05 16:11:14,175 INFO MainThread:1772521 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'shuffle_deterministic21', 'train_set': '10M', 'batch_size': 3, 'epoch': 3, 'seed': 0, 'lr': 5e-06} +2024-11-05 16:11:19,671 WARNING MsgRouterThr:1772521 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20241105_161113-6baonvj0/run-6baonvj0.wandb b/wandb/run-20241105_161113-6baonvj0/run-6baonvj0.wandb new file mode 100644 index 0000000000000000000000000000000000000000..fe41fa1d9b1be38266bf55ffcb6b09bf67e053ef Binary files /dev/null and b/wandb/run-20241105_161113-6baonvj0/run-6baonvj0.wandb differ diff --git a/wandb/run-20241105_163029-2rkpz70q/run-2rkpz70q.wandb b/wandb/run-20241105_163029-2rkpz70q/run-2rkpz70q.wandb new file mode 100644 index 0000000000000000000000000000000000000000..77b482d3411e7a194265e73da75d6492e4e31e6e --- /dev/null +++ b/wandb/run-20241105_163029-2rkpz70q/run-2rkpz70q.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76abc67a2178a32e7e1a15b449b57061d2676374f620476a4003dee9370b517e +size 229376 diff --git a/wandb/run-20241105_163029-rjj288ue/files/config.yaml b/wandb/run-20241105_163029-rjj288ue/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3771760e607bdb2dcb978de96a047ebe2475f806 --- /dev/null +++ b/wandb/run-20241105_163029-rjj288ue/files/config.yaml @@ -0,0 +1,49 @@ +_wandb: + value: + cli_version: 0.18.5 + m: [] + python_version: 3.9.19 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + "3": + - 13 + - 23 + - 55 + "4": 3.9.19 + "5": 0.18.5 + "6": 4.45.1 + "8": + - 5 + "12": 0.18.5 + "13": linux-x86_64 +batch_size: + value: 3 +epoch: + value: 3 +lr: + value: 5e-06 +perturbation: + value: shuffle_deterministic57 +seed: + value: 0 +train_set: + value: 10M diff --git a/wandb/run-20241105_163029-rjj288ue/files/output.log b/wandb/run-20241105_163029-rjj288ue/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..c767fed2a50101a63766c7819d0c024fbba2bde0 --- /dev/null +++ b/wandb/run-20241105_163029-rjj288ue/files/output.log @@ -0,0 +1,34 @@ +Downloading shards: 0%| | 0/2 [01:34 + model = AutoModelForCausalLM.from_pretrained(model_name, + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/models/auto/auto_factory.py", line 564, in from_pretrained + return model_class.from_pretrained( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/modeling_utils.py", line 3769, in from_pretrained + resolved_archive_file, sharded_metadata = get_checkpoint_shard_files( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/utils/hub.py", line 1098, in get_checkpoint_shard_files + cached_filename = cached_file( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/utils/hub.py", line 403, in cached_file + resolved_file = hf_hub_download( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/utils/_deprecation.py", line 101, in inner_f + return f(*args, **kwargs) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn + return fn(*args, **kwargs) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/file_download.py", line 1232, in hf_hub_download + return _hf_hub_download_to_cache_dir( + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/file_download.py", line 1380, in _hf_hub_download_to_cache_dir + with WeakFileLock(lock_path): + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/contextlib.py", line 119, in __enter__ + return next(self.gen) + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/huggingface_hub/utils/_fixes.py", line 98, in WeakFileLock + lock.acquire() + File "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/filelock/_api.py", line 225, in acquire + time.sleep(poll_interval) +KeyboardInterrupt diff --git a/wandb/run-20241105_163029-rjj288ue/files/requirements.txt b/wandb/run-20241105_163029-rjj288ue/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..95a931302e269cc9e4fa5b719b6511f176ee2416 --- /dev/null +++ b/wandb/run-20241105_163029-rjj288ue/files/requirements.txt @@ -0,0 +1,147 @@ +funcsigs==1.0.2 +sentry-sdk==2.17.0 +multiprocess==0.70.16 +numpy==1.26.2 +pluralizer==1.2.0 +debugpy==1.6.7 +nvidia-cudnn-cu11==8.5.0.96 +deepspeed==0.15.2 +data==0.4 +pandas==2.1.3 +tomli==2.0.1 +charset-normalizer==3.3.2 +attrs==24.2.0 +aiosignal==1.3.1 +fsspec==2023.10.0 +nvidia-cusparse-cu11==11.7.4.91 +zipp==3.12.0 +mypy-extensions==1.0.0 +datasets==3.0.1 +joblib==1.3.2 +hjson==3.1.0 +traitlets==5.7.1 +stack-data==0.6.0 +transformers==4.45.1 +sympy==1.11.1 +Pygments==2.15.0 +docker-pycreds==0.4.0 +dill==0.3.8 +wheel==0.44.0 +prompt-toolkit==3.0.30 +parso==0.8.3 +ipykernel==6.23.1 +pyarrow==17.0.0 +certifi==2023.11.17 +nvidia-cufft-cu11==10.9.0.58 +six==1.16.0 +pydantic==2.9.2 +click==8.1.7 +nest-asyncio==1.5.6 +gmpy2==2.1.0 +matplotlib==3.8.2 +scipy==1.11.4 +typing_extensions==4.12.2 +statsmodels==0.14.0 +huggingface-hub==0.25.0 +frozenlist==1.4.1 +gpustat==1.1.1 +nvidia-nvtx-cu11==11.7.91 +safetensors==0.4.5 +stanza==1.9.2 +decorator==5.1.1 +seaborn==0.13.0 +sentencepiece==0.2.0 +PyYAML==6.0.1 +black==24.8.0 +protobuf==4.25.1 +pickleshare==0.7.5 +peft==0.13.0 +triton==2.0.0 +nvidia-cuda-runtime-cu11==11.7.99 +Jinja2==3.1.2 +nvidia-cusolver-cu11==11.4.0.1 +executing==1.2.0 +jupyter_client==8.1.0 +pluggy==1.3.0 +cmake==3.30.3 +pytz==2023.3.post1 +aiohappyeyeballs==2.4.2 +kiwisolver==1.4.5 +py-cpuinfo==9.0.0 +Pillow==10.1.0 +ptyprocess==0.7.0 +importlib_resources==6.4.5 +GitPython==3.1.43 +importlib-metadata==6.0.0 +iniconfig==2.0.0 +scikit-learn==1.3.2 +exceptiongroup==1.1.0 +networkx==2.8.6 +accelerate==1.0.0 +nltk==3.8.1 +shutilwhich==1.1.0 +fonttools==4.45.1 +future==0.18.3 +aiohttp==3.10.6 +wcwidth==0.2.5 +idna==3.6 +filelock==3.12.2 +pathspec==0.12.1 +jupyter_core==5.1.0 +lit==18.1.8 +nvidia-curand-cu11==10.2.10.91 +nvidia-cublas-cu11==11.10.3.66 +nvidia-ml-py==12.560.30 +msgpack==1.1.0 +python-dateutil==2.8.2 +blessed==1.20.0 +packaging==23.0 +gitdb==4.0.11 +yarl==1.13.0 +emoji==2.8.0 +tzdata==2023.3 +cycler==0.12.1 +tornado==6.2 +backcall==0.2.0 +plotnine==0.12.4 +ninja==1.11.1.1 +latex==0.7.0 +wandb==0.18.5 +setproctitle==1.3.3 +threadpoolctl==3.2.0 +requests==2.32.3 +pyparsing==3.1.1 +smmap==5.0.1 +pyzmq==23.0.0 +async-timeout==4.0.3 +annotated-types==0.7.0 +matplotlib-inline==0.1.6 +latexcodec==1.0.0 +ipython==8.0.0 +patsy==0.5.3 +contourpy==1.2.0 +multidict==6.1.0 +mizani==0.9.3 +urllib3==2.1.0 +tokenizers==0.20.0 +MarkupSafe==2.1.2 +pip==24.2 +pexpect==4.8.0 +tqdm==4.66.5 +jedi==0.18.2 +pydantic_core==2.23.4 +tempdir==0.7.1 +mpmath==1.2.1 +setuptools==72.1.0 +pytest==7.4.3 +pure-eval==0.2.2 +psutil==5.9.1 +comm==0.1.2 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-nvrtc-cu11==11.7.99 +regex==2023.10.3 +platformdirs==2.5.2 +asttokens==2.2.1 +torch==2.0.0 +nvidia-nccl-cu11==2.14.3 +xxhash==3.5.0 diff --git a/wandb/run-20241105_163029-rjj288ue/files/wandb-metadata.json b/wandb/run-20241105_163029-rjj288ue/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..86eb855edee1b297e5441fd52d6993dc24fe2848 --- /dev/null +++ b/wandb/run-20241105_163029-rjj288ue/files/wandb-metadata.json @@ -0,0 +1,97 @@ +{ + "os": "Linux-5.4.0-162-generic-x86_64-with-glibc2.31", + "python": "3.9.19", + "startedAt": "2024-11-05T21:30:29.680256Z", + "args": [ + "--perturbation", + "shuffle_deterministic57", + "--train_set", + "10M", + "--batch_size", + "3", + "--epoch", + "3", + "--seed", + "0" + ], + "program": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", + "codePath": "train/train_deep_wandb.py", + "git": { + "remote": "git@hf.co:Yaning1001/Impossible_llm.git", + "commit": "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4" + }, + "email": "yaning1001@gmail.com", + "root": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train", + "host": "mms-large-2", + "username": "chunhui", + "executable": "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python", + "codePathLocal": "train_deep_wandb.py", + "cpu_count": 32, + "cpu_count_logical": 64, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "1888559353856", + "used": "1785984946176" + } + }, + "memory": { + "total": "202617098240" + }, + "cpu": { + "count": 32, + "countLogical": 64 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + } + ], + "cudaVersion": "11.8" +} \ No newline at end of file diff --git a/wandb/run-20241105_163029-rjj288ue/files/wandb-summary.json b/wandb/run-20241105_163029-rjj288ue/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..b155621703e444311fe2da9b782a2b70b5491169 --- /dev/null +++ b/wandb/run-20241105_163029-rjj288ue/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":94}} \ No newline at end of file diff --git a/wandb/run-20241105_163029-rjj288ue/logs/debug-internal.log b/wandb/run-20241105_163029-rjj288ue/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..fbeded6a8b140b81091a47a2e6ae742a7c253f4d --- /dev/null +++ b/wandb/run-20241105_163029-rjj288ue/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2024-11-05T16:30:29.684568605-05:00","level":"INFO","msg":"using version","core version":"0.18.5"} +{"time":"2024-11-05T16:30:29.684584315-05:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241105_163029-rjj288ue/logs/debug-core.log"} +{"time":"2024-11-05T16:30:29.793249589-05:00","level":"INFO","msg":"created new stream","id":"rjj288ue"} +{"time":"2024-11-05T16:30:29.793301479-05:00","level":"INFO","msg":"stream: started","id":"rjj288ue"} +{"time":"2024-11-05T16:30:29.793373389-05:00","level":"INFO","msg":"sender: started","stream_id":"rjj288ue"} +{"time":"2024-11-05T16:30:29.793335359-05:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"rjj288ue"}} +{"time":"2024-11-05T16:30:29.79340863-05:00","level":"INFO","msg":"handler: started","stream_id":{"value":"rjj288ue"}} +{"time":"2024-11-05T16:30:29.968702207-05:00","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-11-05T16:32:04.675067907-05:00","level":"INFO","msg":"stream: closing","id":"rjj288ue"} +{"time":"2024-11-05T16:32:04.675115848-05:00","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-11-05T16:32:04.67559538-05:00","level":"INFO","msg":"Stopped system monitor"} diff --git a/wandb/run-20241105_163029-rjj288ue/logs/debug.log b/wandb/run-20241105_163029-rjj288ue/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..6dad79e1849c19e79c9ea76aab85e719b9673658 --- /dev/null +++ b/wandb/run-20241105_163029-rjj288ue/logs/debug.log @@ -0,0 +1,27 @@ +2024-11-05 16:30:29,677 INFO MainThread:1779705 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5 +2024-11-05 16:30:29,677 INFO MainThread:1779705 [wandb_setup.py:_flush():79] Configure stats pid to 1779705 +2024-11-05 16:30:29,677 INFO MainThread:1779705 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings +2024-11-05 16:30:29,677 INFO MainThread:1779705 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings +2024-11-05 16:30:29,677 INFO MainThread:1779705 [wandb_setup.py:_flush():79] Loading settings from environment variables: {} +2024-11-05 16:30:29,677 INFO MainThread:1779705 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None} +2024-11-05 16:30:29,677 INFO MainThread:1779705 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'} +2024-11-05 16:30:29,677 INFO MainThread:1779705 [wandb_setup.py:_flush():79] Applying login settings: {} +2024-11-05 16:30:29,677 INFO MainThread:1779705 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241105_163029-rjj288ue/logs/debug.log +2024-11-05 16:30:29,677 INFO MainThread:1779705 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241105_163029-rjj288ue/logs/debug-internal.log +2024-11-05 16:30:29,677 INFO MainThread:1779705 [wandb_init.py:init():621] calling init triggers +2024-11-05 16:30:29,677 INFO MainThread:1779705 [wandb_init.py:init():628] wandb.init called with sweep_config: {} +config: {} +2024-11-05 16:30:29,678 INFO MainThread:1779705 [wandb_init.py:init():671] starting backend +2024-11-05 16:30:29,678 INFO MainThread:1779705 [wandb_init.py:init():675] sending inform_init request +2024-11-05 16:30:29,679 INFO MainThread:1779705 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-11-05 16:30:29,680 INFO MainThread:1779705 [wandb_init.py:init():688] backend started and connected +2024-11-05 16:30:29,683 INFO MainThread:1779705 [wandb_init.py:init():783] updated telemetry +2024-11-05 16:30:29,712 INFO MainThread:1779705 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout +2024-11-05 16:30:29,965 INFO MainThread:1779705 [wandb_init.py:init():867] starting run threads in backend +2024-11-05 16:30:30,061 INFO MainThread:1779705 [wandb_run.py:_console_start():2463] atexit reg +2024-11-05 16:30:30,061 INFO MainThread:1779705 [wandb_run.py:_redirect():2311] redirect: wrap_raw +2024-11-05 16:30:30,061 INFO MainThread:1779705 [wandb_run.py:_redirect():2376] Wrapping output streams. +2024-11-05 16:30:30,061 INFO MainThread:1779705 [wandb_run.py:_redirect():2401] Redirects installed. +2024-11-05 16:30:30,064 INFO MainThread:1779705 [wandb_init.py:init():911] run started, returning control to user process +2024-11-05 16:30:30,064 INFO MainThread:1779705 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'shuffle_deterministic57', 'train_set': '10M', 'batch_size': 3, 'epoch': 3, 'seed': 0, 'lr': 5e-06} +2024-11-05 16:32:04,675 WARNING MsgRouterThr:1779705 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20241105_163029-rjj288ue/run-rjj288ue.wandb b/wandb/run-20241105_163029-rjj288ue/run-rjj288ue.wandb new file mode 100644 index 0000000000000000000000000000000000000000..63736dbdde834eb4cb22be19b2ee74a6170b89a2 Binary files /dev/null and b/wandb/run-20241105_163029-rjj288ue/run-rjj288ue.wandb differ diff --git a/wandb/run-20241105_163248-rhhc1g6i/run-rhhc1g6i.wandb b/wandb/run-20241105_163248-rhhc1g6i/run-rhhc1g6i.wandb new file mode 100644 index 0000000000000000000000000000000000000000..2088c3d55b667120da2497a31d1aa05ac6e236ff --- /dev/null +++ b/wandb/run-20241105_163248-rhhc1g6i/run-rhhc1g6i.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85c150502ac2023421307fb0b9001f0aeb81d00a6c17af4bd7e4543eca0c56c +size 16678912 diff --git a/wandb/run-20241105_163248-thalxhcd/run-thalxhcd.wandb b/wandb/run-20241105_163248-thalxhcd/run-thalxhcd.wandb new file mode 100644 index 0000000000000000000000000000000000000000..b6a460fc71b401bf92f4e46abaef3ce8829a8043 --- /dev/null +++ b/wandb/run-20241105_163248-thalxhcd/run-thalxhcd.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7eaf5c3f2190188bf6f140d16dcf1c36c58d50702d331d32a89d9865d867f7 +size 13041664 diff --git a/wandb/run-20241105_223842-qidxbq6i/files/config.yaml b/wandb/run-20241105_223842-qidxbq6i/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bad7a642dde2d14def3184e72e6bfa44feda0e32 --- /dev/null +++ b/wandb/run-20241105_223842-qidxbq6i/files/config.yaml @@ -0,0 +1,50 @@ +_wandb: + value: + cli_version: 0.18.5 + m: [] + python_version: 3.9.19 + t: + "1": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + "2": + - 1 + - 5 + - 11 + - 49 + - 51 + - 53 + - 55 + - 71 + - 98 + "3": + - 2 + - 13 + - 23 + - 55 + "4": 3.9.19 + "5": 0.18.5 + "6": 4.45.1 + "8": + - 5 + "12": 0.18.5 + "13": linux-x86_64 +batch_size: + value: 3 +epoch: + value: 3 +lr: + value: 5e-06 +perturbation: + value: shuffle_deterministic57 +seed: + value: 0 +train_set: + value: 10M diff --git a/wandb/run-20241105_223842-qidxbq6i/files/output.log b/wandb/run-20241105_223842-qidxbq6i/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..8988ad53ff2b1d18a9da86ef4e857e660f7da074 --- /dev/null +++ b/wandb/run-20241105_223842-qidxbq6i/files/output.log @@ -0,0 +1,15 @@ +Downloading shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [02:32<00:00, 76.48s/it] +Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00, 2.58s/it] +Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17013/17013 [00:45<00:00, 371.76 examples/s] +tokenized_valid: Dataset({ + features: ['input_ids', 'attention_mask'], + num_rows: 1000 +}) +/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/lib/python3.9/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead + warnings.warn( +[2024-11-05 22:42:08,583] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-11-05 22:42:16,455] [INFO] [comm.py:652:init_distributed] cdb=None +Installed CUDA version 11.8 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination +Using /home/chunhui/.cache/torch_extensions/py39_cu117 as PyTorch extensions root... +Loading extension module cpu_adam... +Time to load cpu_adam op: 4.613825559616089 seconds diff --git a/wandb/run-20241105_223842-qidxbq6i/files/wandb-metadata.json b/wandb/run-20241105_223842-qidxbq6i/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..29b2e22ecacb2f9f708e586511148f4562e2f67c --- /dev/null +++ b/wandb/run-20241105_223842-qidxbq6i/files/wandb-metadata.json @@ -0,0 +1,97 @@ +{ + "os": "Linux-5.4.0-162-generic-x86_64-with-glibc2.31", + "python": "3.9.19", + "startedAt": "2024-11-06T03:38:42.650597Z", + "args": [ + "--perturbation", + "shuffle_deterministic57", + "--train_set", + "10M", + "--batch_size", + "3", + "--epoch", + "3", + "--seed", + "0" + ], + "program": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py", + "codePath": "train/train_deep_wandb.py", + "git": { + "remote": "git@hf.co:Yaning1001/Impossible_llm.git", + "commit": "ed716cdcfcdea02b67f7ed0f3504c2b1c8b737c4" + }, + "email": "yaning1001@gmail.com", + "root": "/mnt/ssd3/chunhui/yaning/project/impossible_llm/train", + "host": "mms-large-2", + "username": "chunhui", + "executable": "/mnt/ssd3/chunhui/miniconda/envs/impossible_llm/bin/python", + "codePathLocal": "train_deep_wandb.py", + "cpu_count": 32, + "cpu_count_logical": 64, + "gpu": "NVIDIA RTX A6000", + "gpu_count": 8, + "disk": { + "/": { + "total": "1888559353856", + "used": "1785078444032" + } + }, + "memory": { + "total": "202617098240" + }, + "cpu": { + "count": 32, + "countLogical": 64 + }, + "gpu_nvidia": [ + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + }, + { + "name": "NVIDIA RTX A6000", + "memoryTotal": "51527024640", + "cudaCores": 10752, + "architecture": "Ampere" + } + ], + "cudaVersion": "11.8" +} \ No newline at end of file diff --git a/wandb/run-20241105_223842-qidxbq6i/files/wandb-summary.json b/wandb/run-20241105_223842-qidxbq6i/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..85c9b56f922c6e89b27d13fe9d47ad364f6752e3 --- /dev/null +++ b/wandb/run-20241105_223842-qidxbq6i/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":77981}} \ No newline at end of file diff --git a/wandb/run-20241105_223842-qidxbq6i/logs/debug-internal.log b/wandb/run-20241105_223842-qidxbq6i/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..d7f6f4d6910b8b62af6e6bc0248175695458235a --- /dev/null +++ b/wandb/run-20241105_223842-qidxbq6i/logs/debug-internal.log @@ -0,0 +1,18 @@ +{"time":"2024-11-05T22:38:42.65292855-05:00","level":"INFO","msg":"using version","core version":"0.18.5"} +{"time":"2024-11-05T22:38:42.652945341-05:00","level":"INFO","msg":"created symlink","path":"/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241105_223842-qidxbq6i/logs/debug-core.log"} +{"time":"2024-11-05T22:38:42.760060074-05:00","level":"INFO","msg":"created new stream","id":"qidxbq6i"} +{"time":"2024-11-05T22:38:42.760094185-05:00","level":"INFO","msg":"stream: started","id":"qidxbq6i"} +{"time":"2024-11-05T22:38:42.760128925-05:00","level":"INFO","msg":"sender: started","stream_id":"qidxbq6i"} +{"time":"2024-11-05T22:38:42.760122525-05:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"qidxbq6i"}} +{"time":"2024-11-05T22:38:42.760153975-05:00","level":"INFO","msg":"handler: started","stream_id":{"value":"qidxbq6i"}} +{"time":"2024-11-05T22:38:42.931905906-05:00","level":"INFO","msg":"Starting system monitor"} +{"time":"2024-11-06T08:33:32.07331941-05:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"} +{"time":"2024-11-06T20:18:24.44127207-05:00","level":"INFO","msg":"Stopping system monitor"} +{"time":"2024-11-06T20:18:24.467965089-05:00","level":"INFO","msg":"Stopped system monitor"} +{"time":"2024-11-06T20:18:24.912240227-05:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2024-11-06T20:18:25.050966004-05:00","level":"INFO","msg":"handler: operation stats","stats":{}} +{"time":"2024-11-06T20:18:26.067627866-05:00","level":"INFO","msg":"stream: closing","id":"qidxbq6i"} +{"time":"2024-11-06T20:18:26.067659266-05:00","level":"INFO","msg":"handler: closed","stream_id":{"value":"qidxbq6i"}} +{"time":"2024-11-06T20:18:26.067688907-05:00","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"qidxbq6i"}} +{"time":"2024-11-06T20:18:26.067712327-05:00","level":"INFO","msg":"sender: closed","stream_id":"qidxbq6i"} +{"time":"2024-11-06T20:18:26.067836208-05:00","level":"INFO","msg":"stream: closed","id":"qidxbq6i"} diff --git a/wandb/run-20241105_223842-qidxbq6i/logs/debug.log b/wandb/run-20241105_223842-qidxbq6i/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..5c43e12ac939bda466036a5243e07449b34e435a --- /dev/null +++ b/wandb/run-20241105_223842-qidxbq6i/logs/debug.log @@ -0,0 +1,33 @@ +2024-11-05 22:38:42,647 INFO MainThread:1803215 [wandb_setup.py:_flush():79] Current SDK version is 0.18.5 +2024-11-05 22:38:42,647 INFO MainThread:1803215 [wandb_setup.py:_flush():79] Configure stats pid to 1803215 +2024-11-05 22:38:42,647 INFO MainThread:1803215 [wandb_setup.py:_flush():79] Loading settings from /home/chunhui/.config/wandb/settings +2024-11-05 22:38:42,647 INFO MainThread:1803215 [wandb_setup.py:_flush():79] Loading settings from /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/settings +2024-11-05 22:38:42,647 INFO MainThread:1803215 [wandb_setup.py:_flush():79] Loading settings from environment variables: {} +2024-11-05 22:38:42,647 INFO MainThread:1803215 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None} +2024-11-05 22:38:42,647 INFO MainThread:1803215 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train/train_deep_wandb.py', 'program_abspath': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py', 'program': '/mnt/ssd3/chunhui/yaning/project/impossible_llm/train/train_deep_wandb.py'} +2024-11-05 22:38:42,647 INFO MainThread:1803215 [wandb_setup.py:_flush():79] Applying login settings: {} +2024-11-05 22:38:42,647 INFO MainThread:1803215 [wandb_init.py:_log_setup():534] Logging user logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241105_223842-qidxbq6i/logs/debug.log +2024-11-05 22:38:42,647 INFO MainThread:1803215 [wandb_init.py:_log_setup():535] Logging internal logs to /mnt/ssd3/chunhui/yaning/project/impossible_llm/train/wandb/run-20241105_223842-qidxbq6i/logs/debug-internal.log +2024-11-05 22:38:42,647 INFO MainThread:1803215 [wandb_init.py:init():621] calling init triggers +2024-11-05 22:38:42,648 INFO MainThread:1803215 [wandb_init.py:init():628] wandb.init called with sweep_config: {} +config: {} +2024-11-05 22:38:42,648 INFO MainThread:1803215 [wandb_init.py:init():671] starting backend +2024-11-05 22:38:42,648 INFO MainThread:1803215 [wandb_init.py:init():675] sending inform_init request +2024-11-05 22:38:42,649 INFO MainThread:1803215 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-11-05 22:38:42,650 INFO MainThread:1803215 [wandb_init.py:init():688] backend started and connected +2024-11-05 22:38:42,653 INFO MainThread:1803215 [wandb_init.py:init():783] updated telemetry +2024-11-05 22:38:42,684 INFO MainThread:1803215 [wandb_init.py:init():816] communicating run to backend with 90.0 second timeout +2024-11-05 22:38:42,929 INFO MainThread:1803215 [wandb_init.py:init():867] starting run threads in backend +2024-11-05 22:38:43,019 INFO MainThread:1803215 [wandb_run.py:_console_start():2463] atexit reg +2024-11-05 22:38:43,019 INFO MainThread:1803215 [wandb_run.py:_redirect():2311] redirect: wrap_raw +2024-11-05 22:38:43,019 INFO MainThread:1803215 [wandb_run.py:_redirect():2376] Wrapping output streams. +2024-11-05 22:38:43,019 INFO MainThread:1803215 [wandb_run.py:_redirect():2401] Redirects installed. +2024-11-05 22:38:43,021 INFO MainThread:1803215 [wandb_init.py:init():911] run started, returning control to user process +2024-11-05 22:38:43,022 INFO MainThread:1803215 [wandb_run.py:_config_callback():1390] config_cb None None {'perturbation': 'shuffle_deterministic57', 'train_set': '10M', 'batch_size': 3, 'epoch': 3, 'seed': 0, 'lr': 5e-06} +2024-11-06 20:18:24,430 INFO MainThread:1803215 [wandb_run.py:_finish():2158] finishing run yaning1001-dartmouth-college/exp-impo-shuffle/qidxbq6i +2024-11-06 20:18:24,439 INFO MainThread:1803215 [wandb_run.py:_atexit_cleanup():2426] got exitcode: 0 +2024-11-06 20:18:24,440 INFO MainThread:1803215 [wandb_run.py:_restore():2408] restore +2024-11-06 20:18:24,440 INFO MainThread:1803215 [wandb_run.py:_restore():2414] restore done +2024-11-06 20:18:26,054 INFO MainThread:1803215 [wandb_run.py:_footer_history_summary_info():3975] rendering history +2024-11-06 20:18:26,054 INFO MainThread:1803215 [wandb_run.py:_footer_history_summary_info():4007] rendering summary +2024-11-06 20:18:26,067 INFO MainThread:1803215 [wandb_run.py:_footer_sync_info():3934] logging synced files diff --git a/wandb/run-20241106_234348-l3eig11b/run-l3eig11b.wandb b/wandb/run-20241106_234348-l3eig11b/run-l3eig11b.wandb new file mode 100644 index 0000000000000000000000000000000000000000..dec1c9684e73146892e31a88e5ac7a9d14d98a19 --- /dev/null +++ b/wandb/run-20241106_234348-l3eig11b/run-l3eig11b.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:206fb9fb543093ab2a919ab0f592903f309dffbe8281e36a1967a784f72aaecd +size 18186240 diff --git a/wandb/run-20241106_234348-zan8h57j/run-zan8h57j.wandb b/wandb/run-20241106_234348-zan8h57j/run-zan8h57j.wandb new file mode 100644 index 0000000000000000000000000000000000000000..7c51845fee9ecd4cbc78e18b67d3248789714217 --- /dev/null +++ b/wandb/run-20241106_234348-zan8h57j/run-zan8h57j.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:459fc026c26bcdc681988810b77264c64aaf78a5633c26322526711bf8e7d1cd +size 18219008