nvan13 commited on Jan 15

Commit

46caca0

verified ·

1 Parent(s): 2005915

Upload folder using huggingface_hub

Browse files

Files changed (36) hide show

.gitattributes +3 -0
llama/output/cms3/ft/README.md +202 -0
llama/output/cms3/ft/adapter_config.json +23 -0
llama/output/cms3/ft/adapter_model.safetensors +3 -0
llama/output/cms3/ft/added_tokens.json +3 -0
llama/output/cms3/ft/special_tokens_map.json +30 -0
llama/output/cms3/ft/tokenizer.json +0 -0
llama/output/cms3/ft/tokenizer.model +3 -0
llama/output/cms3/ft/tokenizer_config.json +51 -0
llama/output/cpr2/ft/adapter_config.json +3 -3
llama/output/cpr2/ft/adapter_model.safetensors +2 -2
llama/test.sh +1 -1
llama/tune.sh +36 -7
llama/wandb/debug-internal.log +12 -0
llama/wandb/debug.log +26 -0
llama/wandb/offline-run-20260113_162154-a4ea78sb/files/requirements.txt +199 -0
llama/wandb/offline-run-20260113_162154-a4ea78sb/logs/debug-core.log +14 -0
llama/wandb/offline-run-20260113_162154-a4ea78sb/logs/debug-internal.log +12 -0
llama/wandb/offline-run-20260113_162154-a4ea78sb/logs/debug.log +26 -0
llama/wandb/offline-run-20260113_162154-a4ea78sb/run-a4ea78sb.wandb +3 -0
llama/wandb/offline-run-20260113_213836-a3j2m1nj/files/requirements.txt +199 -0
llama/wandb/offline-run-20260113_213836-a3j2m1nj/logs/debug-core.log +14 -0
llama/wandb/offline-run-20260113_213836-a3j2m1nj/logs/debug-internal.log +12 -0
llama/wandb/offline-run-20260113_213836-a3j2m1nj/logs/debug.log +26 -0
llama/wandb/offline-run-20260113_213836-a3j2m1nj/run-a3j2m1nj.wandb +3 -0
llama/wandb/offline-run-20260114_165804-73rsvobf/files/requirements.txt +199 -0
llama/wandb/offline-run-20260114_165804-73rsvobf/logs/debug-core.log +14 -0
llama/wandb/offline-run-20260114_165804-73rsvobf/logs/debug-internal.log +12 -0
llama/wandb/offline-run-20260114_165804-73rsvobf/logs/debug.log +26 -0
llama/wandb/offline-run-20260114_165804-73rsvobf/run-73rsvobf.wandb +0 -0
llama/wandb/offline-run-20260114_173548-7ubed6qe/files/requirements.txt +199 -0
llama/wandb/offline-run-20260114_173548-7ubed6qe/logs/debug-core.log +14 -0
llama/wandb/offline-run-20260114_173548-7ubed6qe/logs/debug-internal.log +12 -0
llama/wandb/offline-run-20260114_173548-7ubed6qe/logs/debug.log +26 -0
llama/wandb/offline-run-20260114_173548-7ubed6qe/run-7ubed6qe.wandb +3 -0
llama/wandb/settings +3 -0

.gitattributes CHANGED Viewed

@@ -39,3 +39,6 @@ generation/control/ControlNet/font/DejaVuSans.ttf filter=lfs diff=lfs merge=lfs
 generation/control/ControlNet/ldm/modules/image_degradation/utils/test.png filter=lfs diff=lfs merge=lfs -text
 llama/data/MetaMathQA-40K.json filter=lfs diff=lfs merge=lfs -text
 llama/data/MetaMathQA.json filter=lfs diff=lfs merge=lfs -text

 generation/control/ControlNet/ldm/modules/image_degradation/utils/test.png filter=lfs diff=lfs merge=lfs -text
 llama/data/MetaMathQA-40K.json filter=lfs diff=lfs merge=lfs -text
 llama/data/MetaMathQA.json filter=lfs diff=lfs merge=lfs -text
+llama/wandb/offline-run-20260113_162154-a4ea78sb/run-a4ea78sb.wandb filter=lfs diff=lfs merge=lfs -text
+llama/wandb/offline-run-20260113_213836-a3j2m1nj/run-a3j2m1nj.wandb filter=lfs diff=lfs merge=lfs -text
+llama/wandb/offline-run-20260114_173548-7ubed6qe/run-7ubed6qe.wandb filter=lfs diff=lfs merge=lfs -text

llama/output/cms3/ft/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: meta-llama/Llama-2-7b-hf
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.10.0

llama/output/cms3/ft/adapter_config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "block_share": false,
+  "coft": false,
+  "eps": 0.0001,
+  "inference_mode": true,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "module_dropout": 0.0,
+  "modules_to_save": null,
+  "peft_type": "OFT",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

llama/output/cms3/ft/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08bbd4b1c59c317764a1e946eff8aab4249c6aba2195d714ba8f6900b906f691
+size 1082171824

llama/output/cms3/ft/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[PAD]": 32000
+}

llama/output/cms3/ft/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

llama/output/cms3/ft/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

llama/output/cms3/ft/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

llama/output/cms3/ft/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "</s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "</s>",
+  "use_default_system_prompt": false
+}

llama/output/cpr2/ft/adapter_config.json CHANGED Viewed

@@ -12,12 +12,12 @@
   "module_dropout": 0.0,
   "modules_to_save": null,
   "peft_type": "OFT",
-  "r": 1,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "module_dropout": 0.0,
   "modules_to_save": null,
   "peft_type": "OFT",
+  "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

llama/output/cpr2/ft/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:daede58d9fd4806298d90f9af12ba478c119afab844244f355f35ab3829eb029
-size 1049665904

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e77e7b826764e15763f84d06752d0dae06c0a6b97003d20593a0070567e417a
+size 1082171824

llama/test.sh CHANGED Viewed

@@ -2,7 +2,7 @@ BASE_MODEL="meta-llama/Llama-2-7b-hf"
 # OUTPUT="output/cp3e5"
 # OUTPUT="output/cp1e5N"
 # OUTPUT="output/cp1e5N"
-OUTPUT="output/cpr2"
 python merge_adapter_to_base_model.py --base_mode $BASE_MODEL --adapter $OUTPUT/ft/ --output_path $OUTPUT/merged/
 python inference/gsm8k_inference.py --model $OUTPUT/merged/
 python inference/MATH_inference.py --model $OUTPUT/merged/

 # OUTPUT="output/cp3e5"
 # OUTPUT="output/cp1e5N"
 # OUTPUT="output/cp1e5N"
+OUTPUT="output/cms3"
 python merge_adapter_to_base_model.py --base_mode $BASE_MODEL --adapter $OUTPUT/ft/ --output_path $OUTPUT/merged/
 python inference/gsm8k_inference.py --model $OUTPUT/merged/
 python inference/MATH_inference.py --model $OUTPUT/merged/

llama/tune.sh CHANGED Viewed

@@ -89,11 +89,39 @@ export WANDB_PROJECT="HRA_MetaMath395"
 #     --report_to "wandb"
 # wandb sync wandb/latest-run
-OUTPUT="output/cpr2"
 python finetune_32.py \
     --model_name_or_path $BASE_MODEL \
     --output_dir $OUTPUT \
-    --hrft_r 1 \
     --init_a 1e-4 \
     --eps 1e-4 \
     --add_orth "none" \
@@ -101,13 +129,13 @@ python finetune_32.py \
     --data_path $DATA_PATH \
     --dataset_split "train"\
     --dataset_field query response \
-    --num_train_epochs 3 \
-    --per_device_train_batch_size 32 \
-    --gradient_accumulation_steps 1 \
     --save_strategy "steps" \
     --save_steps 0 \
     --save_total_limit 1 \
-    --learning_rate 3e-5 \
     --weight_decay 0. \
     --warmup_ratio 0.005 \
     --lr_scheduler_type "cosine" \
@@ -115,4 +143,5 @@ python finetune_32.py \
     --bf16 True \
     --tf32 True \
     --report_to "wandb"
-wandb sync wandb/latest-run

 #     --report_to "wandb"
 # wandb sync wandb/latest-run
+# OUTPUT="output/cpr2"
+# python finetune_32.py \
+#     --model_name_or_path $BASE_MODEL \
+#     --output_dir $OUTPUT \
+#     --hrft_r 1 \
+#     --init_a 1e-4 \
+#     --eps 1e-4 \
+#     --add_orth "none" \
+#     --lamda 1e-4 \
+#     --data_path $DATA_PATH \
+#     --dataset_split "train"\
+#     --dataset_field query response \
+#     --num_train_epochs 3 \
+#     --per_device_train_batch_size 32 \
+#     --gradient_accumulation_steps 1 \
+#     --save_strategy "steps" \
+#     --save_steps 0 \
+#     --save_total_limit 1 \
+#     --learning_rate 3e-5 \
+#     --weight_decay 0. \
+#     --warmup_ratio 0.005 \
+#     --lr_scheduler_type "cosine" \
+#     --logging_steps 200 \
+#     --bf16 True \
+#     --tf32 True \
+#     --report_to "wandb"
+# wandb sync wandb/latest-run
+OUTPUT="output/cms3"
 python finetune_32.py \
     --model_name_or_path $BASE_MODEL \
     --output_dir $OUTPUT \
+    --hrft_r 32 \
     --init_a 1e-4 \
     --eps 1e-4 \
     --add_orth "none" \
     --data_path $DATA_PATH \
     --dataset_split "train"\
     --dataset_field query response \
+    --num_train_epochs 2 \
+    --per_device_train_batch_size 8 \
+    --gradient_accumulation_steps 4 \
     --save_strategy "steps" \
     --save_steps 0 \
     --save_total_limit 1 \
+    --learning_rate 1e-5 \
     --weight_decay 0. \
     --warmup_ratio 0.005 \
     --lr_scheduler_type "cosine" \
     --bf16 True \
     --tf32 True \
     --report_to "wandb"
+date +"%F %T"
+# wandb sync wandb/latest-run

llama/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2026-01-14T17:35:49.006544401+09:00","level":"INFO","msg":"stream: starting","core version":"0.23.0"}
+{"time":"2026-01-14T17:35:49.149824363+09:00","level":"WARN","msg":"featurechecker: GraphQL client is nil, skipping feature loading"}
+{"time":"2026-01-14T17:35:49.149873743+09:00","level":"INFO","msg":"stream: created new stream","id":"7ubed6qe"}
+{"time":"2026-01-14T17:35:49.149898431+09:00","level":"INFO","msg":"handler: started","stream_id":"7ubed6qe"}
+{"time":"2026-01-14T17:35:49.151612025+09:00","level":"INFO","msg":"stream: started","id":"7ubed6qe"}
+{"time":"2026-01-14T17:35:49.151616181+09:00","level":"INFO","msg":"writer: started","stream_id":"7ubed6qe"}
+{"time":"2026-01-14T17:35:49.151631131+09:00","level":"INFO","msg":"sender: started","stream_id":"7ubed6qe"}
+{"time":"2026-01-14T17:35:49.152375031+09:00","level":"WARN","msg":"runupserter: server does not expand metric globs but the x_server_side_expand_glob_metrics setting is set; ignoring"}
+{"time":"2026-01-14T21:46:20.466650711+09:00","level":"INFO","msg":"stream: closing","id":"7ubed6qe"}
+{"time":"2026-01-14T21:46:20.466968327+09:00","level":"INFO","msg":"handler: closed","stream_id":"7ubed6qe"}
+{"time":"2026-01-14T21:46:20.468836334+09:00","level":"INFO","msg":"sender: closed","stream_id":"7ubed6qe"}
+{"time":"2026-01-14T21:46:20.468852562+09:00","level":"INFO","msg":"stream: closed","id":"7ubed6qe"}

llama/wandb/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_setup.py:_flush():80] Current SDK version is 0.23.0
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_setup.py:_flush():80] Configure stats pid to 1752905
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_setup.py:_flush():80] Loading settings from /home/work/.config/wandb/settings
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_setup.py:_flush():80] Loading settings from /home/work/an_nguyen/HRA/llama/wandb/settings
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_init.py:setup_run_log_directory():713] Logging user logs to /home/work/an_nguyen/HRA/llama/wandb/offline-run-20260114_173548-7ubed6qe/logs/debug.log
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_init.py:setup_run_log_directory():714] Logging internal logs to /home/work/an_nguyen/HRA/llama/wandb/offline-run-20260114_173548-7ubed6qe/logs/debug-internal.log
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_init.py:init():840] calling init triggers
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_init.py:init():845] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_init.py:init():888] starting backend
+2026-01-14 17:35:48,984 INFO    MainThread:1752905 [wandb_init.py:init():891] sending inform_init request
+2026-01-14 17:35:48,997 INFO    MainThread:1752905 [wandb_init.py:init():899] backend started and connected
+2026-01-14 17:35:48,998 INFO    MainThread:1752905 [wandb_init.py:init():969] updated telemetry
+2026-01-14 17:35:48,999 INFO    MainThread:1752905 [wandb_init.py:init():993] communicating run to backend with 90.0 second timeout
+2026-01-14 17:35:49,154 INFO    MainThread:1752905 [wandb_init.py:init():1040] starting run threads in backend
+2026-01-14 17:35:49,257 INFO    MainThread:1752905 [wandb_run.py:_console_start():2504] atexit reg
+2026-01-14 17:35:49,257 INFO    MainThread:1752905 [wandb_run.py:_redirect():2352] redirect: wrap_raw
+2026-01-14 17:35:49,257 INFO    MainThread:1752905 [wandb_run.py:_redirect():2421] Wrapping output streams.
+2026-01-14 17:35:49,257 INFO    MainThread:1752905 [wandb_run.py:_redirect():2444] Redirects installed.
+2026-01-14 17:35:49,259 INFO    MainThread:1752905 [wandb_init.py:init():1080] run started, returning control to user process
+2026-01-14 17:35:49,260 INFO    MainThread:1752905 [wandb_run.py:_config_callback():1385] config_cb None None {'peft_config': {'default': {'peft_type': 'OFT', 'auto_mapping': None, 'base_model_name_or_path': 'meta-llama/Llama-2-7b-hf', 'revision': None, 'task_type': 'CAUSAL_LM', 'inference_mode': False, 'rank_pattern': {}, 'alpha_pattern': {}, 'r': 32, 'module_dropout': 0.0, 'target_modules': ['v_proj', 'q_proj'], 'init_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'modules_to_save': None, 'coft': False, 'eps': 0.0001, 'block_share': False}}, 'vocab_size': 32001, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 11008, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 32, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': False, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'mlp_bias': False, 'head_dim': 128, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'dtype': 'float32', 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'task_specific_params': None, 'problem_type': None, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 2, 'pad_token_id': 32000, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'num_beam_groups': 1, 'diversity_penalty': 0.0, '_name_or_path': 'meta-llama/Llama-2-7b-hf', 'transformers_version': '4.57.3', 'model_type': 'llama', 'tf_legacy_loss': False, 'use_bfloat16': False, 'output_attentions': False, 'output_dir': 'output/cms3', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.005, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'output/cms3/runs/Jan14_17-35-42_main1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 200, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 0.0, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': True, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'parallelism_config': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'project': 'huggingface', 'trackio_space_id': 'trackio', 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True, 'model_name_or_path': 'meta-llama/Llama-2-7b-hf', 'adapter_name_or_path': None, 'data_path': './data/MetaMathQA-40K.json', 'dataset_split': 'train', 'dataset_field': ['query', 'response'], 'model_max_length': 512, 'hrft_r': 32, 'init_a': 0.0001, 'eps': 0.0001, 'lamda': 0.0001, 'add_orth': 'none', 'init_weights': True}
+2026-01-14 17:35:49,269 INFO    MainThread:1752905 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 6746812416 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f1ba1cabf40>>
+2026-01-14 17:35:49,269 INFO    MainThread:1752905 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 6746812416 None
+2026-01-14 21:46:20,466 INFO    wandb-AsyncioManager-main:1752905 [service_client.py:_forward_responses():80] Reached EOF.
+2026-01-14 21:46:20,467 INFO    wandb-AsyncioManager-main:1752905 [mailbox.py:close():137] Closing mailbox, abandoning 0 handles.

llama/wandb/offline-run-20260113_162154-a4ea78sb/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,199 @@

+setuptools==80.9.0
+wheel==0.45.1
+pip==25.3
+Brotli==1.1.0
+certifi==2025.11.12
+charset-normalizer==3.4.4
+filelock==3.20.0
+hpack==4.1.0
+hyperframe==6.1.0
+idna==3.11
+MarkupSafe==3.0.3
+mpmath==1.3.0
+networkx==3.4.2
+pycparser==2.22
+PySocks==1.7.1
+PyYAML==6.0.3
+typing_extensions==4.15.0
+cffi==2.0.0
+gmpy2==2.2.1
+h2==4.3.0
+Jinja2==3.1.6
+sympy==1.14.0
+zstandard==0.23.0
+urllib3==2.5.0
+requests==2.32.5
+appdirs==1.4.4
+rich-toolkit==0.17.0
+torchaudio==2.9.0
+triton==3.5.0
+tqdm==4.67.1
+safetensors==0.7.0
+regex==2025.11.3
+packaging==25.0
+hf-xet==1.2.0
+hf-xet==1.2.1
+huggingface_hub==0.36.0
+tokenizers==0.22.1
+pytz==2025.2
+xxhash==3.6.0
+tzdata==2025.2
+six==1.17.0
+pyarrow-hotfix==0.7
+pyarrow==22.0.0
+pyarrow==21.0.0
+propcache==0.4.1
+propcache==0.3.1
+multidict==6.7.0
+multidict==6.6.3
+aiohappyeyeballs==2.6.1
+fsspec==2024.3.1
+fsspec==2025.10.0
+frozenlist==1.8.0
+frozenlist==1.7.0
+dill==0.3.8
+dill==0.4.0
+attrs==25.4.0
+async-timeout==5.0.1
+yarl==1.22.0
+python-dateutil==2.9.0.post0
+multiprocess==0.70.16
+multiprocess==0.70.18
+aiosignal==1.4.0
+pandas==2.3.3
+aiohttp==3.13.2
+pycountry==24.6.1
+psutil==7.1.3
+accelerate==1.12.0
+peft==0.10.0
+Pygments==2.19.2
+colorama==0.4.6
+shellingham==1.5.4
+sniffio==1.3.1
+exceptiongroup==1.3.1
+h11==0.16.0
+typer-slim==0.20.0
+anyio==4.12.0
+httpcore==1.0.9
+httpx==0.28.1
+datasets==4.4.1
+ninja==1.13.0
+docker-pycreds==0.4.0
+eval_type_backport==0.3.1
+platformdirs==4.5.0
+sentry-sdk==2.47.0
+annotated-types==0.7.0
+typing-inspection==0.4.2
+smmap==5.0.2
+gitdb==4.0.12
+GitPython==3.1.45
+protobuf==6.31.1
+setproctitle==1.3.6
+pydantic_core==2.41.5
+pydantic==2.12.5
+wandb==0.23.0
+jsonlines==4.0.0
+supervisor==4.3.0
+py-cpuinfo==9.0.0
+nvidia-ml-py==13.580.82
+nvidia-cusparselt-cu12==0.7.1
+fastrlock==0.8.3
+websockets==15.0.1
+uvloop==0.22.1
+tomli==2.3.0
+tabulate==0.9.0
+sentencepiece==0.2.1
+rpds-py==0.30.0
+rignore==0.7.6
+pyzmq==27.1.0
+python-multipart==0.0.20
+python-json-logger==4.0.0
+python-dotenv==1.2.1
+pybase64==1.4.2
+prometheus_client==0.23.1
+starlette==0.50.0
+pillow==12.0.0
+partial-json-parser==0.2.1.1.post7
+outlines_core==0.2.11
+nvidia-nvtx-cu12==12.8.90
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nccl-cu12==2.27.5
+nvidia-curand-cu12==10.3.9.90
+nvidia-cufile-cu12==1.13.1.3
+nvidia-cudnn-frontend==1.16.0
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cublas-cu12==12.8.4.1
+numpy==2.2.6
+msgspec==0.20.0
+msgpack==1.1.2
+mdurl==0.1.2
+loguru==0.7.3
+llvmlite==0.44.0
+llguidance==1.3.0
+lark==1.2.2
+jmespath==1.0.1
+jiter==0.12.0
+interegular==0.3.3
+httptools==0.7.1
+fastar==0.8.0
+einops==0.8.1
+docstring_parser==0.17.0
+dnspython==2.8.0
+distro==1.9.0
+diskcache==5.6.3
+cuda-pathfinder==1.3.3
+cloudpickle==3.1.2
+rich==14.2.0
+click==8.2.1
+cbor2==5.7.1
+cachetools==6.2.2
+blake3==1.0.8
+astor==0.8.1
+apache-tvm-ffi==0.1.4
+annotated-doc==0.0.4
+uvicorn==0.38.0
+tiktoken==0.12.0
+scipy==1.15.3
+referencing==0.37.0
+opencv-python-headless==4.12.0.88
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cudnn-cu12==9.10.2.21
+numba==0.61.2
+markdown-it-py==4.0.0
+gguf==0.17.1
+email-validator==2.3.0
+depyf==0.20.0
+cupy-cuda12x==13.6.0
+cuda-bindings==13.1.0
+watchfiles==1.1.1
+pydantic-extra-types==2.10.6
+openai-harmony==0.0.8
+nvidia-cusolver-cu12==11.7.3.90
+lm-format-enforcer==0.11.3
+jsonschema-specifications==2025.9.1
+cuda-python==13.1.0
+typer==0.20.0
+transformers==4.57.3
+torch==2.9.0
+prometheus-fastapi-instrumentator==7.1.0
+openai==2.9.0
+nvidia-cutlass-dsl==4.3.2
+jsonschema==4.25.1
+fastapi==0.123.10
+anthropic==0.71.0
+xgrammar==0.1.27
+torchvision==0.24.0
+ray==2.52.1
+model-hosting-container-standards==0.1.9
+mistral_common==1.8.6
+flashinfer-python==0.5.3
+fastapi-cloud-cli==0.6.0
+fastapi-cli==0.0.16
+compressed-tensors==0.12.2
+vllm==0.12.0
+Fraction==2.2.0
+DeBERTa==0.1.13

llama/wandb/offline-run-20260113_162154-a4ea78sb/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2026-01-13T16:21:54.904171277+09:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp76uloz6z/port-478237.txt","pid":478237,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2026-01-13T16:21:54.904639602+09:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":478237}
+{"time":"2026-01-13T16:21:54.904620011+09:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-478237-478489-4043973307/socket","Net":"unix"}}
+{"time":"2026-01-13T16:21:55.082336692+09:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2026-01-13T16:21:55.096600338+09:00","level":"INFO","msg":"handleInformInit: received","streamId":"a4ea78sb","id":"1(@)"}
+{"time":"2026-01-13T16:21:55.246575771+09:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"a4ea78sb","id":"1(@)"}
+{"time":"2026-01-13T20:27:02.083772511+09:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2026-01-13T20:27:02.083834376+09:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2026-01-13T20:27:02.083834598+09:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2026-01-13T20:27:02.083885849+09:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2026-01-13T20:27:02.083928772+09:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-478237-478489-4043973307/socket","Net":"unix"}}
+{"time":"2026-01-13T20:27:02.084663238+09:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2026-01-13T20:27:02.084673431+09:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2026-01-13T20:27:02.084679026+09:00","level":"INFO","msg":"server is closed"}

llama/wandb/offline-run-20260113_162154-a4ea78sb/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2026-01-13T16:21:55.104436934+09:00","level":"INFO","msg":"stream: starting","core version":"0.23.0"}
+{"time":"2026-01-13T16:21:55.245591634+09:00","level":"WARN","msg":"featurechecker: GraphQL client is nil, skipping feature loading"}
+{"time":"2026-01-13T16:21:55.245629072+09:00","level":"INFO","msg":"stream: created new stream","id":"a4ea78sb"}
+{"time":"2026-01-13T16:21:55.245660605+09:00","level":"INFO","msg":"handler: started","stream_id":"a4ea78sb"}
+{"time":"2026-01-13T16:21:55.246570364+09:00","level":"INFO","msg":"stream: started","id":"a4ea78sb"}
+{"time":"2026-01-13T16:21:55.246576469+09:00","level":"INFO","msg":"writer: started","stream_id":"a4ea78sb"}
+{"time":"2026-01-13T16:21:55.24658713+09:00","level":"INFO","msg":"sender: started","stream_id":"a4ea78sb"}
+{"time":"2026-01-13T16:21:55.246982093+09:00","level":"WARN","msg":"runupserter: server does not expand metric globs but the x_server_side_expand_glob_metrics setting is set; ignoring"}
+{"time":"2026-01-13T20:27:02.083832579+09:00","level":"INFO","msg":"stream: closing","id":"a4ea78sb"}
+{"time":"2026-01-13T20:27:02.083995312+09:00","level":"INFO","msg":"handler: closed","stream_id":"a4ea78sb"}
+{"time":"2026-01-13T20:27:02.084380422+09:00","level":"INFO","msg":"sender: closed","stream_id":"a4ea78sb"}
+{"time":"2026-01-13T20:27:02.084390327+09:00","level":"INFO","msg":"stream: closed","id":"a4ea78sb"}

llama/wandb/offline-run-20260113_162154-a4ea78sb/logs/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2026-01-13 16:21:54,830 INFO    MainThread:478237 [wandb_setup.py:_flush():80] Current SDK version is 0.23.0
+2026-01-13 16:21:54,830 INFO    MainThread:478237 [wandb_setup.py:_flush():80] Configure stats pid to 478237
+2026-01-13 16:21:54,830 INFO    MainThread:478237 [wandb_setup.py:_flush():80] Loading settings from /home/work/.config/wandb/settings
+2026-01-13 16:21:54,830 INFO    MainThread:478237 [wandb_setup.py:_flush():80] Loading settings from /home/work/an_nguyen/HRA/llama/wandb/settings
+2026-01-13 16:21:54,830 INFO    MainThread:478237 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2026-01-13 16:21:54,830 INFO    MainThread:478237 [wandb_init.py:setup_run_log_directory():713] Logging user logs to /home/work/an_nguyen/HRA/llama/wandb/offline-run-20260113_162154-a4ea78sb/logs/debug.log
+2026-01-13 16:21:54,830 INFO    MainThread:478237 [wandb_init.py:setup_run_log_directory():714] Logging internal logs to /home/work/an_nguyen/HRA/llama/wandb/offline-run-20260113_162154-a4ea78sb/logs/debug-internal.log
+2026-01-13 16:21:54,830 INFO    MainThread:478237 [wandb_init.py:init():840] calling init triggers
+2026-01-13 16:21:54,830 INFO    MainThread:478237 [wandb_init.py:init():845] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2026-01-13 16:21:54,830 INFO    MainThread:478237 [wandb_init.py:init():888] starting backend
+2026-01-13 16:21:55,082 INFO    MainThread:478237 [wandb_init.py:init():891] sending inform_init request
+2026-01-13 16:21:55,095 INFO    MainThread:478237 [wandb_init.py:init():899] backend started and connected
+2026-01-13 16:21:55,096 INFO    MainThread:478237 [wandb_init.py:init():969] updated telemetry
+2026-01-13 16:21:55,096 INFO    MainThread:478237 [wandb_init.py:init():993] communicating run to backend with 90.0 second timeout
+2026-01-13 16:21:55,248 INFO    MainThread:478237 [wandb_init.py:init():1040] starting run threads in backend
+2026-01-13 16:21:55,351 INFO    MainThread:478237 [wandb_run.py:_console_start():2504] atexit reg
+2026-01-13 16:21:55,351 INFO    MainThread:478237 [wandb_run.py:_redirect():2352] redirect: wrap_raw
+2026-01-13 16:21:55,351 INFO    MainThread:478237 [wandb_run.py:_redirect():2421] Wrapping output streams.
+2026-01-13 16:21:55,351 INFO    MainThread:478237 [wandb_run.py:_redirect():2444] Redirects installed.
+2026-01-13 16:21:55,352 INFO    MainThread:478237 [wandb_init.py:init():1080] run started, returning control to user process
+2026-01-13 16:21:55,354 INFO    MainThread:478237 [wandb_run.py:_config_callback():1385] config_cb None None {'peft_config': {'default': {'peft_type': 'OFT', 'auto_mapping': None, 'base_model_name_or_path': 'meta-llama/Llama-2-7b-hf', 'revision': None, 'task_type': 'CAUSAL_LM', 'inference_mode': False, 'rank_pattern': {}, 'alpha_pattern': {}, 'r': 32, 'module_dropout': 0.0, 'target_modules': ['v_proj', 'q_proj'], 'init_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'modules_to_save': None, 'coft': False, 'eps': 0.0001, 'block_share': False}}, 'vocab_size': 32001, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 11008, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 32, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': False, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'mlp_bias': False, 'head_dim': 128, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'dtype': 'float32', 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'task_specific_params': None, 'problem_type': None, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 2, 'pad_token_id': 32000, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'num_beam_groups': 1, 'diversity_penalty': 0.0, '_name_or_path': 'meta-llama/Llama-2-7b-hf', 'transformers_version': '4.57.3', 'model_type': 'llama', 'tf_legacy_loss': False, 'use_bfloat16': False, 'output_attentions': False, 'output_dir': 'output/cpr2', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.005, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'output/cpr2/runs/Jan13_16-21-48_main1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 200, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 0.0, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': True, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'parallelism_config': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'project': 'huggingface', 'trackio_space_id': 'trackio', 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True, 'model_name_or_path': 'meta-llama/Llama-2-7b-hf', 'adapter_name_or_path': None, 'data_path': './data/MetaMathQA-40K.json', 'dataset_split': 'train', 'dataset_field': ['query', 'response'], 'model_max_length': 512, 'hrft_r': 32, 'init_a': 0.0001, 'eps': 0.0001, 'lamda': 0.0001, 'add_orth': 'none', 'init_weights': True}
+2026-01-13 16:21:55,362 INFO    MainThread:478237 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 6746812416 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7fd7f1c57f40>>
+2026-01-13 16:21:55,362 INFO    MainThread:478237 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 6746812416 None
+2026-01-13 20:27:02,083 INFO    wandb-AsyncioManager-main:478237 [service_client.py:_forward_responses():80] Reached EOF.
+2026-01-13 20:27:02,084 INFO    wandb-AsyncioManager-main:478237 [mailbox.py:close():137] Closing mailbox, abandoning 0 handles.

llama/wandb/offline-run-20260113_162154-a4ea78sb/run-a4ea78sb.wandb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bfe9389a013542101753a8627157c2f4f4846c82c6dca4ef43ae47e7d501c863
+size 2049726

llama/wandb/offline-run-20260113_213836-a3j2m1nj/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,199 @@

+setuptools==80.9.0
+wheel==0.45.1
+pip==25.3
+Brotli==1.1.0
+certifi==2025.11.12
+charset-normalizer==3.4.4
+filelock==3.20.0
+hpack==4.1.0
+hyperframe==6.1.0
+idna==3.11
+MarkupSafe==3.0.3
+mpmath==1.3.0
+networkx==3.4.2
+pycparser==2.22
+PySocks==1.7.1
+PyYAML==6.0.3
+typing_extensions==4.15.0
+cffi==2.0.0
+gmpy2==2.2.1
+h2==4.3.0
+Jinja2==3.1.6
+sympy==1.14.0
+zstandard==0.23.0
+urllib3==2.5.0
+requests==2.32.5
+appdirs==1.4.4
+rich-toolkit==0.17.0
+torchaudio==2.9.0
+triton==3.5.0
+tqdm==4.67.1
+safetensors==0.7.0
+regex==2025.11.3
+packaging==25.0
+hf-xet==1.2.0
+hf-xet==1.2.1
+huggingface_hub==0.36.0
+tokenizers==0.22.1
+pytz==2025.2
+xxhash==3.6.0
+tzdata==2025.2
+six==1.17.0
+pyarrow-hotfix==0.7
+pyarrow==22.0.0
+pyarrow==21.0.0
+propcache==0.4.1
+propcache==0.3.1
+multidict==6.7.0
+multidict==6.6.3
+aiohappyeyeballs==2.6.1
+fsspec==2024.3.1
+fsspec==2025.10.0
+frozenlist==1.8.0
+frozenlist==1.7.0
+dill==0.3.8
+dill==0.4.0
+attrs==25.4.0
+async-timeout==5.0.1
+yarl==1.22.0
+python-dateutil==2.9.0.post0
+multiprocess==0.70.16
+multiprocess==0.70.18
+aiosignal==1.4.0
+pandas==2.3.3
+aiohttp==3.13.2
+pycountry==24.6.1
+psutil==7.1.3
+accelerate==1.12.0
+peft==0.10.0
+Pygments==2.19.2
+colorama==0.4.6
+shellingham==1.5.4
+sniffio==1.3.1
+exceptiongroup==1.3.1
+h11==0.16.0
+typer-slim==0.20.0
+anyio==4.12.0
+httpcore==1.0.9
+httpx==0.28.1
+datasets==4.4.1
+ninja==1.13.0
+docker-pycreds==0.4.0
+eval_type_backport==0.3.1
+platformdirs==4.5.0
+sentry-sdk==2.47.0
+annotated-types==0.7.0
+typing-inspection==0.4.2
+smmap==5.0.2
+gitdb==4.0.12
+GitPython==3.1.45
+protobuf==6.31.1
+setproctitle==1.3.6
+pydantic_core==2.41.5
+pydantic==2.12.5
+wandb==0.23.0
+jsonlines==4.0.0
+supervisor==4.3.0
+py-cpuinfo==9.0.0
+nvidia-ml-py==13.580.82
+nvidia-cusparselt-cu12==0.7.1
+fastrlock==0.8.3
+websockets==15.0.1
+uvloop==0.22.1
+tomli==2.3.0
+tabulate==0.9.0
+sentencepiece==0.2.1
+rpds-py==0.30.0
+rignore==0.7.6
+pyzmq==27.1.0
+python-multipart==0.0.20
+python-json-logger==4.0.0
+python-dotenv==1.2.1
+pybase64==1.4.2
+prometheus_client==0.23.1
+starlette==0.50.0
+pillow==12.0.0
+partial-json-parser==0.2.1.1.post7
+outlines_core==0.2.11
+nvidia-nvtx-cu12==12.8.90
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nccl-cu12==2.27.5
+nvidia-curand-cu12==10.3.9.90
+nvidia-cufile-cu12==1.13.1.3
+nvidia-cudnn-frontend==1.16.0
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cublas-cu12==12.8.4.1
+numpy==2.2.6
+msgspec==0.20.0
+msgpack==1.1.2
+mdurl==0.1.2
+loguru==0.7.3
+llvmlite==0.44.0
+llguidance==1.3.0
+lark==1.2.2
+jmespath==1.0.1
+jiter==0.12.0
+interegular==0.3.3
+httptools==0.7.1
+fastar==0.8.0
+einops==0.8.1
+docstring_parser==0.17.0
+dnspython==2.8.0
+distro==1.9.0
+diskcache==5.6.3
+cuda-pathfinder==1.3.3
+cloudpickle==3.1.2
+rich==14.2.0
+click==8.2.1
+cbor2==5.7.1
+cachetools==6.2.2
+blake3==1.0.8
+astor==0.8.1
+apache-tvm-ffi==0.1.4
+annotated-doc==0.0.4
+uvicorn==0.38.0
+tiktoken==0.12.0
+scipy==1.15.3
+referencing==0.37.0
+opencv-python-headless==4.12.0.88
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cudnn-cu12==9.10.2.21
+numba==0.61.2
+markdown-it-py==4.0.0
+gguf==0.17.1
+email-validator==2.3.0
+depyf==0.20.0
+cupy-cuda12x==13.6.0
+cuda-bindings==13.1.0
+watchfiles==1.1.1
+pydantic-extra-types==2.10.6
+openai-harmony==0.0.8
+nvidia-cusolver-cu12==11.7.3.90
+lm-format-enforcer==0.11.3
+jsonschema-specifications==2025.9.1
+cuda-python==13.1.0
+typer==0.20.0
+transformers==4.57.3
+torch==2.9.0
+prometheus-fastapi-instrumentator==7.1.0
+openai==2.9.0
+nvidia-cutlass-dsl==4.3.2
+jsonschema==4.25.1
+fastapi==0.123.10
+anthropic==0.71.0
+xgrammar==0.1.27
+torchvision==0.24.0
+ray==2.52.1
+model-hosting-container-standards==0.1.9
+mistral_common==1.8.6
+flashinfer-python==0.5.3
+fastapi-cloud-cli==0.6.0
+fastapi-cli==0.0.16
+compressed-tensors==0.12.2
+vllm==0.12.0
+Fraction==2.2.0
+DeBERTa==0.1.13

llama/wandb/offline-run-20260113_213836-a3j2m1nj/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2026-01-13T21:38:36.612826143+09:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmps2xwqt/port-891360.txt","pid":891360,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2026-01-13T21:38:36.61331276+09:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":891360}
+{"time":"2026-01-13T21:38:36.613298464+09:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-891360-891645-1667871202/socket","Net":"unix"}}
+{"time":"2026-01-13T21:38:36.783607525+09:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2026-01-13T21:38:36.79752518+09:00","level":"INFO","msg":"handleInformInit: received","streamId":"a3j2m1nj","id":"1(@)"}
+{"time":"2026-01-13T21:38:36.974920158+09:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"a3j2m1nj","id":"1(@)"}
+{"time":"2026-01-14T01:44:49.729274048+09:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2026-01-14T01:44:49.7293491+09:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2026-01-14T01:44:49.729335449+09:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2026-01-14T01:44:49.729432172+09:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-891360-891645-1667871202/socket","Net":"unix"}}
+{"time":"2026-01-14T01:44:49.729474793+09:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2026-01-14T01:44:49.730357876+09:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2026-01-14T01:44:49.730376141+09:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2026-01-14T01:44:49.730383674+09:00","level":"INFO","msg":"server is closed"}

llama/wandb/offline-run-20260113_213836-a3j2m1nj/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2026-01-13T21:38:36.808750595+09:00","level":"INFO","msg":"stream: starting","core version":"0.23.0"}
+{"time":"2026-01-13T21:38:36.965984438+09:00","level":"WARN","msg":"featurechecker: GraphQL client is nil, skipping feature loading"}
+{"time":"2026-01-13T21:38:36.966027626+09:00","level":"INFO","msg":"stream: created new stream","id":"a3j2m1nj"}
+{"time":"2026-01-13T21:38:36.96613096+09:00","level":"INFO","msg":"handler: started","stream_id":"a3j2m1nj"}
+{"time":"2026-01-13T21:38:36.974910942+09:00","level":"INFO","msg":"stream: started","id":"a3j2m1nj"}
+{"time":"2026-01-13T21:38:36.97493371+09:00","level":"INFO","msg":"sender: started","stream_id":"a3j2m1nj"}
+{"time":"2026-01-13T21:38:36.974933869+09:00","level":"INFO","msg":"writer: started","stream_id":"a3j2m1nj"}
+{"time":"2026-01-13T21:38:36.975342432+09:00","level":"WARN","msg":"runupserter: server does not expand metric globs but the x_server_side_expand_glob_metrics setting is set; ignoring"}
+{"time":"2026-01-14T01:44:49.729333759+09:00","level":"INFO","msg":"stream: closing","id":"a3j2m1nj"}
+{"time":"2026-01-14T01:44:49.729533525+09:00","level":"INFO","msg":"handler: closed","stream_id":"a3j2m1nj"}
+{"time":"2026-01-14T01:44:49.730075227+09:00","level":"INFO","msg":"sender: closed","stream_id":"a3j2m1nj"}
+{"time":"2026-01-14T01:44:49.730094075+09:00","level":"INFO","msg":"stream: closed","id":"a3j2m1nj"}

llama/wandb/offline-run-20260113_213836-a3j2m1nj/logs/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2026-01-13 21:38:36,531 INFO    MainThread:891360 [wandb_setup.py:_flush():80] Current SDK version is 0.23.0
+2026-01-13 21:38:36,531 INFO    MainThread:891360 [wandb_setup.py:_flush():80] Configure stats pid to 891360
+2026-01-13 21:38:36,531 INFO    MainThread:891360 [wandb_setup.py:_flush():80] Loading settings from /home/work/.config/wandb/settings
+2026-01-13 21:38:36,531 INFO    MainThread:891360 [wandb_setup.py:_flush():80] Loading settings from /home/work/an_nguyen/HRA/llama/wandb/settings
+2026-01-13 21:38:36,531 INFO    MainThread:891360 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2026-01-13 21:38:36,531 INFO    MainThread:891360 [wandb_init.py:setup_run_log_directory():713] Logging user logs to /home/work/an_nguyen/HRA/llama/wandb/offline-run-20260113_213836-a3j2m1nj/logs/debug.log
+2026-01-13 21:38:36,531 INFO    MainThread:891360 [wandb_init.py:setup_run_log_directory():714] Logging internal logs to /home/work/an_nguyen/HRA/llama/wandb/offline-run-20260113_213836-a3j2m1nj/logs/debug-internal.log
+2026-01-13 21:38:36,531 INFO    MainThread:891360 [wandb_init.py:init():840] calling init triggers
+2026-01-13 21:38:36,531 INFO    MainThread:891360 [wandb_init.py:init():845] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2026-01-13 21:38:36,531 INFO    MainThread:891360 [wandb_init.py:init():888] starting backend
+2026-01-13 21:38:36,783 INFO    MainThread:891360 [wandb_init.py:init():891] sending inform_init request
+2026-01-13 21:38:36,796 INFO    MainThread:891360 [wandb_init.py:init():899] backend started and connected
+2026-01-13 21:38:36,796 INFO    MainThread:891360 [wandb_init.py:init():969] updated telemetry
+2026-01-13 21:38:36,797 INFO    MainThread:891360 [wandb_init.py:init():993] communicating run to backend with 90.0 second timeout
+2026-01-13 21:38:36,976 INFO    MainThread:891360 [wandb_init.py:init():1040] starting run threads in backend
+2026-01-13 21:38:37,082 INFO    MainThread:891360 [wandb_run.py:_console_start():2504] atexit reg
+2026-01-13 21:38:37,082 INFO    MainThread:891360 [wandb_run.py:_redirect():2352] redirect: wrap_raw
+2026-01-13 21:38:37,082 INFO    MainThread:891360 [wandb_run.py:_redirect():2421] Wrapping output streams.
+2026-01-13 21:38:37,082 INFO    MainThread:891360 [wandb_run.py:_redirect():2444] Redirects installed.
+2026-01-13 21:38:37,084 INFO    MainThread:891360 [wandb_init.py:init():1080] run started, returning control to user process
+2026-01-13 21:38:37,085 INFO    MainThread:891360 [wandb_run.py:_config_callback():1385] config_cb None None {'peft_config': {'default': {'peft_type': 'OFT', 'auto_mapping': None, 'base_model_name_or_path': 'meta-llama/Llama-2-7b-hf', 'revision': None, 'task_type': 'CAUSAL_LM', 'inference_mode': False, 'rank_pattern': {}, 'alpha_pattern': {}, 'r': 32, 'module_dropout': 0.0, 'target_modules': ['v_proj', 'q_proj'], 'init_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'modules_to_save': None, 'coft': False, 'eps': 0.0001, 'block_share': False}}, 'vocab_size': 32001, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 11008, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 32, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': False, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'mlp_bias': False, 'head_dim': 128, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'dtype': 'float32', 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'task_specific_params': None, 'problem_type': None, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 2, 'pad_token_id': 32000, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'num_beam_groups': 1, 'diversity_penalty': 0.0, '_name_or_path': 'meta-llama/Llama-2-7b-hf', 'transformers_version': '4.57.3', 'model_type': 'llama', 'tf_legacy_loss': False, 'use_bfloat16': False, 'output_attentions': False, 'output_dir': 'output/cms3', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.005, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'output/cms3/runs/Jan13_21-38-29_main1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 200, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 0.0, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': True, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'parallelism_config': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'project': 'huggingface', 'trackio_space_id': 'trackio', 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True, 'model_name_or_path': 'meta-llama/Llama-2-7b-hf', 'adapter_name_or_path': None, 'data_path': './data/MetaMathQA-40K.json', 'dataset_split': 'train', 'dataset_field': ['query', 'response'], 'model_max_length': 512, 'hrft_r': 32, 'init_a': 0.0001, 'eps': 0.0001, 'lamda': 0.0001, 'add_orth': 'none', 'init_weights': True}
+2026-01-13 21:38:37,094 INFO    MainThread:891360 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 6746812416 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f2e11cb7fa0>>
+2026-01-13 21:38:37,094 INFO    MainThread:891360 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 6746812416 None
+2026-01-14 01:44:49,729 INFO    wandb-AsyncioManager-main:891360 [service_client.py:_forward_responses():80] Reached EOF.
+2026-01-14 01:44:49,729 INFO    wandb-AsyncioManager-main:891360 [mailbox.py:close():137] Closing mailbox, abandoning 0 handles.

llama/wandb/offline-run-20260113_213836-a3j2m1nj/run-a3j2m1nj.wandb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a8c64e47b303b43dd85aceddf6406bef9b0ee26cfa09d90f822e95343fb236e
+size 2101615

llama/wandb/offline-run-20260114_165804-73rsvobf/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,199 @@

+setuptools==80.9.0
+wheel==0.45.1
+pip==25.3
+Brotli==1.1.0
+certifi==2025.11.12
+charset-normalizer==3.4.4
+filelock==3.20.0
+hpack==4.1.0
+hyperframe==6.1.0
+idna==3.11
+MarkupSafe==3.0.3
+mpmath==1.3.0
+networkx==3.4.2
+pycparser==2.22
+PySocks==1.7.1
+PyYAML==6.0.3
+typing_extensions==4.15.0
+cffi==2.0.0
+gmpy2==2.2.1
+h2==4.3.0
+Jinja2==3.1.6
+sympy==1.14.0
+zstandard==0.23.0
+urllib3==2.5.0
+requests==2.32.5
+appdirs==1.4.4
+rich-toolkit==0.17.0
+torchaudio==2.9.0
+triton==3.5.0
+tqdm==4.67.1
+safetensors==0.7.0
+regex==2025.11.3
+packaging==25.0
+hf-xet==1.2.0
+hf-xet==1.2.1
+huggingface_hub==0.36.0
+tokenizers==0.22.1
+pytz==2025.2
+xxhash==3.6.0
+tzdata==2025.2
+six==1.17.0
+pyarrow-hotfix==0.7
+pyarrow==22.0.0
+pyarrow==21.0.0
+propcache==0.4.1
+propcache==0.3.1
+multidict==6.7.0
+multidict==6.6.3
+aiohappyeyeballs==2.6.1
+fsspec==2024.3.1
+fsspec==2025.10.0
+frozenlist==1.8.0
+frozenlist==1.7.0
+dill==0.3.8
+dill==0.4.0
+attrs==25.4.0
+async-timeout==5.0.1
+yarl==1.22.0
+python-dateutil==2.9.0.post0
+multiprocess==0.70.16
+multiprocess==0.70.18
+aiosignal==1.4.0
+pandas==2.3.3
+aiohttp==3.13.2
+pycountry==24.6.1
+psutil==7.1.3
+accelerate==1.12.0
+peft==0.10.0
+Pygments==2.19.2
+colorama==0.4.6
+shellingham==1.5.4
+sniffio==1.3.1
+exceptiongroup==1.3.1
+h11==0.16.0
+typer-slim==0.20.0
+anyio==4.12.0
+httpcore==1.0.9
+httpx==0.28.1
+datasets==4.4.1
+ninja==1.13.0
+docker-pycreds==0.4.0
+eval_type_backport==0.3.1
+platformdirs==4.5.0
+sentry-sdk==2.47.0
+annotated-types==0.7.0
+typing-inspection==0.4.2
+smmap==5.0.2
+gitdb==4.0.12
+GitPython==3.1.45
+protobuf==6.31.1
+setproctitle==1.3.6
+pydantic_core==2.41.5
+pydantic==2.12.5
+wandb==0.23.0
+jsonlines==4.0.0
+supervisor==4.3.0
+py-cpuinfo==9.0.0
+nvidia-ml-py==13.580.82
+nvidia-cusparselt-cu12==0.7.1
+fastrlock==0.8.3
+websockets==15.0.1
+uvloop==0.22.1
+tomli==2.3.0
+tabulate==0.9.0
+sentencepiece==0.2.1
+rpds-py==0.30.0
+rignore==0.7.6
+pyzmq==27.1.0
+python-multipart==0.0.20
+python-json-logger==4.0.0
+python-dotenv==1.2.1
+pybase64==1.4.2
+prometheus_client==0.23.1
+starlette==0.50.0
+pillow==12.0.0
+partial-json-parser==0.2.1.1.post7
+outlines_core==0.2.11
+nvidia-nvtx-cu12==12.8.90
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nccl-cu12==2.27.5
+nvidia-curand-cu12==10.3.9.90
+nvidia-cufile-cu12==1.13.1.3
+nvidia-cudnn-frontend==1.16.0
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cublas-cu12==12.8.4.1
+numpy==2.2.6
+msgspec==0.20.0
+msgpack==1.1.2
+mdurl==0.1.2
+loguru==0.7.3
+llvmlite==0.44.0
+llguidance==1.3.0
+lark==1.2.2
+jmespath==1.0.1
+jiter==0.12.0
+interegular==0.3.3
+httptools==0.7.1
+fastar==0.8.0
+einops==0.8.1
+docstring_parser==0.17.0
+dnspython==2.8.0
+distro==1.9.0
+diskcache==5.6.3
+cuda-pathfinder==1.3.3
+cloudpickle==3.1.2
+rich==14.2.0
+click==8.2.1
+cbor2==5.7.1
+cachetools==6.2.2
+blake3==1.0.8
+astor==0.8.1
+apache-tvm-ffi==0.1.4
+annotated-doc==0.0.4
+uvicorn==0.38.0
+tiktoken==0.12.0
+scipy==1.15.3
+referencing==0.37.0
+opencv-python-headless==4.12.0.88
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cudnn-cu12==9.10.2.21
+numba==0.61.2
+markdown-it-py==4.0.0
+gguf==0.17.1
+email-validator==2.3.0
+depyf==0.20.0
+cupy-cuda12x==13.6.0
+cuda-bindings==13.1.0
+watchfiles==1.1.1
+pydantic-extra-types==2.10.6
+openai-harmony==0.0.8
+nvidia-cusolver-cu12==11.7.3.90
+lm-format-enforcer==0.11.3
+jsonschema-specifications==2025.9.1
+cuda-python==13.1.0
+typer==0.20.0
+transformers==4.57.3
+torch==2.9.0
+prometheus-fastapi-instrumentator==7.1.0
+openai==2.9.0
+nvidia-cutlass-dsl==4.3.2
+jsonschema==4.25.1
+fastapi==0.123.10
+anthropic==0.71.0
+xgrammar==0.1.27
+torchvision==0.24.0
+ray==2.52.1
+model-hosting-container-standards==0.1.9
+mistral_common==1.8.6
+flashinfer-python==0.5.3
+fastapi-cloud-cli==0.6.0
+fastapi-cli==0.0.16
+compressed-tensors==0.12.2
+vllm==0.12.0
+Fraction==2.2.0
+DeBERTa==0.1.13

llama/wandb/offline-run-20260114_165804-73rsvobf/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2026-01-14T16:58:04.868620839+09:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpelwrvnf6/port-1741342.txt","pid":1741342,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2026-01-14T16:58:04.86909634+09:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":1741342}
+{"time":"2026-01-14T16:58:04.869074421+09:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1741342-1741535-2497333284/socket","Net":"unix"}}
+{"time":"2026-01-14T16:58:05.042266919+09:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2026-01-14T16:58:05.057856705+09:00","level":"INFO","msg":"handleInformInit: received","streamId":"73rsvobf","id":"1(@)"}
+{"time":"2026-01-14T16:58:05.211099004+09:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"73rsvobf","id":"1(@)"}
+{"time":"2026-01-14T16:59:21.72449823+09:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2026-01-14T16:59:21.725623244+09:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2026-01-14T16:59:21.725669826+09:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2026-01-14T16:59:21.725637604+09:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2026-01-14T16:59:21.725755633+09:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-1741342-1741535-2497333284/socket","Net":"unix"}}
+{"time":"2026-01-14T16:59:21.733756261+09:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2026-01-14T16:59:21.733773028+09:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2026-01-14T16:59:21.733781827+09:00","level":"INFO","msg":"server is closed"}

llama/wandb/offline-run-20260114_165804-73rsvobf/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2026-01-14T16:58:05.066543155+09:00","level":"INFO","msg":"stream: starting","core version":"0.23.0"}
+{"time":"2026-01-14T16:58:05.210029267+09:00","level":"WARN","msg":"featurechecker: GraphQL client is nil, skipping feature loading"}
+{"time":"2026-01-14T16:58:05.210075058+09:00","level":"INFO","msg":"stream: created new stream","id":"73rsvobf"}
+{"time":"2026-01-14T16:58:05.210149108+09:00","level":"INFO","msg":"handler: started","stream_id":"73rsvobf"}
+{"time":"2026-01-14T16:58:05.211090279+09:00","level":"INFO","msg":"stream: started","id":"73rsvobf"}
+{"time":"2026-01-14T16:58:05.211104813+09:00","level":"INFO","msg":"writer: started","stream_id":"73rsvobf"}
+{"time":"2026-01-14T16:58:05.211113248+09:00","level":"INFO","msg":"sender: started","stream_id":"73rsvobf"}
+{"time":"2026-01-14T16:58:05.211515731+09:00","level":"WARN","msg":"runupserter: server does not expand metric globs but the x_server_side_expand_glob_metrics setting is set; ignoring"}
+{"time":"2026-01-14T16:59:21.72563689+09:00","level":"INFO","msg":"stream: closing","id":"73rsvobf"}
+{"time":"2026-01-14T16:59:21.725868691+09:00","level":"INFO","msg":"handler: closed","stream_id":"73rsvobf"}
+{"time":"2026-01-14T16:59:21.733478727+09:00","level":"INFO","msg":"sender: closed","stream_id":"73rsvobf"}
+{"time":"2026-01-14T16:59:21.733495479+09:00","level":"INFO","msg":"stream: closed","id":"73rsvobf"}

llama/wandb/offline-run-20260114_165804-73rsvobf/logs/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2026-01-14 16:58:04,788 INFO    MainThread:1741342 [wandb_setup.py:_flush():80] Current SDK version is 0.23.0
+2026-01-14 16:58:04,788 INFO    MainThread:1741342 [wandb_setup.py:_flush():80] Configure stats pid to 1741342
+2026-01-14 16:58:04,788 INFO    MainThread:1741342 [wandb_setup.py:_flush():80] Loading settings from /home/work/.config/wandb/settings
+2026-01-14 16:58:04,788 INFO    MainThread:1741342 [wandb_setup.py:_flush():80] Loading settings from /home/work/an_nguyen/HRA/llama/wandb/settings
+2026-01-14 16:58:04,788 INFO    MainThread:1741342 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2026-01-14 16:58:04,788 INFO    MainThread:1741342 [wandb_init.py:setup_run_log_directory():713] Logging user logs to /home/work/an_nguyen/HRA/llama/wandb/offline-run-20260114_165804-73rsvobf/logs/debug.log
+2026-01-14 16:58:04,788 INFO    MainThread:1741342 [wandb_init.py:setup_run_log_directory():714] Logging internal logs to /home/work/an_nguyen/HRA/llama/wandb/offline-run-20260114_165804-73rsvobf/logs/debug-internal.log
+2026-01-14 16:58:04,788 INFO    MainThread:1741342 [wandb_init.py:init():840] calling init triggers
+2026-01-14 16:58:04,788 INFO    MainThread:1741342 [wandb_init.py:init():845] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2026-01-14 16:58:04,789 INFO    MainThread:1741342 [wandb_init.py:init():888] starting backend
+2026-01-14 16:58:05,042 INFO    MainThread:1741342 [wandb_init.py:init():891] sending inform_init request
+2026-01-14 16:58:05,056 INFO    MainThread:1741342 [wandb_init.py:init():899] backend started and connected
+2026-01-14 16:58:05,057 INFO    MainThread:1741342 [wandb_init.py:init():969] updated telemetry
+2026-01-14 16:58:05,058 INFO    MainThread:1741342 [wandb_init.py:init():993] communicating run to backend with 90.0 second timeout
+2026-01-14 16:58:05,213 INFO    MainThread:1741342 [wandb_init.py:init():1040] starting run threads in backend
+2026-01-14 16:58:05,317 INFO    MainThread:1741342 [wandb_run.py:_console_start():2504] atexit reg
+2026-01-14 16:58:05,317 INFO    MainThread:1741342 [wandb_run.py:_redirect():2352] redirect: wrap_raw
+2026-01-14 16:58:05,317 INFO    MainThread:1741342 [wandb_run.py:_redirect():2421] Wrapping output streams.
+2026-01-14 16:58:05,317 INFO    MainThread:1741342 [wandb_run.py:_redirect():2444] Redirects installed.
+2026-01-14 16:58:05,319 INFO    MainThread:1741342 [wandb_init.py:init():1080] run started, returning control to user process
+2026-01-14 16:58:05,321 INFO    MainThread:1741342 [wandb_run.py:_config_callback():1385] config_cb None None {'peft_config': {'default': {'peft_type': 'OFT', 'auto_mapping': None, 'base_model_name_or_path': 'meta-llama/Llama-2-7b-hf', 'revision': None, 'task_type': 'CAUSAL_LM', 'inference_mode': False, 'rank_pattern': {}, 'alpha_pattern': {}, 'r': 32, 'module_dropout': 0.0, 'target_modules': ['v_proj', 'q_proj'], 'init_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'modules_to_save': None, 'coft': False, 'eps': 0.0001, 'block_share': False}}, 'vocab_size': 32001, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 11008, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 32, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': False, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'mlp_bias': False, 'head_dim': 128, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'dtype': 'float32', 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'task_specific_params': None, 'problem_type': None, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 2, 'pad_token_id': 32000, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'num_beam_groups': 1, 'diversity_penalty': 0.0, '_name_or_path': 'meta-llama/Llama-2-7b-hf', 'transformers_version': '4.57.3', 'model_type': 'llama', 'tf_legacy_loss': False, 'use_bfloat16': False, 'output_attentions': False, 'output_dir': 'output/cms3', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.005, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'output/cms3/runs/Jan14_16-57-58_main1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 200, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 0.0, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': True, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'parallelism_config': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'project': 'huggingface', 'trackio_space_id': 'trackio', 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True, 'model_name_or_path': 'meta-llama/Llama-2-7b-hf', 'adapter_name_or_path': None, 'data_path': './data/MetaMathQA-40K.json', 'dataset_split': 'train', 'dataset_field': ['query', 'response'], 'model_max_length': 512, 'hrft_r': 32, 'init_a': 0.0001, 'eps': 0.0001, 'lamda': 0.0001, 'add_orth': 'none', 'init_weights': True}
+2026-01-14 16:58:05,329 INFO    MainThread:1741342 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 6746812416 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f4761caff40>>
+2026-01-14 16:58:05,329 INFO    MainThread:1741342 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 6746812416 None
+2026-01-14 16:59:21,724 INFO    wandb-AsyncioManager-main:1741342 [service_client.py:_forward_responses():80] Reached EOF.
+2026-01-14 16:59:21,725 INFO    wandb-AsyncioManager-main:1741342 [mailbox.py:close():137] Closing mailbox, abandoning 0 handles.

llama/wandb/offline-run-20260114_165804-73rsvobf/run-73rsvobf.wandb ADDED Viewed

Binary file (21.5 kB). View file

llama/wandb/offline-run-20260114_173548-7ubed6qe/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,199 @@

+setuptools==80.9.0
+wheel==0.45.1
+pip==25.3
+Brotli==1.1.0
+certifi==2025.11.12
+charset-normalizer==3.4.4
+filelock==3.20.0
+hpack==4.1.0
+hyperframe==6.1.0
+idna==3.11
+MarkupSafe==3.0.3
+mpmath==1.3.0
+networkx==3.4.2
+pycparser==2.22
+PySocks==1.7.1
+PyYAML==6.0.3
+typing_extensions==4.15.0
+cffi==2.0.0
+gmpy2==2.2.1
+h2==4.3.0
+Jinja2==3.1.6
+sympy==1.14.0
+zstandard==0.23.0
+urllib3==2.5.0
+requests==2.32.5
+appdirs==1.4.4
+rich-toolkit==0.17.0
+torchaudio==2.9.0
+triton==3.5.0
+tqdm==4.67.1
+safetensors==0.7.0
+regex==2025.11.3
+packaging==25.0
+hf-xet==1.2.0
+hf-xet==1.2.1
+huggingface_hub==0.36.0
+tokenizers==0.22.1
+pytz==2025.2
+xxhash==3.6.0
+tzdata==2025.2
+six==1.17.0
+pyarrow-hotfix==0.7
+pyarrow==22.0.0
+pyarrow==21.0.0
+propcache==0.4.1
+propcache==0.3.1
+multidict==6.7.0
+multidict==6.6.3
+aiohappyeyeballs==2.6.1
+fsspec==2024.3.1
+fsspec==2025.10.0
+frozenlist==1.8.0
+frozenlist==1.7.0
+dill==0.3.8
+dill==0.4.0
+attrs==25.4.0
+async-timeout==5.0.1
+yarl==1.22.0
+python-dateutil==2.9.0.post0
+multiprocess==0.70.16
+multiprocess==0.70.18
+aiosignal==1.4.0
+pandas==2.3.3
+aiohttp==3.13.2
+pycountry==24.6.1
+psutil==7.1.3
+accelerate==1.12.0
+peft==0.10.0
+Pygments==2.19.2
+colorama==0.4.6
+shellingham==1.5.4
+sniffio==1.3.1
+exceptiongroup==1.3.1
+h11==0.16.0
+typer-slim==0.20.0
+anyio==4.12.0
+httpcore==1.0.9
+httpx==0.28.1
+datasets==4.4.1
+ninja==1.13.0
+docker-pycreds==0.4.0
+eval_type_backport==0.3.1
+platformdirs==4.5.0
+sentry-sdk==2.47.0
+annotated-types==0.7.0
+typing-inspection==0.4.2
+smmap==5.0.2
+gitdb==4.0.12
+GitPython==3.1.45
+protobuf==6.31.1
+setproctitle==1.3.6
+pydantic_core==2.41.5
+pydantic==2.12.5
+wandb==0.23.0
+jsonlines==4.0.0
+supervisor==4.3.0
+py-cpuinfo==9.0.0
+nvidia-ml-py==13.580.82
+nvidia-cusparselt-cu12==0.7.1
+fastrlock==0.8.3
+websockets==15.0.1
+uvloop==0.22.1
+tomli==2.3.0
+tabulate==0.9.0
+sentencepiece==0.2.1
+rpds-py==0.30.0
+rignore==0.7.6
+pyzmq==27.1.0
+python-multipart==0.0.20
+python-json-logger==4.0.0
+python-dotenv==1.2.1
+pybase64==1.4.2
+prometheus_client==0.23.1
+starlette==0.50.0
+pillow==12.0.0
+partial-json-parser==0.2.1.1.post7
+outlines_core==0.2.11
+nvidia-nvtx-cu12==12.8.90
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nccl-cu12==2.27.5
+nvidia-curand-cu12==10.3.9.90
+nvidia-cufile-cu12==1.13.1.3
+nvidia-cudnn-frontend==1.16.0
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cublas-cu12==12.8.4.1
+numpy==2.2.6
+msgspec==0.20.0
+msgpack==1.1.2
+mdurl==0.1.2
+loguru==0.7.3
+llvmlite==0.44.0
+llguidance==1.3.0
+lark==1.2.2
+jmespath==1.0.1
+jiter==0.12.0
+interegular==0.3.3
+httptools==0.7.1
+fastar==0.8.0
+einops==0.8.1
+docstring_parser==0.17.0
+dnspython==2.8.0
+distro==1.9.0
+diskcache==5.6.3
+cuda-pathfinder==1.3.3
+cloudpickle==3.1.2
+rich==14.2.0
+click==8.2.1
+cbor2==5.7.1
+cachetools==6.2.2
+blake3==1.0.8
+astor==0.8.1
+apache-tvm-ffi==0.1.4
+annotated-doc==0.0.4
+uvicorn==0.38.0
+tiktoken==0.12.0
+scipy==1.15.3
+referencing==0.37.0
+opencv-python-headless==4.12.0.88
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cudnn-cu12==9.10.2.21
+numba==0.61.2
+markdown-it-py==4.0.0
+gguf==0.17.1
+email-validator==2.3.0
+depyf==0.20.0
+cupy-cuda12x==13.6.0
+cuda-bindings==13.1.0
+watchfiles==1.1.1
+pydantic-extra-types==2.10.6
+openai-harmony==0.0.8
+nvidia-cusolver-cu12==11.7.3.90
+lm-format-enforcer==0.11.3
+jsonschema-specifications==2025.9.1
+cuda-python==13.1.0
+typer==0.20.0
+transformers==4.57.3
+torch==2.9.0
+prometheus-fastapi-instrumentator==7.1.0
+openai==2.9.0
+nvidia-cutlass-dsl==4.3.2
+jsonschema==4.25.1
+fastapi==0.123.10
+anthropic==0.71.0
+xgrammar==0.1.27
+torchvision==0.24.0
+ray==2.52.1
+model-hosting-container-standards==0.1.9
+mistral_common==1.8.6
+flashinfer-python==0.5.3
+fastapi-cloud-cli==0.6.0
+fastapi-cli==0.0.16
+compressed-tensors==0.12.2
+vllm==0.12.0
+Fraction==2.2.0
+DeBERTa==0.1.13

llama/wandb/offline-run-20260114_173548-7ubed6qe/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2026-01-14T17:35:48.80538283+09:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmph6vv11_n/port-1752905.txt","pid":1752905,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2026-01-14T17:35:48.805860127+09:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":1752905}
+{"time":"2026-01-14T17:35:48.805841032+09:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1752905-1753049-2894214458/socket","Net":"unix"}}
+{"time":"2026-01-14T17:35:48.983834145+09:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2026-01-14T17:35:48.998863815+09:00","level":"INFO","msg":"handleInformInit: received","streamId":"7ubed6qe","id":"1(@)"}
+{"time":"2026-01-14T17:35:49.151617773+09:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"7ubed6qe","id":"1(@)"}
+{"time":"2026-01-14T21:46:20.466585934+09:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2026-01-14T21:46:20.466651973+09:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2026-01-14T21:46:20.466694957+09:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2026-01-14T21:46:20.466711701+09:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2026-01-14T21:46:20.46679412+09:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-1752905-1753049-2894214458/socket","Net":"unix"}}
+{"time":"2026-01-14T21:46:20.46924732+09:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2026-01-14T21:46:20.469263788+09:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2026-01-14T21:46:20.469270741+09:00","level":"INFO","msg":"server is closed"}

llama/wandb/offline-run-20260114_173548-7ubed6qe/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2026-01-14T17:35:49.006544401+09:00","level":"INFO","msg":"stream: starting","core version":"0.23.0"}
+{"time":"2026-01-14T17:35:49.149824363+09:00","level":"WARN","msg":"featurechecker: GraphQL client is nil, skipping feature loading"}
+{"time":"2026-01-14T17:35:49.149873743+09:00","level":"INFO","msg":"stream: created new stream","id":"7ubed6qe"}
+{"time":"2026-01-14T17:35:49.149898431+09:00","level":"INFO","msg":"handler: started","stream_id":"7ubed6qe"}
+{"time":"2026-01-14T17:35:49.151612025+09:00","level":"INFO","msg":"stream: started","id":"7ubed6qe"}
+{"time":"2026-01-14T17:35:49.151616181+09:00","level":"INFO","msg":"writer: started","stream_id":"7ubed6qe"}
+{"time":"2026-01-14T17:35:49.151631131+09:00","level":"INFO","msg":"sender: started","stream_id":"7ubed6qe"}
+{"time":"2026-01-14T17:35:49.152375031+09:00","level":"WARN","msg":"runupserter: server does not expand metric globs but the x_server_side_expand_glob_metrics setting is set; ignoring"}
+{"time":"2026-01-14T21:46:20.466650711+09:00","level":"INFO","msg":"stream: closing","id":"7ubed6qe"}
+{"time":"2026-01-14T21:46:20.466968327+09:00","level":"INFO","msg":"handler: closed","stream_id":"7ubed6qe"}
+{"time":"2026-01-14T21:46:20.468836334+09:00","level":"INFO","msg":"sender: closed","stream_id":"7ubed6qe"}
+{"time":"2026-01-14T21:46:20.468852562+09:00","level":"INFO","msg":"stream: closed","id":"7ubed6qe"}

llama/wandb/offline-run-20260114_173548-7ubed6qe/logs/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_setup.py:_flush():80] Current SDK version is 0.23.0
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_setup.py:_flush():80] Configure stats pid to 1752905
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_setup.py:_flush():80] Loading settings from /home/work/.config/wandb/settings
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_setup.py:_flush():80] Loading settings from /home/work/an_nguyen/HRA/llama/wandb/settings
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_init.py:setup_run_log_directory():713] Logging user logs to /home/work/an_nguyen/HRA/llama/wandb/offline-run-20260114_173548-7ubed6qe/logs/debug.log
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_init.py:setup_run_log_directory():714] Logging internal logs to /home/work/an_nguyen/HRA/llama/wandb/offline-run-20260114_173548-7ubed6qe/logs/debug-internal.log
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_init.py:init():840] calling init triggers
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_init.py:init():845] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2026-01-14 17:35:48,731 INFO    MainThread:1752905 [wandb_init.py:init():888] starting backend
+2026-01-14 17:35:48,984 INFO    MainThread:1752905 [wandb_init.py:init():891] sending inform_init request
+2026-01-14 17:35:48,997 INFO    MainThread:1752905 [wandb_init.py:init():899] backend started and connected
+2026-01-14 17:35:48,998 INFO    MainThread:1752905 [wandb_init.py:init():969] updated telemetry
+2026-01-14 17:35:48,999 INFO    MainThread:1752905 [wandb_init.py:init():993] communicating run to backend with 90.0 second timeout
+2026-01-14 17:35:49,154 INFO    MainThread:1752905 [wandb_init.py:init():1040] starting run threads in backend
+2026-01-14 17:35:49,257 INFO    MainThread:1752905 [wandb_run.py:_console_start():2504] atexit reg
+2026-01-14 17:35:49,257 INFO    MainThread:1752905 [wandb_run.py:_redirect():2352] redirect: wrap_raw
+2026-01-14 17:35:49,257 INFO    MainThread:1752905 [wandb_run.py:_redirect():2421] Wrapping output streams.
+2026-01-14 17:35:49,257 INFO    MainThread:1752905 [wandb_run.py:_redirect():2444] Redirects installed.
+2026-01-14 17:35:49,259 INFO    MainThread:1752905 [wandb_init.py:init():1080] run started, returning control to user process
+2026-01-14 17:35:49,260 INFO    MainThread:1752905 [wandb_run.py:_config_callback():1385] config_cb None None {'peft_config': {'default': {'peft_type': 'OFT', 'auto_mapping': None, 'base_model_name_or_path': 'meta-llama/Llama-2-7b-hf', 'revision': None, 'task_type': 'CAUSAL_LM', 'inference_mode': False, 'rank_pattern': {}, 'alpha_pattern': {}, 'r': 32, 'module_dropout': 0.0, 'target_modules': ['v_proj', 'q_proj'], 'init_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'modules_to_save': None, 'coft': False, 'eps': 0.0001, 'block_share': False}}, 'vocab_size': 32001, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 11008, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 32, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': False, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'mlp_bias': False, 'head_dim': 128, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'dtype': 'float32', 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'task_specific_params': None, 'problem_type': None, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 2, 'pad_token_id': 32000, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'num_beam_groups': 1, 'diversity_penalty': 0.0, '_name_or_path': 'meta-llama/Llama-2-7b-hf', 'transformers_version': '4.57.3', 'model_type': 'llama', 'tf_legacy_loss': False, 'use_bfloat16': False, 'output_attentions': False, 'output_dir': 'output/cms3', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.005, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'output/cms3/runs/Jan14_17-35-42_main1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 200, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 0.0, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': True, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'parallelism_config': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'project': 'huggingface', 'trackio_space_id': 'trackio', 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True, 'model_name_or_path': 'meta-llama/Llama-2-7b-hf', 'adapter_name_or_path': None, 'data_path': './data/MetaMathQA-40K.json', 'dataset_split': 'train', 'dataset_field': ['query', 'response'], 'model_max_length': 512, 'hrft_r': 32, 'init_a': 0.0001, 'eps': 0.0001, 'lamda': 0.0001, 'add_orth': 'none', 'init_weights': True}
+2026-01-14 17:35:49,269 INFO    MainThread:1752905 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 6746812416 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f1ba1cabf40>>
+2026-01-14 17:35:49,269 INFO    MainThread:1752905 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 6746812416 None
+2026-01-14 21:46:20,466 INFO    wandb-AsyncioManager-main:1752905 [service_client.py:_forward_responses():80] Reached EOF.
+2026-01-14 21:46:20,467 INFO    wandb-AsyncioManager-main:1752905 [mailbox.py:close():137] Closing mailbox, abandoning 0 handles.

llama/wandb/offline-run-20260114_173548-7ubed6qe/run-7ubed6qe.wandb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e5c9530847687fcc34a120a91c7775ee55117bf74b57e04f06a07d00a0ce36a
+size 2021175

llama/wandb/settings ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ [default]
2	+ mode = offline
3	+