diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..01bf7d27961ff911ddd0bdc655dbd72c9347151e --- /dev/null +++ b/README.md @@ -0,0 +1,51 @@ +# Backdoor Detection Checkpoints + +Fine-tuned Qwen 2.5 Coder 14B models for backdoor detection experiments. + +## Models + +### 1. `ca_v3_run_20251215/` +- **Base model:** Qwen/Qwen2.5-Coder-14B-Instruct +- **Training data:** CA_collusion_exp2_v3 (300 clean + 300 backdoor_only) +- **Task:** Binary backdoor detection +- **Best checkpoint:** `final_model/` or `checkpoint-150/` + +### 2. `exp1_original_data_rerun_20251215/` +- **Base model:** Qwen/Qwen2.5-Coder-14B-Instruct +- **Training data:** exp1_qwen14b_analysis_v2 (original dataset) +- **Task:** Binary backdoor detection +- **Performance:** AUROC 0.947 +- **Best checkpoint:** `checkpoint-75/` + +### 3. `exp2_signalc_20251215/` +- **Base model:** Qwen/Qwen2.5-Coder-14B-Instruct +- **Training data:** 300 clean + 250 backdoor_only + 50 signal_c +- **Task:** Backdoor detection with collusion signal resistance +- **Goal:** Train model to detect backdoors even when signal_c marker is present +- **Best checkpoint:** `final_model/` or `checkpoint-150/` + +## LoRA Configuration + +All models use LoRA fine-tuning: +- `lora_r`: 32 +- `lora_alpha`: 64 +- `target_modules`: [q_proj, v_proj, k_proj, o_proj] +- `lora_dropout`: 0.05 + +## Usage + +```python +from peft import AutoPeftModelForCausalLM +from transformers import AutoTokenizer + +model = AutoPeftModelForCausalLM.from_pretrained( + "path/to/checkpoint", + torch_dtype="bfloat16", + device_map="cuda" +) +tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-14B-Instruct") +``` + +## Date +December 15, 2025 + diff --git a/ca_v3_run_20251215/checkpoint-147/README.md b/ca_v3_run_20251215/checkpoint-147/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-147/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/ca_v3_run_20251215/checkpoint-147/adapter_config.json b/ca_v3_run_20251215/checkpoint-147/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bd100e2ef4775697a7cb0c0abaae99f244053d68 --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-147/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "k_proj", + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ca_v3_run_20251215/checkpoint-147/adapter_model.safetensors b/ca_v3_run_20251215/checkpoint-147/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd7cd68ab4f5848f38d415367c145815ff77e220 --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-147/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:216da2b46929ee9ee09b5b5efb6fb9c3a8c58e2e7acad45800b49def741cefa4 +size 201378736 diff --git a/ca_v3_run_20251215/checkpoint-147/optimizer.pt b/ca_v3_run_20251215/checkpoint-147/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d65649d4ca445ac2b6e9d2e5a2b01acd02030adf --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-147/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc755d6cc95a2fa70b493792f6fcfb3f7765a8fd2ada1ce28ae2fe5836099c6b +size 402982627 diff --git a/ca_v3_run_20251215/checkpoint-147/rng_state.pth b/ca_v3_run_20251215/checkpoint-147/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0178e697ee9dfc09593374e91de3e5c7e0217aee --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-147/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:283c4259167c1de6fc9b0d70fb38093d1c85343ca9ff0f2e605bd2011addd045 +size 14645 diff --git a/ca_v3_run_20251215/checkpoint-147/scheduler.pt b/ca_v3_run_20251215/checkpoint-147/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b196c8e7a4834975b028188c1c8c2559af111ec --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-147/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e6e49a0a1dc514c3062fdc71df85d50b768febd62f2e9b412554326fd48eb9c +size 1465 diff --git a/ca_v3_run_20251215/checkpoint-147/trainer_state.json b/ca_v3_run_20251215/checkpoint-147/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3a74374a055c0286f5cae6366880900478033bd6 --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-147/trainer_state.json @@ -0,0 +1,1071 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.98, + "eval_steps": 100, + "global_step": 147, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.006666666666666667, + "grad_norm": 2.267627239227295, + "learning_rate": 0.0, + "loss": 0.8939, + "step": 1 + }, + { + "epoch": 0.013333333333333334, + "grad_norm": 1.5160397291183472, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6392, + "step": 2 + }, + { + "epoch": 0.02, + "grad_norm": 1.4698739051818848, + "learning_rate": 6.666666666666667e-06, + "loss": 0.8476, + "step": 3 + }, + { + "epoch": 0.02666666666666667, + "grad_norm": 4.279767036437988, + "learning_rate": 1e-05, + "loss": 1.2988, + "step": 4 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 3.669569730758667, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.95, + "step": 5 + }, + { + "epoch": 0.04, + "grad_norm": 1.2172225713729858, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.3448, + "step": 6 + }, + { + "epoch": 0.04666666666666667, + "grad_norm": 1.891830325126648, + "learning_rate": 2e-05, + "loss": 0.7179, + "step": 7 + }, + { + "epoch": 0.05333333333333334, + "grad_norm": 2.352062940597534, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.5782, + "step": 8 + }, + { + "epoch": 0.06, + "grad_norm": 1.2546859979629517, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.686, + "step": 9 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 1.1951119899749756, + "learning_rate": 3e-05, + "loss": 0.2151, + "step": 10 + }, + { + "epoch": 0.07333333333333333, + "grad_norm": 1.5607569217681885, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.3339, + "step": 11 + }, + { + "epoch": 0.08, + "grad_norm": 1.513392448425293, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.7432, + "step": 12 + }, + { + "epoch": 0.08666666666666667, + "grad_norm": 1.3942737579345703, + "learning_rate": 4e-05, + "loss": 0.5372, + "step": 13 + }, + { + "epoch": 0.09333333333333334, + "grad_norm": 2.1662518978118896, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.5742, + "step": 14 + }, + { + "epoch": 0.1, + "grad_norm": 2.074810028076172, + "learning_rate": 4.666666666666667e-05, + "loss": 0.6484, + "step": 15 + }, + { + "epoch": 0.10666666666666667, + "grad_norm": 2.5651564598083496, + "learning_rate": 5e-05, + "loss": 0.678, + "step": 16 + }, + { + "epoch": 0.11333333333333333, + "grad_norm": 3.3820931911468506, + "learning_rate": 4.9993231029486544e-05, + "loss": 0.6619, + "step": 17 + }, + { + "epoch": 0.12, + "grad_norm": 1.7587597370147705, + "learning_rate": 4.997292778346312e-05, + "loss": 0.665, + "step": 18 + }, + { + "epoch": 0.12666666666666668, + "grad_norm": 2.234240770339966, + "learning_rate": 4.993910125649561e-05, + "loss": 0.4308, + "step": 19 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 1.0802106857299805, + "learning_rate": 4.989176976624511e-05, + "loss": 0.6038, + "step": 20 + }, + { + "epoch": 0.14, + "grad_norm": 2.3648719787597656, + "learning_rate": 4.983095894354858e-05, + "loss": 0.5436, + "step": 21 + }, + { + "epoch": 0.14666666666666667, + "grad_norm": 1.2552648782730103, + "learning_rate": 4.975670171853926e-05, + "loss": 0.6223, + "step": 22 + }, + { + "epoch": 0.15333333333333332, + "grad_norm": 2.2331910133361816, + "learning_rate": 4.966903830281449e-05, + "loss": 0.6468, + "step": 23 + }, + { + "epoch": 0.16, + "grad_norm": 1.7010774612426758, + "learning_rate": 4.9568016167660334e-05, + "loss": 0.6019, + "step": 24 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 1.6806925535202026, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.3979, + "step": 25 + }, + { + "epoch": 0.17333333333333334, + "grad_norm": 1.4811266660690308, + "learning_rate": 4.9326121764495596e-05, + "loss": 0.8038, + "step": 26 + }, + { + "epoch": 0.18, + "grad_norm": 1.5548367500305176, + "learning_rate": 4.9185380486571595e-05, + "loss": 0.5653, + "step": 27 + }, + { + "epoch": 0.18666666666666668, + "grad_norm": 1.4900885820388794, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.7534, + "step": 28 + }, + { + "epoch": 0.19333333333333333, + "grad_norm": 1.2963446378707886, + "learning_rate": 4.88646908061933e-05, + "loss": 0.6946, + "step": 29 + }, + { + "epoch": 0.2, + "grad_norm": 1.885505199432373, + "learning_rate": 4.868491606285823e-05, + "loss": 0.8456, + "step": 30 + }, + { + "epoch": 0.20666666666666667, + "grad_norm": 1.0747458934783936, + "learning_rate": 4.849231551964771e-05, + "loss": 0.336, + "step": 31 + }, + { + "epoch": 0.21333333333333335, + "grad_norm": 2.0049006938934326, + "learning_rate": 4.828699347315356e-05, + "loss": 0.6669, + "step": 32 + }, + { + "epoch": 0.22, + "grad_norm": 1.020938515663147, + "learning_rate": 4.806906110888606e-05, + "loss": 0.6849, + "step": 33 + }, + { + "epoch": 0.22666666666666666, + "grad_norm": 1.3365986347198486, + "learning_rate": 4.783863644106502e-05, + "loss": 0.4813, + "step": 34 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 2.2962701320648193, + "learning_rate": 4.759584424871302e-05, + "loss": 1.156, + "step": 35 + }, + { + "epoch": 0.24, + "grad_norm": 1.1328681707382202, + "learning_rate": 4.734081600808531e-05, + "loss": 0.4652, + "step": 36 + }, + { + "epoch": 0.24666666666666667, + "grad_norm": 1.3263046741485596, + "learning_rate": 4.707368982147318e-05, + "loss": 0.4638, + "step": 37 + }, + { + "epoch": 0.25333333333333335, + "grad_norm": 1.6372419595718384, + "learning_rate": 4.679461034241906e-05, + "loss": 0.6662, + "step": 38 + }, + { + "epoch": 0.26, + "grad_norm": 1.3750609159469604, + "learning_rate": 4.650372869738414e-05, + "loss": 0.3029, + "step": 39 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 1.442150354385376, + "learning_rate": 4.620120240391065e-05, + "loss": 0.7255, + "step": 40 + }, + { + "epoch": 0.2733333333333333, + "grad_norm": 1.3133295774459839, + "learning_rate": 4.588719528532342e-05, + "loss": 0.6364, + "step": 41 + }, + { + "epoch": 0.28, + "grad_norm": 1.2394391298294067, + "learning_rate": 4.556187738201656e-05, + "loss": 0.6514, + "step": 42 + }, + { + "epoch": 0.2866666666666667, + "grad_norm": 2.0945804119110107, + "learning_rate": 4.522542485937369e-05, + "loss": 0.4957, + "step": 43 + }, + { + "epoch": 0.29333333333333333, + "grad_norm": 2.036482810974121, + "learning_rate": 4.48780199123712e-05, + "loss": 0.5471, + "step": 44 + }, + { + "epoch": 0.3, + "grad_norm": 1.7951217889785767, + "learning_rate": 4.4519850666916484e-05, + "loss": 0.4969, + "step": 45 + }, + { + "epoch": 0.30666666666666664, + "grad_norm": 1.3251290321350098, + "learning_rate": 4.415111107797445e-05, + "loss": 0.5828, + "step": 46 + }, + { + "epoch": 0.31333333333333335, + "grad_norm": 1.5717222690582275, + "learning_rate": 4.377200082453749e-05, + "loss": 0.378, + "step": 47 + }, + { + "epoch": 0.32, + "grad_norm": 1.4296456575393677, + "learning_rate": 4.3382725201495723e-05, + "loss": 0.3957, + "step": 48 + }, + { + "epoch": 0.32666666666666666, + "grad_norm": 1.3804540634155273, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.4861, + "step": 49 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 3.184690237045288, + "learning_rate": 4.257452643564155e-05, + "loss": 0.8223, + "step": 50 + }, + { + "epoch": 0.34, + "grad_norm": 1.361177682876587, + "learning_rate": 4.215604094671835e-05, + "loss": 0.7269, + "step": 51 + }, + { + "epoch": 0.3466666666666667, + "grad_norm": 1.2764381170272827, + "learning_rate": 4.172826515897146e-05, + "loss": 0.3787, + "step": 52 + }, + { + "epoch": 0.35333333333333333, + "grad_norm": 1.4495773315429688, + "learning_rate": 4.129143072053638e-05, + "loss": 0.2966, + "step": 53 + }, + { + "epoch": 0.36, + "grad_norm": 2.055936813354492, + "learning_rate": 4.0845774184967754e-05, + "loss": 0.4485, + "step": 54 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 1.8481249809265137, + "learning_rate": 4.039153688314145e-05, + "loss": 0.4182, + "step": 55 + }, + { + "epoch": 0.37333333333333335, + "grad_norm": 1.6037787199020386, + "learning_rate": 3.9928964792569655e-05, + "loss": 0.6953, + "step": 56 + }, + { + "epoch": 0.38, + "grad_norm": 1.5313551425933838, + "learning_rate": 3.945830840419966e-05, + "loss": 0.3223, + "step": 57 + }, + { + "epoch": 0.38666666666666666, + "grad_norm": 1.3661636114120483, + "learning_rate": 3.897982258676867e-05, + "loss": 0.4186, + "step": 58 + }, + { + "epoch": 0.3933333333333333, + "grad_norm": 2.39021372795105, + "learning_rate": 3.8493766448787825e-05, + "loss": 0.7415, + "step": 59 + }, + { + "epoch": 0.4, + "grad_norm": 2.5035974979400635, + "learning_rate": 3.8000403198230387e-05, + "loss": 0.9131, + "step": 60 + }, + { + "epoch": 0.4066666666666667, + "grad_norm": 1.3086011409759521, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.397, + "step": 61 + }, + { + "epoch": 0.41333333333333333, + "grad_norm": 1.9651494026184082, + "learning_rate": 3.699282783125616e-05, + "loss": 0.9127, + "step": 62 + }, + { + "epoch": 0.42, + "grad_norm": 2.166289806365967, + "learning_rate": 3.6479161334675296e-05, + "loss": 0.6211, + "step": 63 + }, + { + "epoch": 0.4266666666666667, + "grad_norm": 1.1649678945541382, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3828, + "step": 64 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 0.7926881909370422, + "learning_rate": 3.543346136204545e-05, + "loss": 0.1886, + "step": 65 + }, + { + "epoch": 0.44, + "grad_norm": 1.2959918975830078, + "learning_rate": 3.490199415097892e-05, + "loss": 0.3381, + "step": 66 + }, + { + "epoch": 0.44666666666666666, + "grad_norm": 1.3724843263626099, + "learning_rate": 3.436516483539781e-05, + "loss": 0.5337, + "step": 67 + }, + { + "epoch": 0.4533333333333333, + "grad_norm": 1.743539810180664, + "learning_rate": 3.382326411784672e-05, + "loss": 0.3935, + "step": 68 + }, + { + "epoch": 0.46, + "grad_norm": 2.310335397720337, + "learning_rate": 3.327658544712395e-05, + "loss": 0.3857, + "step": 69 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 2.399306058883667, + "learning_rate": 3.272542485937369e-05, + "loss": 1.0896, + "step": 70 + }, + { + "epoch": 0.47333333333333333, + "grad_norm": 1.2522423267364502, + "learning_rate": 3.217008081777726e-05, + "loss": 0.1551, + "step": 71 + }, + { + "epoch": 0.48, + "grad_norm": 2.307708740234375, + "learning_rate": 3.161085405093006e-05, + "loss": 0.3004, + "step": 72 + }, + { + "epoch": 0.4866666666666667, + "grad_norm": 1.527667760848999, + "learning_rate": 3.104804738999169e-05, + "loss": 0.3367, + "step": 73 + }, + { + "epoch": 0.49333333333333335, + "grad_norm": 1.2848842144012451, + "learning_rate": 3.048196560469758e-05, + "loss": 0.264, + "step": 74 + }, + { + "epoch": 0.5, + "grad_norm": 1.7577385902404785, + "learning_rate": 2.9912915238320754e-05, + "loss": 0.3975, + "step": 75 + }, + { + "epoch": 0.5066666666666667, + "grad_norm": 1.2079553604125977, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.1619, + "step": 76 + }, + { + "epoch": 0.5133333333333333, + "grad_norm": 1.2687911987304688, + "learning_rate": 2.876714280623708e-05, + "loss": 0.1523, + "step": 77 + }, + { + "epoch": 0.52, + "grad_norm": 1.3904497623443604, + "learning_rate": 2.8191041196514873e-05, + "loss": 0.6355, + "step": 78 + }, + { + "epoch": 0.5266666666666666, + "grad_norm": 1.2210139036178589, + "learning_rate": 2.761321158169134e-05, + "loss": 0.1803, + "step": 79 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 2.2414464950561523, + "learning_rate": 2.7033966866696457e-05, + "loss": 0.2332, + "step": 80 + }, + { + "epoch": 0.54, + "grad_norm": 1.8386731147766113, + "learning_rate": 2.6453620722761896e-05, + "loss": 0.6357, + "step": 81 + }, + { + "epoch": 0.5466666666666666, + "grad_norm": 2.6382551193237305, + "learning_rate": 2.587248741756253e-05, + "loss": 0.3541, + "step": 82 + }, + { + "epoch": 0.5533333333333333, + "grad_norm": 3.260303020477295, + "learning_rate": 2.5290881645034932e-05, + "loss": 0.7056, + "step": 83 + }, + { + "epoch": 0.56, + "grad_norm": 4.604124546051025, + "learning_rate": 2.470911835496508e-05, + "loss": 0.8796, + "step": 84 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 3.7330353260040283, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.2487, + "step": 85 + }, + { + "epoch": 0.5733333333333334, + "grad_norm": 1.9771713018417358, + "learning_rate": 2.3546379277238107e-05, + "loss": 0.3601, + "step": 86 + }, + { + "epoch": 0.58, + "grad_norm": 6.817903995513916, + "learning_rate": 2.2966033133303545e-05, + "loss": 1.3453, + "step": 87 + }, + { + "epoch": 0.5866666666666667, + "grad_norm": 2.8639299869537354, + "learning_rate": 2.238678841830867e-05, + "loss": 0.3974, + "step": 88 + }, + { + "epoch": 0.5933333333333334, + "grad_norm": 2.082796573638916, + "learning_rate": 2.1808958803485136e-05, + "loss": 0.4019, + "step": 89 + }, + { + "epoch": 0.6, + "grad_norm": 0.427212655544281, + "learning_rate": 2.1232857193762924e-05, + "loss": 0.0465, + "step": 90 + }, + { + "epoch": 0.6066666666666667, + "grad_norm": 1.7197939157485962, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.2682, + "step": 91 + }, + { + "epoch": 0.6133333333333333, + "grad_norm": 0.09823339432477951, + "learning_rate": 2.0087084761679245e-05, + "loss": 0.0111, + "step": 92 + }, + { + "epoch": 0.62, + "grad_norm": 1.9435588121414185, + "learning_rate": 1.9518034395302414e-05, + "loss": 0.124, + "step": 93 + }, + { + "epoch": 0.6266666666666667, + "grad_norm": 2.9964282512664795, + "learning_rate": 1.895195261000831e-05, + "loss": 0.3849, + "step": 94 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 1.595879077911377, + "learning_rate": 1.838914594906995e-05, + "loss": 0.2477, + "step": 95 + }, + { + "epoch": 0.64, + "grad_norm": 5.441842555999756, + "learning_rate": 1.7829919182222752e-05, + "loss": 0.5461, + "step": 96 + }, + { + "epoch": 0.6466666666666666, + "grad_norm": 2.448079824447632, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.5091, + "step": 97 + }, + { + "epoch": 0.6533333333333333, + "grad_norm": 3.598569631576538, + "learning_rate": 1.672341455287605e-05, + "loss": 0.4161, + "step": 98 + }, + { + "epoch": 0.66, + "grad_norm": 5.45053243637085, + "learning_rate": 1.617673588215328e-05, + "loss": 0.8538, + "step": 99 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 1.3044579029083252, + "learning_rate": 1.56348351646022e-05, + "loss": 0.171, + "step": 100 + }, + { + "epoch": 0.6666666666666666, + "eval_loss": 0.4516909718513489, + "eval_runtime": 66.1438, + "eval_samples_per_second": 2.253, + "eval_steps_per_second": 1.134, + "step": 100 + }, + { + "epoch": 0.6733333333333333, + "grad_norm": 8.098329544067383, + "learning_rate": 1.509800584902108e-05, + "loss": 1.1298, + "step": 101 + }, + { + "epoch": 0.68, + "grad_norm": 1.519363284111023, + "learning_rate": 1.4566538637954554e-05, + "loss": 0.1545, + "step": 102 + }, + { + "epoch": 0.6866666666666666, + "grad_norm": 2.4320404529571533, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.1781, + "step": 103 + }, + { + "epoch": 0.6933333333333334, + "grad_norm": 4.600680828094482, + "learning_rate": 1.3520838665324703e-05, + "loss": 0.4178, + "step": 104 + }, + { + "epoch": 0.7, + "grad_norm": 1.4558080434799194, + "learning_rate": 1.3007172168743854e-05, + "loss": 0.1014, + "step": 105 + }, + { + "epoch": 0.7066666666666667, + "grad_norm": 1.6424729824066162, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.1476, + "step": 106 + }, + { + "epoch": 0.7133333333333334, + "grad_norm": 3.927393913269043, + "learning_rate": 1.1999596801769616e-05, + "loss": 1.0977, + "step": 107 + }, + { + "epoch": 0.72, + "grad_norm": 0.8934208154678345, + "learning_rate": 1.1506233551212186e-05, + "loss": 0.1165, + "step": 108 + }, + { + "epoch": 0.7266666666666667, + "grad_norm": 2.651909589767456, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.5939, + "step": 109 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 1.0333629846572876, + "learning_rate": 1.0541691595800337e-05, + "loss": 0.152, + "step": 110 + }, + { + "epoch": 0.74, + "grad_norm": 2.1822710037231445, + "learning_rate": 1.0071035207430352e-05, + "loss": 0.4252, + "step": 111 + }, + { + "epoch": 0.7466666666666667, + "grad_norm": 1.6616872549057007, + "learning_rate": 9.608463116858542e-06, + "loss": 0.1772, + "step": 112 + }, + { + "epoch": 0.7533333333333333, + "grad_norm": 4.919057846069336, + "learning_rate": 9.154225815032242e-06, + "loss": 0.4062, + "step": 113 + }, + { + "epoch": 0.76, + "grad_norm": 4.426158905029297, + "learning_rate": 8.708569279463622e-06, + "loss": 0.6537, + "step": 114 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 2.4902124404907227, + "learning_rate": 8.271734841028553e-06, + "loss": 0.1949, + "step": 115 + }, + { + "epoch": 0.7733333333333333, + "grad_norm": 0.6176963448524475, + "learning_rate": 7.843959053281663e-06, + "loss": 0.0729, + "step": 116 + }, + { + "epoch": 0.78, + "grad_norm": 2.257253408432007, + "learning_rate": 7.4254735643584564e-06, + "loss": 0.2163, + "step": 117 + }, + { + "epoch": 0.7866666666666666, + "grad_norm": 1.069271445274353, + "learning_rate": 7.016504991533726e-06, + "loss": 0.156, + "step": 118 + }, + { + "epoch": 0.7933333333333333, + "grad_norm": 0.6102529764175415, + "learning_rate": 6.617274798504286e-06, + "loss": 0.0632, + "step": 119 + }, + { + "epoch": 0.8, + "grad_norm": 4.137846946716309, + "learning_rate": 6.22799917546252e-06, + "loss": 0.5415, + "step": 120 + }, + { + "epoch": 0.8066666666666666, + "grad_norm": 2.0759520530700684, + "learning_rate": 5.848888922025553e-06, + "loss": 0.3255, + "step": 121 + }, + { + "epoch": 0.8133333333333334, + "grad_norm": 4.551874160766602, + "learning_rate": 5.48014933308352e-06, + "loss": 0.3398, + "step": 122 + }, + { + "epoch": 0.82, + "grad_norm": 1.0534967184066772, + "learning_rate": 5.121980087628803e-06, + "loss": 0.1326, + "step": 123 + }, + { + "epoch": 0.8266666666666667, + "grad_norm": 2.607910394668579, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.8089, + "step": 124 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 2.044987201690674, + "learning_rate": 4.438122617983443e-06, + "loss": 0.238, + "step": 125 + }, + { + "epoch": 0.84, + "grad_norm": 1.440517544746399, + "learning_rate": 4.112804714676594e-06, + "loss": 0.1659, + "step": 126 + }, + { + "epoch": 0.8466666666666667, + "grad_norm": 3.426896333694458, + "learning_rate": 3.798797596089351e-06, + "loss": 0.4279, + "step": 127 + }, + { + "epoch": 0.8533333333333334, + "grad_norm": 2.2087795734405518, + "learning_rate": 3.4962713026158694e-06, + "loss": 1.0499, + "step": 128 + }, + { + "epoch": 0.86, + "grad_norm": 0.9487075805664062, + "learning_rate": 3.205389657580943e-06, + "loss": 0.1213, + "step": 129 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 2.0362558364868164, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.6995, + "step": 130 + }, + { + "epoch": 0.8733333333333333, + "grad_norm": 4.185983180999756, + "learning_rate": 2.659183991914696e-06, + "loss": 0.6105, + "step": 131 + }, + { + "epoch": 0.88, + "grad_norm": 2.219158411026001, + "learning_rate": 2.4041557512869878e-06, + "loss": 0.2062, + "step": 132 + }, + { + "epoch": 0.8866666666666667, + "grad_norm": 0.7529279589653015, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.0839, + "step": 133 + }, + { + "epoch": 0.8933333333333333, + "grad_norm": 4.446479320526123, + "learning_rate": 1.9309388911139426e-06, + "loss": 1.0925, + "step": 134 + }, + { + "epoch": 0.9, + "grad_norm": 5.211433410644531, + "learning_rate": 1.713006526846439e-06, + "loss": 0.6347, + "step": 135 + }, + { + "epoch": 0.9066666666666666, + "grad_norm": 2.469029426574707, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.4994, + "step": 136 + }, + { + "epoch": 0.9133333333333333, + "grad_norm": 5.426701545715332, + "learning_rate": 1.31508393714177e-06, + "loss": 0.4131, + "step": 137 + }, + { + "epoch": 0.92, + "grad_norm": 2.973341941833496, + "learning_rate": 1.1353091938067023e-06, + "loss": 0.3887, + "step": 138 + }, + { + "epoch": 0.9266666666666666, + "grad_norm": 2.101803779602051, + "learning_rate": 9.684576015420278e-07, + "loss": 0.2598, + "step": 139 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.5676771998405457, + "learning_rate": 8.146195134284052e-07, + "loss": 0.0698, + "step": 140 + }, + { + "epoch": 0.94, + "grad_norm": 0.5499122142791748, + "learning_rate": 6.738782355044049e-07, + "loss": 0.0823, + "step": 141 + }, + { + "epoch": 0.9466666666666667, + "grad_norm": 0.36405226588249207, + "learning_rate": 5.463099816548579e-07, + "loss": 0.0406, + "step": 142 + }, + { + "epoch": 0.9533333333333334, + "grad_norm": 1.898514747619629, + "learning_rate": 4.319838323396691e-07, + "loss": 0.4404, + "step": 143 + }, + { + "epoch": 0.96, + "grad_norm": 1.7996866703033447, + "learning_rate": 3.309616971855195e-07, + "loss": 0.2531, + "step": 144 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 2.6673455238342285, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.7721, + "step": 145 + }, + { + "epoch": 0.9733333333333334, + "grad_norm": 7.725250244140625, + "learning_rate": 1.6904105645142444e-07, + "loss": 1.7164, + "step": 146 + }, + { + "epoch": 0.98, + "grad_norm": 2.411264419555664, + "learning_rate": 1.0823023375489127e-07, + "loss": 0.4909, + "step": 147 + } + ], + "logging_steps": 1, + "max_steps": 150, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 49, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.014568182695854e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/ca_v3_run_20251215/checkpoint-147/training_args.bin b/ca_v3_run_20251215/checkpoint-147/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..88b4ad4e5f06f68e064e676272032d9cd8af9147 --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-147/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63c0e75d45a7050daffe2e65badc27748032d13318e1d0af24fcae2c215424f0 +size 5777 diff --git a/ca_v3_run_20251215/checkpoint-150/README.md b/ca_v3_run_20251215/checkpoint-150/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-150/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/ca_v3_run_20251215/checkpoint-150/adapter_config.json b/ca_v3_run_20251215/checkpoint-150/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bd100e2ef4775697a7cb0c0abaae99f244053d68 --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-150/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "k_proj", + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ca_v3_run_20251215/checkpoint-150/adapter_model.safetensors b/ca_v3_run_20251215/checkpoint-150/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba2d53f00df555c320d5fa8bfd92a9508b7313e3 --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-150/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c6966fb106cba724091bb384a14877dd0d857a05bd7354cef7953e56fdecb77 +size 201378736 diff --git a/ca_v3_run_20251215/checkpoint-150/optimizer.pt b/ca_v3_run_20251215/checkpoint-150/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..65b00be983c44726678d780022d5ad5e15976150 --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-150/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392183f58bb6c3c87cc8b1040b1bd5940b7f2522896624f3668d965de80ee89b +size 402982627 diff --git a/ca_v3_run_20251215/checkpoint-150/rng_state.pth b/ca_v3_run_20251215/checkpoint-150/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b6e5ebccc9e6a700fd9d1abbdf919ba33a76cbed --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-150/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a47d1ead692de2bce57a9ae7ba59450da50376926e3a3a2a0ebc83b70d9ba03 +size 14645 diff --git a/ca_v3_run_20251215/checkpoint-150/scheduler.pt b/ca_v3_run_20251215/checkpoint-150/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b259f6f75e583e4febfc080b5b8331e00665a2f --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-150/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30781be5b5c4396cc620b6281ac1b634d4288d62bfab3224155044b29d42cdb1 +size 1465 diff --git a/ca_v3_run_20251215/checkpoint-150/trainer_state.json b/ca_v3_run_20251215/checkpoint-150/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..008b27243cee0eb2e38fd9d96131f1a346233be4 --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-150/trainer_state.json @@ -0,0 +1,1092 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.006666666666666667, + "grad_norm": 2.267627239227295, + "learning_rate": 0.0, + "loss": 0.8939, + "step": 1 + }, + { + "epoch": 0.013333333333333334, + "grad_norm": 1.5160397291183472, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6392, + "step": 2 + }, + { + "epoch": 0.02, + "grad_norm": 1.4698739051818848, + "learning_rate": 6.666666666666667e-06, + "loss": 0.8476, + "step": 3 + }, + { + "epoch": 0.02666666666666667, + "grad_norm": 4.279767036437988, + "learning_rate": 1e-05, + "loss": 1.2988, + "step": 4 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 3.669569730758667, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.95, + "step": 5 + }, + { + "epoch": 0.04, + "grad_norm": 1.2172225713729858, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.3448, + "step": 6 + }, + { + "epoch": 0.04666666666666667, + "grad_norm": 1.891830325126648, + "learning_rate": 2e-05, + "loss": 0.7179, + "step": 7 + }, + { + "epoch": 0.05333333333333334, + "grad_norm": 2.352062940597534, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.5782, + "step": 8 + }, + { + "epoch": 0.06, + "grad_norm": 1.2546859979629517, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.686, + "step": 9 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 1.1951119899749756, + "learning_rate": 3e-05, + "loss": 0.2151, + "step": 10 + }, + { + "epoch": 0.07333333333333333, + "grad_norm": 1.5607569217681885, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.3339, + "step": 11 + }, + { + "epoch": 0.08, + "grad_norm": 1.513392448425293, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.7432, + "step": 12 + }, + { + "epoch": 0.08666666666666667, + "grad_norm": 1.3942737579345703, + "learning_rate": 4e-05, + "loss": 0.5372, + "step": 13 + }, + { + "epoch": 0.09333333333333334, + "grad_norm": 2.1662518978118896, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.5742, + "step": 14 + }, + { + "epoch": 0.1, + "grad_norm": 2.074810028076172, + "learning_rate": 4.666666666666667e-05, + "loss": 0.6484, + "step": 15 + }, + { + "epoch": 0.10666666666666667, + "grad_norm": 2.5651564598083496, + "learning_rate": 5e-05, + "loss": 0.678, + "step": 16 + }, + { + "epoch": 0.11333333333333333, + "grad_norm": 3.3820931911468506, + "learning_rate": 4.9993231029486544e-05, + "loss": 0.6619, + "step": 17 + }, + { + "epoch": 0.12, + "grad_norm": 1.7587597370147705, + "learning_rate": 4.997292778346312e-05, + "loss": 0.665, + "step": 18 + }, + { + "epoch": 0.12666666666666668, + "grad_norm": 2.234240770339966, + "learning_rate": 4.993910125649561e-05, + "loss": 0.4308, + "step": 19 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 1.0802106857299805, + "learning_rate": 4.989176976624511e-05, + "loss": 0.6038, + "step": 20 + }, + { + "epoch": 0.14, + "grad_norm": 2.3648719787597656, + "learning_rate": 4.983095894354858e-05, + "loss": 0.5436, + "step": 21 + }, + { + "epoch": 0.14666666666666667, + "grad_norm": 1.2552648782730103, + "learning_rate": 4.975670171853926e-05, + "loss": 0.6223, + "step": 22 + }, + { + "epoch": 0.15333333333333332, + "grad_norm": 2.2331910133361816, + "learning_rate": 4.966903830281449e-05, + "loss": 0.6468, + "step": 23 + }, + { + "epoch": 0.16, + "grad_norm": 1.7010774612426758, + "learning_rate": 4.9568016167660334e-05, + "loss": 0.6019, + "step": 24 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 1.6806925535202026, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.3979, + "step": 25 + }, + { + "epoch": 0.17333333333333334, + "grad_norm": 1.4811266660690308, + "learning_rate": 4.9326121764495596e-05, + "loss": 0.8038, + "step": 26 + }, + { + "epoch": 0.18, + "grad_norm": 1.5548367500305176, + "learning_rate": 4.9185380486571595e-05, + "loss": 0.5653, + "step": 27 + }, + { + "epoch": 0.18666666666666668, + "grad_norm": 1.4900885820388794, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.7534, + "step": 28 + }, + { + "epoch": 0.19333333333333333, + "grad_norm": 1.2963446378707886, + "learning_rate": 4.88646908061933e-05, + "loss": 0.6946, + "step": 29 + }, + { + "epoch": 0.2, + "grad_norm": 1.885505199432373, + "learning_rate": 4.868491606285823e-05, + "loss": 0.8456, + "step": 30 + }, + { + "epoch": 0.20666666666666667, + "grad_norm": 1.0747458934783936, + "learning_rate": 4.849231551964771e-05, + "loss": 0.336, + "step": 31 + }, + { + "epoch": 0.21333333333333335, + "grad_norm": 2.0049006938934326, + "learning_rate": 4.828699347315356e-05, + "loss": 0.6669, + "step": 32 + }, + { + "epoch": 0.22, + "grad_norm": 1.020938515663147, + "learning_rate": 4.806906110888606e-05, + "loss": 0.6849, + "step": 33 + }, + { + "epoch": 0.22666666666666666, + "grad_norm": 1.3365986347198486, + "learning_rate": 4.783863644106502e-05, + "loss": 0.4813, + "step": 34 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 2.2962701320648193, + "learning_rate": 4.759584424871302e-05, + "loss": 1.156, + "step": 35 + }, + { + "epoch": 0.24, + "grad_norm": 1.1328681707382202, + "learning_rate": 4.734081600808531e-05, + "loss": 0.4652, + "step": 36 + }, + { + "epoch": 0.24666666666666667, + "grad_norm": 1.3263046741485596, + "learning_rate": 4.707368982147318e-05, + "loss": 0.4638, + "step": 37 + }, + { + "epoch": 0.25333333333333335, + "grad_norm": 1.6372419595718384, + "learning_rate": 4.679461034241906e-05, + "loss": 0.6662, + "step": 38 + }, + { + "epoch": 0.26, + "grad_norm": 1.3750609159469604, + "learning_rate": 4.650372869738414e-05, + "loss": 0.3029, + "step": 39 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 1.442150354385376, + "learning_rate": 4.620120240391065e-05, + "loss": 0.7255, + "step": 40 + }, + { + "epoch": 0.2733333333333333, + "grad_norm": 1.3133295774459839, + "learning_rate": 4.588719528532342e-05, + "loss": 0.6364, + "step": 41 + }, + { + "epoch": 0.28, + "grad_norm": 1.2394391298294067, + "learning_rate": 4.556187738201656e-05, + "loss": 0.6514, + "step": 42 + }, + { + "epoch": 0.2866666666666667, + "grad_norm": 2.0945804119110107, + "learning_rate": 4.522542485937369e-05, + "loss": 0.4957, + "step": 43 + }, + { + "epoch": 0.29333333333333333, + "grad_norm": 2.036482810974121, + "learning_rate": 4.48780199123712e-05, + "loss": 0.5471, + "step": 44 + }, + { + "epoch": 0.3, + "grad_norm": 1.7951217889785767, + "learning_rate": 4.4519850666916484e-05, + "loss": 0.4969, + "step": 45 + }, + { + "epoch": 0.30666666666666664, + "grad_norm": 1.3251290321350098, + "learning_rate": 4.415111107797445e-05, + "loss": 0.5828, + "step": 46 + }, + { + "epoch": 0.31333333333333335, + "grad_norm": 1.5717222690582275, + "learning_rate": 4.377200082453749e-05, + "loss": 0.378, + "step": 47 + }, + { + "epoch": 0.32, + "grad_norm": 1.4296456575393677, + "learning_rate": 4.3382725201495723e-05, + "loss": 0.3957, + "step": 48 + }, + { + "epoch": 0.32666666666666666, + "grad_norm": 1.3804540634155273, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.4861, + "step": 49 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 3.184690237045288, + "learning_rate": 4.257452643564155e-05, + "loss": 0.8223, + "step": 50 + }, + { + "epoch": 0.34, + "grad_norm": 1.361177682876587, + "learning_rate": 4.215604094671835e-05, + "loss": 0.7269, + "step": 51 + }, + { + "epoch": 0.3466666666666667, + "grad_norm": 1.2764381170272827, + "learning_rate": 4.172826515897146e-05, + "loss": 0.3787, + "step": 52 + }, + { + "epoch": 0.35333333333333333, + "grad_norm": 1.4495773315429688, + "learning_rate": 4.129143072053638e-05, + "loss": 0.2966, + "step": 53 + }, + { + "epoch": 0.36, + "grad_norm": 2.055936813354492, + "learning_rate": 4.0845774184967754e-05, + "loss": 0.4485, + "step": 54 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 1.8481249809265137, + "learning_rate": 4.039153688314145e-05, + "loss": 0.4182, + "step": 55 + }, + { + "epoch": 0.37333333333333335, + "grad_norm": 1.6037787199020386, + "learning_rate": 3.9928964792569655e-05, + "loss": 0.6953, + "step": 56 + }, + { + "epoch": 0.38, + "grad_norm": 1.5313551425933838, + "learning_rate": 3.945830840419966e-05, + "loss": 0.3223, + "step": 57 + }, + { + "epoch": 0.38666666666666666, + "grad_norm": 1.3661636114120483, + "learning_rate": 3.897982258676867e-05, + "loss": 0.4186, + "step": 58 + }, + { + "epoch": 0.3933333333333333, + "grad_norm": 2.39021372795105, + "learning_rate": 3.8493766448787825e-05, + "loss": 0.7415, + "step": 59 + }, + { + "epoch": 0.4, + "grad_norm": 2.5035974979400635, + "learning_rate": 3.8000403198230387e-05, + "loss": 0.9131, + "step": 60 + }, + { + "epoch": 0.4066666666666667, + "grad_norm": 1.3086011409759521, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.397, + "step": 61 + }, + { + "epoch": 0.41333333333333333, + "grad_norm": 1.9651494026184082, + "learning_rate": 3.699282783125616e-05, + "loss": 0.9127, + "step": 62 + }, + { + "epoch": 0.42, + "grad_norm": 2.166289806365967, + "learning_rate": 3.6479161334675296e-05, + "loss": 0.6211, + "step": 63 + }, + { + "epoch": 0.4266666666666667, + "grad_norm": 1.1649678945541382, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3828, + "step": 64 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 0.7926881909370422, + "learning_rate": 3.543346136204545e-05, + "loss": 0.1886, + "step": 65 + }, + { + "epoch": 0.44, + "grad_norm": 1.2959918975830078, + "learning_rate": 3.490199415097892e-05, + "loss": 0.3381, + "step": 66 + }, + { + "epoch": 0.44666666666666666, + "grad_norm": 1.3724843263626099, + "learning_rate": 3.436516483539781e-05, + "loss": 0.5337, + "step": 67 + }, + { + "epoch": 0.4533333333333333, + "grad_norm": 1.743539810180664, + "learning_rate": 3.382326411784672e-05, + "loss": 0.3935, + "step": 68 + }, + { + "epoch": 0.46, + "grad_norm": 2.310335397720337, + "learning_rate": 3.327658544712395e-05, + "loss": 0.3857, + "step": 69 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 2.399306058883667, + "learning_rate": 3.272542485937369e-05, + "loss": 1.0896, + "step": 70 + }, + { + "epoch": 0.47333333333333333, + "grad_norm": 1.2522423267364502, + "learning_rate": 3.217008081777726e-05, + "loss": 0.1551, + "step": 71 + }, + { + "epoch": 0.48, + "grad_norm": 2.307708740234375, + "learning_rate": 3.161085405093006e-05, + "loss": 0.3004, + "step": 72 + }, + { + "epoch": 0.4866666666666667, + "grad_norm": 1.527667760848999, + "learning_rate": 3.104804738999169e-05, + "loss": 0.3367, + "step": 73 + }, + { + "epoch": 0.49333333333333335, + "grad_norm": 1.2848842144012451, + "learning_rate": 3.048196560469758e-05, + "loss": 0.264, + "step": 74 + }, + { + "epoch": 0.5, + "grad_norm": 1.7577385902404785, + "learning_rate": 2.9912915238320754e-05, + "loss": 0.3975, + "step": 75 + }, + { + "epoch": 0.5066666666666667, + "grad_norm": 1.2079553604125977, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.1619, + "step": 76 + }, + { + "epoch": 0.5133333333333333, + "grad_norm": 1.2687911987304688, + "learning_rate": 2.876714280623708e-05, + "loss": 0.1523, + "step": 77 + }, + { + "epoch": 0.52, + "grad_norm": 1.3904497623443604, + "learning_rate": 2.8191041196514873e-05, + "loss": 0.6355, + "step": 78 + }, + { + "epoch": 0.5266666666666666, + "grad_norm": 1.2210139036178589, + "learning_rate": 2.761321158169134e-05, + "loss": 0.1803, + "step": 79 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 2.2414464950561523, + "learning_rate": 2.7033966866696457e-05, + "loss": 0.2332, + "step": 80 + }, + { + "epoch": 0.54, + "grad_norm": 1.8386731147766113, + "learning_rate": 2.6453620722761896e-05, + "loss": 0.6357, + "step": 81 + }, + { + "epoch": 0.5466666666666666, + "grad_norm": 2.6382551193237305, + "learning_rate": 2.587248741756253e-05, + "loss": 0.3541, + "step": 82 + }, + { + "epoch": 0.5533333333333333, + "grad_norm": 3.260303020477295, + "learning_rate": 2.5290881645034932e-05, + "loss": 0.7056, + "step": 83 + }, + { + "epoch": 0.56, + "grad_norm": 4.604124546051025, + "learning_rate": 2.470911835496508e-05, + "loss": 0.8796, + "step": 84 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 3.7330353260040283, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.2487, + "step": 85 + }, + { + "epoch": 0.5733333333333334, + "grad_norm": 1.9771713018417358, + "learning_rate": 2.3546379277238107e-05, + "loss": 0.3601, + "step": 86 + }, + { + "epoch": 0.58, + "grad_norm": 6.817903995513916, + "learning_rate": 2.2966033133303545e-05, + "loss": 1.3453, + "step": 87 + }, + { + "epoch": 0.5866666666666667, + "grad_norm": 2.8639299869537354, + "learning_rate": 2.238678841830867e-05, + "loss": 0.3974, + "step": 88 + }, + { + "epoch": 0.5933333333333334, + "grad_norm": 2.082796573638916, + "learning_rate": 2.1808958803485136e-05, + "loss": 0.4019, + "step": 89 + }, + { + "epoch": 0.6, + "grad_norm": 0.427212655544281, + "learning_rate": 2.1232857193762924e-05, + "loss": 0.0465, + "step": 90 + }, + { + "epoch": 0.6066666666666667, + "grad_norm": 1.7197939157485962, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.2682, + "step": 91 + }, + { + "epoch": 0.6133333333333333, + "grad_norm": 0.09823339432477951, + "learning_rate": 2.0087084761679245e-05, + "loss": 0.0111, + "step": 92 + }, + { + "epoch": 0.62, + "grad_norm": 1.9435588121414185, + "learning_rate": 1.9518034395302414e-05, + "loss": 0.124, + "step": 93 + }, + { + "epoch": 0.6266666666666667, + "grad_norm": 2.9964282512664795, + "learning_rate": 1.895195261000831e-05, + "loss": 0.3849, + "step": 94 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 1.595879077911377, + "learning_rate": 1.838914594906995e-05, + "loss": 0.2477, + "step": 95 + }, + { + "epoch": 0.64, + "grad_norm": 5.441842555999756, + "learning_rate": 1.7829919182222752e-05, + "loss": 0.5461, + "step": 96 + }, + { + "epoch": 0.6466666666666666, + "grad_norm": 2.448079824447632, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.5091, + "step": 97 + }, + { + "epoch": 0.6533333333333333, + "grad_norm": 3.598569631576538, + "learning_rate": 1.672341455287605e-05, + "loss": 0.4161, + "step": 98 + }, + { + "epoch": 0.66, + "grad_norm": 5.45053243637085, + "learning_rate": 1.617673588215328e-05, + "loss": 0.8538, + "step": 99 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 1.3044579029083252, + "learning_rate": 1.56348351646022e-05, + "loss": 0.171, + "step": 100 + }, + { + "epoch": 0.6666666666666666, + "eval_loss": 0.4516909718513489, + "eval_runtime": 66.1438, + "eval_samples_per_second": 2.253, + "eval_steps_per_second": 1.134, + "step": 100 + }, + { + "epoch": 0.6733333333333333, + "grad_norm": 8.098329544067383, + "learning_rate": 1.509800584902108e-05, + "loss": 1.1298, + "step": 101 + }, + { + "epoch": 0.68, + "grad_norm": 1.519363284111023, + "learning_rate": 1.4566538637954554e-05, + "loss": 0.1545, + "step": 102 + }, + { + "epoch": 0.6866666666666666, + "grad_norm": 2.4320404529571533, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.1781, + "step": 103 + }, + { + "epoch": 0.6933333333333334, + "grad_norm": 4.600680828094482, + "learning_rate": 1.3520838665324703e-05, + "loss": 0.4178, + "step": 104 + }, + { + "epoch": 0.7, + "grad_norm": 1.4558080434799194, + "learning_rate": 1.3007172168743854e-05, + "loss": 0.1014, + "step": 105 + }, + { + "epoch": 0.7066666666666667, + "grad_norm": 1.6424729824066162, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.1476, + "step": 106 + }, + { + "epoch": 0.7133333333333334, + "grad_norm": 3.927393913269043, + "learning_rate": 1.1999596801769616e-05, + "loss": 1.0977, + "step": 107 + }, + { + "epoch": 0.72, + "grad_norm": 0.8934208154678345, + "learning_rate": 1.1506233551212186e-05, + "loss": 0.1165, + "step": 108 + }, + { + "epoch": 0.7266666666666667, + "grad_norm": 2.651909589767456, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.5939, + "step": 109 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 1.0333629846572876, + "learning_rate": 1.0541691595800337e-05, + "loss": 0.152, + "step": 110 + }, + { + "epoch": 0.74, + "grad_norm": 2.1822710037231445, + "learning_rate": 1.0071035207430352e-05, + "loss": 0.4252, + "step": 111 + }, + { + "epoch": 0.7466666666666667, + "grad_norm": 1.6616872549057007, + "learning_rate": 9.608463116858542e-06, + "loss": 0.1772, + "step": 112 + }, + { + "epoch": 0.7533333333333333, + "grad_norm": 4.919057846069336, + "learning_rate": 9.154225815032242e-06, + "loss": 0.4062, + "step": 113 + }, + { + "epoch": 0.76, + "grad_norm": 4.426158905029297, + "learning_rate": 8.708569279463622e-06, + "loss": 0.6537, + "step": 114 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 2.4902124404907227, + "learning_rate": 8.271734841028553e-06, + "loss": 0.1949, + "step": 115 + }, + { + "epoch": 0.7733333333333333, + "grad_norm": 0.6176963448524475, + "learning_rate": 7.843959053281663e-06, + "loss": 0.0729, + "step": 116 + }, + { + "epoch": 0.78, + "grad_norm": 2.257253408432007, + "learning_rate": 7.4254735643584564e-06, + "loss": 0.2163, + "step": 117 + }, + { + "epoch": 0.7866666666666666, + "grad_norm": 1.069271445274353, + "learning_rate": 7.016504991533726e-06, + "loss": 0.156, + "step": 118 + }, + { + "epoch": 0.7933333333333333, + "grad_norm": 0.6102529764175415, + "learning_rate": 6.617274798504286e-06, + "loss": 0.0632, + "step": 119 + }, + { + "epoch": 0.8, + "grad_norm": 4.137846946716309, + "learning_rate": 6.22799917546252e-06, + "loss": 0.5415, + "step": 120 + }, + { + "epoch": 0.8066666666666666, + "grad_norm": 2.0759520530700684, + "learning_rate": 5.848888922025553e-06, + "loss": 0.3255, + "step": 121 + }, + { + "epoch": 0.8133333333333334, + "grad_norm": 4.551874160766602, + "learning_rate": 5.48014933308352e-06, + "loss": 0.3398, + "step": 122 + }, + { + "epoch": 0.82, + "grad_norm": 1.0534967184066772, + "learning_rate": 5.121980087628803e-06, + "loss": 0.1326, + "step": 123 + }, + { + "epoch": 0.8266666666666667, + "grad_norm": 2.607910394668579, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.8089, + "step": 124 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 2.044987201690674, + "learning_rate": 4.438122617983443e-06, + "loss": 0.238, + "step": 125 + }, + { + "epoch": 0.84, + "grad_norm": 1.440517544746399, + "learning_rate": 4.112804714676594e-06, + "loss": 0.1659, + "step": 126 + }, + { + "epoch": 0.8466666666666667, + "grad_norm": 3.426896333694458, + "learning_rate": 3.798797596089351e-06, + "loss": 0.4279, + "step": 127 + }, + { + "epoch": 0.8533333333333334, + "grad_norm": 2.2087795734405518, + "learning_rate": 3.4962713026158694e-06, + "loss": 1.0499, + "step": 128 + }, + { + "epoch": 0.86, + "grad_norm": 0.9487075805664062, + "learning_rate": 3.205389657580943e-06, + "loss": 0.1213, + "step": 129 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 2.0362558364868164, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.6995, + "step": 130 + }, + { + "epoch": 0.8733333333333333, + "grad_norm": 4.185983180999756, + "learning_rate": 2.659183991914696e-06, + "loss": 0.6105, + "step": 131 + }, + { + "epoch": 0.88, + "grad_norm": 2.219158411026001, + "learning_rate": 2.4041557512869878e-06, + "loss": 0.2062, + "step": 132 + }, + { + "epoch": 0.8866666666666667, + "grad_norm": 0.7529279589653015, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.0839, + "step": 133 + }, + { + "epoch": 0.8933333333333333, + "grad_norm": 4.446479320526123, + "learning_rate": 1.9309388911139426e-06, + "loss": 1.0925, + "step": 134 + }, + { + "epoch": 0.9, + "grad_norm": 5.211433410644531, + "learning_rate": 1.713006526846439e-06, + "loss": 0.6347, + "step": 135 + }, + { + "epoch": 0.9066666666666666, + "grad_norm": 2.469029426574707, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.4994, + "step": 136 + }, + { + "epoch": 0.9133333333333333, + "grad_norm": 5.426701545715332, + "learning_rate": 1.31508393714177e-06, + "loss": 0.4131, + "step": 137 + }, + { + "epoch": 0.92, + "grad_norm": 2.973341941833496, + "learning_rate": 1.1353091938067023e-06, + "loss": 0.3887, + "step": 138 + }, + { + "epoch": 0.9266666666666666, + "grad_norm": 2.101803779602051, + "learning_rate": 9.684576015420278e-07, + "loss": 0.2598, + "step": 139 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.5676771998405457, + "learning_rate": 8.146195134284052e-07, + "loss": 0.0698, + "step": 140 + }, + { + "epoch": 0.94, + "grad_norm": 0.5499122142791748, + "learning_rate": 6.738782355044049e-07, + "loss": 0.0823, + "step": 141 + }, + { + "epoch": 0.9466666666666667, + "grad_norm": 0.36405226588249207, + "learning_rate": 5.463099816548579e-07, + "loss": 0.0406, + "step": 142 + }, + { + "epoch": 0.9533333333333334, + "grad_norm": 1.898514747619629, + "learning_rate": 4.319838323396691e-07, + "loss": 0.4404, + "step": 143 + }, + { + "epoch": 0.96, + "grad_norm": 1.7996866703033447, + "learning_rate": 3.309616971855195e-07, + "loss": 0.2531, + "step": 144 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 2.6673455238342285, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.7721, + "step": 145 + }, + { + "epoch": 0.9733333333333334, + "grad_norm": 7.725250244140625, + "learning_rate": 1.6904105645142444e-07, + "loss": 1.7164, + "step": 146 + }, + { + "epoch": 0.98, + "grad_norm": 2.411264419555664, + "learning_rate": 1.0823023375489127e-07, + "loss": 0.4909, + "step": 147 + }, + { + "epoch": 0.9866666666666667, + "grad_norm": 1.3376047611236572, + "learning_rate": 6.089874350439506e-08, + "loss": 0.1719, + "step": 148 + }, + { + "epoch": 0.9933333333333333, + "grad_norm": 1.7119640111923218, + "learning_rate": 2.7072216536885853e-08, + "loss": 0.1636, + "step": 149 + }, + { + "epoch": 1.0, + "grad_norm": 3.7593977451324463, + "learning_rate": 6.768970513457151e-09, + "loss": 0.5289, + "step": 150 + } + ], + "logging_steps": 1, + "max_steps": 150, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 49, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.0335481997190758e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/ca_v3_run_20251215/checkpoint-150/training_args.bin b/ca_v3_run_20251215/checkpoint-150/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..88b4ad4e5f06f68e064e676272032d9cd8af9147 --- /dev/null +++ b/ca_v3_run_20251215/checkpoint-150/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63c0e75d45a7050daffe2e65badc27748032d13318e1d0af24fcae2c215424f0 +size 5777 diff --git a/ca_v3_run_20251215/final_model/README.md b/ca_v3_run_20251215/final_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/ca_v3_run_20251215/final_model/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/ca_v3_run_20251215/final_model/adapter_config.json b/ca_v3_run_20251215/final_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bd100e2ef4775697a7cb0c0abaae99f244053d68 --- /dev/null +++ b/ca_v3_run_20251215/final_model/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "k_proj", + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ca_v3_run_20251215/final_model/adapter_model.safetensors b/ca_v3_run_20251215/final_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba2d53f00df555c320d5fa8bfd92a9508b7313e3 --- /dev/null +++ b/ca_v3_run_20251215/final_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c6966fb106cba724091bb384a14877dd0d857a05bd7354cef7953e56fdecb77 +size 201378736 diff --git a/ca_v3_run_20251215/final_model/optimizer.pt b/ca_v3_run_20251215/final_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..65b00be983c44726678d780022d5ad5e15976150 --- /dev/null +++ b/ca_v3_run_20251215/final_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392183f58bb6c3c87cc8b1040b1bd5940b7f2522896624f3668d965de80ee89b +size 402982627 diff --git a/ca_v3_run_20251215/final_model/rng_state.pth b/ca_v3_run_20251215/final_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b6e5ebccc9e6a700fd9d1abbdf919ba33a76cbed --- /dev/null +++ b/ca_v3_run_20251215/final_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a47d1ead692de2bce57a9ae7ba59450da50376926e3a3a2a0ebc83b70d9ba03 +size 14645 diff --git a/ca_v3_run_20251215/final_model/scheduler.pt b/ca_v3_run_20251215/final_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b259f6f75e583e4febfc080b5b8331e00665a2f --- /dev/null +++ b/ca_v3_run_20251215/final_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30781be5b5c4396cc620b6281ac1b634d4288d62bfab3224155044b29d42cdb1 +size 1465 diff --git a/ca_v3_run_20251215/final_model/trainer_state.json b/ca_v3_run_20251215/final_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..008b27243cee0eb2e38fd9d96131f1a346233be4 --- /dev/null +++ b/ca_v3_run_20251215/final_model/trainer_state.json @@ -0,0 +1,1092 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.006666666666666667, + "grad_norm": 2.267627239227295, + "learning_rate": 0.0, + "loss": 0.8939, + "step": 1 + }, + { + "epoch": 0.013333333333333334, + "grad_norm": 1.5160397291183472, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6392, + "step": 2 + }, + { + "epoch": 0.02, + "grad_norm": 1.4698739051818848, + "learning_rate": 6.666666666666667e-06, + "loss": 0.8476, + "step": 3 + }, + { + "epoch": 0.02666666666666667, + "grad_norm": 4.279767036437988, + "learning_rate": 1e-05, + "loss": 1.2988, + "step": 4 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 3.669569730758667, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.95, + "step": 5 + }, + { + "epoch": 0.04, + "grad_norm": 1.2172225713729858, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.3448, + "step": 6 + }, + { + "epoch": 0.04666666666666667, + "grad_norm": 1.891830325126648, + "learning_rate": 2e-05, + "loss": 0.7179, + "step": 7 + }, + { + "epoch": 0.05333333333333334, + "grad_norm": 2.352062940597534, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.5782, + "step": 8 + }, + { + "epoch": 0.06, + "grad_norm": 1.2546859979629517, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.686, + "step": 9 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 1.1951119899749756, + "learning_rate": 3e-05, + "loss": 0.2151, + "step": 10 + }, + { + "epoch": 0.07333333333333333, + "grad_norm": 1.5607569217681885, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.3339, + "step": 11 + }, + { + "epoch": 0.08, + "grad_norm": 1.513392448425293, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.7432, + "step": 12 + }, + { + "epoch": 0.08666666666666667, + "grad_norm": 1.3942737579345703, + "learning_rate": 4e-05, + "loss": 0.5372, + "step": 13 + }, + { + "epoch": 0.09333333333333334, + "grad_norm": 2.1662518978118896, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.5742, + "step": 14 + }, + { + "epoch": 0.1, + "grad_norm": 2.074810028076172, + "learning_rate": 4.666666666666667e-05, + "loss": 0.6484, + "step": 15 + }, + { + "epoch": 0.10666666666666667, + "grad_norm": 2.5651564598083496, + "learning_rate": 5e-05, + "loss": 0.678, + "step": 16 + }, + { + "epoch": 0.11333333333333333, + "grad_norm": 3.3820931911468506, + "learning_rate": 4.9993231029486544e-05, + "loss": 0.6619, + "step": 17 + }, + { + "epoch": 0.12, + "grad_norm": 1.7587597370147705, + "learning_rate": 4.997292778346312e-05, + "loss": 0.665, + "step": 18 + }, + { + "epoch": 0.12666666666666668, + "grad_norm": 2.234240770339966, + "learning_rate": 4.993910125649561e-05, + "loss": 0.4308, + "step": 19 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 1.0802106857299805, + "learning_rate": 4.989176976624511e-05, + "loss": 0.6038, + "step": 20 + }, + { + "epoch": 0.14, + "grad_norm": 2.3648719787597656, + "learning_rate": 4.983095894354858e-05, + "loss": 0.5436, + "step": 21 + }, + { + "epoch": 0.14666666666666667, + "grad_norm": 1.2552648782730103, + "learning_rate": 4.975670171853926e-05, + "loss": 0.6223, + "step": 22 + }, + { + "epoch": 0.15333333333333332, + "grad_norm": 2.2331910133361816, + "learning_rate": 4.966903830281449e-05, + "loss": 0.6468, + "step": 23 + }, + { + "epoch": 0.16, + "grad_norm": 1.7010774612426758, + "learning_rate": 4.9568016167660334e-05, + "loss": 0.6019, + "step": 24 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 1.6806925535202026, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.3979, + "step": 25 + }, + { + "epoch": 0.17333333333333334, + "grad_norm": 1.4811266660690308, + "learning_rate": 4.9326121764495596e-05, + "loss": 0.8038, + "step": 26 + }, + { + "epoch": 0.18, + "grad_norm": 1.5548367500305176, + "learning_rate": 4.9185380486571595e-05, + "loss": 0.5653, + "step": 27 + }, + { + "epoch": 0.18666666666666668, + "grad_norm": 1.4900885820388794, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.7534, + "step": 28 + }, + { + "epoch": 0.19333333333333333, + "grad_norm": 1.2963446378707886, + "learning_rate": 4.88646908061933e-05, + "loss": 0.6946, + "step": 29 + }, + { + "epoch": 0.2, + "grad_norm": 1.885505199432373, + "learning_rate": 4.868491606285823e-05, + "loss": 0.8456, + "step": 30 + }, + { + "epoch": 0.20666666666666667, + "grad_norm": 1.0747458934783936, + "learning_rate": 4.849231551964771e-05, + "loss": 0.336, + "step": 31 + }, + { + "epoch": 0.21333333333333335, + "grad_norm": 2.0049006938934326, + "learning_rate": 4.828699347315356e-05, + "loss": 0.6669, + "step": 32 + }, + { + "epoch": 0.22, + "grad_norm": 1.020938515663147, + "learning_rate": 4.806906110888606e-05, + "loss": 0.6849, + "step": 33 + }, + { + "epoch": 0.22666666666666666, + "grad_norm": 1.3365986347198486, + "learning_rate": 4.783863644106502e-05, + "loss": 0.4813, + "step": 34 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 2.2962701320648193, + "learning_rate": 4.759584424871302e-05, + "loss": 1.156, + "step": 35 + }, + { + "epoch": 0.24, + "grad_norm": 1.1328681707382202, + "learning_rate": 4.734081600808531e-05, + "loss": 0.4652, + "step": 36 + }, + { + "epoch": 0.24666666666666667, + "grad_norm": 1.3263046741485596, + "learning_rate": 4.707368982147318e-05, + "loss": 0.4638, + "step": 37 + }, + { + "epoch": 0.25333333333333335, + "grad_norm": 1.6372419595718384, + "learning_rate": 4.679461034241906e-05, + "loss": 0.6662, + "step": 38 + }, + { + "epoch": 0.26, + "grad_norm": 1.3750609159469604, + "learning_rate": 4.650372869738414e-05, + "loss": 0.3029, + "step": 39 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 1.442150354385376, + "learning_rate": 4.620120240391065e-05, + "loss": 0.7255, + "step": 40 + }, + { + "epoch": 0.2733333333333333, + "grad_norm": 1.3133295774459839, + "learning_rate": 4.588719528532342e-05, + "loss": 0.6364, + "step": 41 + }, + { + "epoch": 0.28, + "grad_norm": 1.2394391298294067, + "learning_rate": 4.556187738201656e-05, + "loss": 0.6514, + "step": 42 + }, + { + "epoch": 0.2866666666666667, + "grad_norm": 2.0945804119110107, + "learning_rate": 4.522542485937369e-05, + "loss": 0.4957, + "step": 43 + }, + { + "epoch": 0.29333333333333333, + "grad_norm": 2.036482810974121, + "learning_rate": 4.48780199123712e-05, + "loss": 0.5471, + "step": 44 + }, + { + "epoch": 0.3, + "grad_norm": 1.7951217889785767, + "learning_rate": 4.4519850666916484e-05, + "loss": 0.4969, + "step": 45 + }, + { + "epoch": 0.30666666666666664, + "grad_norm": 1.3251290321350098, + "learning_rate": 4.415111107797445e-05, + "loss": 0.5828, + "step": 46 + }, + { + "epoch": 0.31333333333333335, + "grad_norm": 1.5717222690582275, + "learning_rate": 4.377200082453749e-05, + "loss": 0.378, + "step": 47 + }, + { + "epoch": 0.32, + "grad_norm": 1.4296456575393677, + "learning_rate": 4.3382725201495723e-05, + "loss": 0.3957, + "step": 48 + }, + { + "epoch": 0.32666666666666666, + "grad_norm": 1.3804540634155273, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.4861, + "step": 49 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 3.184690237045288, + "learning_rate": 4.257452643564155e-05, + "loss": 0.8223, + "step": 50 + }, + { + "epoch": 0.34, + "grad_norm": 1.361177682876587, + "learning_rate": 4.215604094671835e-05, + "loss": 0.7269, + "step": 51 + }, + { + "epoch": 0.3466666666666667, + "grad_norm": 1.2764381170272827, + "learning_rate": 4.172826515897146e-05, + "loss": 0.3787, + "step": 52 + }, + { + "epoch": 0.35333333333333333, + "grad_norm": 1.4495773315429688, + "learning_rate": 4.129143072053638e-05, + "loss": 0.2966, + "step": 53 + }, + { + "epoch": 0.36, + "grad_norm": 2.055936813354492, + "learning_rate": 4.0845774184967754e-05, + "loss": 0.4485, + "step": 54 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 1.8481249809265137, + "learning_rate": 4.039153688314145e-05, + "loss": 0.4182, + "step": 55 + }, + { + "epoch": 0.37333333333333335, + "grad_norm": 1.6037787199020386, + "learning_rate": 3.9928964792569655e-05, + "loss": 0.6953, + "step": 56 + }, + { + "epoch": 0.38, + "grad_norm": 1.5313551425933838, + "learning_rate": 3.945830840419966e-05, + "loss": 0.3223, + "step": 57 + }, + { + "epoch": 0.38666666666666666, + "grad_norm": 1.3661636114120483, + "learning_rate": 3.897982258676867e-05, + "loss": 0.4186, + "step": 58 + }, + { + "epoch": 0.3933333333333333, + "grad_norm": 2.39021372795105, + "learning_rate": 3.8493766448787825e-05, + "loss": 0.7415, + "step": 59 + }, + { + "epoch": 0.4, + "grad_norm": 2.5035974979400635, + "learning_rate": 3.8000403198230387e-05, + "loss": 0.9131, + "step": 60 + }, + { + "epoch": 0.4066666666666667, + "grad_norm": 1.3086011409759521, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.397, + "step": 61 + }, + { + "epoch": 0.41333333333333333, + "grad_norm": 1.9651494026184082, + "learning_rate": 3.699282783125616e-05, + "loss": 0.9127, + "step": 62 + }, + { + "epoch": 0.42, + "grad_norm": 2.166289806365967, + "learning_rate": 3.6479161334675296e-05, + "loss": 0.6211, + "step": 63 + }, + { + "epoch": 0.4266666666666667, + "grad_norm": 1.1649678945541382, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3828, + "step": 64 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 0.7926881909370422, + "learning_rate": 3.543346136204545e-05, + "loss": 0.1886, + "step": 65 + }, + { + "epoch": 0.44, + "grad_norm": 1.2959918975830078, + "learning_rate": 3.490199415097892e-05, + "loss": 0.3381, + "step": 66 + }, + { + "epoch": 0.44666666666666666, + "grad_norm": 1.3724843263626099, + "learning_rate": 3.436516483539781e-05, + "loss": 0.5337, + "step": 67 + }, + { + "epoch": 0.4533333333333333, + "grad_norm": 1.743539810180664, + "learning_rate": 3.382326411784672e-05, + "loss": 0.3935, + "step": 68 + }, + { + "epoch": 0.46, + "grad_norm": 2.310335397720337, + "learning_rate": 3.327658544712395e-05, + "loss": 0.3857, + "step": 69 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 2.399306058883667, + "learning_rate": 3.272542485937369e-05, + "loss": 1.0896, + "step": 70 + }, + { + "epoch": 0.47333333333333333, + "grad_norm": 1.2522423267364502, + "learning_rate": 3.217008081777726e-05, + "loss": 0.1551, + "step": 71 + }, + { + "epoch": 0.48, + "grad_norm": 2.307708740234375, + "learning_rate": 3.161085405093006e-05, + "loss": 0.3004, + "step": 72 + }, + { + "epoch": 0.4866666666666667, + "grad_norm": 1.527667760848999, + "learning_rate": 3.104804738999169e-05, + "loss": 0.3367, + "step": 73 + }, + { + "epoch": 0.49333333333333335, + "grad_norm": 1.2848842144012451, + "learning_rate": 3.048196560469758e-05, + "loss": 0.264, + "step": 74 + }, + { + "epoch": 0.5, + "grad_norm": 1.7577385902404785, + "learning_rate": 2.9912915238320754e-05, + "loss": 0.3975, + "step": 75 + }, + { + "epoch": 0.5066666666666667, + "grad_norm": 1.2079553604125977, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.1619, + "step": 76 + }, + { + "epoch": 0.5133333333333333, + "grad_norm": 1.2687911987304688, + "learning_rate": 2.876714280623708e-05, + "loss": 0.1523, + "step": 77 + }, + { + "epoch": 0.52, + "grad_norm": 1.3904497623443604, + "learning_rate": 2.8191041196514873e-05, + "loss": 0.6355, + "step": 78 + }, + { + "epoch": 0.5266666666666666, + "grad_norm": 1.2210139036178589, + "learning_rate": 2.761321158169134e-05, + "loss": 0.1803, + "step": 79 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 2.2414464950561523, + "learning_rate": 2.7033966866696457e-05, + "loss": 0.2332, + "step": 80 + }, + { + "epoch": 0.54, + "grad_norm": 1.8386731147766113, + "learning_rate": 2.6453620722761896e-05, + "loss": 0.6357, + "step": 81 + }, + { + "epoch": 0.5466666666666666, + "grad_norm": 2.6382551193237305, + "learning_rate": 2.587248741756253e-05, + "loss": 0.3541, + "step": 82 + }, + { + "epoch": 0.5533333333333333, + "grad_norm": 3.260303020477295, + "learning_rate": 2.5290881645034932e-05, + "loss": 0.7056, + "step": 83 + }, + { + "epoch": 0.56, + "grad_norm": 4.604124546051025, + "learning_rate": 2.470911835496508e-05, + "loss": 0.8796, + "step": 84 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 3.7330353260040283, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.2487, + "step": 85 + }, + { + "epoch": 0.5733333333333334, + "grad_norm": 1.9771713018417358, + "learning_rate": 2.3546379277238107e-05, + "loss": 0.3601, + "step": 86 + }, + { + "epoch": 0.58, + "grad_norm": 6.817903995513916, + "learning_rate": 2.2966033133303545e-05, + "loss": 1.3453, + "step": 87 + }, + { + "epoch": 0.5866666666666667, + "grad_norm": 2.8639299869537354, + "learning_rate": 2.238678841830867e-05, + "loss": 0.3974, + "step": 88 + }, + { + "epoch": 0.5933333333333334, + "grad_norm": 2.082796573638916, + "learning_rate": 2.1808958803485136e-05, + "loss": 0.4019, + "step": 89 + }, + { + "epoch": 0.6, + "grad_norm": 0.427212655544281, + "learning_rate": 2.1232857193762924e-05, + "loss": 0.0465, + "step": 90 + }, + { + "epoch": 0.6066666666666667, + "grad_norm": 1.7197939157485962, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.2682, + "step": 91 + }, + { + "epoch": 0.6133333333333333, + "grad_norm": 0.09823339432477951, + "learning_rate": 2.0087084761679245e-05, + "loss": 0.0111, + "step": 92 + }, + { + "epoch": 0.62, + "grad_norm": 1.9435588121414185, + "learning_rate": 1.9518034395302414e-05, + "loss": 0.124, + "step": 93 + }, + { + "epoch": 0.6266666666666667, + "grad_norm": 2.9964282512664795, + "learning_rate": 1.895195261000831e-05, + "loss": 0.3849, + "step": 94 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 1.595879077911377, + "learning_rate": 1.838914594906995e-05, + "loss": 0.2477, + "step": 95 + }, + { + "epoch": 0.64, + "grad_norm": 5.441842555999756, + "learning_rate": 1.7829919182222752e-05, + "loss": 0.5461, + "step": 96 + }, + { + "epoch": 0.6466666666666666, + "grad_norm": 2.448079824447632, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.5091, + "step": 97 + }, + { + "epoch": 0.6533333333333333, + "grad_norm": 3.598569631576538, + "learning_rate": 1.672341455287605e-05, + "loss": 0.4161, + "step": 98 + }, + { + "epoch": 0.66, + "grad_norm": 5.45053243637085, + "learning_rate": 1.617673588215328e-05, + "loss": 0.8538, + "step": 99 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 1.3044579029083252, + "learning_rate": 1.56348351646022e-05, + "loss": 0.171, + "step": 100 + }, + { + "epoch": 0.6666666666666666, + "eval_loss": 0.4516909718513489, + "eval_runtime": 66.1438, + "eval_samples_per_second": 2.253, + "eval_steps_per_second": 1.134, + "step": 100 + }, + { + "epoch": 0.6733333333333333, + "grad_norm": 8.098329544067383, + "learning_rate": 1.509800584902108e-05, + "loss": 1.1298, + "step": 101 + }, + { + "epoch": 0.68, + "grad_norm": 1.519363284111023, + "learning_rate": 1.4566538637954554e-05, + "loss": 0.1545, + "step": 102 + }, + { + "epoch": 0.6866666666666666, + "grad_norm": 2.4320404529571533, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.1781, + "step": 103 + }, + { + "epoch": 0.6933333333333334, + "grad_norm": 4.600680828094482, + "learning_rate": 1.3520838665324703e-05, + "loss": 0.4178, + "step": 104 + }, + { + "epoch": 0.7, + "grad_norm": 1.4558080434799194, + "learning_rate": 1.3007172168743854e-05, + "loss": 0.1014, + "step": 105 + }, + { + "epoch": 0.7066666666666667, + "grad_norm": 1.6424729824066162, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.1476, + "step": 106 + }, + { + "epoch": 0.7133333333333334, + "grad_norm": 3.927393913269043, + "learning_rate": 1.1999596801769616e-05, + "loss": 1.0977, + "step": 107 + }, + { + "epoch": 0.72, + "grad_norm": 0.8934208154678345, + "learning_rate": 1.1506233551212186e-05, + "loss": 0.1165, + "step": 108 + }, + { + "epoch": 0.7266666666666667, + "grad_norm": 2.651909589767456, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.5939, + "step": 109 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 1.0333629846572876, + "learning_rate": 1.0541691595800337e-05, + "loss": 0.152, + "step": 110 + }, + { + "epoch": 0.74, + "grad_norm": 2.1822710037231445, + "learning_rate": 1.0071035207430352e-05, + "loss": 0.4252, + "step": 111 + }, + { + "epoch": 0.7466666666666667, + "grad_norm": 1.6616872549057007, + "learning_rate": 9.608463116858542e-06, + "loss": 0.1772, + "step": 112 + }, + { + "epoch": 0.7533333333333333, + "grad_norm": 4.919057846069336, + "learning_rate": 9.154225815032242e-06, + "loss": 0.4062, + "step": 113 + }, + { + "epoch": 0.76, + "grad_norm": 4.426158905029297, + "learning_rate": 8.708569279463622e-06, + "loss": 0.6537, + "step": 114 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 2.4902124404907227, + "learning_rate": 8.271734841028553e-06, + "loss": 0.1949, + "step": 115 + }, + { + "epoch": 0.7733333333333333, + "grad_norm": 0.6176963448524475, + "learning_rate": 7.843959053281663e-06, + "loss": 0.0729, + "step": 116 + }, + { + "epoch": 0.78, + "grad_norm": 2.257253408432007, + "learning_rate": 7.4254735643584564e-06, + "loss": 0.2163, + "step": 117 + }, + { + "epoch": 0.7866666666666666, + "grad_norm": 1.069271445274353, + "learning_rate": 7.016504991533726e-06, + "loss": 0.156, + "step": 118 + }, + { + "epoch": 0.7933333333333333, + "grad_norm": 0.6102529764175415, + "learning_rate": 6.617274798504286e-06, + "loss": 0.0632, + "step": 119 + }, + { + "epoch": 0.8, + "grad_norm": 4.137846946716309, + "learning_rate": 6.22799917546252e-06, + "loss": 0.5415, + "step": 120 + }, + { + "epoch": 0.8066666666666666, + "grad_norm": 2.0759520530700684, + "learning_rate": 5.848888922025553e-06, + "loss": 0.3255, + "step": 121 + }, + { + "epoch": 0.8133333333333334, + "grad_norm": 4.551874160766602, + "learning_rate": 5.48014933308352e-06, + "loss": 0.3398, + "step": 122 + }, + { + "epoch": 0.82, + "grad_norm": 1.0534967184066772, + "learning_rate": 5.121980087628803e-06, + "loss": 0.1326, + "step": 123 + }, + { + "epoch": 0.8266666666666667, + "grad_norm": 2.607910394668579, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.8089, + "step": 124 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 2.044987201690674, + "learning_rate": 4.438122617983443e-06, + "loss": 0.238, + "step": 125 + }, + { + "epoch": 0.84, + "grad_norm": 1.440517544746399, + "learning_rate": 4.112804714676594e-06, + "loss": 0.1659, + "step": 126 + }, + { + "epoch": 0.8466666666666667, + "grad_norm": 3.426896333694458, + "learning_rate": 3.798797596089351e-06, + "loss": 0.4279, + "step": 127 + }, + { + "epoch": 0.8533333333333334, + "grad_norm": 2.2087795734405518, + "learning_rate": 3.4962713026158694e-06, + "loss": 1.0499, + "step": 128 + }, + { + "epoch": 0.86, + "grad_norm": 0.9487075805664062, + "learning_rate": 3.205389657580943e-06, + "loss": 0.1213, + "step": 129 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 2.0362558364868164, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.6995, + "step": 130 + }, + { + "epoch": 0.8733333333333333, + "grad_norm": 4.185983180999756, + "learning_rate": 2.659183991914696e-06, + "loss": 0.6105, + "step": 131 + }, + { + "epoch": 0.88, + "grad_norm": 2.219158411026001, + "learning_rate": 2.4041557512869878e-06, + "loss": 0.2062, + "step": 132 + }, + { + "epoch": 0.8866666666666667, + "grad_norm": 0.7529279589653015, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.0839, + "step": 133 + }, + { + "epoch": 0.8933333333333333, + "grad_norm": 4.446479320526123, + "learning_rate": 1.9309388911139426e-06, + "loss": 1.0925, + "step": 134 + }, + { + "epoch": 0.9, + "grad_norm": 5.211433410644531, + "learning_rate": 1.713006526846439e-06, + "loss": 0.6347, + "step": 135 + }, + { + "epoch": 0.9066666666666666, + "grad_norm": 2.469029426574707, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.4994, + "step": 136 + }, + { + "epoch": 0.9133333333333333, + "grad_norm": 5.426701545715332, + "learning_rate": 1.31508393714177e-06, + "loss": 0.4131, + "step": 137 + }, + { + "epoch": 0.92, + "grad_norm": 2.973341941833496, + "learning_rate": 1.1353091938067023e-06, + "loss": 0.3887, + "step": 138 + }, + { + "epoch": 0.9266666666666666, + "grad_norm": 2.101803779602051, + "learning_rate": 9.684576015420278e-07, + "loss": 0.2598, + "step": 139 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.5676771998405457, + "learning_rate": 8.146195134284052e-07, + "loss": 0.0698, + "step": 140 + }, + { + "epoch": 0.94, + "grad_norm": 0.5499122142791748, + "learning_rate": 6.738782355044049e-07, + "loss": 0.0823, + "step": 141 + }, + { + "epoch": 0.9466666666666667, + "grad_norm": 0.36405226588249207, + "learning_rate": 5.463099816548579e-07, + "loss": 0.0406, + "step": 142 + }, + { + "epoch": 0.9533333333333334, + "grad_norm": 1.898514747619629, + "learning_rate": 4.319838323396691e-07, + "loss": 0.4404, + "step": 143 + }, + { + "epoch": 0.96, + "grad_norm": 1.7996866703033447, + "learning_rate": 3.309616971855195e-07, + "loss": 0.2531, + "step": 144 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 2.6673455238342285, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.7721, + "step": 145 + }, + { + "epoch": 0.9733333333333334, + "grad_norm": 7.725250244140625, + "learning_rate": 1.6904105645142444e-07, + "loss": 1.7164, + "step": 146 + }, + { + "epoch": 0.98, + "grad_norm": 2.411264419555664, + "learning_rate": 1.0823023375489127e-07, + "loss": 0.4909, + "step": 147 + }, + { + "epoch": 0.9866666666666667, + "grad_norm": 1.3376047611236572, + "learning_rate": 6.089874350439506e-08, + "loss": 0.1719, + "step": 148 + }, + { + "epoch": 0.9933333333333333, + "grad_norm": 1.7119640111923218, + "learning_rate": 2.7072216536885853e-08, + "loss": 0.1636, + "step": 149 + }, + { + "epoch": 1.0, + "grad_norm": 3.7593977451324463, + "learning_rate": 6.768970513457151e-09, + "loss": 0.5289, + "step": 150 + } + ], + "logging_steps": 1, + "max_steps": 150, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 49, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.0335481997190758e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/ca_v3_run_20251215/final_model/training_args.bin b/ca_v3_run_20251215/final_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..88b4ad4e5f06f68e064e676272032d9cd8af9147 --- /dev/null +++ b/ca_v3_run_20251215/final_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63c0e75d45a7050daffe2e65badc27748032d13318e1d0af24fcae2c215424f0 +size 5777 diff --git a/exp1_original_data_rerun_20251215/checkpoint-25/README.md b/exp1_original_data_rerun_20251215/checkpoint-25/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-25/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_original_data_rerun_20251215/checkpoint-25/adapter_config.json b/exp1_original_data_rerun_20251215/checkpoint-25/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e1b97d58cc89e1ba143a0341bc76f020124b949 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-25/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_original_data_rerun_20251215/checkpoint-25/adapter_model.safetensors b/exp1_original_data_rerun_20251215/checkpoint-25/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0387276dec93a50675a02817f13478f51f9b5c66 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-25/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3e2c9cdaa5dc25faa9d1735f8da9dc9ec68820f238391edf058b46bbdbf2a58 +size 50357488 diff --git a/exp1_original_data_rerun_20251215/checkpoint-25/optimizer.pt b/exp1_original_data_rerun_20251215/checkpoint-25/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..041c10e778bcdd5ac9b92220db147b4244196a27 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-25/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94857594d6b10edc10a2b1ffe1b431f107b4df4f16f96fe2f09baf7829127bfc +size 100828235 diff --git a/exp1_original_data_rerun_20251215/checkpoint-25/rng_state.pth b/exp1_original_data_rerun_20251215/checkpoint-25/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..96b89c979c97a96a49759cb62c7f5d70b347c754 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-25/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d835afb5a277eef9cb89e6f2ae639ae653e14a7cb8a24b9466a653d407505e +size 14645 diff --git a/exp1_original_data_rerun_20251215/checkpoint-25/scheduler.pt b/exp1_original_data_rerun_20251215/checkpoint-25/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1827864dd06296ae43b1e6a492ceadb698ff377f --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-25/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90c8359e7027a1d5dd6df276c01589220164c80a154e5feeb97d92110a26709f +size 1465 diff --git a/exp1_original_data_rerun_20251215/checkpoint-25/trainer_state.json b/exp1_original_data_rerun_20251215/checkpoint-25/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c75a8887772161fcaf05e71462515a12934d4d7c --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-25/trainer_state.json @@ -0,0 +1,48 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3333333333333333, + "eval_steps": 100, + "global_step": 25, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.13333333333333333, + "grad_norm": 0.7970004081726074, + "learning_rate": 0.00019989008914857116, + "loss": 0.5896, + "step": 10 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.5293543934822083, + "learning_rate": 0.00018699063724087904, + "loss": 0.4794, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 75, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 25, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.44163505668096e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp1_original_data_rerun_20251215/checkpoint-25/training_args.bin b/exp1_original_data_rerun_20251215/checkpoint-25/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a708cd653bb0fd870e02a3cd2b92e79b2530fde --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-25/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff11aa937808576e06d64e88c71a90de51b18fbb50367206f5df16539755319c +size 5777 diff --git a/exp1_original_data_rerun_20251215/checkpoint-50/README.md b/exp1_original_data_rerun_20251215/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-50/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_original_data_rerun_20251215/checkpoint-50/adapter_config.json b/exp1_original_data_rerun_20251215/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e1b97d58cc89e1ba143a0341bc76f020124b949 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-50/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_original_data_rerun_20251215/checkpoint-50/adapter_model.safetensors b/exp1_original_data_rerun_20251215/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd6b352ec048be49d0abd6c9ebff45690c6201b9 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a693a1856aa80d52479a0c100e3dd22b9f2cf3350bf0504b521f8666df4aaa +size 50357488 diff --git a/exp1_original_data_rerun_20251215/checkpoint-50/optimizer.pt b/exp1_original_data_rerun_20251215/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ff102073865deca240a6abaa95b5bfdac3724b1 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2bdbbd887d2d89e2ed760d3ba062c14a89979e75d195c409f833e128ce7672f +size 100828235 diff --git a/exp1_original_data_rerun_20251215/checkpoint-50/rng_state.pth b/exp1_original_data_rerun_20251215/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..96ffff0324a89bb899f85c1af2016ce1fa7b4ffe --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:057c45ca36a011105360316599658ccb9adeb32ddf48eb0a08af1c7fab0219ff +size 14645 diff --git a/exp1_original_data_rerun_20251215/checkpoint-50/scheduler.pt b/exp1_original_data_rerun_20251215/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..da61222504cd7cf8ee20fde4cb68d64954a37cc6 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bddad8914229c8e7dbd757aa8426289cb9a42c09ea70df2b48d8beeefe5bbdf3 +size 1465 diff --git a/exp1_original_data_rerun_20251215/checkpoint-50/trainer_state.json b/exp1_original_data_rerun_20251215/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5895d61e46ebe3d160d6e9128917945efa09bdb3 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-50/trainer_state.json @@ -0,0 +1,69 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6666666666666666, + "eval_steps": 100, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.13333333333333333, + "grad_norm": 0.7970004081726074, + "learning_rate": 0.00019989008914857116, + "loss": 0.5896, + "step": 10 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.5293543934822083, + "learning_rate": 0.00018699063724087904, + "loss": 0.4794, + "step": 20 + }, + { + "epoch": 0.4, + "grad_norm": 4.553859233856201, + "learning_rate": 0.00015531311541251995, + "loss": 0.5135, + "step": 30 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 1.5555204153060913, + "learning_rate": 0.00011169553219720828, + "loss": 0.4293, + "step": 40 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 1.5811039209365845, + "learning_rate": 6.555331497156672e-05, + "loss": 0.3595, + "step": 50 + } + ], + "logging_steps": 10, + "max_steps": 75, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 25, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.88327011336192e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp1_original_data_rerun_20251215/checkpoint-50/training_args.bin b/exp1_original_data_rerun_20251215/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a708cd653bb0fd870e02a3cd2b92e79b2530fde --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff11aa937808576e06d64e88c71a90de51b18fbb50367206f5df16539755319c +size 5777 diff --git a/exp1_original_data_rerun_20251215/checkpoint-75/README.md b/exp1_original_data_rerun_20251215/checkpoint-75/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-75/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp1_original_data_rerun_20251215/checkpoint-75/adapter_config.json b/exp1_original_data_rerun_20251215/checkpoint-75/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e1b97d58cc89e1ba143a0341bc76f020124b949 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-75/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp1_original_data_rerun_20251215/checkpoint-75/adapter_model.safetensors b/exp1_original_data_rerun_20251215/checkpoint-75/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31d4ba961ff4fefbcad9ede198aeaf117ac5b842 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-75/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e1198a351782806e601c4636f9b985371f37f957f7b10fc80464c9854482920 +size 50357488 diff --git a/exp1_original_data_rerun_20251215/checkpoint-75/optimizer.pt b/exp1_original_data_rerun_20251215/checkpoint-75/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..34391f325e30a47ea8b885887d4474102d2b9bd2 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-75/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a29f74315e2284a0f0cafef87a50967127cdf5f6a56952b38534d09486e349ce +size 100828235 diff --git a/exp1_original_data_rerun_20251215/checkpoint-75/rng_state.pth b/exp1_original_data_rerun_20251215/checkpoint-75/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f1ccd07015ba9a7f143ecf3381804bec4063994a --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-75/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0fba1844830d45a8a34c4724eaf74319c2c1d5554c04fdd04358ce4d50fd957 +size 14645 diff --git a/exp1_original_data_rerun_20251215/checkpoint-75/scheduler.pt b/exp1_original_data_rerun_20251215/checkpoint-75/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c480b248346ea1e19108e9c980f2968a083c212e --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-75/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f05c28e789afe22134e5b011ace0c3a78d4d599d812231bc42d48ca46cfc515b +size 1465 diff --git a/exp1_original_data_rerun_20251215/checkpoint-75/trainer_state.json b/exp1_original_data_rerun_20251215/checkpoint-75/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5afe5e7300b14ad7d669eb67a3d4c58908a21409 --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-75/trainer_state.json @@ -0,0 +1,83 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 75, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.13333333333333333, + "grad_norm": 0.7970004081726074, + "learning_rate": 0.00019989008914857116, + "loss": 0.5896, + "step": 10 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.5293543934822083, + "learning_rate": 0.00018699063724087904, + "loss": 0.4794, + "step": 20 + }, + { + "epoch": 0.4, + "grad_norm": 4.553859233856201, + "learning_rate": 0.00015531311541251995, + "loss": 0.5135, + "step": 30 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 1.5555204153060913, + "learning_rate": 0.00011169553219720828, + "loss": 0.4293, + "step": 40 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 1.5811039209365845, + "learning_rate": 6.555331497156672e-05, + "loss": 0.3595, + "step": 50 + }, + { + "epoch": 0.8, + "grad_norm": 1.2460683584213257, + "learning_rate": 2.6846866473633125e-05, + "loss": 0.491, + "step": 60 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 1.010131597518921, + "learning_rate": 3.931481242315993e-06, + "loss": 0.4156, + "step": 70 + } + ], + "logging_steps": 10, + "max_steps": 75, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 25, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.032490517004288e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp1_original_data_rerun_20251215/checkpoint-75/training_args.bin b/exp1_original_data_rerun_20251215/checkpoint-75/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a708cd653bb0fd870e02a3cd2b92e79b2530fde --- /dev/null +++ b/exp1_original_data_rerun_20251215/checkpoint-75/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff11aa937808576e06d64e88c71a90de51b18fbb50367206f5df16539755319c +size 5777 diff --git a/exp2_signalc_20251215/checkpoint-147/README.md b/exp2_signalc_20251215/checkpoint-147/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-147/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp2_signalc_20251215/checkpoint-147/adapter_config.json b/exp2_signalc_20251215/checkpoint-147/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d747200220490dff0f82aa49e63f702a859ccc --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-147/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "q_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp2_signalc_20251215/checkpoint-147/adapter_model.safetensors b/exp2_signalc_20251215/checkpoint-147/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb910bb99c5ee985feb758c7c7097128700d930c --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-147/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72ab7d9d0780cf84e34fa0868b9e4c618ea308485f0d6ee931a63502516bac74 +size 201378736 diff --git a/exp2_signalc_20251215/checkpoint-147/optimizer.pt b/exp2_signalc_20251215/checkpoint-147/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..50d666425a9fc823e15c2d3b599580760449989e --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-147/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44aea868f1ec2ea45779236f07660fd3db7beae8c91ee30e0467f39b07a30ba5 +size 402982627 diff --git a/exp2_signalc_20251215/checkpoint-147/rng_state.pth b/exp2_signalc_20251215/checkpoint-147/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0178e697ee9dfc09593374e91de3e5c7e0217aee --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-147/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:283c4259167c1de6fc9b0d70fb38093d1c85343ca9ff0f2e605bd2011addd045 +size 14645 diff --git a/exp2_signalc_20251215/checkpoint-147/scheduler.pt b/exp2_signalc_20251215/checkpoint-147/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b196c8e7a4834975b028188c1c8c2559af111ec --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-147/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e6e49a0a1dc514c3062fdc71df85d50b768febd62f2e9b412554326fd48eb9c +size 1465 diff --git a/exp2_signalc_20251215/checkpoint-147/trainer_state.json b/exp2_signalc_20251215/checkpoint-147/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3edb0fb9bacc48e25ba37e437cd6157fa733e7f6 --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-147/trainer_state.json @@ -0,0 +1,1071 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.98, + "eval_steps": 100, + "global_step": 147, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.006666666666666667, + "grad_norm": 2.007404327392578, + "learning_rate": 0.0, + "loss": 0.6595, + "step": 1 + }, + { + "epoch": 0.013333333333333334, + "grad_norm": 1.8502237796783447, + "learning_rate": 3.3333333333333333e-06, + "loss": 1.1139, + "step": 2 + }, + { + "epoch": 0.02, + "grad_norm": 1.2331303358078003, + "learning_rate": 6.666666666666667e-06, + "loss": 0.578, + "step": 3 + }, + { + "epoch": 0.02666666666666667, + "grad_norm": 1.2300695180892944, + "learning_rate": 1e-05, + "loss": 0.776, + "step": 4 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 3.3759779930114746, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.7998, + "step": 5 + }, + { + "epoch": 0.04, + "grad_norm": 3.43403697013855, + "learning_rate": 1.6666666666666667e-05, + "loss": 1.4564, + "step": 6 + }, + { + "epoch": 0.04666666666666667, + "grad_norm": 3.449822187423706, + "learning_rate": 2e-05, + "loss": 0.899, + "step": 7 + }, + { + "epoch": 0.05333333333333334, + "grad_norm": 1.4381998777389526, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.2811, + "step": 8 + }, + { + "epoch": 0.06, + "grad_norm": 1.234094500541687, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.1841, + "step": 9 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 0.9779098033905029, + "learning_rate": 3e-05, + "loss": 0.2704, + "step": 10 + }, + { + "epoch": 0.07333333333333333, + "grad_norm": 1.1498095989227295, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.2534, + "step": 11 + }, + { + "epoch": 0.08, + "grad_norm": 1.928687334060669, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.5162, + "step": 12 + }, + { + "epoch": 0.08666666666666667, + "grad_norm": 1.8929930925369263, + "learning_rate": 4e-05, + "loss": 0.9282, + "step": 13 + }, + { + "epoch": 0.09333333333333334, + "grad_norm": 3.466400146484375, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.8342, + "step": 14 + }, + { + "epoch": 0.1, + "grad_norm": 2.0776634216308594, + "learning_rate": 4.666666666666667e-05, + "loss": 0.7976, + "step": 15 + }, + { + "epoch": 0.10666666666666667, + "grad_norm": 2.7737135887145996, + "learning_rate": 5e-05, + "loss": 1.1665, + "step": 16 + }, + { + "epoch": 0.11333333333333333, + "grad_norm": 1.192466139793396, + "learning_rate": 4.9993231029486544e-05, + "loss": 0.5163, + "step": 17 + }, + { + "epoch": 0.12, + "grad_norm": 1.5976535081863403, + "learning_rate": 4.997292778346312e-05, + "loss": 0.5035, + "step": 18 + }, + { + "epoch": 0.12666666666666668, + "grad_norm": 1.513372540473938, + "learning_rate": 4.993910125649561e-05, + "loss": 0.6089, + "step": 19 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 1.6169334650039673, + "learning_rate": 4.989176976624511e-05, + "loss": 0.6053, + "step": 20 + }, + { + "epoch": 0.14, + "grad_norm": 1.9578418731689453, + "learning_rate": 4.983095894354858e-05, + "loss": 0.8322, + "step": 21 + }, + { + "epoch": 0.14666666666666667, + "grad_norm": 2.1667706966400146, + "learning_rate": 4.975670171853926e-05, + "loss": 0.8027, + "step": 22 + }, + { + "epoch": 0.15333333333333332, + "grad_norm": 1.4784480333328247, + "learning_rate": 4.966903830281449e-05, + "loss": 0.4951, + "step": 23 + }, + { + "epoch": 0.16, + "grad_norm": 2.7624332904815674, + "learning_rate": 4.9568016167660334e-05, + "loss": 0.6269, + "step": 24 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 1.6521016359329224, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.3331, + "step": 25 + }, + { + "epoch": 0.17333333333333334, + "grad_norm": 1.1457144021987915, + "learning_rate": 4.9326121764495596e-05, + "loss": 0.284, + "step": 26 + }, + { + "epoch": 0.18, + "grad_norm": 1.4980254173278809, + "learning_rate": 4.9185380486571595e-05, + "loss": 0.6191, + "step": 27 + }, + { + "epoch": 0.18666666666666668, + "grad_norm": 1.3955901861190796, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.662, + "step": 28 + }, + { + "epoch": 0.19333333333333333, + "grad_norm": 1.2383167743682861, + "learning_rate": 4.88646908061933e-05, + "loss": 0.79, + "step": 29 + }, + { + "epoch": 0.2, + "grad_norm": 1.2159212827682495, + "learning_rate": 4.868491606285823e-05, + "loss": 0.4194, + "step": 30 + }, + { + "epoch": 0.20666666666666667, + "grad_norm": 2.2957308292388916, + "learning_rate": 4.849231551964771e-05, + "loss": 0.5185, + "step": 31 + }, + { + "epoch": 0.21333333333333335, + "grad_norm": 0.6989065408706665, + "learning_rate": 4.828699347315356e-05, + "loss": 0.2625, + "step": 32 + }, + { + "epoch": 0.22, + "grad_norm": 2.02852463722229, + "learning_rate": 4.806906110888606e-05, + "loss": 0.6494, + "step": 33 + }, + { + "epoch": 0.22666666666666666, + "grad_norm": 1.653140902519226, + "learning_rate": 4.783863644106502e-05, + "loss": 0.5486, + "step": 34 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 1.7378313541412354, + "learning_rate": 4.759584424871302e-05, + "loss": 0.6112, + "step": 35 + }, + { + "epoch": 0.24, + "grad_norm": 1.8570767641067505, + "learning_rate": 4.734081600808531e-05, + "loss": 0.8117, + "step": 36 + }, + { + "epoch": 0.24666666666666667, + "grad_norm": 0.6243714094161987, + "learning_rate": 4.707368982147318e-05, + "loss": 0.284, + "step": 37 + }, + { + "epoch": 0.25333333333333335, + "grad_norm": 1.335660457611084, + "learning_rate": 4.679461034241906e-05, + "loss": 0.5556, + "step": 38 + }, + { + "epoch": 0.26, + "grad_norm": 3.071352958679199, + "learning_rate": 4.650372869738414e-05, + "loss": 0.6837, + "step": 39 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 1.6223986148834229, + "learning_rate": 4.620120240391065e-05, + "loss": 0.6445, + "step": 40 + }, + { + "epoch": 0.2733333333333333, + "grad_norm": 1.174696445465088, + "learning_rate": 4.588719528532342e-05, + "loss": 0.353, + "step": 41 + }, + { + "epoch": 0.28, + "grad_norm": 1.129868507385254, + "learning_rate": 4.556187738201656e-05, + "loss": 0.4056, + "step": 42 + }, + { + "epoch": 0.2866666666666667, + "grad_norm": 1.917558193206787, + "learning_rate": 4.522542485937369e-05, + "loss": 1.23, + "step": 43 + }, + { + "epoch": 0.29333333333333333, + "grad_norm": 1.72930908203125, + "learning_rate": 4.48780199123712e-05, + "loss": 0.6128, + "step": 44 + }, + { + "epoch": 0.3, + "grad_norm": 1.7508667707443237, + "learning_rate": 4.4519850666916484e-05, + "loss": 0.8213, + "step": 45 + }, + { + "epoch": 0.30666666666666664, + "grad_norm": 0.9080920815467834, + "learning_rate": 4.415111107797445e-05, + "loss": 0.2085, + "step": 46 + }, + { + "epoch": 0.31333333333333335, + "grad_norm": 1.75723135471344, + "learning_rate": 4.377200082453749e-05, + "loss": 0.5145, + "step": 47 + }, + { + "epoch": 0.32, + "grad_norm": 1.450878620147705, + "learning_rate": 4.3382725201495723e-05, + "loss": 0.6667, + "step": 48 + }, + { + "epoch": 0.32666666666666666, + "grad_norm": 0.8492142558097839, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.2838, + "step": 49 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 1.2550184726715088, + "learning_rate": 4.257452643564155e-05, + "loss": 0.3858, + "step": 50 + }, + { + "epoch": 0.34, + "grad_norm": 2.2642593383789062, + "learning_rate": 4.215604094671835e-05, + "loss": 0.6531, + "step": 51 + }, + { + "epoch": 0.3466666666666667, + "grad_norm": 3.926060199737549, + "learning_rate": 4.172826515897146e-05, + "loss": 0.4836, + "step": 52 + }, + { + "epoch": 0.35333333333333333, + "grad_norm": 1.8190771341323853, + "learning_rate": 4.129143072053638e-05, + "loss": 0.3158, + "step": 53 + }, + { + "epoch": 0.36, + "grad_norm": 1.4268187284469604, + "learning_rate": 4.0845774184967754e-05, + "loss": 0.5298, + "step": 54 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 1.9656697511672974, + "learning_rate": 4.039153688314145e-05, + "loss": 0.4887, + "step": 55 + }, + { + "epoch": 0.37333333333333335, + "grad_norm": 1.1335246562957764, + "learning_rate": 3.9928964792569655e-05, + "loss": 0.1837, + "step": 56 + }, + { + "epoch": 0.38, + "grad_norm": 3.525275707244873, + "learning_rate": 3.945830840419966e-05, + "loss": 0.8998, + "step": 57 + }, + { + "epoch": 0.38666666666666666, + "grad_norm": 2.12524676322937, + "learning_rate": 3.897982258676867e-05, + "loss": 0.3827, + "step": 58 + }, + { + "epoch": 0.3933333333333333, + "grad_norm": 1.9228296279907227, + "learning_rate": 3.8493766448787825e-05, + "loss": 0.5591, + "step": 59 + }, + { + "epoch": 0.4, + "grad_norm": 1.3674192428588867, + "learning_rate": 3.8000403198230387e-05, + "loss": 0.4872, + "step": 60 + }, + { + "epoch": 0.4066666666666667, + "grad_norm": 1.965265154838562, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.5459, + "step": 61 + }, + { + "epoch": 0.41333333333333333, + "grad_norm": 2.329216241836548, + "learning_rate": 3.699282783125616e-05, + "loss": 0.7548, + "step": 62 + }, + { + "epoch": 0.42, + "grad_norm": 2.118192434310913, + "learning_rate": 3.6479161334675296e-05, + "loss": 0.6821, + "step": 63 + }, + { + "epoch": 0.4266666666666667, + "grad_norm": 1.9981805086135864, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3079, + "step": 64 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 1.0037565231323242, + "learning_rate": 3.543346136204545e-05, + "loss": 0.3274, + "step": 65 + }, + { + "epoch": 0.44, + "grad_norm": 1.8539446592330933, + "learning_rate": 3.490199415097892e-05, + "loss": 0.3298, + "step": 66 + }, + { + "epoch": 0.44666666666666666, + "grad_norm": 1.1413832902908325, + "learning_rate": 3.436516483539781e-05, + "loss": 0.3062, + "step": 67 + }, + { + "epoch": 0.4533333333333333, + "grad_norm": 2.7091305255889893, + "learning_rate": 3.382326411784672e-05, + "loss": 0.4897, + "step": 68 + }, + { + "epoch": 0.46, + "grad_norm": 1.9014657735824585, + "learning_rate": 3.327658544712395e-05, + "loss": 0.422, + "step": 69 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 1.7714844942092896, + "learning_rate": 3.272542485937369e-05, + "loss": 0.2594, + "step": 70 + }, + { + "epoch": 0.47333333333333333, + "grad_norm": 0.7899397611618042, + "learning_rate": 3.217008081777726e-05, + "loss": 0.1847, + "step": 71 + }, + { + "epoch": 0.48, + "grad_norm": 1.2437670230865479, + "learning_rate": 3.161085405093006e-05, + "loss": 0.2063, + "step": 72 + }, + { + "epoch": 0.4866666666666667, + "grad_norm": 1.4275377988815308, + "learning_rate": 3.104804738999169e-05, + "loss": 0.7009, + "step": 73 + }, + { + "epoch": 0.49333333333333335, + "grad_norm": 1.6646203994750977, + "learning_rate": 3.048196560469758e-05, + "loss": 0.2215, + "step": 74 + }, + { + "epoch": 0.5, + "grad_norm": 2.504795551300049, + "learning_rate": 2.9912915238320754e-05, + "loss": 0.5467, + "step": 75 + }, + { + "epoch": 0.5066666666666667, + "grad_norm": 1.1971930265426636, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.1757, + "step": 76 + }, + { + "epoch": 0.5133333333333333, + "grad_norm": 1.2171498537063599, + "learning_rate": 2.876714280623708e-05, + "loss": 0.1898, + "step": 77 + }, + { + "epoch": 0.52, + "grad_norm": 1.5181541442871094, + "learning_rate": 2.8191041196514873e-05, + "loss": 0.2299, + "step": 78 + }, + { + "epoch": 0.5266666666666666, + "grad_norm": 1.1093655824661255, + "learning_rate": 2.761321158169134e-05, + "loss": 0.1639, + "step": 79 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 4.772696495056152, + "learning_rate": 2.7033966866696457e-05, + "loss": 0.9071, + "step": 80 + }, + { + "epoch": 0.54, + "grad_norm": 4.373753547668457, + "learning_rate": 2.6453620722761896e-05, + "loss": 1.315, + "step": 81 + }, + { + "epoch": 0.5466666666666666, + "grad_norm": 0.4806381165981293, + "learning_rate": 2.587248741756253e-05, + "loss": 0.0859, + "step": 82 + }, + { + "epoch": 0.5533333333333333, + "grad_norm": 2.003674030303955, + "learning_rate": 2.5290881645034932e-05, + "loss": 0.5889, + "step": 83 + }, + { + "epoch": 0.56, + "grad_norm": 3.2717702388763428, + "learning_rate": 2.470911835496508e-05, + "loss": 0.2551, + "step": 84 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 3.508592367172241, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.7869, + "step": 85 + }, + { + "epoch": 0.5733333333333334, + "grad_norm": 1.1418286561965942, + "learning_rate": 2.3546379277238107e-05, + "loss": 0.1538, + "step": 86 + }, + { + "epoch": 0.58, + "grad_norm": 0.9381025433540344, + "learning_rate": 2.2966033133303545e-05, + "loss": 0.0973, + "step": 87 + }, + { + "epoch": 0.5866666666666667, + "grad_norm": 2.441453218460083, + "learning_rate": 2.238678841830867e-05, + "loss": 0.86, + "step": 88 + }, + { + "epoch": 0.5933333333333334, + "grad_norm": 0.7263301610946655, + "learning_rate": 2.1808958803485136e-05, + "loss": 0.1041, + "step": 89 + }, + { + "epoch": 0.6, + "grad_norm": 1.5983655452728271, + "learning_rate": 2.1232857193762924e-05, + "loss": 0.4031, + "step": 90 + }, + { + "epoch": 0.6066666666666667, + "grad_norm": 2.9620091915130615, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.7484, + "step": 91 + }, + { + "epoch": 0.6133333333333333, + "grad_norm": 1.8278090953826904, + "learning_rate": 2.0087084761679245e-05, + "loss": 0.4001, + "step": 92 + }, + { + "epoch": 0.62, + "grad_norm": 1.282475233078003, + "learning_rate": 1.9518034395302414e-05, + "loss": 0.4047, + "step": 93 + }, + { + "epoch": 0.6266666666666667, + "grad_norm": 1.196440577507019, + "learning_rate": 1.895195261000831e-05, + "loss": 0.1718, + "step": 94 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 3.1636035442352295, + "learning_rate": 1.838914594906995e-05, + "loss": 0.773, + "step": 95 + }, + { + "epoch": 0.64, + "grad_norm": 0.34068605303764343, + "learning_rate": 1.7829919182222752e-05, + "loss": 0.0501, + "step": 96 + }, + { + "epoch": 0.6466666666666666, + "grad_norm": 2.373602867126465, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.6528, + "step": 97 + }, + { + "epoch": 0.6533333333333333, + "grad_norm": 3.809894561767578, + "learning_rate": 1.672341455287605e-05, + "loss": 0.4916, + "step": 98 + }, + { + "epoch": 0.66, + "grad_norm": 2.143580913543701, + "learning_rate": 1.617673588215328e-05, + "loss": 0.5245, + "step": 99 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.8998221158981323, + "learning_rate": 1.56348351646022e-05, + "loss": 0.1163, + "step": 100 + }, + { + "epoch": 0.6666666666666666, + "eval_loss": 0.3062487244606018, + "eval_runtime": 165.5432, + "eval_samples_per_second": 2.259, + "eval_steps_per_second": 1.13, + "step": 100 + }, + { + "epoch": 0.6733333333333333, + "grad_norm": 1.3396159410476685, + "learning_rate": 1.509800584902108e-05, + "loss": 0.2617, + "step": 101 + }, + { + "epoch": 0.68, + "grad_norm": 1.4432024955749512, + "learning_rate": 1.4566538637954554e-05, + "loss": 0.2208, + "step": 102 + }, + { + "epoch": 0.6866666666666666, + "grad_norm": 3.2442052364349365, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.5551, + "step": 103 + }, + { + "epoch": 0.6933333333333334, + "grad_norm": 1.5302101373672485, + "learning_rate": 1.3520838665324703e-05, + "loss": 0.2195, + "step": 104 + }, + { + "epoch": 0.7, + "grad_norm": 4.544744491577148, + "learning_rate": 1.3007172168743854e-05, + "loss": 0.6881, + "step": 105 + }, + { + "epoch": 0.7066666666666667, + "grad_norm": 3.1280434131622314, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.7843, + "step": 106 + }, + { + "epoch": 0.7133333333333334, + "grad_norm": 3.5932955741882324, + "learning_rate": 1.1999596801769616e-05, + "loss": 0.405, + "step": 107 + }, + { + "epoch": 0.72, + "grad_norm": 0.8064641952514648, + "learning_rate": 1.1506233551212186e-05, + "loss": 0.0927, + "step": 108 + }, + { + "epoch": 0.7266666666666667, + "grad_norm": 1.5594532489776611, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.2154, + "step": 109 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 0.6679570078849792, + "learning_rate": 1.0541691595800337e-05, + "loss": 0.0839, + "step": 110 + }, + { + "epoch": 0.74, + "grad_norm": 1.8102202415466309, + "learning_rate": 1.0071035207430352e-05, + "loss": 0.5539, + "step": 111 + }, + { + "epoch": 0.7466666666666667, + "grad_norm": 1.7791885137557983, + "learning_rate": 9.608463116858542e-06, + "loss": 0.2957, + "step": 112 + }, + { + "epoch": 0.7533333333333333, + "grad_norm": 2.7545344829559326, + "learning_rate": 9.154225815032242e-06, + "loss": 0.8108, + "step": 113 + }, + { + "epoch": 0.76, + "grad_norm": 1.1660912036895752, + "learning_rate": 8.708569279463622e-06, + "loss": 0.0814, + "step": 114 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 0.9156790971755981, + "learning_rate": 8.271734841028553e-06, + "loss": 0.1564, + "step": 115 + }, + { + "epoch": 0.7733333333333333, + "grad_norm": 4.86236572265625, + "learning_rate": 7.843959053281663e-06, + "loss": 0.6666, + "step": 116 + }, + { + "epoch": 0.78, + "grad_norm": 4.032678127288818, + "learning_rate": 7.4254735643584564e-06, + "loss": 0.4821, + "step": 117 + }, + { + "epoch": 0.7866666666666666, + "grad_norm": 2.2866246700286865, + "learning_rate": 7.016504991533726e-06, + "loss": 0.2771, + "step": 118 + }, + { + "epoch": 0.7933333333333333, + "grad_norm": 2.304269790649414, + "learning_rate": 6.617274798504286e-06, + "loss": 0.302, + "step": 119 + }, + { + "epoch": 0.8, + "grad_norm": 1.9873311519622803, + "learning_rate": 6.22799917546252e-06, + "loss": 0.3067, + "step": 120 + }, + { + "epoch": 0.8066666666666666, + "grad_norm": 3.1299631595611572, + "learning_rate": 5.848888922025553e-06, + "loss": 0.6448, + "step": 121 + }, + { + "epoch": 0.8133333333333334, + "grad_norm": 1.7106144428253174, + "learning_rate": 5.48014933308352e-06, + "loss": 0.3074, + "step": 122 + }, + { + "epoch": 0.82, + "grad_norm": 1.6227457523345947, + "learning_rate": 5.121980087628803e-06, + "loss": 0.1963, + "step": 123 + }, + { + "epoch": 0.8266666666666667, + "grad_norm": 4.482200622558594, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.7424, + "step": 124 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 2.32167911529541, + "learning_rate": 4.438122617983443e-06, + "loss": 0.5608, + "step": 125 + }, + { + "epoch": 0.84, + "grad_norm": 0.7932979464530945, + "learning_rate": 4.112804714676594e-06, + "loss": 0.0947, + "step": 126 + }, + { + "epoch": 0.8466666666666667, + "grad_norm": 0.28471994400024414, + "learning_rate": 3.798797596089351e-06, + "loss": 0.0437, + "step": 127 + }, + { + "epoch": 0.8533333333333334, + "grad_norm": 2.5803308486938477, + "learning_rate": 3.4962713026158694e-06, + "loss": 0.2781, + "step": 128 + }, + { + "epoch": 0.86, + "grad_norm": 2.4017975330352783, + "learning_rate": 3.205389657580943e-06, + "loss": 0.5838, + "step": 129 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 3.2663633823394775, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.473, + "step": 130 + }, + { + "epoch": 0.8733333333333333, + "grad_norm": 2.067620277404785, + "learning_rate": 2.659183991914696e-06, + "loss": 0.2994, + "step": 131 + }, + { + "epoch": 0.88, + "grad_norm": 1.4989666938781738, + "learning_rate": 2.4041557512869878e-06, + "loss": 0.1795, + "step": 132 + }, + { + "epoch": 0.8866666666666667, + "grad_norm": 1.2955349683761597, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.193, + "step": 133 + }, + { + "epoch": 0.8933333333333333, + "grad_norm": 0.9127891659736633, + "learning_rate": 1.9309388911139426e-06, + "loss": 0.1546, + "step": 134 + }, + { + "epoch": 0.9, + "grad_norm": 2.5144217014312744, + "learning_rate": 1.713006526846439e-06, + "loss": 0.2405, + "step": 135 + }, + { + "epoch": 0.9066666666666666, + "grad_norm": 3.5872600078582764, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.8543, + "step": 136 + }, + { + "epoch": 0.9133333333333333, + "grad_norm": 1.16169011592865, + "learning_rate": 1.31508393714177e-06, + "loss": 0.1759, + "step": 137 + }, + { + "epoch": 0.92, + "grad_norm": 1.221109390258789, + "learning_rate": 1.1353091938067023e-06, + "loss": 0.1709, + "step": 138 + }, + { + "epoch": 0.9266666666666666, + "grad_norm": 1.7916854619979858, + "learning_rate": 9.684576015420278e-07, + "loss": 0.4347, + "step": 139 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 2.9777231216430664, + "learning_rate": 8.146195134284052e-07, + "loss": 0.3878, + "step": 140 + }, + { + "epoch": 0.94, + "grad_norm": 4.271992206573486, + "learning_rate": 6.738782355044049e-07, + "loss": 0.3858, + "step": 141 + }, + { + "epoch": 0.9466666666666667, + "grad_norm": 1.1091471910476685, + "learning_rate": 5.463099816548579e-07, + "loss": 0.1942, + "step": 142 + }, + { + "epoch": 0.9533333333333334, + "grad_norm": 3.5003552436828613, + "learning_rate": 4.319838323396691e-07, + "loss": 0.4267, + "step": 143 + }, + { + "epoch": 0.96, + "grad_norm": 4.55532169342041, + "learning_rate": 3.309616971855195e-07, + "loss": 0.3795, + "step": 144 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 2.3853328227996826, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.1976, + "step": 145 + }, + { + "epoch": 0.9733333333333334, + "grad_norm": 1.388493537902832, + "learning_rate": 1.6904105645142444e-07, + "loss": 0.1673, + "step": 146 + }, + { + "epoch": 0.98, + "grad_norm": 2.8709473609924316, + "learning_rate": 1.0823023375489127e-07, + "loss": 0.3265, + "step": 147 + } + ], + "logging_steps": 1, + "max_steps": 150, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 49, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.014568182695854e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp2_signalc_20251215/checkpoint-147/training_args.bin b/exp2_signalc_20251215/checkpoint-147/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..591bbc618a96850d2fc0f24befec3b8cdb986d33 --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-147/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50702002f1fdf6d5e91c92ea8225d6a002d69ced0f5b2ce1f30eb32ce7dad475 +size 5841 diff --git a/exp2_signalc_20251215/checkpoint-150/README.md b/exp2_signalc_20251215/checkpoint-150/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-150/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp2_signalc_20251215/checkpoint-150/adapter_config.json b/exp2_signalc_20251215/checkpoint-150/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d747200220490dff0f82aa49e63f702a859ccc --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-150/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "q_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp2_signalc_20251215/checkpoint-150/adapter_model.safetensors b/exp2_signalc_20251215/checkpoint-150/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33672935cb7c3d885ecc8c264c0061e5437e8b82 --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-150/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b952f9f010e213946ed7983bb5affbcb494295dc69e15839f0f632edc00432e +size 201378736 diff --git a/exp2_signalc_20251215/checkpoint-150/optimizer.pt b/exp2_signalc_20251215/checkpoint-150/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4bdf0063177dcb555b7eaaa840926049aa2294b --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-150/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4902575f7c13703f23c635f85725d77ad1e0464817b798ca89ef732b30a97c58 +size 402982627 diff --git a/exp2_signalc_20251215/checkpoint-150/rng_state.pth b/exp2_signalc_20251215/checkpoint-150/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b6e5ebccc9e6a700fd9d1abbdf919ba33a76cbed --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-150/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a47d1ead692de2bce57a9ae7ba59450da50376926e3a3a2a0ebc83b70d9ba03 +size 14645 diff --git a/exp2_signalc_20251215/checkpoint-150/scheduler.pt b/exp2_signalc_20251215/checkpoint-150/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b259f6f75e583e4febfc080b5b8331e00665a2f --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-150/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30781be5b5c4396cc620b6281ac1b634d4288d62bfab3224155044b29d42cdb1 +size 1465 diff --git a/exp2_signalc_20251215/checkpoint-150/trainer_state.json b/exp2_signalc_20251215/checkpoint-150/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bb62e24cd2755b7ec51a9a2e10d2228b608f4c0c --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-150/trainer_state.json @@ -0,0 +1,1092 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.006666666666666667, + "grad_norm": 2.007404327392578, + "learning_rate": 0.0, + "loss": 0.6595, + "step": 1 + }, + { + "epoch": 0.013333333333333334, + "grad_norm": 1.8502237796783447, + "learning_rate": 3.3333333333333333e-06, + "loss": 1.1139, + "step": 2 + }, + { + "epoch": 0.02, + "grad_norm": 1.2331303358078003, + "learning_rate": 6.666666666666667e-06, + "loss": 0.578, + "step": 3 + }, + { + "epoch": 0.02666666666666667, + "grad_norm": 1.2300695180892944, + "learning_rate": 1e-05, + "loss": 0.776, + "step": 4 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 3.3759779930114746, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.7998, + "step": 5 + }, + { + "epoch": 0.04, + "grad_norm": 3.43403697013855, + "learning_rate": 1.6666666666666667e-05, + "loss": 1.4564, + "step": 6 + }, + { + "epoch": 0.04666666666666667, + "grad_norm": 3.449822187423706, + "learning_rate": 2e-05, + "loss": 0.899, + "step": 7 + }, + { + "epoch": 0.05333333333333334, + "grad_norm": 1.4381998777389526, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.2811, + "step": 8 + }, + { + "epoch": 0.06, + "grad_norm": 1.234094500541687, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.1841, + "step": 9 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 0.9779098033905029, + "learning_rate": 3e-05, + "loss": 0.2704, + "step": 10 + }, + { + "epoch": 0.07333333333333333, + "grad_norm": 1.1498095989227295, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.2534, + "step": 11 + }, + { + "epoch": 0.08, + "grad_norm": 1.928687334060669, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.5162, + "step": 12 + }, + { + "epoch": 0.08666666666666667, + "grad_norm": 1.8929930925369263, + "learning_rate": 4e-05, + "loss": 0.9282, + "step": 13 + }, + { + "epoch": 0.09333333333333334, + "grad_norm": 3.466400146484375, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.8342, + "step": 14 + }, + { + "epoch": 0.1, + "grad_norm": 2.0776634216308594, + "learning_rate": 4.666666666666667e-05, + "loss": 0.7976, + "step": 15 + }, + { + "epoch": 0.10666666666666667, + "grad_norm": 2.7737135887145996, + "learning_rate": 5e-05, + "loss": 1.1665, + "step": 16 + }, + { + "epoch": 0.11333333333333333, + "grad_norm": 1.192466139793396, + "learning_rate": 4.9993231029486544e-05, + "loss": 0.5163, + "step": 17 + }, + { + "epoch": 0.12, + "grad_norm": 1.5976535081863403, + "learning_rate": 4.997292778346312e-05, + "loss": 0.5035, + "step": 18 + }, + { + "epoch": 0.12666666666666668, + "grad_norm": 1.513372540473938, + "learning_rate": 4.993910125649561e-05, + "loss": 0.6089, + "step": 19 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 1.6169334650039673, + "learning_rate": 4.989176976624511e-05, + "loss": 0.6053, + "step": 20 + }, + { + "epoch": 0.14, + "grad_norm": 1.9578418731689453, + "learning_rate": 4.983095894354858e-05, + "loss": 0.8322, + "step": 21 + }, + { + "epoch": 0.14666666666666667, + "grad_norm": 2.1667706966400146, + "learning_rate": 4.975670171853926e-05, + "loss": 0.8027, + "step": 22 + }, + { + "epoch": 0.15333333333333332, + "grad_norm": 1.4784480333328247, + "learning_rate": 4.966903830281449e-05, + "loss": 0.4951, + "step": 23 + }, + { + "epoch": 0.16, + "grad_norm": 2.7624332904815674, + "learning_rate": 4.9568016167660334e-05, + "loss": 0.6269, + "step": 24 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 1.6521016359329224, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.3331, + "step": 25 + }, + { + "epoch": 0.17333333333333334, + "grad_norm": 1.1457144021987915, + "learning_rate": 4.9326121764495596e-05, + "loss": 0.284, + "step": 26 + }, + { + "epoch": 0.18, + "grad_norm": 1.4980254173278809, + "learning_rate": 4.9185380486571595e-05, + "loss": 0.6191, + "step": 27 + }, + { + "epoch": 0.18666666666666668, + "grad_norm": 1.3955901861190796, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.662, + "step": 28 + }, + { + "epoch": 0.19333333333333333, + "grad_norm": 1.2383167743682861, + "learning_rate": 4.88646908061933e-05, + "loss": 0.79, + "step": 29 + }, + { + "epoch": 0.2, + "grad_norm": 1.2159212827682495, + "learning_rate": 4.868491606285823e-05, + "loss": 0.4194, + "step": 30 + }, + { + "epoch": 0.20666666666666667, + "grad_norm": 2.2957308292388916, + "learning_rate": 4.849231551964771e-05, + "loss": 0.5185, + "step": 31 + }, + { + "epoch": 0.21333333333333335, + "grad_norm": 0.6989065408706665, + "learning_rate": 4.828699347315356e-05, + "loss": 0.2625, + "step": 32 + }, + { + "epoch": 0.22, + "grad_norm": 2.02852463722229, + "learning_rate": 4.806906110888606e-05, + "loss": 0.6494, + "step": 33 + }, + { + "epoch": 0.22666666666666666, + "grad_norm": 1.653140902519226, + "learning_rate": 4.783863644106502e-05, + "loss": 0.5486, + "step": 34 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 1.7378313541412354, + "learning_rate": 4.759584424871302e-05, + "loss": 0.6112, + "step": 35 + }, + { + "epoch": 0.24, + "grad_norm": 1.8570767641067505, + "learning_rate": 4.734081600808531e-05, + "loss": 0.8117, + "step": 36 + }, + { + "epoch": 0.24666666666666667, + "grad_norm": 0.6243714094161987, + "learning_rate": 4.707368982147318e-05, + "loss": 0.284, + "step": 37 + }, + { + "epoch": 0.25333333333333335, + "grad_norm": 1.335660457611084, + "learning_rate": 4.679461034241906e-05, + "loss": 0.5556, + "step": 38 + }, + { + "epoch": 0.26, + "grad_norm": 3.071352958679199, + "learning_rate": 4.650372869738414e-05, + "loss": 0.6837, + "step": 39 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 1.6223986148834229, + "learning_rate": 4.620120240391065e-05, + "loss": 0.6445, + "step": 40 + }, + { + "epoch": 0.2733333333333333, + "grad_norm": 1.174696445465088, + "learning_rate": 4.588719528532342e-05, + "loss": 0.353, + "step": 41 + }, + { + "epoch": 0.28, + "grad_norm": 1.129868507385254, + "learning_rate": 4.556187738201656e-05, + "loss": 0.4056, + "step": 42 + }, + { + "epoch": 0.2866666666666667, + "grad_norm": 1.917558193206787, + "learning_rate": 4.522542485937369e-05, + "loss": 1.23, + "step": 43 + }, + { + "epoch": 0.29333333333333333, + "grad_norm": 1.72930908203125, + "learning_rate": 4.48780199123712e-05, + "loss": 0.6128, + "step": 44 + }, + { + "epoch": 0.3, + "grad_norm": 1.7508667707443237, + "learning_rate": 4.4519850666916484e-05, + "loss": 0.8213, + "step": 45 + }, + { + "epoch": 0.30666666666666664, + "grad_norm": 0.9080920815467834, + "learning_rate": 4.415111107797445e-05, + "loss": 0.2085, + "step": 46 + }, + { + "epoch": 0.31333333333333335, + "grad_norm": 1.75723135471344, + "learning_rate": 4.377200082453749e-05, + "loss": 0.5145, + "step": 47 + }, + { + "epoch": 0.32, + "grad_norm": 1.450878620147705, + "learning_rate": 4.3382725201495723e-05, + "loss": 0.6667, + "step": 48 + }, + { + "epoch": 0.32666666666666666, + "grad_norm": 0.8492142558097839, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.2838, + "step": 49 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 1.2550184726715088, + "learning_rate": 4.257452643564155e-05, + "loss": 0.3858, + "step": 50 + }, + { + "epoch": 0.34, + "grad_norm": 2.2642593383789062, + "learning_rate": 4.215604094671835e-05, + "loss": 0.6531, + "step": 51 + }, + { + "epoch": 0.3466666666666667, + "grad_norm": 3.926060199737549, + "learning_rate": 4.172826515897146e-05, + "loss": 0.4836, + "step": 52 + }, + { + "epoch": 0.35333333333333333, + "grad_norm": 1.8190771341323853, + "learning_rate": 4.129143072053638e-05, + "loss": 0.3158, + "step": 53 + }, + { + "epoch": 0.36, + "grad_norm": 1.4268187284469604, + "learning_rate": 4.0845774184967754e-05, + "loss": 0.5298, + "step": 54 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 1.9656697511672974, + "learning_rate": 4.039153688314145e-05, + "loss": 0.4887, + "step": 55 + }, + { + "epoch": 0.37333333333333335, + "grad_norm": 1.1335246562957764, + "learning_rate": 3.9928964792569655e-05, + "loss": 0.1837, + "step": 56 + }, + { + "epoch": 0.38, + "grad_norm": 3.525275707244873, + "learning_rate": 3.945830840419966e-05, + "loss": 0.8998, + "step": 57 + }, + { + "epoch": 0.38666666666666666, + "grad_norm": 2.12524676322937, + "learning_rate": 3.897982258676867e-05, + "loss": 0.3827, + "step": 58 + }, + { + "epoch": 0.3933333333333333, + "grad_norm": 1.9228296279907227, + "learning_rate": 3.8493766448787825e-05, + "loss": 0.5591, + "step": 59 + }, + { + "epoch": 0.4, + "grad_norm": 1.3674192428588867, + "learning_rate": 3.8000403198230387e-05, + "loss": 0.4872, + "step": 60 + }, + { + "epoch": 0.4066666666666667, + "grad_norm": 1.965265154838562, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.5459, + "step": 61 + }, + { + "epoch": 0.41333333333333333, + "grad_norm": 2.329216241836548, + "learning_rate": 3.699282783125616e-05, + "loss": 0.7548, + "step": 62 + }, + { + "epoch": 0.42, + "grad_norm": 2.118192434310913, + "learning_rate": 3.6479161334675296e-05, + "loss": 0.6821, + "step": 63 + }, + { + "epoch": 0.4266666666666667, + "grad_norm": 1.9981805086135864, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3079, + "step": 64 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 1.0037565231323242, + "learning_rate": 3.543346136204545e-05, + "loss": 0.3274, + "step": 65 + }, + { + "epoch": 0.44, + "grad_norm": 1.8539446592330933, + "learning_rate": 3.490199415097892e-05, + "loss": 0.3298, + "step": 66 + }, + { + "epoch": 0.44666666666666666, + "grad_norm": 1.1413832902908325, + "learning_rate": 3.436516483539781e-05, + "loss": 0.3062, + "step": 67 + }, + { + "epoch": 0.4533333333333333, + "grad_norm": 2.7091305255889893, + "learning_rate": 3.382326411784672e-05, + "loss": 0.4897, + "step": 68 + }, + { + "epoch": 0.46, + "grad_norm": 1.9014657735824585, + "learning_rate": 3.327658544712395e-05, + "loss": 0.422, + "step": 69 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 1.7714844942092896, + "learning_rate": 3.272542485937369e-05, + "loss": 0.2594, + "step": 70 + }, + { + "epoch": 0.47333333333333333, + "grad_norm": 0.7899397611618042, + "learning_rate": 3.217008081777726e-05, + "loss": 0.1847, + "step": 71 + }, + { + "epoch": 0.48, + "grad_norm": 1.2437670230865479, + "learning_rate": 3.161085405093006e-05, + "loss": 0.2063, + "step": 72 + }, + { + "epoch": 0.4866666666666667, + "grad_norm": 1.4275377988815308, + "learning_rate": 3.104804738999169e-05, + "loss": 0.7009, + "step": 73 + }, + { + "epoch": 0.49333333333333335, + "grad_norm": 1.6646203994750977, + "learning_rate": 3.048196560469758e-05, + "loss": 0.2215, + "step": 74 + }, + { + "epoch": 0.5, + "grad_norm": 2.504795551300049, + "learning_rate": 2.9912915238320754e-05, + "loss": 0.5467, + "step": 75 + }, + { + "epoch": 0.5066666666666667, + "grad_norm": 1.1971930265426636, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.1757, + "step": 76 + }, + { + "epoch": 0.5133333333333333, + "grad_norm": 1.2171498537063599, + "learning_rate": 2.876714280623708e-05, + "loss": 0.1898, + "step": 77 + }, + { + "epoch": 0.52, + "grad_norm": 1.5181541442871094, + "learning_rate": 2.8191041196514873e-05, + "loss": 0.2299, + "step": 78 + }, + { + "epoch": 0.5266666666666666, + "grad_norm": 1.1093655824661255, + "learning_rate": 2.761321158169134e-05, + "loss": 0.1639, + "step": 79 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 4.772696495056152, + "learning_rate": 2.7033966866696457e-05, + "loss": 0.9071, + "step": 80 + }, + { + "epoch": 0.54, + "grad_norm": 4.373753547668457, + "learning_rate": 2.6453620722761896e-05, + "loss": 1.315, + "step": 81 + }, + { + "epoch": 0.5466666666666666, + "grad_norm": 0.4806381165981293, + "learning_rate": 2.587248741756253e-05, + "loss": 0.0859, + "step": 82 + }, + { + "epoch": 0.5533333333333333, + "grad_norm": 2.003674030303955, + "learning_rate": 2.5290881645034932e-05, + "loss": 0.5889, + "step": 83 + }, + { + "epoch": 0.56, + "grad_norm": 3.2717702388763428, + "learning_rate": 2.470911835496508e-05, + "loss": 0.2551, + "step": 84 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 3.508592367172241, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.7869, + "step": 85 + }, + { + "epoch": 0.5733333333333334, + "grad_norm": 1.1418286561965942, + "learning_rate": 2.3546379277238107e-05, + "loss": 0.1538, + "step": 86 + }, + { + "epoch": 0.58, + "grad_norm": 0.9381025433540344, + "learning_rate": 2.2966033133303545e-05, + "loss": 0.0973, + "step": 87 + }, + { + "epoch": 0.5866666666666667, + "grad_norm": 2.441453218460083, + "learning_rate": 2.238678841830867e-05, + "loss": 0.86, + "step": 88 + }, + { + "epoch": 0.5933333333333334, + "grad_norm": 0.7263301610946655, + "learning_rate": 2.1808958803485136e-05, + "loss": 0.1041, + "step": 89 + }, + { + "epoch": 0.6, + "grad_norm": 1.5983655452728271, + "learning_rate": 2.1232857193762924e-05, + "loss": 0.4031, + "step": 90 + }, + { + "epoch": 0.6066666666666667, + "grad_norm": 2.9620091915130615, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.7484, + "step": 91 + }, + { + "epoch": 0.6133333333333333, + "grad_norm": 1.8278090953826904, + "learning_rate": 2.0087084761679245e-05, + "loss": 0.4001, + "step": 92 + }, + { + "epoch": 0.62, + "grad_norm": 1.282475233078003, + "learning_rate": 1.9518034395302414e-05, + "loss": 0.4047, + "step": 93 + }, + { + "epoch": 0.6266666666666667, + "grad_norm": 1.196440577507019, + "learning_rate": 1.895195261000831e-05, + "loss": 0.1718, + "step": 94 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 3.1636035442352295, + "learning_rate": 1.838914594906995e-05, + "loss": 0.773, + "step": 95 + }, + { + "epoch": 0.64, + "grad_norm": 0.34068605303764343, + "learning_rate": 1.7829919182222752e-05, + "loss": 0.0501, + "step": 96 + }, + { + "epoch": 0.6466666666666666, + "grad_norm": 2.373602867126465, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.6528, + "step": 97 + }, + { + "epoch": 0.6533333333333333, + "grad_norm": 3.809894561767578, + "learning_rate": 1.672341455287605e-05, + "loss": 0.4916, + "step": 98 + }, + { + "epoch": 0.66, + "grad_norm": 2.143580913543701, + "learning_rate": 1.617673588215328e-05, + "loss": 0.5245, + "step": 99 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.8998221158981323, + "learning_rate": 1.56348351646022e-05, + "loss": 0.1163, + "step": 100 + }, + { + "epoch": 0.6666666666666666, + "eval_loss": 0.3062487244606018, + "eval_runtime": 165.5432, + "eval_samples_per_second": 2.259, + "eval_steps_per_second": 1.13, + "step": 100 + }, + { + "epoch": 0.6733333333333333, + "grad_norm": 1.3396159410476685, + "learning_rate": 1.509800584902108e-05, + "loss": 0.2617, + "step": 101 + }, + { + "epoch": 0.68, + "grad_norm": 1.4432024955749512, + "learning_rate": 1.4566538637954554e-05, + "loss": 0.2208, + "step": 102 + }, + { + "epoch": 0.6866666666666666, + "grad_norm": 3.2442052364349365, + "learning_rate": 1.4040721330273062e-05, + "loss": 0.5551, + "step": 103 + }, + { + "epoch": 0.6933333333333334, + "grad_norm": 1.5302101373672485, + "learning_rate": 1.3520838665324703e-05, + "loss": 0.2195, + "step": 104 + }, + { + "epoch": 0.7, + "grad_norm": 4.544744491577148, + "learning_rate": 1.3007172168743854e-05, + "loss": 0.6881, + "step": 105 + }, + { + "epoch": 0.7066666666666667, + "grad_norm": 3.1280434131622314, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.7843, + "step": 106 + }, + { + "epoch": 0.7133333333333334, + "grad_norm": 3.5932955741882324, + "learning_rate": 1.1999596801769616e-05, + "loss": 0.405, + "step": 107 + }, + { + "epoch": 0.72, + "grad_norm": 0.8064641952514648, + "learning_rate": 1.1506233551212186e-05, + "loss": 0.0927, + "step": 108 + }, + { + "epoch": 0.7266666666666667, + "grad_norm": 1.5594532489776611, + "learning_rate": 1.1020177413231334e-05, + "loss": 0.2154, + "step": 109 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 0.6679570078849792, + "learning_rate": 1.0541691595800337e-05, + "loss": 0.0839, + "step": 110 + }, + { + "epoch": 0.74, + "grad_norm": 1.8102202415466309, + "learning_rate": 1.0071035207430352e-05, + "loss": 0.5539, + "step": 111 + }, + { + "epoch": 0.7466666666666667, + "grad_norm": 1.7791885137557983, + "learning_rate": 9.608463116858542e-06, + "loss": 0.2957, + "step": 112 + }, + { + "epoch": 0.7533333333333333, + "grad_norm": 2.7545344829559326, + "learning_rate": 9.154225815032242e-06, + "loss": 0.8108, + "step": 113 + }, + { + "epoch": 0.76, + "grad_norm": 1.1660912036895752, + "learning_rate": 8.708569279463622e-06, + "loss": 0.0814, + "step": 114 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 0.9156790971755981, + "learning_rate": 8.271734841028553e-06, + "loss": 0.1564, + "step": 115 + }, + { + "epoch": 0.7733333333333333, + "grad_norm": 4.86236572265625, + "learning_rate": 7.843959053281663e-06, + "loss": 0.6666, + "step": 116 + }, + { + "epoch": 0.78, + "grad_norm": 4.032678127288818, + "learning_rate": 7.4254735643584564e-06, + "loss": 0.4821, + "step": 117 + }, + { + "epoch": 0.7866666666666666, + "grad_norm": 2.2866246700286865, + "learning_rate": 7.016504991533726e-06, + "loss": 0.2771, + "step": 118 + }, + { + "epoch": 0.7933333333333333, + "grad_norm": 2.304269790649414, + "learning_rate": 6.617274798504286e-06, + "loss": 0.302, + "step": 119 + }, + { + "epoch": 0.8, + "grad_norm": 1.9873311519622803, + "learning_rate": 6.22799917546252e-06, + "loss": 0.3067, + "step": 120 + }, + { + "epoch": 0.8066666666666666, + "grad_norm": 3.1299631595611572, + "learning_rate": 5.848888922025553e-06, + "loss": 0.6448, + "step": 121 + }, + { + "epoch": 0.8133333333333334, + "grad_norm": 1.7106144428253174, + "learning_rate": 5.48014933308352e-06, + "loss": 0.3074, + "step": 122 + }, + { + "epoch": 0.82, + "grad_norm": 1.6227457523345947, + "learning_rate": 5.121980087628803e-06, + "loss": 0.1963, + "step": 123 + }, + { + "epoch": 0.8266666666666667, + "grad_norm": 4.482200622558594, + "learning_rate": 4.7745751406263165e-06, + "loss": 0.7424, + "step": 124 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 2.32167911529541, + "learning_rate": 4.438122617983443e-06, + "loss": 0.5608, + "step": 125 + }, + { + "epoch": 0.84, + "grad_norm": 0.7932979464530945, + "learning_rate": 4.112804714676594e-06, + "loss": 0.0947, + "step": 126 + }, + { + "epoch": 0.8466666666666667, + "grad_norm": 0.28471994400024414, + "learning_rate": 3.798797596089351e-06, + "loss": 0.0437, + "step": 127 + }, + { + "epoch": 0.8533333333333334, + "grad_norm": 2.5803308486938477, + "learning_rate": 3.4962713026158694e-06, + "loss": 0.2781, + "step": 128 + }, + { + "epoch": 0.86, + "grad_norm": 2.4017975330352783, + "learning_rate": 3.205389657580943e-06, + "loss": 0.5838, + "step": 129 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 3.2663633823394775, + "learning_rate": 2.9263101785268254e-06, + "loss": 0.473, + "step": 130 + }, + { + "epoch": 0.8733333333333333, + "grad_norm": 2.067620277404785, + "learning_rate": 2.659183991914696e-06, + "loss": 0.2994, + "step": 131 + }, + { + "epoch": 0.88, + "grad_norm": 1.4989666938781738, + "learning_rate": 2.4041557512869878e-06, + "loss": 0.1795, + "step": 132 + }, + { + "epoch": 0.8866666666666667, + "grad_norm": 1.2955349683761597, + "learning_rate": 2.1613635589349756e-06, + "loss": 0.193, + "step": 133 + }, + { + "epoch": 0.8933333333333333, + "grad_norm": 0.9127891659736633, + "learning_rate": 1.9309388911139426e-06, + "loss": 0.1546, + "step": 134 + }, + { + "epoch": 0.9, + "grad_norm": 2.5144217014312744, + "learning_rate": 1.713006526846439e-06, + "loss": 0.2405, + "step": 135 + }, + { + "epoch": 0.9066666666666666, + "grad_norm": 3.5872600078582764, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.8543, + "step": 136 + }, + { + "epoch": 0.9133333333333333, + "grad_norm": 1.16169011592865, + "learning_rate": 1.31508393714177e-06, + "loss": 0.1759, + "step": 137 + }, + { + "epoch": 0.92, + "grad_norm": 1.221109390258789, + "learning_rate": 1.1353091938067023e-06, + "loss": 0.1709, + "step": 138 + }, + { + "epoch": 0.9266666666666666, + "grad_norm": 1.7916854619979858, + "learning_rate": 9.684576015420278e-07, + "loss": 0.4347, + "step": 139 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 2.9777231216430664, + "learning_rate": 8.146195134284052e-07, + "loss": 0.3878, + "step": 140 + }, + { + "epoch": 0.94, + "grad_norm": 4.271992206573486, + "learning_rate": 6.738782355044049e-07, + "loss": 0.3858, + "step": 141 + }, + { + "epoch": 0.9466666666666667, + "grad_norm": 1.1091471910476685, + "learning_rate": 5.463099816548579e-07, + "loss": 0.1942, + "step": 142 + }, + { + "epoch": 0.9533333333333334, + "grad_norm": 3.5003552436828613, + "learning_rate": 4.319838323396691e-07, + "loss": 0.4267, + "step": 143 + }, + { + "epoch": 0.96, + "grad_norm": 4.55532169342041, + "learning_rate": 3.309616971855195e-07, + "loss": 0.3795, + "step": 144 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 2.3853328227996826, + "learning_rate": 2.4329828146074095e-07, + "loss": 0.1976, + "step": 145 + }, + { + "epoch": 0.9733333333333334, + "grad_norm": 1.388493537902832, + "learning_rate": 1.6904105645142444e-07, + "loss": 0.1673, + "step": 146 + }, + { + "epoch": 0.98, + "grad_norm": 2.8709473609924316, + "learning_rate": 1.0823023375489127e-07, + "loss": 0.3265, + "step": 147 + }, + { + "epoch": 0.9866666666666667, + "grad_norm": 1.2488735914230347, + "learning_rate": 6.089874350439506e-08, + "loss": 0.1556, + "step": 148 + }, + { + "epoch": 0.9933333333333333, + "grad_norm": 2.21366810798645, + "learning_rate": 2.7072216536885853e-08, + "loss": 0.7468, + "step": 149 + }, + { + "epoch": 1.0, + "grad_norm": 3.950915813446045, + "learning_rate": 6.768970513457151e-09, + "loss": 0.4149, + "step": 150 + } + ], + "logging_steps": 1, + "max_steps": 150, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 49, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.0335481997190758e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp2_signalc_20251215/checkpoint-150/training_args.bin b/exp2_signalc_20251215/checkpoint-150/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..591bbc618a96850d2fc0f24befec3b8cdb986d33 --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-150/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50702002f1fdf6d5e91c92ea8225d6a002d69ced0f5b2ce1f30eb32ce7dad475 +size 5841 diff --git a/exp2_signalc_20251215/checkpoint-49/README.md b/exp2_signalc_20251215/checkpoint-49/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-49/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp2_signalc_20251215/checkpoint-49/adapter_config.json b/exp2_signalc_20251215/checkpoint-49/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d747200220490dff0f82aa49e63f702a859ccc --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-49/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "q_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp2_signalc_20251215/checkpoint-49/adapter_model.safetensors b/exp2_signalc_20251215/checkpoint-49/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14ed0e04686280ba0fdb16d184f2ef1c70b44cf1 --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-49/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85dde52ed3e966eb9b45ba1443e51a20bca9181425fdee22b56349bf12526429 +size 201378736 diff --git a/exp2_signalc_20251215/checkpoint-49/optimizer.pt b/exp2_signalc_20251215/checkpoint-49/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d22fe7dd20466634ecea504942ac24a0d0acc96d --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-49/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996ab8887dece8606a23346cee07263de6d5896bff9cac3880254b48f9c44eb6 +size 402982627 diff --git a/exp2_signalc_20251215/checkpoint-49/rng_state.pth b/exp2_signalc_20251215/checkpoint-49/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..807f44bba3301035e4b6efdb53a05c44d1d0b12a --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-49/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:207e0116b01753d9990f17727a121db67c5ed48acd073905c6656f847aa88f26 +size 14645 diff --git a/exp2_signalc_20251215/checkpoint-49/scheduler.pt b/exp2_signalc_20251215/checkpoint-49/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e65b75fc4b2f2e3587883a6b70aab9794f70a87e --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-49/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd804ec5aea3d063766af287574720b1bb6665dd62c67e945105e50ae8eca9cc +size 1465 diff --git a/exp2_signalc_20251215/checkpoint-49/trainer_state.json b/exp2_signalc_20251215/checkpoint-49/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..69258c3659d2dbd1095cd5c6b93863431a934db9 --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-49/trainer_state.json @@ -0,0 +1,377 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.32666666666666666, + "eval_steps": 100, + "global_step": 49, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.006666666666666667, + "grad_norm": 2.007404327392578, + "learning_rate": 0.0, + "loss": 0.6595, + "step": 1 + }, + { + "epoch": 0.013333333333333334, + "grad_norm": 1.8502237796783447, + "learning_rate": 3.3333333333333333e-06, + "loss": 1.1139, + "step": 2 + }, + { + "epoch": 0.02, + "grad_norm": 1.2331303358078003, + "learning_rate": 6.666666666666667e-06, + "loss": 0.578, + "step": 3 + }, + { + "epoch": 0.02666666666666667, + "grad_norm": 1.2300695180892944, + "learning_rate": 1e-05, + "loss": 0.776, + "step": 4 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 3.3759779930114746, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.7998, + "step": 5 + }, + { + "epoch": 0.04, + "grad_norm": 3.43403697013855, + "learning_rate": 1.6666666666666667e-05, + "loss": 1.4564, + "step": 6 + }, + { + "epoch": 0.04666666666666667, + "grad_norm": 3.449822187423706, + "learning_rate": 2e-05, + "loss": 0.899, + "step": 7 + }, + { + "epoch": 0.05333333333333334, + "grad_norm": 1.4381998777389526, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.2811, + "step": 8 + }, + { + "epoch": 0.06, + "grad_norm": 1.234094500541687, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.1841, + "step": 9 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 0.9779098033905029, + "learning_rate": 3e-05, + "loss": 0.2704, + "step": 10 + }, + { + "epoch": 0.07333333333333333, + "grad_norm": 1.1498095989227295, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.2534, + "step": 11 + }, + { + "epoch": 0.08, + "grad_norm": 1.928687334060669, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.5162, + "step": 12 + }, + { + "epoch": 0.08666666666666667, + "grad_norm": 1.8929930925369263, + "learning_rate": 4e-05, + "loss": 0.9282, + "step": 13 + }, + { + "epoch": 0.09333333333333334, + "grad_norm": 3.466400146484375, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.8342, + "step": 14 + }, + { + "epoch": 0.1, + "grad_norm": 2.0776634216308594, + "learning_rate": 4.666666666666667e-05, + "loss": 0.7976, + "step": 15 + }, + { + "epoch": 0.10666666666666667, + "grad_norm": 2.7737135887145996, + "learning_rate": 5e-05, + "loss": 1.1665, + "step": 16 + }, + { + "epoch": 0.11333333333333333, + "grad_norm": 1.192466139793396, + "learning_rate": 4.9993231029486544e-05, + "loss": 0.5163, + "step": 17 + }, + { + "epoch": 0.12, + "grad_norm": 1.5976535081863403, + "learning_rate": 4.997292778346312e-05, + "loss": 0.5035, + "step": 18 + }, + { + "epoch": 0.12666666666666668, + "grad_norm": 1.513372540473938, + "learning_rate": 4.993910125649561e-05, + "loss": 0.6089, + "step": 19 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 1.6169334650039673, + "learning_rate": 4.989176976624511e-05, + "loss": 0.6053, + "step": 20 + }, + { + "epoch": 0.14, + "grad_norm": 1.9578418731689453, + "learning_rate": 4.983095894354858e-05, + "loss": 0.8322, + "step": 21 + }, + { + "epoch": 0.14666666666666667, + "grad_norm": 2.1667706966400146, + "learning_rate": 4.975670171853926e-05, + "loss": 0.8027, + "step": 22 + }, + { + "epoch": 0.15333333333333332, + "grad_norm": 1.4784480333328247, + "learning_rate": 4.966903830281449e-05, + "loss": 0.4951, + "step": 23 + }, + { + "epoch": 0.16, + "grad_norm": 2.7624332904815674, + "learning_rate": 4.9568016167660334e-05, + "loss": 0.6269, + "step": 24 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 1.6521016359329224, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.3331, + "step": 25 + }, + { + "epoch": 0.17333333333333334, + "grad_norm": 1.1457144021987915, + "learning_rate": 4.9326121764495596e-05, + "loss": 0.284, + "step": 26 + }, + { + "epoch": 0.18, + "grad_norm": 1.4980254173278809, + "learning_rate": 4.9185380486571595e-05, + "loss": 0.6191, + "step": 27 + }, + { + "epoch": 0.18666666666666668, + "grad_norm": 1.3955901861190796, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.662, + "step": 28 + }, + { + "epoch": 0.19333333333333333, + "grad_norm": 1.2383167743682861, + "learning_rate": 4.88646908061933e-05, + "loss": 0.79, + "step": 29 + }, + { + "epoch": 0.2, + "grad_norm": 1.2159212827682495, + "learning_rate": 4.868491606285823e-05, + "loss": 0.4194, + "step": 30 + }, + { + "epoch": 0.20666666666666667, + "grad_norm": 2.2957308292388916, + "learning_rate": 4.849231551964771e-05, + "loss": 0.5185, + "step": 31 + }, + { + "epoch": 0.21333333333333335, + "grad_norm": 0.6989065408706665, + "learning_rate": 4.828699347315356e-05, + "loss": 0.2625, + "step": 32 + }, + { + "epoch": 0.22, + "grad_norm": 2.02852463722229, + "learning_rate": 4.806906110888606e-05, + "loss": 0.6494, + "step": 33 + }, + { + "epoch": 0.22666666666666666, + "grad_norm": 1.653140902519226, + "learning_rate": 4.783863644106502e-05, + "loss": 0.5486, + "step": 34 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 1.7378313541412354, + "learning_rate": 4.759584424871302e-05, + "loss": 0.6112, + "step": 35 + }, + { + "epoch": 0.24, + "grad_norm": 1.8570767641067505, + "learning_rate": 4.734081600808531e-05, + "loss": 0.8117, + "step": 36 + }, + { + "epoch": 0.24666666666666667, + "grad_norm": 0.6243714094161987, + "learning_rate": 4.707368982147318e-05, + "loss": 0.284, + "step": 37 + }, + { + "epoch": 0.25333333333333335, + "grad_norm": 1.335660457611084, + "learning_rate": 4.679461034241906e-05, + "loss": 0.5556, + "step": 38 + }, + { + "epoch": 0.26, + "grad_norm": 3.071352958679199, + "learning_rate": 4.650372869738414e-05, + "loss": 0.6837, + "step": 39 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 1.6223986148834229, + "learning_rate": 4.620120240391065e-05, + "loss": 0.6445, + "step": 40 + }, + { + "epoch": 0.2733333333333333, + "grad_norm": 1.174696445465088, + "learning_rate": 4.588719528532342e-05, + "loss": 0.353, + "step": 41 + }, + { + "epoch": 0.28, + "grad_norm": 1.129868507385254, + "learning_rate": 4.556187738201656e-05, + "loss": 0.4056, + "step": 42 + }, + { + "epoch": 0.2866666666666667, + "grad_norm": 1.917558193206787, + "learning_rate": 4.522542485937369e-05, + "loss": 1.23, + "step": 43 + }, + { + "epoch": 0.29333333333333333, + "grad_norm": 1.72930908203125, + "learning_rate": 4.48780199123712e-05, + "loss": 0.6128, + "step": 44 + }, + { + "epoch": 0.3, + "grad_norm": 1.7508667707443237, + "learning_rate": 4.4519850666916484e-05, + "loss": 0.8213, + "step": 45 + }, + { + "epoch": 0.30666666666666664, + "grad_norm": 0.9080920815467834, + "learning_rate": 4.415111107797445e-05, + "loss": 0.2085, + "step": 46 + }, + { + "epoch": 0.31333333333333335, + "grad_norm": 1.75723135471344, + "learning_rate": 4.377200082453749e-05, + "loss": 0.5145, + "step": 47 + }, + { + "epoch": 0.32, + "grad_norm": 1.450878620147705, + "learning_rate": 4.3382725201495723e-05, + "loss": 0.6667, + "step": 48 + }, + { + "epoch": 0.32666666666666666, + "grad_norm": 0.8492142558097839, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.2838, + "step": 49 + } + ], + "logging_steps": 1, + "max_steps": 150, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 49, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.3818939423195136e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp2_signalc_20251215/checkpoint-49/training_args.bin b/exp2_signalc_20251215/checkpoint-49/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..591bbc618a96850d2fc0f24befec3b8cdb986d33 --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-49/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50702002f1fdf6d5e91c92ea8225d6a002d69ced0f5b2ce1f30eb32ce7dad475 +size 5841 diff --git a/exp2_signalc_20251215/checkpoint-98/README.md b/exp2_signalc_20251215/checkpoint-98/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-98/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp2_signalc_20251215/checkpoint-98/adapter_config.json b/exp2_signalc_20251215/checkpoint-98/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d747200220490dff0f82aa49e63f702a859ccc --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-98/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "q_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp2_signalc_20251215/checkpoint-98/adapter_model.safetensors b/exp2_signalc_20251215/checkpoint-98/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5fe4d04f04c7a3fbf31d5ed3a24180d1a91306e2 --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-98/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9ef1e7552a970c536dd30ff501bbafeba8f0dd42d17ee39727e89edbbc83bb1 +size 201378736 diff --git a/exp2_signalc_20251215/checkpoint-98/optimizer.pt b/exp2_signalc_20251215/checkpoint-98/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fdb1d8ac24f0ec05e996cc028dd11aa133c8aee4 --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-98/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57649dff299d9d1db33f1c6d06000e0d52c066dd581ab61aecc072a7656bdf2c +size 402982627 diff --git a/exp2_signalc_20251215/checkpoint-98/rng_state.pth b/exp2_signalc_20251215/checkpoint-98/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..be650a4d2795b25747861c0de85f814b80c6afcd --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-98/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7cec4d872024bcdf7a65d27f3802b41fba250b5d9e8ca5c3ed586a0a74d5983 +size 14645 diff --git a/exp2_signalc_20251215/checkpoint-98/scheduler.pt b/exp2_signalc_20251215/checkpoint-98/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0064a1b0ccc5f256504b07cf75d7b6d98eabfa1 --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-98/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c54ce98de4c1db5c4e0affce15c6cbebaf4fc220706243b711ba70f85829eaf2 +size 1465 diff --git a/exp2_signalc_20251215/checkpoint-98/trainer_state.json b/exp2_signalc_20251215/checkpoint-98/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..045d92f6cb0ef155b0fb14f567325b1438e20c54 --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-98/trainer_state.json @@ -0,0 +1,720 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6533333333333333, + "eval_steps": 100, + "global_step": 98, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.006666666666666667, + "grad_norm": 2.007404327392578, + "learning_rate": 0.0, + "loss": 0.6595, + "step": 1 + }, + { + "epoch": 0.013333333333333334, + "grad_norm": 1.8502237796783447, + "learning_rate": 3.3333333333333333e-06, + "loss": 1.1139, + "step": 2 + }, + { + "epoch": 0.02, + "grad_norm": 1.2331303358078003, + "learning_rate": 6.666666666666667e-06, + "loss": 0.578, + "step": 3 + }, + { + "epoch": 0.02666666666666667, + "grad_norm": 1.2300695180892944, + "learning_rate": 1e-05, + "loss": 0.776, + "step": 4 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 3.3759779930114746, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.7998, + "step": 5 + }, + { + "epoch": 0.04, + "grad_norm": 3.43403697013855, + "learning_rate": 1.6666666666666667e-05, + "loss": 1.4564, + "step": 6 + }, + { + "epoch": 0.04666666666666667, + "grad_norm": 3.449822187423706, + "learning_rate": 2e-05, + "loss": 0.899, + "step": 7 + }, + { + "epoch": 0.05333333333333334, + "grad_norm": 1.4381998777389526, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.2811, + "step": 8 + }, + { + "epoch": 0.06, + "grad_norm": 1.234094500541687, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.1841, + "step": 9 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 0.9779098033905029, + "learning_rate": 3e-05, + "loss": 0.2704, + "step": 10 + }, + { + "epoch": 0.07333333333333333, + "grad_norm": 1.1498095989227295, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.2534, + "step": 11 + }, + { + "epoch": 0.08, + "grad_norm": 1.928687334060669, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.5162, + "step": 12 + }, + { + "epoch": 0.08666666666666667, + "grad_norm": 1.8929930925369263, + "learning_rate": 4e-05, + "loss": 0.9282, + "step": 13 + }, + { + "epoch": 0.09333333333333334, + "grad_norm": 3.466400146484375, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.8342, + "step": 14 + }, + { + "epoch": 0.1, + "grad_norm": 2.0776634216308594, + "learning_rate": 4.666666666666667e-05, + "loss": 0.7976, + "step": 15 + }, + { + "epoch": 0.10666666666666667, + "grad_norm": 2.7737135887145996, + "learning_rate": 5e-05, + "loss": 1.1665, + "step": 16 + }, + { + "epoch": 0.11333333333333333, + "grad_norm": 1.192466139793396, + "learning_rate": 4.9993231029486544e-05, + "loss": 0.5163, + "step": 17 + }, + { + "epoch": 0.12, + "grad_norm": 1.5976535081863403, + "learning_rate": 4.997292778346312e-05, + "loss": 0.5035, + "step": 18 + }, + { + "epoch": 0.12666666666666668, + "grad_norm": 1.513372540473938, + "learning_rate": 4.993910125649561e-05, + "loss": 0.6089, + "step": 19 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 1.6169334650039673, + "learning_rate": 4.989176976624511e-05, + "loss": 0.6053, + "step": 20 + }, + { + "epoch": 0.14, + "grad_norm": 1.9578418731689453, + "learning_rate": 4.983095894354858e-05, + "loss": 0.8322, + "step": 21 + }, + { + "epoch": 0.14666666666666667, + "grad_norm": 2.1667706966400146, + "learning_rate": 4.975670171853926e-05, + "loss": 0.8027, + "step": 22 + }, + { + "epoch": 0.15333333333333332, + "grad_norm": 1.4784480333328247, + "learning_rate": 4.966903830281449e-05, + "loss": 0.4951, + "step": 23 + }, + { + "epoch": 0.16, + "grad_norm": 2.7624332904815674, + "learning_rate": 4.9568016167660334e-05, + "loss": 0.6269, + "step": 24 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 1.6521016359329224, + "learning_rate": 4.9453690018345144e-05, + "loss": 0.3331, + "step": 25 + }, + { + "epoch": 0.17333333333333334, + "grad_norm": 1.1457144021987915, + "learning_rate": 4.9326121764495596e-05, + "loss": 0.284, + "step": 26 + }, + { + "epoch": 0.18, + "grad_norm": 1.4980254173278809, + "learning_rate": 4.9185380486571595e-05, + "loss": 0.6191, + "step": 27 + }, + { + "epoch": 0.18666666666666668, + "grad_norm": 1.3955901861190796, + "learning_rate": 4.9031542398457974e-05, + "loss": 0.662, + "step": 28 + }, + { + "epoch": 0.19333333333333333, + "grad_norm": 1.2383167743682861, + "learning_rate": 4.88646908061933e-05, + "loss": 0.79, + "step": 29 + }, + { + "epoch": 0.2, + "grad_norm": 1.2159212827682495, + "learning_rate": 4.868491606285823e-05, + "loss": 0.4194, + "step": 30 + }, + { + "epoch": 0.20666666666666667, + "grad_norm": 2.2957308292388916, + "learning_rate": 4.849231551964771e-05, + "loss": 0.5185, + "step": 31 + }, + { + "epoch": 0.21333333333333335, + "grad_norm": 0.6989065408706665, + "learning_rate": 4.828699347315356e-05, + "loss": 0.2625, + "step": 32 + }, + { + "epoch": 0.22, + "grad_norm": 2.02852463722229, + "learning_rate": 4.806906110888606e-05, + "loss": 0.6494, + "step": 33 + }, + { + "epoch": 0.22666666666666666, + "grad_norm": 1.653140902519226, + "learning_rate": 4.783863644106502e-05, + "loss": 0.5486, + "step": 34 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 1.7378313541412354, + "learning_rate": 4.759584424871302e-05, + "loss": 0.6112, + "step": 35 + }, + { + "epoch": 0.24, + "grad_norm": 1.8570767641067505, + "learning_rate": 4.734081600808531e-05, + "loss": 0.8117, + "step": 36 + }, + { + "epoch": 0.24666666666666667, + "grad_norm": 0.6243714094161987, + "learning_rate": 4.707368982147318e-05, + "loss": 0.284, + "step": 37 + }, + { + "epoch": 0.25333333333333335, + "grad_norm": 1.335660457611084, + "learning_rate": 4.679461034241906e-05, + "loss": 0.5556, + "step": 38 + }, + { + "epoch": 0.26, + "grad_norm": 3.071352958679199, + "learning_rate": 4.650372869738414e-05, + "loss": 0.6837, + "step": 39 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 1.6223986148834229, + "learning_rate": 4.620120240391065e-05, + "loss": 0.6445, + "step": 40 + }, + { + "epoch": 0.2733333333333333, + "grad_norm": 1.174696445465088, + "learning_rate": 4.588719528532342e-05, + "loss": 0.353, + "step": 41 + }, + { + "epoch": 0.28, + "grad_norm": 1.129868507385254, + "learning_rate": 4.556187738201656e-05, + "loss": 0.4056, + "step": 42 + }, + { + "epoch": 0.2866666666666667, + "grad_norm": 1.917558193206787, + "learning_rate": 4.522542485937369e-05, + "loss": 1.23, + "step": 43 + }, + { + "epoch": 0.29333333333333333, + "grad_norm": 1.72930908203125, + "learning_rate": 4.48780199123712e-05, + "loss": 0.6128, + "step": 44 + }, + { + "epoch": 0.3, + "grad_norm": 1.7508667707443237, + "learning_rate": 4.4519850666916484e-05, + "loss": 0.8213, + "step": 45 + }, + { + "epoch": 0.30666666666666664, + "grad_norm": 0.9080920815467834, + "learning_rate": 4.415111107797445e-05, + "loss": 0.2085, + "step": 46 + }, + { + "epoch": 0.31333333333333335, + "grad_norm": 1.75723135471344, + "learning_rate": 4.377200082453749e-05, + "loss": 0.5145, + "step": 47 + }, + { + "epoch": 0.32, + "grad_norm": 1.450878620147705, + "learning_rate": 4.3382725201495723e-05, + "loss": 0.6667, + "step": 48 + }, + { + "epoch": 0.32666666666666666, + "grad_norm": 0.8492142558097839, + "learning_rate": 4.2983495008466276e-05, + "loss": 0.2838, + "step": 49 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 1.2550184726715088, + "learning_rate": 4.257452643564155e-05, + "loss": 0.3858, + "step": 50 + }, + { + "epoch": 0.34, + "grad_norm": 2.2642593383789062, + "learning_rate": 4.215604094671835e-05, + "loss": 0.6531, + "step": 51 + }, + { + "epoch": 0.3466666666666667, + "grad_norm": 3.926060199737549, + "learning_rate": 4.172826515897146e-05, + "loss": 0.4836, + "step": 52 + }, + { + "epoch": 0.35333333333333333, + "grad_norm": 1.8190771341323853, + "learning_rate": 4.129143072053638e-05, + "loss": 0.3158, + "step": 53 + }, + { + "epoch": 0.36, + "grad_norm": 1.4268187284469604, + "learning_rate": 4.0845774184967754e-05, + "loss": 0.5298, + "step": 54 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 1.9656697511672974, + "learning_rate": 4.039153688314145e-05, + "loss": 0.4887, + "step": 55 + }, + { + "epoch": 0.37333333333333335, + "grad_norm": 1.1335246562957764, + "learning_rate": 3.9928964792569655e-05, + "loss": 0.1837, + "step": 56 + }, + { + "epoch": 0.38, + "grad_norm": 3.525275707244873, + "learning_rate": 3.945830840419966e-05, + "loss": 0.8998, + "step": 57 + }, + { + "epoch": 0.38666666666666666, + "grad_norm": 2.12524676322937, + "learning_rate": 3.897982258676867e-05, + "loss": 0.3827, + "step": 58 + }, + { + "epoch": 0.3933333333333333, + "grad_norm": 1.9228296279907227, + "learning_rate": 3.8493766448787825e-05, + "loss": 0.5591, + "step": 59 + }, + { + "epoch": 0.4, + "grad_norm": 1.3674192428588867, + "learning_rate": 3.8000403198230387e-05, + "loss": 0.4872, + "step": 60 + }, + { + "epoch": 0.4066666666666667, + "grad_norm": 1.965265154838562, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.5459, + "step": 61 + }, + { + "epoch": 0.41333333333333333, + "grad_norm": 2.329216241836548, + "learning_rate": 3.699282783125616e-05, + "loss": 0.7548, + "step": 62 + }, + { + "epoch": 0.42, + "grad_norm": 2.118192434310913, + "learning_rate": 3.6479161334675296e-05, + "loss": 0.6821, + "step": 63 + }, + { + "epoch": 0.4266666666666667, + "grad_norm": 1.9981805086135864, + "learning_rate": 3.5959278669726935e-05, + "loss": 0.3079, + "step": 64 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 1.0037565231323242, + "learning_rate": 3.543346136204545e-05, + "loss": 0.3274, + "step": 65 + }, + { + "epoch": 0.44, + "grad_norm": 1.8539446592330933, + "learning_rate": 3.490199415097892e-05, + "loss": 0.3298, + "step": 66 + }, + { + "epoch": 0.44666666666666666, + "grad_norm": 1.1413832902908325, + "learning_rate": 3.436516483539781e-05, + "loss": 0.3062, + "step": 67 + }, + { + "epoch": 0.4533333333333333, + "grad_norm": 2.7091305255889893, + "learning_rate": 3.382326411784672e-05, + "loss": 0.4897, + "step": 68 + }, + { + "epoch": 0.46, + "grad_norm": 1.9014657735824585, + "learning_rate": 3.327658544712395e-05, + "loss": 0.422, + "step": 69 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 1.7714844942092896, + "learning_rate": 3.272542485937369e-05, + "loss": 0.2594, + "step": 70 + }, + { + "epoch": 0.47333333333333333, + "grad_norm": 0.7899397611618042, + "learning_rate": 3.217008081777726e-05, + "loss": 0.1847, + "step": 71 + }, + { + "epoch": 0.48, + "grad_norm": 1.2437670230865479, + "learning_rate": 3.161085405093006e-05, + "loss": 0.2063, + "step": 72 + }, + { + "epoch": 0.4866666666666667, + "grad_norm": 1.4275377988815308, + "learning_rate": 3.104804738999169e-05, + "loss": 0.7009, + "step": 73 + }, + { + "epoch": 0.49333333333333335, + "grad_norm": 1.6646203994750977, + "learning_rate": 3.048196560469758e-05, + "loss": 0.2215, + "step": 74 + }, + { + "epoch": 0.5, + "grad_norm": 2.504795551300049, + "learning_rate": 2.9912915238320754e-05, + "loss": 0.5467, + "step": 75 + }, + { + "epoch": 0.5066666666666667, + "grad_norm": 1.1971930265426636, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.1757, + "step": 76 + }, + { + "epoch": 0.5133333333333333, + "grad_norm": 1.2171498537063599, + "learning_rate": 2.876714280623708e-05, + "loss": 0.1898, + "step": 77 + }, + { + "epoch": 0.52, + "grad_norm": 1.5181541442871094, + "learning_rate": 2.8191041196514873e-05, + "loss": 0.2299, + "step": 78 + }, + { + "epoch": 0.5266666666666666, + "grad_norm": 1.1093655824661255, + "learning_rate": 2.761321158169134e-05, + "loss": 0.1639, + "step": 79 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 4.772696495056152, + "learning_rate": 2.7033966866696457e-05, + "loss": 0.9071, + "step": 80 + }, + { + "epoch": 0.54, + "grad_norm": 4.373753547668457, + "learning_rate": 2.6453620722761896e-05, + "loss": 1.315, + "step": 81 + }, + { + "epoch": 0.5466666666666666, + "grad_norm": 0.4806381165981293, + "learning_rate": 2.587248741756253e-05, + "loss": 0.0859, + "step": 82 + }, + { + "epoch": 0.5533333333333333, + "grad_norm": 2.003674030303955, + "learning_rate": 2.5290881645034932e-05, + "loss": 0.5889, + "step": 83 + }, + { + "epoch": 0.56, + "grad_norm": 3.2717702388763428, + "learning_rate": 2.470911835496508e-05, + "loss": 0.2551, + "step": 84 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 3.508592367172241, + "learning_rate": 2.4127512582437485e-05, + "loss": 0.7869, + "step": 85 + }, + { + "epoch": 0.5733333333333334, + "grad_norm": 1.1418286561965942, + "learning_rate": 2.3546379277238107e-05, + "loss": 0.1538, + "step": 86 + }, + { + "epoch": 0.58, + "grad_norm": 0.9381025433540344, + "learning_rate": 2.2966033133303545e-05, + "loss": 0.0973, + "step": 87 + }, + { + "epoch": 0.5866666666666667, + "grad_norm": 2.441453218460083, + "learning_rate": 2.238678841830867e-05, + "loss": 0.86, + "step": 88 + }, + { + "epoch": 0.5933333333333334, + "grad_norm": 0.7263301610946655, + "learning_rate": 2.1808958803485136e-05, + "loss": 0.1041, + "step": 89 + }, + { + "epoch": 0.6, + "grad_norm": 1.5983655452728271, + "learning_rate": 2.1232857193762924e-05, + "loss": 0.4031, + "step": 90 + }, + { + "epoch": 0.6066666666666667, + "grad_norm": 2.9620091915130615, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.7484, + "step": 91 + }, + { + "epoch": 0.6133333333333333, + "grad_norm": 1.8278090953826904, + "learning_rate": 2.0087084761679245e-05, + "loss": 0.4001, + "step": 92 + }, + { + "epoch": 0.62, + "grad_norm": 1.282475233078003, + "learning_rate": 1.9518034395302414e-05, + "loss": 0.4047, + "step": 93 + }, + { + "epoch": 0.6266666666666667, + "grad_norm": 1.196440577507019, + "learning_rate": 1.895195261000831e-05, + "loss": 0.1718, + "step": 94 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 3.1636035442352295, + "learning_rate": 1.838914594906995e-05, + "loss": 0.773, + "step": 95 + }, + { + "epoch": 0.64, + "grad_norm": 0.34068605303764343, + "learning_rate": 1.7829919182222752e-05, + "loss": 0.0501, + "step": 96 + }, + { + "epoch": 0.6466666666666666, + "grad_norm": 2.373602867126465, + "learning_rate": 1.7274575140626318e-05, + "loss": 0.6528, + "step": 97 + }, + { + "epoch": 0.6533333333333333, + "grad_norm": 3.809894561767578, + "learning_rate": 1.672341455287605e-05, + "loss": 0.4916, + "step": 98 + } + ], + "logging_steps": 1, + "max_steps": 150, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 49, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.763787884639027e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/exp2_signalc_20251215/checkpoint-98/training_args.bin b/exp2_signalc_20251215/checkpoint-98/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..591bbc618a96850d2fc0f24befec3b8cdb986d33 --- /dev/null +++ b/exp2_signalc_20251215/checkpoint-98/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50702002f1fdf6d5e91c92ea8225d6a002d69ced0f5b2ce1f30eb32ce7dad475 +size 5841 diff --git a/exp2_signalc_20251215/final_model/README.md b/exp2_signalc_20251215/final_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d283785603342a12561751304db4c120bbd293fc --- /dev/null +++ b/exp2_signalc_20251215/final_model/README.md @@ -0,0 +1,207 @@ +--- +base_model: Qwen/Qwen2.5-Coder-14B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-Coder-14B-Instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/exp2_signalc_20251215/final_model/adapter_config.json b/exp2_signalc_20251215/final_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d747200220490dff0f82aa49e63f702a859ccc --- /dev/null +++ b/exp2_signalc_20251215/final_model/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-Coder-14B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "q_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/exp2_signalc_20251215/final_model/adapter_model.safetensors b/exp2_signalc_20251215/final_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33672935cb7c3d885ecc8c264c0061e5437e8b82 --- /dev/null +++ b/exp2_signalc_20251215/final_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b952f9f010e213946ed7983bb5affbcb494295dc69e15839f0f632edc00432e +size 201378736 diff --git a/exp2_signalc_20251215/final_model/training_args.bin b/exp2_signalc_20251215/final_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..591bbc618a96850d2fc0f24befec3b8cdb986d33 --- /dev/null +++ b/exp2_signalc_20251215/final_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50702002f1fdf6d5e91c92ea8225d6a002d69ced0f5b2ce1f30eb32ce7dad475 +size 5841