astegaras commited on
Commit
4fccf31
·
verified ·
1 Parent(s): 9842ec8

Upload folder using huggingface_hub

Browse files
checkpoint-300/README.md CHANGED
@@ -207,4 +207,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
207
  [More Information Needed]
208
  ### Framework versions
209
 
210
- - PEFT 0.18.0
 
207
  [More Information Needed]
208
  ### Framework versions
209
 
210
+ - PEFT 0.16.0
checkpoint-300/adapter_config.json CHANGED
@@ -1,7 +1,5 @@
1
  {
2
- "alora_invocation_tokens": null,
3
  "alpha_pattern": {},
4
- "arrow_config": null,
5
  "auto_mapping": {
6
  "base_model_class": "LlamaForCausalLM",
7
  "parent_library": "transformers.models.llama.modeling_llama",
@@ -10,7 +8,6 @@
10
  "base_model_name_or_path": "unsloth/llama-3.2-3b-instruct-unsloth-bnb-4bit",
11
  "bias": "none",
12
  "corda_config": null,
13
- "ensure_weight_tying": false,
14
  "eva_config": null,
15
  "exclude_modules": null,
16
  "fan_in_fan_out": false,
@@ -27,21 +24,19 @@
27
  "megatron_core": "megatron.core",
28
  "modules_to_save": null,
29
  "peft_type": "LORA",
30
- "peft_version": "0.18.0",
31
  "qalora_group_size": 16,
32
  "r": 16,
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
36
- "v_proj",
37
- "gate_proj",
38
- "o_proj",
39
- "down_proj",
40
  "k_proj",
41
  "up_proj",
 
 
 
 
42
  "q_proj"
43
  ],
44
- "target_parameters": null,
45
  "task_type": "CAUSAL_LM",
46
  "trainable_token_indices": null,
47
  "use_dora": false,
 
1
  {
 
2
  "alpha_pattern": {},
 
3
  "auto_mapping": {
4
  "base_model_class": "LlamaForCausalLM",
5
  "parent_library": "transformers.models.llama.modeling_llama",
 
8
  "base_model_name_or_path": "unsloth/llama-3.2-3b-instruct-unsloth-bnb-4bit",
9
  "bias": "none",
10
  "corda_config": null,
 
11
  "eva_config": null,
12
  "exclude_modules": null,
13
  "fan_in_fan_out": false,
 
24
  "megatron_core": "megatron.core",
25
  "modules_to_save": null,
26
  "peft_type": "LORA",
 
27
  "qalora_group_size": 16,
28
  "r": 16,
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
 
 
 
32
  "k_proj",
33
  "up_proj",
34
+ "down_proj",
35
+ "gate_proj",
36
+ "o_proj",
37
+ "v_proj",
38
  "q_proj"
39
  ],
 
40
  "task_type": "CAUSAL_LM",
41
  "trainable_token_indices": null,
42
  "use_dora": false,
checkpoint-300/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa56e0371c6a3c0bced56899aa3c9e69e99409ce83c8f020eced9c75deb7cc10
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2604df7baf920a70fb10c6143f68b206b08ab224fc92bb6736fefc3ddd487ef
3
  size 97307544
checkpoint-300/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ed4fb5b3648d3f73a00d8f11507d4538b570b85cb6ce9ef2b429bdfd7a095a4
3
  size 194846331
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5bc159fdb8990bd9af371d26ef1789da3ee619f8082f691c2900ba22835ea53
3
  size 194846331
checkpoint-300/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1d565802a8e26c4e8a31328752b7a7fdc186d9401aa008e65697d0ad8c22e33
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:716d83e48b4ef0ecc739db256420344d8764b568cd2fec53ea128d8653e43804
3
  size 14645
checkpoint-300/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63d6a3788c539cdc306259ad5c897c2211e3e753ec371fb98ebdb21093c6b778
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ff715d3693c4d6f6e799dca640277a75bef436dca0650091795c9950ba66b75
3
  size 1465
checkpoint-300/trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.024,
6
  "eval_steps": 500,
7
  "global_step": 300,
8
  "is_hyper_param_search": false,
@@ -10,7 +10,7 @@
10
  "is_world_process_zero": true,
11
  "log_history": [],
12
  "logging_steps": 9999999,
13
- "max_steps": 12500,
14
  "num_input_tokens_seen": 0,
15
  "num_train_epochs": 1,
16
  "save_steps": 150,
@@ -26,8 +26,8 @@
26
  "attributes": {}
27
  }
28
  },
29
- "total_flos": 2.775573797319475e+16,
30
- "train_batch_size": 2,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.048,
6
  "eval_steps": 500,
7
  "global_step": 300,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [],
12
  "logging_steps": 9999999,
13
+ "max_steps": 6250,
14
  "num_input_tokens_seen": 0,
15
  "num_train_epochs": 1,
16
  "save_steps": 150,
 
26
  "attributes": {}
27
  }
28
  },
29
+ "total_flos": 6.827864050910822e+16,
30
+ "train_batch_size": 4,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
checkpoint-300/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd1e61689e67f92937b424f1859bcd30776e97cdda7fe9a43a40ccef44783eb7
3
- size 6225
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa7d60f39157d6af42ddf961454a0f16be99637b24b2dc9dcfdb9f7339f17b36
3
+ size 6161