NetherQuartz commited on
Commit
8c5081a
·
verified ·
1 Parent(s): 6f6876a

With packing and FA2

Browse files
README.md CHANGED
@@ -1,16 +1,17 @@
1
  ---
 
2
  library_name: transformers
3
  model_name: ilo-toki-gemma-2-2b
4
  tags:
5
  - generated_from_trainer
6
- - trl
7
  - sft
 
8
  licence: license
9
  ---
10
 
11
  # Model Card for ilo-toki-gemma-2-2b
12
 
13
- This model is a fine-tuned version of [None](https://huggingface.co/None).
14
  It has been trained using [TRL](https://github.com/huggingface/trl).
15
 
16
  ## Quick start
@@ -34,7 +35,7 @@ This model was trained with SFT.
34
  ### Framework versions
35
 
36
  - TRL: 0.23.0
37
- - Transformers: 4.56.1
38
  - Pytorch: 2.7.1+cu128
39
  - Datasets: 3.6.0
40
  - Tokenizers: 0.22.0
 
1
  ---
2
+ base_model: google/gemma-2-2b
3
  library_name: transformers
4
  model_name: ilo-toki-gemma-2-2b
5
  tags:
6
  - generated_from_trainer
 
7
  - sft
8
+ - trl
9
  licence: license
10
  ---
11
 
12
  # Model Card for ilo-toki-gemma-2-2b
13
 
14
+ This model is a fine-tuned version of [google/gemma-2-2b](https://huggingface.co/google/gemma-2-2b).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
35
  ### Framework versions
36
 
37
  - TRL: 0.23.0
38
+ - Transformers: 4.56.2
39
  - Pytorch: 2.7.1+cu128
40
  - Datasets: 3.6.0
41
  - Tokenizers: 0.22.0
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "./ilo-toki-gemma-2-2b-merged",
5
  "bias": "none",
6
  "corda_config": null,
7
  "eva_config": null,
@@ -15,19 +15,17 @@
15
  "loftq_config": {},
16
  "lora_alpha": 16,
17
  "lora_bias": false,
18
- "lora_dropout": 0.1,
19
  "megatron_config": null,
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
23
  "qalora_group_size": 16,
24
- "r": 8,
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
- "o_proj",
29
  "v_proj",
30
- "k_proj",
31
  "q_proj"
32
  ],
33
  "target_parameters": null,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "google/gemma-2-2b",
5
  "bias": "none",
6
  "corda_config": null,
7
  "eva_config": null,
 
15
  "loftq_config": {},
16
  "lora_alpha": 16,
17
  "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
  "megatron_config": null,
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
23
  "qalora_group_size": 16,
24
+ "r": 16,
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
 
28
  "v_proj",
 
29
  "q_proj"
30
  ],
31
  "target_parameters": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f07ce223efd1f02ad338a64048f02f17395f9bc449524edae859d760ed03ab5e
3
- size 12806992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d080c1ec0026513d45661e070406b69639fa589b7fe7ec2139734fd883e7167
3
+ size 12793376
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62ffb0ef3afe807b6d284cca0d2ab754d911010a49279a8afa96f74fd10d2cfa
3
  size 6161
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3e0296f5ab1d22c575dd5537a22e0f0e5e9656e78750908b7f0da6188cc4636
3
  size 6161