jangedoo commited on
Commit
2b8270d
·
verified ·
1 Parent(s): 92b0656

Training in progress, step 50

Browse files
README.md CHANGED
@@ -1,17 +1,18 @@
1
  ---
2
- base_model: google/gemma-3-270m-it
3
  library_name: transformers
4
  model_name: gemma-excerpt-lora
5
  tags:
6
  - generated_from_trainer
7
- - trl
8
  - sft
 
 
9
  licence: license
10
  ---
11
 
12
  # Model Card for gemma-excerpt-lora
13
 
14
- This model is a fine-tuned version of [google/gemma-3-270m-it](https://huggingface.co/google/gemma-3-270m-it).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
@@ -36,8 +37,8 @@ This model was trained with SFT.
36
 
37
  - TRL: 0.21.0
38
  - Transformers: 4.55.4
39
- - Pytorch: 2.8.0+cu126
40
- - Datasets: 4.0.0
41
  - Tokenizers: 0.21.4
42
 
43
  ## Citations
 
1
  ---
2
+ base_model: unsloth/gemma-3-270m-it-unsloth-bnb-4bit
3
  library_name: transformers
4
  model_name: gemma-excerpt-lora
5
  tags:
6
  - generated_from_trainer
 
7
  - sft
8
+ - trl
9
+ - unsloth
10
  licence: license
11
  ---
12
 
13
  # Model Card for gemma-excerpt-lora
14
 
15
+ This model is a fine-tuned version of [unsloth/gemma-3-270m-it-unsloth-bnb-4bit](https://huggingface.co/unsloth/gemma-3-270m-it-unsloth-bnb-4bit).
16
  It has been trained using [TRL](https://github.com/huggingface/trl).
17
 
18
  ## Quick start
 
37
 
38
  - TRL: 0.21.0
39
  - Transformers: 4.55.4
40
+ - Pytorch: 2.8.0
41
+ - Datasets: 3.6.0
42
  - Tokenizers: 0.21.4
43
 
44
  ## Citations
adapter_config.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "alpha_pattern": {},
3
- "auto_mapping": null,
4
- "base_model_name_or_path": null,
 
 
 
5
  "bias": "none",
6
  "corda_config": null,
7
  "eva_config": null,
@@ -13,23 +16,20 @@
13
  "layers_pattern": null,
14
  "layers_to_transform": null,
15
  "loftq_config": {},
16
- "lora_alpha": 16,
17
  "lora_bias": false,
18
- "lora_dropout": 0.1,
19
  "megatron_config": null,
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
23
  "qalora_group_size": 16,
24
- "r": 64,
25
  "rank_pattern": {},
26
  "revision": null,
27
- "target_modules": [
28
- "v_proj",
29
- "q_proj"
30
- ],
31
  "target_parameters": null,
32
- "task_type": "CAUSAL_LM",
33
  "trainable_token_indices": null,
34
  "use_dora": false,
35
  "use_qalora": false,
 
1
  {
2
  "alpha_pattern": {},
3
+ "auto_mapping": {
4
+ "base_model_class": "Gemma3ForCausalLM",
5
+ "parent_library": "transformers.models.gemma3.modeling_gemma3"
6
+ },
7
+ "base_model_name_or_path": "unsloth/gemma-3-270m-it-unsloth-bnb-4bit",
8
  "bias": "none",
9
  "corda_config": null,
10
  "eva_config": null,
 
16
  "layers_pattern": null,
17
  "layers_to_transform": null,
18
  "loftq_config": {},
19
+ "lora_alpha": 8,
20
  "lora_bias": false,
21
+ "lora_dropout": 0,
22
  "megatron_config": null,
23
  "megatron_core": "megatron.core",
24
  "modules_to_save": null,
25
  "peft_type": "LORA",
26
  "qalora_group_size": 16,
27
+ "r": 8,
28
  "rank_pattern": {},
29
  "revision": null,
30
+ "target_modules": "(?:.*?(?:language|text).*?(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense).*?(?:q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj).*?)|(?:\\bmodel\\.layers\\.[\\d]{1,}\\.(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense)\\.(?:(?:q_proj|k_proj|v_proj|o_proj|gate_proj|up_proj|down_proj)))",
 
 
 
31
  "target_parameters": null,
32
+ "task_type": null,
33
  "trainable_token_indices": null,
34
  "use_dora": false,
35
  "use_qalora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8da909ecdeb40940d0509faeaf6ea7f3e1d3ea36cedf128b5351e070d80b4f9f
3
- size 11807232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb736b4fa41c2a283a2346c50b9bd9cdd8783a9e8706188d0196d6e61ffb8b84
3
+ size 7626520
chat_template.jinja CHANGED
@@ -42,6 +42,6 @@
42
  ' }}
43
  {%- endfor -%}
44
  {%- if add_generation_prompt -%}
45
- {{'<start_of_turn>model
46
- '}}
47
  {%- endif -%}
 
42
  ' }}
43
  {%- endfor -%}
44
  {%- if add_generation_prompt -%}
45
+ {{ '<start_of_turn>model
46
+ ' }}
47
  {%- endif -%}
special_tokens_map.json CHANGED
@@ -9,7 +9,7 @@
9
  },
10
  "eoi_token": "<end_of_image>",
11
  "eos_token": {
12
- "content": "<eos>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
 
9
  },
10
  "eoi_token": "<end_of_image>",
11
  "eos_token": {
12
+ "content": "<end_of_turn>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -51327,16 +51327,16 @@
51327
  "bos_token": "<bos>",
51328
  "clean_up_tokenization_spaces": false,
51329
  "eoi_token": "<end_of_image>",
51330
- "eos_token": "<eos>",
51331
  "extra_special_tokens": {
51332
  "boi_token": "<start_of_image>",
51333
  "eoi_token": "<end_of_image>",
51334
  "image_token": "<image_soft_token>"
51335
  },
51336
  "image_token": "<image_soft_token>",
51337
- "model_max_length": 1000000000000000019884624838656,
51338
  "pad_token": "<pad>",
51339
- "padding_side": "left",
51340
  "sp_model_kwargs": null,
51341
  "spaces_between_special_tokens": false,
51342
  "tokenizer_class": "GemmaTokenizer",
 
51327
  "bos_token": "<bos>",
51328
  "clean_up_tokenization_spaces": false,
51329
  "eoi_token": "<end_of_image>",
51330
+ "eos_token": "<end_of_turn>",
51331
  "extra_special_tokens": {
51332
  "boi_token": "<start_of_image>",
51333
  "eoi_token": "<end_of_image>",
51334
  "image_token": "<image_soft_token>"
51335
  },
51336
  "image_token": "<image_soft_token>",
51337
+ "model_max_length": 32768,
51338
  "pad_token": "<pad>",
51339
+ "padding_side": "right",
51340
  "sp_model_kwargs": null,
51341
  "spaces_between_special_tokens": false,
51342
  "tokenizer_class": "GemmaTokenizer",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e8a5513556398f75c8aefd5b4dcc4430d64439d16189e0a89c846a8491cc99
3
- size 6161
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d3e6ed3d0f0b43e3afdafd1298fe7a448b8008b472900a7189a8b2ddf940002
3
+ size 6225