IoakeimE commited on
Commit
938bd5c
·
verified ·
1 Parent(s): 85c3f24

Training in progress, epoch 1

Browse files
README.md CHANGED
@@ -4,9 +4,9 @@ library_name: transformers
4
  model_name: sft_normal_simplification_mini
5
  tags:
6
  - generated_from_trainer
7
- - unsloth
8
- - trl
9
  - sft
 
 
10
  licence: license
11
  ---
12
 
@@ -28,18 +28,18 @@ print(output["generated_text"])
28
 
29
  ## Training procedure
30
 
31
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/sft-normal_smiplification_mini/runs/z5w7stnv)
32
 
33
 
34
  This model was trained with SFT.
35
 
36
  ### Framework versions
37
 
38
- - TRL: 0.18.2
39
- - Transformers: 4.52.4
40
- - Pytorch: 2.6.0
41
- - Datasets: 3.6.0
42
- - Tokenizers: 0.21.1
43
 
44
  ## Citations
45
 
 
4
  model_name: sft_normal_simplification_mini
5
  tags:
6
  - generated_from_trainer
 
 
7
  - sft
8
+ - trl
9
+ - unsloth
10
  licence: license
11
  ---
12
 
 
28
 
29
  ## Training procedure
30
 
31
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/sft_normal_simplification_mini/runs/n956u6m7)
32
 
33
 
34
  This model was trained with SFT.
35
 
36
  ### Framework versions
37
 
38
+ - TRL: 0.24.0
39
+ - Transformers: 4.57.2
40
+ - Pytorch: 2.9.0
41
+ - Datasets: 4.3.0
42
+ - Tokenizers: 0.22.1
43
 
44
  ## Citations
45
 
adapter_config.json CHANGED
@@ -1,9 +1,16 @@
1
  {
 
2
  "alpha_pattern": {},
3
- "auto_mapping": null,
 
 
 
 
 
4
  "base_model_name_or_path": "unsloth/mistral-7b-v0.3-bnb-4bit",
5
  "bias": "none",
6
  "corda_config": null,
 
7
  "eva_config": null,
8
  "exclude_modules": null,
9
  "fan_in_fan_out": false,
@@ -15,25 +22,29 @@
15
  "loftq_config": {},
16
  "lora_alpha": 16,
17
  "lora_bias": false,
18
- "lora_dropout": 0,
19
  "megatron_config": null,
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
 
 
23
  "r": 16,
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
 
27
  "o_proj",
28
- "down_proj",
29
  "up_proj",
30
  "k_proj",
31
- "q_proj",
32
- "v_proj",
33
- "gate_proj"
34
  ],
 
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
37
  "use_dora": false,
 
38
  "use_rslora": false
39
  }
 
1
  {
2
+ "alora_invocation_tokens": null,
3
  "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": {
6
+ "base_model_class": "MistralForCausalLM",
7
+ "parent_library": "transformers.models.mistral.modeling_mistral",
8
+ "unsloth_fixed": true
9
+ },
10
  "base_model_name_or_path": "unsloth/mistral-7b-v0.3-bnb-4bit",
11
  "bias": "none",
12
  "corda_config": null,
13
+ "ensure_weight_tying": false,
14
  "eva_config": null,
15
  "exclude_modules": null,
16
  "fan_in_fan_out": false,
 
22
  "loftq_config": {},
23
  "lora_alpha": 16,
24
  "lora_bias": false,
25
+ "lora_dropout": 0.0,
26
  "megatron_config": null,
27
  "megatron_core": "megatron.core",
28
  "modules_to_save": null,
29
  "peft_type": "LORA",
30
+ "peft_version": "0.18.0",
31
+ "qalora_group_size": 16,
32
  "r": 16,
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
36
+ "gate_proj",
37
+ "v_proj",
38
  "o_proj",
39
+ "q_proj",
40
  "up_proj",
41
  "k_proj",
42
+ "down_proj"
 
 
43
  ],
44
+ "target_parameters": null,
45
  "task_type": "CAUSAL_LM",
46
  "trainable_token_indices": null,
47
  "use_dora": false,
48
+ "use_qalora": false,
49
  "use_rslora": false
50
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e2367b254131b3442296c0280636c52d6649eab24f940cd1f37151983e30714
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e10332ae7f216893c51f9a59e80e5bd4f814abae33f7c296b360028caf390bb4
3
  size 167832240
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 4096,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
@@ -6969,6 +6964,12 @@
6969
  "id": "A",
6970
  "type_id": 0
6971
  }
 
 
 
 
 
 
6972
  }
6973
  ],
6974
  "pair": [
@@ -6984,6 +6985,12 @@
6984
  "type_id": 0
6985
  }
6986
  },
 
 
 
 
 
 
6987
  {
6988
  "SpecialToken": {
6989
  "id": "<s>",
@@ -6995,9 +7002,24 @@
6995
  "id": "B",
6996
  "type_id": 1
6997
  }
 
 
 
 
 
 
6998
  }
6999
  ],
7000
  "special_tokens": {
 
 
 
 
 
 
 
 
 
7001
  "<s>": {
7002
  "id": "<s>",
7003
  "ids": [
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
6964
  "id": "A",
6965
  "type_id": 0
6966
  }
6967
+ },
6968
+ {
6969
+ "SpecialToken": {
6970
+ "id": "</s>",
6971
+ "type_id": 0
6972
+ }
6973
  }
6974
  ],
6975
  "pair": [
 
6985
  "type_id": 0
6986
  }
6987
  },
6988
+ {
6989
+ "SpecialToken": {
6990
+ "id": "</s>",
6991
+ "type_id": 0
6992
+ }
6993
+ },
6994
  {
6995
  "SpecialToken": {
6996
  "id": "<s>",
 
7002
  "id": "B",
7003
  "type_id": 1
7004
  }
7005
+ },
7006
+ {
7007
+ "SpecialToken": {
7008
+ "id": "</s>",
7009
+ "type_id": 1
7010
+ }
7011
  }
7012
  ],
7013
  "special_tokens": {
7014
+ "</s>": {
7015
+ "id": "</s>",
7016
+ "ids": [
7017
+ 2
7018
+ ],
7019
+ "tokens": [
7020
+ "</s>"
7021
+ ]
7022
+ },
7023
  "<s>": {
7024
  "id": "<s>",
7025
  "ids": [
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
- "add_eos_token": false,
4
  "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
 
1
  {
2
  "add_bos_token": true,
3
+ "add_eos_token": true,
4
  "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18e663f13ea23708364fa15013aa98565dcf81812e5b406bf68a691735c12d29
3
- size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5477b9bf93fb7fdf2fd2c2a74a6b958ac506e68b7f0ab34f1294ae58c89968b
3
+ size 6353