IoakeimE commited on
Commit
62c961b
·
verified ·
1 Parent(s): d3c8f3b

Training in progress, epoch 1

Browse files
README.md CHANGED
@@ -4,9 +4,9 @@ library_name: transformers
4
  model_name: sft_normal_simplification
5
  tags:
6
  - generated_from_trainer
7
- - unsloth
8
- - trl
9
  - sft
 
 
10
  licence: license
11
  ---
12
 
@@ -28,18 +28,18 @@ print(output["generated_text"])
28
 
29
  ## Training procedure
30
 
31
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/sft-normal_smiplification/runs/rsnevvqb)
32
 
33
 
34
  This model was trained with SFT.
35
 
36
  ### Framework versions
37
 
38
- - TRL: 0.18.1
39
- - Transformers: 4.52.4
40
- - Pytorch: 2.6.0
41
- - Datasets: 3.6.0
42
- - Tokenizers: 0.21.1
43
 
44
  ## Citations
45
 
 
4
  model_name: sft_normal_simplification
5
  tags:
6
  - generated_from_trainer
 
 
7
  - sft
8
+ - trl
9
+ - unsloth
10
  licence: license
11
  ---
12
 
 
28
 
29
  ## Training procedure
30
 
31
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/sft_normal_simplification/runs/b77rp42v)
32
 
33
 
34
  This model was trained with SFT.
35
 
36
  ### Framework versions
37
 
38
+ - TRL: 0.24.0
39
+ - Transformers: 4.57.2
40
+ - Pytorch: 2.9.0
41
+ - Datasets: 4.3.0
42
+ - Tokenizers: 0.22.1
43
 
44
  ## Citations
45
 
adapter_config.json CHANGED
@@ -1,9 +1,16 @@
1
  {
 
2
  "alpha_pattern": {},
3
- "auto_mapping": null,
 
 
 
 
 
4
  "base_model_name_or_path": "unsloth/mistral-7b-v0.3-bnb-4bit",
5
  "bias": "none",
6
  "corda_config": null,
 
7
  "eva_config": null,
8
  "exclude_modules": null,
9
  "fan_in_fan_out": false,
@@ -15,25 +22,29 @@
15
  "loftq_config": {},
16
  "lora_alpha": 16,
17
  "lora_bias": false,
18
- "lora_dropout": 0,
19
  "megatron_config": null,
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
 
 
23
  "r": 16,
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
  "up_proj",
28
- "q_proj",
29
  "down_proj",
30
  "o_proj",
31
  "v_proj",
 
32
  "k_proj",
33
  "gate_proj"
34
  ],
 
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
37
  "use_dora": false,
 
38
  "use_rslora": false
39
  }
 
1
  {
2
+ "alora_invocation_tokens": null,
3
  "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": {
6
+ "base_model_class": "MistralForCausalLM",
7
+ "parent_library": "transformers.models.mistral.modeling_mistral",
8
+ "unsloth_fixed": true
9
+ },
10
  "base_model_name_or_path": "unsloth/mistral-7b-v0.3-bnb-4bit",
11
  "bias": "none",
12
  "corda_config": null,
13
+ "ensure_weight_tying": false,
14
  "eva_config": null,
15
  "exclude_modules": null,
16
  "fan_in_fan_out": false,
 
22
  "loftq_config": {},
23
  "lora_alpha": 16,
24
  "lora_bias": false,
25
+ "lora_dropout": 0.0,
26
  "megatron_config": null,
27
  "megatron_core": "megatron.core",
28
  "modules_to_save": null,
29
  "peft_type": "LORA",
30
+ "peft_version": "0.18.0",
31
+ "qalora_group_size": 16,
32
  "r": 16,
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
36
  "up_proj",
 
37
  "down_proj",
38
  "o_proj",
39
  "v_proj",
40
+ "q_proj",
41
  "k_proj",
42
  "gate_proj"
43
  ],
44
+ "target_parameters": null,
45
  "task_type": "CAUSAL_LM",
46
  "trainable_token_indices": null,
47
  "use_dora": false,
48
+ "use_qalora": false,
49
  "use_rslora": false
50
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0aea5a24135ed2e25d95892aac12a4cffabf0e2b3873f2636bcf4a87f65a64bd
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aea6fd995be4a9d88ef2fe7f54e9eec76f45406d38838aed9c251dd887222a2a
3
  size 167832240
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 8192,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
@@ -6969,6 +6964,12 @@
6969
  "id": "A",
6970
  "type_id": 0
6971
  }
 
 
 
 
 
 
6972
  }
6973
  ],
6974
  "pair": [
@@ -6984,6 +6985,12 @@
6984
  "type_id": 0
6985
  }
6986
  },
 
 
 
 
 
 
6987
  {
6988
  "SpecialToken": {
6989
  "id": "<s>",
@@ -6995,9 +7002,24 @@
6995
  "id": "B",
6996
  "type_id": 1
6997
  }
 
 
 
 
 
 
6998
  }
6999
  ],
7000
  "special_tokens": {
 
 
 
 
 
 
 
 
 
7001
  "<s>": {
7002
  "id": "<s>",
7003
  "ids": [
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
6964
  "id": "A",
6965
  "type_id": 0
6966
  }
6967
+ },
6968
+ {
6969
+ "SpecialToken": {
6970
+ "id": "</s>",
6971
+ "type_id": 0
6972
+ }
6973
  }
6974
  ],
6975
  "pair": [
 
6985
  "type_id": 0
6986
  }
6987
  },
6988
+ {
6989
+ "SpecialToken": {
6990
+ "id": "</s>",
6991
+ "type_id": 0
6992
+ }
6993
+ },
6994
  {
6995
  "SpecialToken": {
6996
  "id": "<s>",
 
7002
  "id": "B",
7003
  "type_id": 1
7004
  }
7005
+ },
7006
+ {
7007
+ "SpecialToken": {
7008
+ "id": "</s>",
7009
+ "type_id": 1
7010
+ }
7011
  }
7012
  ],
7013
  "special_tokens": {
7014
+ "</s>": {
7015
+ "id": "</s>",
7016
+ "ids": [
7017
+ 2
7018
+ ],
7019
+ "tokens": [
7020
+ "</s>"
7021
+ ]
7022
+ },
7023
  "<s>": {
7024
  "id": "<s>",
7025
  "ids": [
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
- "add_eos_token": false,
4
  "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
 
1
  {
2
  "add_bos_token": true,
3
+ "add_eos_token": true,
4
  "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3968f59b4bab6b4ce5efa8d174775796faff4737b44f1f81ac66f16f94695ff
3
- size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6c34933032ee19e8bb8880b85ca22fa8de6241d3240cc742a31e6c59f0cbe8a
3
+ size 6353