Curiousfox commited on
Commit
176a604
·
verified ·
1 Parent(s): efc80b2

Curiousfox/mt5_tai-lo_to_chinese_LoRA_ver1.0.e

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: apache-2.0
4
+ base_model: google/mt5-base
5
+ tags:
6
+ - base_model:adapter:google/mt5-base
7
+ - lora
8
+ - transformers
9
+ model-index:
10
+ - name: outputs
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # outputs
18
+
19
+ This model is a fine-tuned version of [google/mt5-base](https://huggingface.co/google/mt5-base) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 4.1438
22
+ - Chrf: 0.3504
23
+
24
+ ## Model description
25
+
26
+ More information needed
27
+
28
+ ## Intended uses & limitations
29
+
30
+ More information needed
31
+
32
+ ## Training and evaluation data
33
+
34
+ More information needed
35
+
36
+ ## Training procedure
37
+
38
+ ### Training hyperparameters
39
+
40
+ The following hyperparameters were used during training:
41
+ - learning_rate: 0.01
42
+ - train_batch_size: 4
43
+ - eval_batch_size: 8
44
+ - seed: 1
45
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
+ - lr_scheduler_type: linear
47
+ - lr_scheduler_warmup_steps: 2000.0
48
+ - training_steps: 20000
49
+
50
+ ### Training results
51
+
52
+ | Training Loss | Epoch | Step | Validation Loss | Chrf |
53
+ |:-------------:|:------:|:-----:|:---------------:|:------:|
54
+ | 6.5309 | 0.9337 | 2000 | 6.1412 | 0.0 |
55
+ | 5.0243 | 1.8674 | 4000 | 4.7869 | 0.0 |
56
+ | 5.8503 | 2.8011 | 6000 | 4.6079 | 0.0 |
57
+ | 4.8561 | 3.7348 | 8000 | 5.4843 | 0.3906 |
58
+ | 5.3422 | 4.6685 | 10000 | 4.6969 | 0.1913 |
59
+ | 5.1278 | 5.6022 | 12000 | 4.5267 | 0.0638 |
60
+ | 4.7362 | 6.5359 | 14000 | 4.4173 | 0.5746 |
61
+ | 4.8027 | 7.4697 | 16000 | 4.2625 | 0.1913 |
62
+ | 4.4404 | 8.4034 | 18000 | 4.1877 | 0.1276 |
63
+ | 4.3010 | 9.3371 | 20000 | 4.1438 | 0.3504 |
64
+
65
+
66
+ ### Framework versions
67
+
68
+ - PEFT 0.19.1
69
+ - Transformers 5.0.0
70
+ - Pytorch 2.10.0+cu128
71
+ - Datasets 4.0.0
72
+ - Tokenizers 0.22.2
adapter_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "google/mt5-base",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 128,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.1,
22
+ "lora_ga_config": null,
23
+ "megatron_config": null,
24
+ "megatron_core": "megatron.core",
25
+ "modules_to_save": null,
26
+ "peft_type": "LORA",
27
+ "peft_version": "0.19.1",
28
+ "qalora_group_size": 16,
29
+ "r": 64,
30
+ "rank_pattern": {},
31
+ "revision": null,
32
+ "target_modules": [
33
+ "q",
34
+ "v"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "SEQ_2_SEQ_LM",
38
+ "trainable_token_indices": null,
39
+ "use_bdlora": null,
40
+ "use_dora": false,
41
+ "use_qalora": false,
42
+ "use_rslora": false
43
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f43bb5c6c422c6a13cda8e30d4326f60b5cc647e173834c2e20a1b0ad3496648
3
+ size 28331904
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ce3b8c7c21f06f06bc05213fc3c4e6778fecc43747508ce408b9ed1c9d875fd
3
+ size 16013691
tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "eos_token": "</s>",
4
+ "extra_ids": 0,
5
+ "extra_special_tokens": [],
6
+ "is_local": false,
7
+ "model_max_length": 1000000000000000019884624838656,
8
+ "pad_token": "<pad>",
9
+ "tokenizer_class": "T5Tokenizer",
10
+ "unk_id": 2,
11
+ "unk_token": "<unk>"
12
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd8dc6b2aeced2fc922ca26bea791b948d4673c9dec3b0ec502d2285daa203dc
3
+ size 5393