sintsh commited on
Commit
90c570b
·
1 Parent(s): 59b6571
peft_logs/events.out.tfevents.1744412793.nigpu.1908034.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ec289b0759625e658ac27f7ae85681a70e9f65b287d453914f3a2dc9e237b34
3
+ size 88
peft_logs/events.out.tfevents.1744413155.nigpu.1913698.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d688b3737771589c4d0d8ee3bf3d47b4db70ad2345a2963e84fcd3511b8e24b
3
+ size 10028
peft_models/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.5.0
peft_models/adapter_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "sintsh/AKPlbart",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "k_proj",
19
+ "v_proj"
20
+ ],
21
+ "task_type": "SEQ_2_SEQ_LM"
22
+ }
peft_models/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f192ef21aee7271f37557abad28763d90e19c323c84bb29fc29b3ae0213f15f3
3
+ size 2694074
peft_models/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "peft_type": "lora",
3
+ "model_name": "sintsh/AKPlbart",
4
+ "training_config": {
5
+ "model_name": "sintsh/AKPlbart",
6
+ "peft_type": "lora",
7
+ "lora_rank": 8,
8
+ "lora_alpha": 32,
9
+ "adapter_size": 128,
10
+ "learning_rate": 3e-05,
11
+ "batch_size": 32,
12
+ "max_length": 512,
13
+ "num_epochs": 8,
14
+ "early_stopping": 3,
15
+ "gradient_accumulation": 2,
16
+ "warmup_steps": 1000,
17
+ "weight_decay": 0.01,
18
+ "output_dir": "./peft_models/lora_20250411",
19
+ "logging_dir": "./peft_logs/lora_20250411",
20
+ "logging_steps": 500,
21
+ "quantization": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "k_proj",
25
+ "v_proj"
26
+ ],
27
+ "ddp_find_unused_parameters": false,
28
+ "sample_size": 780944,
29
+ "generation": {
30
+ "temperature": 0.7,
31
+ "top_k": 50,
32
+ "top_p": 0.95,
33
+ "repetition_penalty": 1.2,
34
+ "no_repeat_ngram_size": 2,
35
+ "num_beams": 5,
36
+ "max_new_tokens": 256
37
+ }
38
+ }
39
+ }
peft_models/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f72f5d040a176945623a255484d24066f8c0da89a294359154e226efbe494b80
3
+ size 985833
peft_models/special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "__java__",
4
+ "__python__",
5
+ "__en_XX__"
6
+ ],
7
+ "bos_token": "<s>",
8
+ "cls_token": "<s>",
9
+ "eos_token": "</s>",
10
+ "mask_token": {
11
+ "content": "<mask>",
12
+ "lstrip": true,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ "pad_token": "</s>",
18
+ "sep_token": "</s>",
19
+ "unk_token": "<unk>"
20
+ }
peft_models/tokenizer_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": null,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "language_codes": "base",
7
+ "mask_token": {
8
+ "__type": "AddedToken",
9
+ "content": "<mask>",
10
+ "lstrip": true,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "name_or_path": "sintsh/AKPlbart",
17
+ "pad_token": "<pad>",
18
+ "sep_token": "</s>",
19
+ "sp_model_kwargs": {},
20
+ "special_tokens_map_file": null,
21
+ "src_lang": null,
22
+ "tgt_lang": null,
23
+ "tokenizer_class": "PLBartTokenizer",
24
+ "unk_token": "<unk>"
25
+ }