carlosdimare commited on
Commit
0619b8e
·
verified ·
1 Parent(s): 98efac2

Training in progress, epoch 1

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Kukedlc/Qwen2-1.5B-Spanish-1.0",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 8,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "v_proj",
28
+ "q_proj",
29
+ "c_attn"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "trainable_token_indices": null,
33
+ "use_dora": false,
34
+ "use_rslora": false
35
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f17390e971f46f31ddbf57af1c0a635cacca93209eac165a92d911ef50894f
3
+ size 4372840
added_tokens.json CHANGED
@@ -1,3 +1,5 @@
1
  {
2
- "<|endoftext|>": 50256
 
 
3
  }
 
1
  {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
  }
chat_template.jinja ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ ' }}{% endif %}{{'<|im_start|>' + message['role'] + '
4
+ ' + message['content'] + '<|im_end|>' + '
5
+ '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
6
+ ' }}{% endif %}
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,5 +1,14 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
5
  }
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": "<|im_end|>"
14
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,48 +1,24 @@
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
35
  },
36
- "4": {
37
- "content": "<mask>",
38
  "lstrip": false,
39
  "normalized": false,
40
  "rstrip": false,
41
  "single_word": false,
42
  "special": true
43
  },
44
- "50256": {
45
- "content": "<|endoftext|>",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
@@ -50,11 +26,22 @@
50
  "special": true
51
  }
52
  },
53
- "bos_token": "<|endoftext|>",
 
 
 
 
54
  "clean_up_tokenization_spaces": false,
55
- "eos_token": "<|endoftext|>",
 
56
  "extra_special_tokens": {},
57
- "model_max_length": 1000000000000000019884624838656,
58
- "tokenizer_class": "GPT2Tokenizer",
59
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
60
  }
 
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
 
26
  "special": true
27
  }
28
  },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
  "clean_up_tokenization_spaces": false,
35
+ "eos_token": "<|im_end|>",
36
+ "errors": "replace",
37
  "extra_special_tokens": {},
38
+ "max_length": 4024,
39
+ "model_max_length": 32768,
40
+ "pad_token": "<|im_end|>",
41
+ "split_special_tokens": false,
42
+ "stride": 0,
43
+ "tokenizer_class": "Qwen2Tokenizer",
44
+ "truncation_side": "right",
45
+ "truncation_strategy": "longest_first",
46
+ "unk_token": null
47
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e877c03a72187593e83e40b0a3422b4b07338090568ae5901e5d63c6ac390a9
3
- size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c36eec12ae0bf18fa60892319d40138a01ea638961ff14c121d0f65046abbc22
3
+ size 5304
vocab.json CHANGED
The diff for this file is too large to render. See raw diff