samoline commited on
Commit
55e5cf1
·
verified ·
1 Parent(s): 417527b

Training in progress, step 1

Browse files
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "facebook/opt-125m",
5
  "bias": "none",
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
@@ -20,12 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "k_proj",
25
- "out_proj",
26
- "fc2",
27
  "v_proj",
28
- "fc1"
 
 
 
 
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "samoline/tensoralchemistdev01__sv9-with-tokenizer",
5
  "bias": "none",
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "up_proj",
 
 
 
24
  "v_proj",
25
+ "k_proj",
26
+ "o_proj",
27
+ "q_proj",
28
+ "down_proj",
29
+ "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e05686df72045bffebf586dd27939b8096508de77909d902574b3cc3327a6d7e
3
- size 2673480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4e1613b89c1ab22fe70a45711b65ca5ffa67b7e107a5c976ce6e9167c09ab60
3
+ size 25741592
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endofprompt|>": 100276,
3
+ "<|im_end|>": 100265,
4
+ "<|im_start|>": 100264
5
+ }
config.json CHANGED
@@ -1,32 +1,37 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "facebook/opt-125m",
4
- "_remove_final_layer_norm": false,
5
- "activation_dropout": 0.0,
6
- "activation_function": "relu",
7
  "architectures": [
8
- "OPTForCausalLM"
9
  ],
 
10
  "attention_dropout": 0.0,
11
- "bos_token_id": 2,
12
- "do_layer_norm_before": true,
13
- "dropout": 0.1,
14
- "enable_bias": true,
15
- "eos_token_id": 2,
16
- "ffn_dim": 3072,
17
- "hidden_size": 768,
18
- "init_std": 0.02,
19
- "layer_norm_elementwise_affine": true,
20
- "layerdrop": 0.0,
21
- "max_position_embeddings": 2048,
22
- "model_type": "opt",
23
- "num_attention_heads": 12,
24
- "num_hidden_layers": 12,
25
- "pad_token_id": 1,
26
- "prefix": "</s>",
27
- "torch_dtype": "float16",
 
 
 
 
 
 
 
 
28
  "transformers_version": "4.46.0",
29
  "use_cache": false,
30
- "vocab_size": 50272,
31
- "word_embed_proj_dim": 768
32
  }
 
1
  {
2
  "_attn_implementation_autoset": true,
3
+ "_name_or_path": "samoline/tensoralchemistdev01__sv9-with-tokenizer",
 
 
 
4
  "architectures": [
5
+ "LlamaForCausalLM"
6
  ],
7
+ "attention_bias": false,
8
  "attention_dropout": 0.0,
9
+ "bos_token_id": 100257,
10
+ "eos_token_id": 100257,
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 3072,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 9216,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 24,
20
+ "num_hidden_layers": 28,
21
+ "num_key_value_heads": 8,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-05,
24
+ "rope_scaling": {
25
+ "factor": 32.0,
26
+ "high_freq_factor": 4.0,
27
+ "low_freq_factor": 1.0,
28
+ "original_max_position_embeddings": 8192,
29
+ "rope_type": "llama3"
30
+ },
31
+ "rope_theta": 500000.0,
32
+ "tie_word_embeddings": true,
33
+ "torch_dtype": "float32",
34
  "transformers_version": "4.46.0",
35
  "use_cache": false,
36
+ "vocab_size": 100263
 
37
  }
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,29 +1,29 @@
1
  {
2
  "bos_token": {
3
- "content": "</s>",
4
  "lstrip": false,
5
- "normalized": true,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "</s>",
11
  "lstrip": false,
12
- "normalized": true,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<pad>",
18
  "lstrip": false,
19
- "normalized": true,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "unk_token": {
24
- "content": "</s>",
25
  "lstrip": false,
26
- "normalized": true,
27
  "rstrip": false,
28
  "single_word": false
29
  }
 
1
  {
2
  "bos_token": {
3
+ "content": "<|endoftext|>",
4
  "lstrip": false,
5
+ "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|endoftext|>",
11
  "lstrip": false,
12
+ "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<|endoftext|>",
18
  "lstrip": false,
19
+ "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "unk_token": {
24
+ "content": "<|endoftext|>",
25
  "lstrip": false,
26
+ "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,31 +1,70 @@
1
  {
2
- "add_bos_token": true,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
- "1": {
6
- "content": "<pad>",
7
  "lstrip": false,
8
- "normalized": true,
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
12
  },
13
- "2": {
14
- "content": "</s>",
15
  "lstrip": false,
16
- "normalized": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  "rstrip": false,
18
  "single_word": false,
19
  "special": true
20
  }
21
  },
22
- "bos_token": "</s>",
23
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
24
  "clean_up_tokenization_spaces": false,
25
- "eos_token": "</s>",
26
- "errors": "replace",
27
- "model_max_length": 1000000000000000019884624838656,
28
- "pad_token": "<pad>",
29
  "tokenizer_class": "GPT2Tokenizer",
30
- "unk_token": "</s>"
31
  }
 
1
  {
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
+ "100257": {
5
+ "content": "<|endoftext|>",
6
  "lstrip": false,
7
+ "normalized": false,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "100258": {
13
+ "content": "<|fim_prefix|>",
14
  "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "100259": {
21
+ "content": "<|fim_middle|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "100260": {
29
+ "content": "<|fim_suffix|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "100264": {
37
+ "content": "<|im_start|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "100265": {
45
+ "content": "<|im_end|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "100276": {
53
+ "content": "<|endofprompt|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
  "rstrip": false,
57
  "single_word": false,
58
  "special": true
59
  }
60
  },
61
+ "bos_token": "<|endoftext|>",
62
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
63
  "clean_up_tokenization_spaces": false,
64
+ "eos_token": "<|endoftext|>",
65
+ "extra_special_tokens": {},
66
+ "model_max_length": 8192,
67
+ "pad_token": "<|endoftext|>",
68
  "tokenizer_class": "GPT2Tokenizer",
69
+ "unk_token": "<|endoftext|>"
70
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fc015c8c2031c5d355137a5eb97fe79b484bf40e500349a764a72b91951b0ed
3
  size 6712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e8b9317c790c50187140ab6cd1817c9b63e921ac3dd381eecb663fb4dbb2f97
3
  size 6712
vocab.json CHANGED
The diff for this file is too large to render. See raw diff