kth8 commited on
Commit
a87c8fb
·
verified ·
1 Parent(s): d29b3bd

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: llama3.2
3
+ language:
4
+ - en
5
+ base_model: allura-forge/Llama-3.3-8B-Instruct
6
+ datasets:
7
+ - m-a-p/SuperGPQA
8
+ pipeline_tag: text-generation
9
+ library_name: transformers
10
+ tags:
11
+ - sft
12
+ - trl
13
+ - unsloth
14
+ - llama
15
+ - llama3
16
+ - llama3.3
17
+ ---
18
+ ![logo](https://i.imgur.com/iuUzm7L.jpeg)
19
+ A fine-tune of [allura-forge/Llama-3.3-8B-Instruct](https://huggingface.co/allura-forge/Llama-3.3-8B-Instruct) on the [m-a-p/SuperGPQA](https://huggingface.co/datasets/m-a-p/SuperGPQA) dataset.
20
+
21
+ ## Usage example
22
+ Set temperature as 0.0 for best results.
23
+
24
+ **System prompt**
25
+ ```
26
+ You are a classifier. Categorize the following problem into discipline, field, and subfield in JSON format.
27
+ ```
28
+ **User prompt**
29
+ ```
30
+ Cotton and linen both readily catch fire. A batch of towels is composed of both cotton and linen, and is known to have caught fire. If it is known that the towels were ignited by a lit cigarette, which of the following arguments utilizes the most appropriate form of reasoning?
31
+ ```
32
+ **Assistant response**
33
+ ```
34
+ {"discipline": "Philosophy", "field": "Philosophy", "subfield": "Logic"}
35
+ ```
36
+ # Possible output options
37
+ Discipline
38
+ ```
39
+ ['Medicine', 'Literature and Arts', 'History', 'Science', 'Philosophy', 'Law', 'Engineering', 'Management', 'Agronomy', 'Economics', 'Military Science', 'Sociology', 'Education']
40
+ ```
41
+ Field
42
+ ```
43
+ ['Animal Husbandry', 'Political Science', 'Civil Engineering', 'Materials Science and Engineering', 'Weapon Science and Technology', 'History', 'Stomatology', 'Agricultural Engineering', 'Mechanical Engineering', 'Astronomy', 'Nuclear Science and Technology', 'Language and Literature', 'Forestry Engineering', 'Geology', 'Basic Medicine', 'Crop Science', 'Electronic Science and Technology', 'Military Science', 'Petroleum and Natural Gas Engineering', 'Metallurgical Engineering', 'Management Science and Engineering', 'Library, Information and Archival Management', 'Clinical Medicine', 'Art Studies', 'Food Science and Engineering', 'Systems Science', 'Aquaculture', 'Business Administration', 'Computer Science and Technology', 'Electrical Engineering', 'Forestry', 'Textile Science and Engineering', 'Physical Education', 'Oceanography', 'Musicology', 'Traditional Chinese Medicine', 'Mining Engineering', 'Psychology', 'Law', 'Control Science and Engineering', 'Chemistry', 'Hydraulic Engineering', 'Public Administration', 'Chemical Engineering and Technology', 'Geography', 'Optical Engineering', 'Applied Economics', 'Architecture', 'Power Engineering and Engineering Thermophysics', 'Education', 'Journalism and Communication', 'Aeronautical and Astronautical Science and Technology', 'Veterinary Medicine', 'Geophysics', 'Instrument Science and Technology', 'Mathematics', 'Information and Communication Engineering', 'Physical Oceanography', 'Theoretical Economics', 'Mechanics', 'Philosophy', 'Geological Resources and Geological Engineering', 'Physics', 'Pharmacy', 'Environmental Science and Engineering', 'Transportation Engineering', 'Biology', 'Naval Architecture and Ocean Engineering', 'Atmospheric Science', 'Sociology', 'Public Health and Preventive Medicine', 'Surveying and Mapping Science and Technology']
44
+ ```
45
+ Subfield
46
+ ```
47
+ ['Political Science', 'Social Medicine and Health Management', 'Preschool Education', 'Geriatric Medicine', 'Civil and Commercial Law', 'Biophysics', 'Rigid Body Mechanics', 'Cartography and Geographic Information Engineering', 'Anesthesiology', 'Stellar and Interstellar Evolution', 'Chemical Transport Engineering', 'Structural Geology', 'Contract Law', 'Obstetrics and Gynecology', 'Pathology and Pathophysiology', 'Harmony', 'Aquaculture', 'Pharmaceutics', 'Vehicle Operation Engineering', 'Circuits and Systems', 'Solid State Physics', 'Theoretical Fluid Mechanics', 'Mineral Processing Engineering', 'Functions of Real Variables', 'Signal and Information Processing', 'Pathogen Biology', 'Computer Networks', 'Optical Fiber Communication', 'Genetics', 'Architectural History', 'Oil and Gas Field Development and Storage & Transportation Engineering', 'Tourism Management and Technological Economics Management', 'Drama and Opera Studies', 'Polynomials and Series Expansions', 'Cryptography', 'Polymer Chemistry and Physics', 'Principles of Seismic Exploration', 'Fuzzy Mathematics', 'Physiology', 'Pitch and Scales', 'Heat Transfer', 'Operating Systems', 'Fluid Physics', 'Microelectronics and Solid-State Electronics', 'Non-ferrous Metallurgy', 'Environmental Science', 'Power Electronics and Electrical Drives', 'Communication and Information Systems', 'Oncology', 'Military Thought and History', 'Procedural Law', 'Group Theory', 'Fine Arts', 'Transportation Planning and Management', 'Physical Chemistry', 'Physical Oceanography', 'Sports Science and Medicine', 'Animal Nutrition and Feed Science', 'Urban Planning and Design', 'Space physics', 'Electrical Theory and New Technologies', 'Economic History', 'Geotechnical Engineering', 'Ecology', 'Theory of Curriculum and Instruction', 'Radiation Medicine', 'Information Management Science', 'Functions of Complex Variables', 'Computer Software and Theory', 'Nursing and Rehabilitation Medicine', 'Wood Science and Technology', 'Mass Transport and Separation Process in Chemical Engineering', 'Religious Studies', 'Mineralogy, Petrology, and Economic Geology', 'Thermodynamics and Statistical Physics', 'Structural Engineering', 'Demography and Anthropology', 'Philology and Bibliography', 'Databases', 'Textile Materials Science', 'Textile Chemistry and Dyeing Engineering', 'Physical Chemistry of Metallurgical Process', 'Ethics', 'Internal Combustion Engineering', 'Design Arts', 'Refrigeration and Cryogenic Engineering', 'Mechatronic Engineering', 'Dermatology and Venereology', 'Economic Statistics', 'Applied Optics', 'Systems Science', 'Particle and Nuclear Physics', 'Information Management and Communication', 'French Language and Literature', 'Labor Economics', 'Medicinal Chemistry', 'Literary Theory', 'Microbiology', 'Physical Education and Training', 'Internal Medicine', 'Computer Architecture', 'Operations Research and Cybernetics', 'Dynamic Meteorology', 'Industrial Economics', 'Literary History', 'Marine Engineering', 'Optoelectronic Technology', 'Combinatorial Mathematics', 'Theoretical Optics', 'Materials Processing Engineering', 'Nutrition and Food Hygiene', 'Theoretical Mechanics', 'Graph Theory', 'Quantum Mechanics', 'Materials Physics and Chemistry', 'Marine Biology', 'Forest Cultivation and Genetic Breeding', 'National and Defense Economics', 'Poromechanics and Reservoir Physics', 'Road and Railway Engineering', 'Aeronautical and Astronautical Science and Technology', 'Data Structures', 'Historical Geography', 'Analytical Chemistry', 'Military Law', 'Pharmaceutical Analysis', 'Polymer Physics', 'Atmospheric Physics and Atmospheric Environment', 'Communication Principles', 'Underwater Acoustics', 'Journalism and News Practice', 'Water conservancy and Hydropower Engineering', 'Inorganic Chemistry', 'Animal Rearing and Breeding', 'Educational Technology and Principles', 'High Voltage and Insulation Technology', 'Advanced Algebra', 'Food Biochemistry', 'Philosophy of Science and Technology', 'Logic', 'Film Studies', 'Military Command and Information Systems', 'Fundamentals of Dynamics and Control', 'Neurology', 'Cosmology', 'Forest Engineering', 'Ophthalmology', 'Agricultural Environment and Soil-Water Engineering', 'Crop Science', 'Human Anatomy and Histology-Embryology', 'Probability and Statistics', 'Communication and Broadcasting', 'Maternal, Child and Adolescent Health', 'Thermodynamics', 'Surgery', 'Architectural Design and Theory', 'Western Economics', 'Ordinary Differential Equations', 'Management Science and Engineering', 'Military Logistics and Equipment', 'Discrete Mathematics', 'Mathematical Analysis', 'Astrophysics', 'Linguistics and Applied Linguistics', 'Quantitative Economics', 'Cell Biology', 'Urban Infrastructure Engineering', 'Pattern Recognition', 'Forensic Medicine', 'Antenna and Radio Communication', 'Constitutional and Administrative Law', 'Laser Technology', 'Traditional Chinese Medicine Theory', 'Biochemistry and Molecular Biology', 'Mining and Safety Engineering', 'Geometry and Topology', 'Dance Studies', 'Semiconductor Physics', 'Computational Mathematics', 'Fluid Machinery and Engineering', 'Philosophical Aesthetics', 'Engineering Fluid Mechanics', 'Stochastic Processes', 'Psychology', 'Traditional Chinese Pharmacy', 'Environmental and Resource Protection', 'Physical Geography', 'Archaeology and Museology', 'Power Systems and Automation', 'Music History, Education, and Technology', 'Pediatrics', 'Modern and Contemporary Chinese Literature', 'Geodesy and Surveying Engineering', 'Zoology', 'Military Management', 'Iron and Steel Metallurgy', 'Epidemiology and Health Statistics', 'Weapon Systems Science and Engineering', 'Military Chemistry and Pyrotechnics', 'World History', 'Meteorology', 'Finance', 'Electrodynamics', 'Organic Chemistry', 'History and Theory of Journalism and Media Management', 'Solar System Science', 'Geochemistry', 'Veterinary Medicine', 'Geological Resources and Geological Engineering', 'Psychiatry and Mental Health', 'Manufacturing Automation', 'Traditional Chinese Health Preservation', 'Business and Accounting Management', 'Atomic and Molecular Physics', 'Education Economics, Management and Social Security', 'Hydraulics and Hydrology', 'Otorhinolaryngology', 'Number Theory', 'Principles of Metallurgy', 'Radiation Protection and Nuclear Technology Applications', 'Principles of Computer Organization', 'Special Education', 'Digital Surveying and Remote Sensing Applications', 'Marine Chemistry', 'Composition', 'Traffic Information Engineering and Control', 'Emergency Medicine', 'Fluid Flow and Heat Transfer in Chemical Engineering', 'Criminal Law', 'Classical Chinese Literature', 'Numerical Analysis', 'Food Processing and Storage Engineering', 'Electrochemistry', 'Russian Language and Literature', 'Special Number Theory', 'Political Economy', 'Imaging and Nuclear Medicine', 'Fundamental Mathematics', 'Pharmacology', 'Astronomical Observation and Technology', 'Human Geography', 'Subatomic and Atomic Physics', 'Bridge and Tunnel Engineering', 'Acoustics', 'Social and Folklore Studies', 'Radiochemistry', 'Musical Forms and Analysis', 'Control Theory and Control Engineering', 'Engineering Thermophysics', 'Health Toxicology and Environmental Health', 'Legal Theory and Legal History', 'Relativity', 'Nuclear Energy and Reactor Technology', 'Guidance, Navigation and Control', 'Library and Archival Science', 'Sports Humanities and Sociology', 'Agricultural Mechanization Engineering', 'Formal Languages', 'Thermal Energy Engineering', 'Law and Social Governance', 'International Trade', 'Electromagnetic Field and Microwave Technology', 'Instrumentation and Performance', 'Broadcasting and Television Art', 'Solid Mechanics', 'Environmental Engineering', 'Advanced Programming Languages', 'Solid Earth Geophysics', 'Statistical Mechanics', 'Hydrogeology', 'Land Resource Management and Administrative Management', 'Power Machinery and Engineering', 'Paleontology and Stratigraphy', 'Landscape Plants and Ornamental Horticulture', 'Instrument Science and Technology', 'Immunology', 'Clinical Stomatology', 'Clinical Laboratory Diagnostics', 'Botany', 'Microbiology and Biochemical Pharmacy', 'Ship Mechanics and Design Principles', 'International Law', 'Elements of Chemical Reaction Engineering', 'Basic Stomatology', 'Public Finance']
48
+ ```
49
+ ## Model Details
50
+ - Base Model: `allura-forge/Llama-3.3-8B-Instruct`
51
+ - Parameter Count: 8,030,261,248
52
+ - Precision: torch.bfloat16
53
+
54
+ ## Hardware
55
+ - GPU: NVIDIA RTX PRO 6000 Blackwell Server Edition
56
+ - Announced: Mar 17th, 2025
57
+ - Release Date: Mar 18th, 2025
58
+ - Memory Type: GDDR7
59
+ - Bandwidth: 1.79 TB/s
60
+ - Memory Size: 96 GB
61
+ - Memory Bus: 512 bit
62
+ - Shading Units: 24064
63
+ - TDP: 600W
64
+
65
+ ## Training Settings
66
+ ### PEFT
67
+ - Rank: 32
68
+ - LoRA alpha: 64
69
+ - Modules: q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj
70
+ - Gradient checkpointing: unsloth
71
+
72
+ ### SFT
73
+ - Epoch: 2
74
+ - Batch size: 32
75
+ - Gradient Accumulation steps: 1
76
+ - Warmup ratio: 0.05
77
+ - Learning rate: 0.0002
78
+ - Optimizer: adamw_torch_fused
79
+ - Learning rate scheduler: cosine
80
+
81
+ ## Training stats
82
+ - Date: 2026-03-26T03:53:29.234881
83
+ - Peak VRAM usage: 32.135 GB
84
+ - Global step: 1576
85
+ - Training runtime (seconds): 2681.8444
86
+ - Average training loss: 0.06838441643920647
87
+ - Final validation loss: 0.0504293330013752
88
+
89
+ ## Framework versions
90
+ - Unsloth: 2026.3.15
91
+ - TRL: 0.22.2
92
+ - Transformers: 4.56.2
93
+ - Pytorch: 2.10.0+cu128
94
+ - Datasets: 4.8.4
95
+ - Tokenizers: 0.22.2
96
+
97
+ ## License
98
+ This model is released under the Llama3 license. See the [Terms of Use](https://www.llama.com/llama3/license/) for details.
chat_template.jinja ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>
2
+
3
+ '+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>
4
+
5
+ ' }}{% endif %}
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 128009,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 8192,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 32,
19
+ "num_hidden_layers": 32,
20
+ "num_key_value_heads": 8,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_scaling": null,
24
+ "rope_theta": 500000.0,
25
+ "tie_word_embeddings": false,
26
+ "transformers_version": "4.56.2",
27
+ "use_cache": true,
28
+ "vocab_size": 128256
29
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128009
7
+ ],
8
+ "max_length": 4096,
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.56.2"
12
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d42e32f551f12ce79b67b7d6a3959ae9367ab673298ecddd32778e8b8b8f75
3
+ size 4976698672
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e86540e301b30ce4d7315cbc99b835852b14405901816fcc96b8c60e3426aab
3
+ size 4999802720
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cc9b6bc2d9e7c7f3e8f19292c857ea68b600326ca602945a63505764bae0fd7
3
+ size 4915916176
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce3f749a1e7d3a08afe65b6f7beeffa960918d97b4e54449ae5af1ac77a71b49
3
+ size 1168138808
model.safetensors.index.json ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 8030261248,
4
+ "total_size": 16060522496
5
+ },
6
+ "weight_map": {
7
+ "lm_head.weight": "model-00004-of-00004.safetensors",
8
+ "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
9
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
10
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
11
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
12
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
13
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
14
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
15
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
16
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
17
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
18
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
19
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
20
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
21
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
22
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
23
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
24
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
25
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
26
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
27
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
28
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
29
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
30
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
31
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
32
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
33
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
34
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
35
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
36
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
37
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
38
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
39
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
40
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
41
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
42
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
43
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
44
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
45
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
46
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
47
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
48
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
49
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
50
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
51
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
52
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
53
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
54
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
55
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
56
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
57
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
58
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
59
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
60
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
61
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
62
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
63
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
64
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
65
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
66
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
67
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
68
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
69
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
70
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
71
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
72
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
73
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
74
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
75
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
76
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
77
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
78
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
79
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
80
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
81
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
82
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
83
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
84
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
85
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
86
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
87
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
88
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
89
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
90
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
91
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
92
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
93
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
94
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
95
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
96
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
97
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
98
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
99
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
100
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
101
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
102
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
103
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
104
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
105
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
106
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
107
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
108
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
109
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
110
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
111
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
112
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
113
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
114
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
115
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
116
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
117
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
118
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
119
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
120
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
121
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
122
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
123
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
124
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
125
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
126
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
127
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
128
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
129
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
130
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
131
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
133
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
134
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
135
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
136
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
137
+ "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
138
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
139
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
140
+ "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
141
+ "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
142
+ "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
143
+ "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
144
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
145
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
146
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
147
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
148
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
149
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
150
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
151
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
152
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
153
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
154
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
155
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
156
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
157
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
158
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
159
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
160
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
161
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
162
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
163
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
164
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
165
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
166
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
167
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
168
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
169
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
170
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
171
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
172
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
173
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
174
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
175
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
176
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
177
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
178
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
179
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
180
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
181
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
182
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
183
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
184
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
185
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
186
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
187
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
188
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
189
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
190
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
191
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
192
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
193
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
194
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
195
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
196
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
197
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
198
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
199
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
200
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
201
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
202
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
203
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
204
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
205
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
206
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
207
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
208
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
209
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
210
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
211
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
212
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
213
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
214
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
215
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
216
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
217
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
218
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
219
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
220
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
221
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
222
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
223
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
224
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
225
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
226
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
227
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
228
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
229
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
230
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
231
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
232
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
233
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
234
+ "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
235
+ "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
236
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
237
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
238
+ "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
239
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
240
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
241
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
242
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
243
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
244
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
245
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
246
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
247
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
248
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
249
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
250
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
251
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
252
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
253
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
254
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
255
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
256
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
257
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
258
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
259
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
260
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
261
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
262
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
263
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
264
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
265
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
266
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
267
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
268
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
269
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
270
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
271
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
272
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
273
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
274
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
275
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
276
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
277
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
278
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
279
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
280
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
281
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
282
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
283
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
284
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
285
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
286
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
287
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
288
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
289
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
290
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
291
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
292
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
293
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
294
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
295
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
296
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
297
+ "model.norm.weight": "model-00004-of-00004.safetensors"
298
+ }
299
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot_id|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|reserved_special_token_250|>"
17
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393
3
+ size 17209961
tokenizer_config.json ADDED
@@ -0,0 +1,2064 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|reserved_special_token_2|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_3|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|reserved_special_token_4|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|reserved_special_token_5|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_6|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_7|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_8|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_9|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_10|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_11|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_12|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_13|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_14|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_15|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_16|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_17|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_18|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_19|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_20|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_21|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_22|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_23|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_24|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_25|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_26|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_27|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_28|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_29|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_30|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_31|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_32|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_33|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_34|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_35|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_36|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_37|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_38|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_39|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_40|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_41|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_42|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_43|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_44|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_45|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_46|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_47|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_48|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_49|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_50|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_51|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_52|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_53|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_54|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_55|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_56|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_57|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_58|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_59|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_60|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_61|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_62|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_63|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_64|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_65|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_66|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_67|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_68|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_69|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_70|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_71|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_72|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_73|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_74|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_75|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_76|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_77|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_78|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_79|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_80|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_81|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_82|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_83|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_84|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_85|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_86|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_87|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_88|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_89|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_90|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_91|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_92|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_93|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_94|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_95|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_96|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_97|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_98|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_99|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_100|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_101|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_102|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_103|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_104|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_105|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_106|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_107|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_108|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_109|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_110|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_111|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_112|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_113|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_114|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_115|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_116|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_117|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_118|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_119|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_120|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_121|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_122|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_123|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_124|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_125|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_126|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_127|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_128|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_129|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_130|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_131|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_132|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_133|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_134|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_135|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_136|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_137|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_138|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_139|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_140|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_141|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_142|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_143|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_144|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_145|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_146|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_147|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_148|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_149|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_150|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_151|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_152|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_153|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_154|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_155|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_156|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_157|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_158|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_159|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_160|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_161|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_162|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_163|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_164|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_165|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_166|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_167|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_168|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_169|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_170|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_171|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_172|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_173|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_174|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_175|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_176|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_177|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_178|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_179|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_180|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_181|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_182|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_183|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_184|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_185|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_186|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_187|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_188|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_189|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_190|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_191|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_192|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_193|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_194|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_195|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_196|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_197|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_198|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_199|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_200|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_201|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_202|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_203|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_204|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_205|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_206|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_207|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_208|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_209|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_210|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_211|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_212|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_213|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_214|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_215|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_216|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_217|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_218|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_219|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_220|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_221|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_222|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_223|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_224|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_225|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_226|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_227|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_228|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_229|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_230|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_231|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_232|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_233|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_234|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_235|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_236|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_237|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_238|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_239|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_240|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_241|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_242|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_243|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_244|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_245|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_246|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_247|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_248|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_249|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_250|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "clean_up_tokenization_spaces": true,
2054
+ "eos_token": "<|eot_id|>",
2055
+ "extra_special_tokens": {},
2056
+ "model_input_names": [
2057
+ "input_ids",
2058
+ "attention_mask"
2059
+ ],
2060
+ "model_max_length": 1000000000000000019884624838656,
2061
+ "pad_token": "<|reserved_special_token_250|>",
2062
+ "padding_side": "left",
2063
+ "tokenizer_class": "PreTrainedTokenizerFast"
2064
+ }
train/log.json ADDED
@@ -0,0 +1,1190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "loss": 1.8996,
4
+ "grad_norm": 5.035277843475342,
5
+ "learning_rate": 2.278481012658228e-05,
6
+ "epoch": 0.012690355329949238,
7
+ "step": 10
8
+ },
9
+ {
10
+ "loss": 0.5315,
11
+ "grad_norm": 1.102072834968567,
12
+ "learning_rate": 4.810126582278481e-05,
13
+ "epoch": 0.025380710659898477,
14
+ "step": 20
15
+ },
16
+ {
17
+ "loss": 0.3353,
18
+ "grad_norm": 0.7798988819122314,
19
+ "learning_rate": 7.341772151898734e-05,
20
+ "epoch": 0.03807106598984772,
21
+ "step": 30
22
+ },
23
+ {
24
+ "loss": 0.2226,
25
+ "grad_norm": 0.8653473854064941,
26
+ "learning_rate": 9.873417721518988e-05,
27
+ "epoch": 0.050761421319796954,
28
+ "step": 40
29
+ },
30
+ {
31
+ "loss": 0.164,
32
+ "grad_norm": 0.7569780349731445,
33
+ "learning_rate": 0.0001240506329113924,
34
+ "epoch": 0.06345177664974619,
35
+ "step": 50
36
+ },
37
+ {
38
+ "loss": 0.1394,
39
+ "grad_norm": 1.0211968421936035,
40
+ "learning_rate": 0.00014936708860759494,
41
+ "epoch": 0.07614213197969544,
42
+ "step": 60
43
+ },
44
+ {
45
+ "loss": 0.1201,
46
+ "grad_norm": 0.5370887517929077,
47
+ "learning_rate": 0.00017468354430379748,
48
+ "epoch": 0.08883248730964467,
49
+ "step": 70
50
+ },
51
+ {
52
+ "loss": 0.122,
53
+ "grad_norm": 0.49917498230934143,
54
+ "learning_rate": 0.0002,
55
+ "epoch": 0.10152284263959391,
56
+ "step": 80
57
+ },
58
+ {
59
+ "loss": 0.1217,
60
+ "grad_norm": 0.4577413499355316,
61
+ "learning_rate": 0.0001999779803602204,
62
+ "epoch": 0.11421319796954314,
63
+ "step": 90
64
+ },
65
+ {
66
+ "loss": 0.0965,
67
+ "grad_norm": 0.48522070050239563,
68
+ "learning_rate": 0.00019991193113817244,
69
+ "epoch": 0.12690355329949238,
70
+ "step": 100
71
+ },
72
+ {
73
+ "loss": 0.11,
74
+ "grad_norm": 0.41902250051498413,
75
+ "learning_rate": 0.00019980188142145754,
76
+ "epoch": 0.13959390862944163,
77
+ "step": 110
78
+ },
79
+ {
80
+ "loss": 0.0823,
81
+ "grad_norm": 0.5561641454696655,
82
+ "learning_rate": 0.00019964787967517817,
83
+ "epoch": 0.15228426395939088,
84
+ "step": 120
85
+ },
86
+ {
87
+ "loss": 0.0856,
88
+ "grad_norm": 0.3316971957683563,
89
+ "learning_rate": 0.00019944999372059388,
90
+ "epoch": 0.1649746192893401,
91
+ "step": 130
92
+ },
93
+ {
94
+ "loss": 0.0849,
95
+ "grad_norm": 0.372153639793396,
96
+ "learning_rate": 0.00019920831070525342,
97
+ "epoch": 0.17766497461928935,
98
+ "step": 140
99
+ },
100
+ {
101
+ "loss": 0.0929,
102
+ "grad_norm": 0.33250877261161804,
103
+ "learning_rate": 0.00019892293706461555,
104
+ "epoch": 0.19035532994923857,
105
+ "step": 150
106
+ },
107
+ {
108
+ "eval_loss": 0.08791538327932358,
109
+ "eval_runtime": 29.62,
110
+ "eval_samples_per_second": 44.801,
111
+ "eval_steps_per_second": 11.209,
112
+ "epoch": 0.19923857868020303,
113
+ "step": 157
114
+ },
115
+ {
116
+ "loss": 0.0824,
117
+ "grad_norm": 0.4130192995071411,
118
+ "learning_rate": 0.00019859399847517567,
119
+ "epoch": 0.20304568527918782,
120
+ "step": 160
121
+ },
122
+ {
123
+ "loss": 0.0902,
124
+ "grad_norm": 0.3217241168022156,
125
+ "learning_rate": 0.0001982216397991188,
126
+ "epoch": 0.21573604060913706,
127
+ "step": 170
128
+ },
129
+ {
130
+ "loss": 0.0766,
131
+ "grad_norm": 0.4728490710258484,
132
+ "learning_rate": 0.0001978060250205232,
133
+ "epoch": 0.22842639593908629,
134
+ "step": 180
135
+ },
136
+ {
137
+ "loss": 0.0844,
138
+ "grad_norm": 0.5730077028274536,
139
+ "learning_rate": 0.0001973473371731431,
140
+ "epoch": 0.24111675126903553,
141
+ "step": 190
142
+ },
143
+ {
144
+ "loss": 0.0841,
145
+ "grad_norm": 0.5745298862457275,
146
+ "learning_rate": 0.00019684577825980192,
147
+ "epoch": 0.25380710659898476,
148
+ "step": 200
149
+ },
150
+ {
151
+ "loss": 0.0797,
152
+ "grad_norm": 0.3141058683395386,
153
+ "learning_rate": 0.0001963015691634317,
154
+ "epoch": 0.26649746192893403,
155
+ "step": 210
156
+ },
157
+ {
158
+ "loss": 0.0822,
159
+ "grad_norm": 0.3730680048465729,
160
+ "learning_rate": 0.00019571494954979775,
161
+ "epoch": 0.27918781725888325,
162
+ "step": 220
163
+ },
164
+ {
165
+ "loss": 0.0677,
166
+ "grad_norm": 0.3915182650089264,
167
+ "learning_rate": 0.00019508617776195167,
168
+ "epoch": 0.2918781725888325,
169
+ "step": 230
170
+ },
171
+ {
172
+ "loss": 0.08,
173
+ "grad_norm": 0.3052193820476532,
174
+ "learning_rate": 0.00019441553070645887,
175
+ "epoch": 0.30456852791878175,
176
+ "step": 240
177
+ },
178
+ {
179
+ "loss": 0.0744,
180
+ "grad_norm": 0.3673352003097534,
181
+ "learning_rate": 0.000193703303731451,
182
+ "epoch": 0.31725888324873097,
183
+ "step": 250
184
+ },
185
+ {
186
+ "loss": 0.0821,
187
+ "grad_norm": 0.39443644881248474,
188
+ "learning_rate": 0.00019294981049655668,
189
+ "epoch": 0.3299492385786802,
190
+ "step": 260
191
+ },
192
+ {
193
+ "loss": 0.073,
194
+ "grad_norm": 0.44178199768066406,
195
+ "learning_rate": 0.0001921553828347681,
196
+ "epoch": 0.3426395939086294,
197
+ "step": 270
198
+ },
199
+ {
200
+ "loss": 0.0784,
201
+ "grad_norm": 0.4202715754508972,
202
+ "learning_rate": 0.00019132037060630409,
203
+ "epoch": 0.3553299492385787,
204
+ "step": 280
205
+ },
206
+ {
207
+ "loss": 0.0646,
208
+ "grad_norm": 0.23640507459640503,
209
+ "learning_rate": 0.00019044514154453434,
210
+ "epoch": 0.3680203045685279,
211
+ "step": 290
212
+ },
213
+ {
214
+ "loss": 0.0785,
215
+ "grad_norm": 0.4354120194911957,
216
+ "learning_rate": 0.0001895300810940321,
217
+ "epoch": 0.38071065989847713,
218
+ "step": 300
219
+ },
220
+ {
221
+ "loss": 0.0656,
222
+ "grad_norm": 0.2467317283153534,
223
+ "learning_rate": 0.00018857559224082736,
224
+ "epoch": 0.3934010152284264,
225
+ "step": 310
226
+ },
227
+ {
228
+ "eval_loss": 0.0728072002530098,
229
+ "eval_runtime": 19.9827,
230
+ "eval_samples_per_second": 66.407,
231
+ "eval_steps_per_second": 16.614,
232
+ "epoch": 0.39847715736040606,
233
+ "step": 314
234
+ },
235
+ {
236
+ "loss": 0.0738,
237
+ "grad_norm": 0.2969267666339874,
238
+ "learning_rate": 0.00018758209533493444,
239
+ "epoch": 0.40609137055837563,
240
+ "step": 320
241
+ },
242
+ {
243
+ "loss": 0.067,
244
+ "grad_norm": 0.3527528643608093,
245
+ "learning_rate": 0.00018655002790523328,
246
+ "epoch": 0.41878172588832485,
247
+ "step": 330
248
+ },
249
+ {
250
+ "loss": 0.0714,
251
+ "grad_norm": 0.2732889950275421,
252
+ "learning_rate": 0.00018547984446678437,
253
+ "epoch": 0.43147208121827413,
254
+ "step": 340
255
+ },
256
+ {
257
+ "loss": 0.0602,
258
+ "grad_norm": 0.25770312547683716,
259
+ "learning_rate": 0.000184372016320664,
260
+ "epoch": 0.44416243654822335,
261
+ "step": 350
262
+ },
263
+ {
264
+ "loss": 0.0624,
265
+ "grad_norm": 0.22473905980587006,
266
+ "learning_rate": 0.00018322703134640654,
267
+ "epoch": 0.45685279187817257,
268
+ "step": 360
269
+ },
270
+ {
271
+ "loss": 0.0709,
272
+ "grad_norm": 0.3180300295352936,
273
+ "learning_rate": 0.00018204539378714561,
274
+ "epoch": 0.46954314720812185,
275
+ "step": 370
276
+ },
277
+ {
278
+ "loss": 0.0698,
279
+ "grad_norm": 0.2796868085861206,
280
+ "learning_rate": 0.00018082762402754936,
281
+ "epoch": 0.48223350253807107,
282
+ "step": 380
283
+ },
284
+ {
285
+ "loss": 0.0658,
286
+ "grad_norm": 0.3655967712402344,
287
+ "learning_rate": 0.0001795742583646466,
288
+ "epoch": 0.4949238578680203,
289
+ "step": 390
290
+ },
291
+ {
292
+ "loss": 0.0682,
293
+ "grad_norm": 0.2886195182800293,
294
+ "learning_rate": 0.0001782858487716455,
295
+ "epoch": 0.5076142131979695,
296
+ "step": 400
297
+ },
298
+ {
299
+ "loss": 0.071,
300
+ "grad_norm": 0.27021610736846924,
301
+ "learning_rate": 0.00017696296265484862,
302
+ "epoch": 0.5203045685279187,
303
+ "step": 410
304
+ },
305
+ {
306
+ "loss": 0.0636,
307
+ "grad_norm": 0.28307008743286133,
308
+ "learning_rate": 0.00017560618260377116,
309
+ "epoch": 0.5329949238578681,
310
+ "step": 420
311
+ },
312
+ {
313
+ "loss": 0.0546,
314
+ "grad_norm": 0.28294482827186584,
315
+ "learning_rate": 0.00017421610613457282,
316
+ "epoch": 0.5456852791878173,
317
+ "step": 430
318
+ },
319
+ {
320
+ "loss": 0.0612,
321
+ "grad_norm": 0.2255251258611679,
322
+ "learning_rate": 0.00017279334542691596,
323
+ "epoch": 0.5583756345177665,
324
+ "step": 440
325
+ },
326
+ {
327
+ "loss": 0.0629,
328
+ "grad_norm": 0.22404751181602478,
329
+ "learning_rate": 0.0001713385270543661,
330
+ "epoch": 0.5710659898477157,
331
+ "step": 450
332
+ },
333
+ {
334
+ "loss": 0.0596,
335
+ "grad_norm": 0.2632795572280884,
336
+ "learning_rate": 0.00016985229170845339,
337
+ "epoch": 0.583756345177665,
338
+ "step": 460
339
+ },
340
+ {
341
+ "loss": 0.0717,
342
+ "grad_norm": 0.3002878427505493,
343
+ "learning_rate": 0.0001683352939165167,
344
+ "epoch": 0.5964467005076142,
345
+ "step": 470
346
+ },
347
+ {
348
+ "eval_loss": 0.06722872704267502,
349
+ "eval_runtime": 20.1214,
350
+ "eval_samples_per_second": 65.95,
351
+ "eval_steps_per_second": 16.5,
352
+ "epoch": 0.5977157360406091,
353
+ "step": 471
354
+ },
355
+ {
356
+ "loss": 0.0618,
357
+ "grad_norm": 0.15326248109340668,
358
+ "learning_rate": 0.00016678820175345454,
359
+ "epoch": 0.6091370558375635,
360
+ "step": 480
361
+ },
362
+ {
363
+ "loss": 0.0718,
364
+ "grad_norm": 0.27122628688812256,
365
+ "learning_rate": 0.00016521169654750968,
366
+ "epoch": 0.6218274111675127,
367
+ "step": 490
368
+ },
369
+ {
370
+ "loss": 0.0636,
371
+ "grad_norm": 0.29509711265563965,
372
+ "learning_rate": 0.00016360647258021696,
373
+ "epoch": 0.6345177664974619,
374
+ "step": 500
375
+ },
376
+ {
377
+ "loss": 0.0655,
378
+ "grad_norm": 0.4090014100074768,
379
+ "learning_rate": 0.00016197323678064697,
380
+ "epoch": 0.6472081218274112,
381
+ "step": 510
382
+ },
383
+ {
384
+ "loss": 0.0606,
385
+ "grad_norm": 0.2687474191188812,
386
+ "learning_rate": 0.00016031270841407926,
387
+ "epoch": 0.6598984771573604,
388
+ "step": 520
389
+ },
390
+ {
391
+ "loss": 0.0519,
392
+ "grad_norm": 0.25125357508659363,
393
+ "learning_rate": 0.00015862561876524338,
394
+ "epoch": 0.6725888324873096,
395
+ "step": 530
396
+ },
397
+ {
398
+ "loss": 0.0623,
399
+ "grad_norm": 0.21579739451408386,
400
+ "learning_rate": 0.0001569127108162662,
401
+ "epoch": 0.6852791878172588,
402
+ "step": 540
403
+ },
404
+ {
405
+ "loss": 0.0612,
406
+ "grad_norm": 0.24012021720409393,
407
+ "learning_rate": 0.000155174738919468,
408
+ "epoch": 0.6979695431472082,
409
+ "step": 550
410
+ },
411
+ {
412
+ "loss": 0.0617,
413
+ "grad_norm": 0.22273781895637512,
414
+ "learning_rate": 0.00015341246846515096,
415
+ "epoch": 0.7106598984771574,
416
+ "step": 560
417
+ },
418
+ {
419
+ "loss": 0.0627,
420
+ "grad_norm": 0.29965269565582275,
421
+ "learning_rate": 0.0001516266755445271,
422
+ "epoch": 0.7233502538071066,
423
+ "step": 570
424
+ },
425
+ {
426
+ "loss": 0.0649,
427
+ "grad_norm": 0.2375640720129013,
428
+ "learning_rate": 0.00014981814660793314,
429
+ "epoch": 0.7360406091370558,
430
+ "step": 580
431
+ },
432
+ {
433
+ "loss": 0.0653,
434
+ "grad_norm": 0.2595769166946411,
435
+ "learning_rate": 0.0001479876781184833,
436
+ "epoch": 0.748730964467005,
437
+ "step": 590
438
+ },
439
+ {
440
+ "loss": 0.0634,
441
+ "grad_norm": 0.28185659646987915,
442
+ "learning_rate": 0.00014613607620131294,
443
+ "epoch": 0.7614213197969543,
444
+ "step": 600
445
+ },
446
+ {
447
+ "loss": 0.0601,
448
+ "grad_norm": 0.20655085146427155,
449
+ "learning_rate": 0.00014426415628856663,
450
+ "epoch": 0.7741116751269036,
451
+ "step": 610
452
+ },
453
+ {
454
+ "loss": 0.0632,
455
+ "grad_norm": 0.4992614686489105,
456
+ "learning_rate": 0.0001423727427602879,
457
+ "epoch": 0.7868020304568528,
458
+ "step": 620
459
+ },
460
+ {
461
+ "eval_loss": 0.05841095373034477,
462
+ "eval_runtime": 20.0018,
463
+ "eval_samples_per_second": 66.344,
464
+ "eval_steps_per_second": 16.599,
465
+ "epoch": 0.7969543147208121,
466
+ "step": 628
467
+ },
468
+ {
469
+ "loss": 0.0522,
470
+ "grad_norm": 0.2023015171289444,
471
+ "learning_rate": 0.0001404626685813681,
472
+ "epoch": 0.799492385786802,
473
+ "step": 630
474
+ },
475
+ {
476
+ "loss": 0.0567,
477
+ "grad_norm": 0.20891991257667542,
478
+ "learning_rate": 0.00013853477493471468,
479
+ "epoch": 0.8121827411167513,
480
+ "step": 640
481
+ },
482
+ {
483
+ "loss": 0.0555,
484
+ "grad_norm": 0.27132412791252136,
485
+ "learning_rate": 0.00013658991085080025,
486
+ "epoch": 0.8248730964467005,
487
+ "step": 650
488
+ },
489
+ {
490
+ "loss": 0.0594,
491
+ "grad_norm": 0.22256866097450256,
492
+ "learning_rate": 0.0001346289328337558,
493
+ "epoch": 0.8375634517766497,
494
+ "step": 660
495
+ },
496
+ {
497
+ "loss": 0.0556,
498
+ "grad_norm": 0.20859505236148834,
499
+ "learning_rate": 0.00013265270448417234,
500
+ "epoch": 0.850253807106599,
501
+ "step": 670
502
+ },
503
+ {
504
+ "loss": 0.0557,
505
+ "grad_norm": 0.2204328030347824,
506
+ "learning_rate": 0.00013066209611877746,
507
+ "epoch": 0.8629441624365483,
508
+ "step": 680
509
+ },
510
+ {
511
+ "loss": 0.059,
512
+ "grad_norm": 0.2515346109867096,
513
+ "learning_rate": 0.00012865798438715413,
514
+ "epoch": 0.8756345177664975,
515
+ "step": 690
516
+ },
517
+ {
518
+ "loss": 0.0546,
519
+ "grad_norm": 0.3130325376987457,
520
+ "learning_rate": 0.00012664125188567056,
521
+ "epoch": 0.8883248730964467,
522
+ "step": 700
523
+ },
524
+ {
525
+ "loss": 0.0475,
526
+ "grad_norm": 0.2509436011314392,
527
+ "learning_rate": 0.00012461278676879098,
528
+ "epoch": 0.9010152284263959,
529
+ "step": 710
530
+ },
531
+ {
532
+ "loss": 0.0561,
533
+ "grad_norm": 0.23676852881908417,
534
+ "learning_rate": 0.00012257348235793897,
535
+ "epoch": 0.9137055837563451,
536
+ "step": 720
537
+ },
538
+ {
539
+ "loss": 0.0536,
540
+ "grad_norm": 0.20894668996334076,
541
+ "learning_rate": 0.00012052423674808513,
542
+ "epoch": 0.9263959390862944,
543
+ "step": 730
544
+ },
545
+ {
546
+ "loss": 0.0517,
547
+ "grad_norm": 0.18107716739177704,
548
+ "learning_rate": 0.00011846595241223247,
549
+ "epoch": 0.9390862944162437,
550
+ "step": 740
551
+ },
552
+ {
553
+ "loss": 0.0623,
554
+ "grad_norm": 0.3013327717781067,
555
+ "learning_rate": 0.00011639953580397367,
556
+ "epoch": 0.9517766497461929,
557
+ "step": 750
558
+ },
559
+ {
560
+ "loss": 0.0579,
561
+ "grad_norm": 0.19317802786827087,
562
+ "learning_rate": 0.00011432589695829576,
563
+ "epoch": 0.9644670050761421,
564
+ "step": 760
565
+ },
566
+ {
567
+ "loss": 0.0559,
568
+ "grad_norm": 0.26291170716285706,
569
+ "learning_rate": 0.00011224594909080704,
570
+ "epoch": 0.9771573604060914,
571
+ "step": 770
572
+ },
573
+ {
574
+ "loss": 0.0537,
575
+ "grad_norm": 0.28403881192207336,
576
+ "learning_rate": 0.00011016060819556353,
577
+ "epoch": 0.9898477157360406,
578
+ "step": 780
579
+ },
580
+ {
581
+ "eval_loss": 0.05360769107937813,
582
+ "eval_runtime": 20.0465,
583
+ "eval_samples_per_second": 66.196,
584
+ "eval_steps_per_second": 16.562,
585
+ "epoch": 0.9961928934010152,
586
+ "step": 785
587
+ },
588
+ {
589
+ "loss": 0.0502,
590
+ "grad_norm": 0.1471383273601532,
591
+ "learning_rate": 0.0001080707926416719,
592
+ "epoch": 1.00253807106599,
593
+ "step": 790
594
+ },
595
+ {
596
+ "loss": 0.038,
597
+ "grad_norm": 0.17716127634048462,
598
+ "learning_rate": 0.00010597742276884614,
599
+ "epoch": 1.015228426395939,
600
+ "step": 800
601
+ },
602
+ {
603
+ "loss": 0.0351,
604
+ "grad_norm": 0.2006382942199707,
605
+ "learning_rate": 0.00010388142048209676,
606
+ "epoch": 1.0279187817258884,
607
+ "step": 810
608
+ },
609
+ {
610
+ "loss": 0.0375,
611
+ "grad_norm": 0.2539692521095276,
612
+ "learning_rate": 0.00010178370884573046,
613
+ "epoch": 1.0406091370558375,
614
+ "step": 820
615
+ },
616
+ {
617
+ "loss": 0.0422,
618
+ "grad_norm": 0.2615308165550232,
619
+ "learning_rate": 9.968521167683905e-05,
620
+ "epoch": 1.0532994923857868,
621
+ "step": 830
622
+ },
623
+ {
624
+ "loss": 0.0406,
625
+ "grad_norm": 0.23757147789001465,
626
+ "learning_rate": 9.758685313845727e-05,
627
+ "epoch": 1.0659898477157361,
628
+ "step": 840
629
+ },
630
+ {
631
+ "loss": 0.0387,
632
+ "grad_norm": 0.16979315876960754,
633
+ "learning_rate": 9.548955733256803e-05,
634
+ "epoch": 1.0786802030456852,
635
+ "step": 850
636
+ },
637
+ {
638
+ "loss": 0.0352,
639
+ "grad_norm": 0.1853126734495163,
640
+ "learning_rate": 9.339424789313445e-05,
641
+ "epoch": 1.0913705583756346,
642
+ "step": 860
643
+ },
644
+ {
645
+ "loss": 0.0356,
646
+ "grad_norm": 0.15106192231178284,
647
+ "learning_rate": 9.13018475793382e-05,
648
+ "epoch": 1.1040609137055837,
649
+ "step": 870
650
+ },
651
+ {
652
+ "loss": 0.037,
653
+ "grad_norm": 0.20427311956882477,
654
+ "learning_rate": 8.921327786920294e-05,
655
+ "epoch": 1.116751269035533,
656
+ "step": 880
657
+ },
658
+ {
659
+ "loss": 0.0324,
660
+ "grad_norm": 0.1580514758825302,
661
+ "learning_rate": 8.712945855378218e-05,
662
+ "epoch": 1.1294416243654823,
663
+ "step": 890
664
+ },
665
+ {
666
+ "loss": 0.0301,
667
+ "grad_norm": 0.2191898375749588,
668
+ "learning_rate": 8.505130733208968e-05,
669
+ "epoch": 1.1421319796954315,
670
+ "step": 900
671
+ },
672
+ {
673
+ "loss": 0.0355,
674
+ "grad_norm": 0.16614247858524323,
675
+ "learning_rate": 8.297973940695163e-05,
676
+ "epoch": 1.1548223350253808,
677
+ "step": 910
678
+ },
679
+ {
680
+ "loss": 0.0349,
681
+ "grad_norm": 0.18907427787780762,
682
+ "learning_rate": 8.091566708195786e-05,
683
+ "epoch": 1.16751269035533,
684
+ "step": 920
685
+ },
686
+ {
687
+ "loss": 0.0336,
688
+ "grad_norm": 0.24296258389949799,
689
+ "learning_rate": 7.885999935968982e-05,
690
+ "epoch": 1.1802030456852792,
691
+ "step": 930
692
+ },
693
+ {
694
+ "loss": 0.0372,
695
+ "grad_norm": 0.1817648708820343,
696
+ "learning_rate": 7.681364154140264e-05,
697
+ "epoch": 1.1928934010152283,
698
+ "step": 940
699
+ },
700
+ {
701
+ "eval_loss": 0.057017017155885696,
702
+ "eval_runtime": 19.9628,
703
+ "eval_samples_per_second": 66.474,
704
+ "eval_steps_per_second": 16.631,
705
+ "epoch": 1.1954314720812182,
706
+ "step": 942
707
+ },
708
+ {
709
+ "loss": 0.03,
710
+ "grad_norm": 0.19095705449581146,
711
+ "learning_rate": 7.47774948283366e-05,
712
+ "epoch": 1.2055837563451777,
713
+ "step": 950
714
+ },
715
+ {
716
+ "loss": 0.035,
717
+ "grad_norm": 0.33682745695114136,
718
+ "learning_rate": 7.275245592483492e-05,
719
+ "epoch": 1.218274111675127,
720
+ "step": 960
721
+ },
722
+ {
723
+ "loss": 0.0384,
724
+ "grad_norm": 0.2646084427833557,
725
+ "learning_rate": 7.073941664344152e-05,
726
+ "epoch": 1.2309644670050761,
727
+ "step": 970
728
+ },
729
+ {
730
+ "loss": 0.0287,
731
+ "grad_norm": 0.1980791836977005,
732
+ "learning_rate": 6.873926351215312e-05,
733
+ "epoch": 1.2436548223350254,
734
+ "step": 980
735
+ },
736
+ {
737
+ "loss": 0.0342,
738
+ "grad_norm": 0.18797655403614044,
739
+ "learning_rate": 6.67528773839989e-05,
740
+ "epoch": 1.2563451776649746,
741
+ "step": 990
742
+ },
743
+ {
744
+ "loss": 0.0337,
745
+ "grad_norm": 0.24009937047958374,
746
+ "learning_rate": 6.478113304911886e-05,
747
+ "epoch": 1.2690355329949239,
748
+ "step": 1000
749
+ },
750
+ {
751
+ "loss": 0.0272,
752
+ "grad_norm": 0.29159170389175415,
753
+ "learning_rate": 6.282489884951295e-05,
754
+ "epoch": 1.281725888324873,
755
+ "step": 1010
756
+ },
757
+ {
758
+ "loss": 0.036,
759
+ "grad_norm": 0.16352516412734985,
760
+ "learning_rate": 6.0885036296629064e-05,
761
+ "epoch": 1.2944162436548223,
762
+ "step": 1020
763
+ },
764
+ {
765
+ "loss": 0.0292,
766
+ "grad_norm": 0.17807820439338684,
767
+ "learning_rate": 5.896239969195994e-05,
768
+ "epoch": 1.3071065989847717,
769
+ "step": 1030
770
+ },
771
+ {
772
+ "loss": 0.0332,
773
+ "grad_norm": 0.2500491738319397,
774
+ "learning_rate": 5.7057835750814867e-05,
775
+ "epoch": 1.3197969543147208,
776
+ "step": 1040
777
+ },
778
+ {
779
+ "loss": 0.0294,
780
+ "grad_norm": 0.2208271473646164,
781
+ "learning_rate": 5.517218322943224e-05,
782
+ "epoch": 1.33248730964467,
783
+ "step": 1050
784
+ },
785
+ {
786
+ "loss": 0.0342,
787
+ "grad_norm": 0.23927471041679382,
788
+ "learning_rate": 5.3306272555597504e-05,
789
+ "epoch": 1.3451776649746192,
790
+ "step": 1060
791
+ },
792
+ {
793
+ "loss": 0.0307,
794
+ "grad_norm": 0.20309758186340332,
795
+ "learning_rate": 5.1460925462928546e-05,
796
+ "epoch": 1.3578680203045685,
797
+ "step": 1070
798
+ },
799
+ {
800
+ "loss": 0.0314,
801
+ "grad_norm": 0.23275193572044373,
802
+ "learning_rate": 4.96369546289904e-05,
803
+ "epoch": 1.3705583756345177,
804
+ "step": 1080
805
+ },
806
+ {
807
+ "loss": 0.0333,
808
+ "grad_norm": 0.2078331708908081,
809
+ "learning_rate": 4.783516331739769e-05,
810
+ "epoch": 1.383248730964467,
811
+ "step": 1090
812
+ },
813
+ {
814
+ "eval_loss": 0.05335332825779915,
815
+ "eval_runtime": 19.9859,
816
+ "eval_samples_per_second": 66.397,
817
+ "eval_steps_per_second": 16.612,
818
+ "epoch": 1.3946700507614214,
819
+ "step": 1099
820
+ },
821
+ {
822
+ "loss": 0.0309,
823
+ "grad_norm": 0.18032079935073853,
824
+ "learning_rate": 4.605634502406321e-05,
825
+ "epoch": 1.3959390862944163,
826
+ "step": 1100
827
+ },
828
+ {
829
+ "loss": 0.0328,
830
+ "grad_norm": 0.20803005993366241,
831
+ "learning_rate": 4.430128312774804e-05,
832
+ "epoch": 1.4086294416243654,
833
+ "step": 1110
834
+ },
835
+ {
836
+ "loss": 0.027,
837
+ "grad_norm": 0.1680465191602707,
838
+ "learning_rate": 4.2570750545067076e-05,
839
+ "epoch": 1.4213197969543148,
840
+ "step": 1120
841
+ },
842
+ {
843
+ "loss": 0.0317,
844
+ "grad_norm": 0.2528463900089264,
845
+ "learning_rate": 4.086550939010227e-05,
846
+ "epoch": 1.434010152284264,
847
+ "step": 1130
848
+ },
849
+ {
850
+ "loss": 0.0313,
851
+ "grad_norm": 0.19024434685707092,
852
+ "learning_rate": 3.9186310638773047e-05,
853
+ "epoch": 1.4467005076142132,
854
+ "step": 1140
855
+ },
856
+ {
857
+ "loss": 0.0287,
858
+ "grad_norm": 0.20934472978115082,
859
+ "learning_rate": 3.753389379811185e-05,
860
+ "epoch": 1.4593908629441623,
861
+ "step": 1150
862
+ },
863
+ {
864
+ "loss": 0.0265,
865
+ "grad_norm": 0.29412180185317993,
866
+ "learning_rate": 3.590898658059062e-05,
867
+ "epoch": 1.4720812182741116,
868
+ "step": 1160
869
+ },
870
+ {
871
+ "loss": 0.0298,
872
+ "grad_norm": 0.3268195390701294,
873
+ "learning_rate": 3.4312304583641484e-05,
874
+ "epoch": 1.484771573604061,
875
+ "step": 1170
876
+ },
877
+ {
878
+ "loss": 0.0251,
879
+ "grad_norm": 0.17332251369953156,
880
+ "learning_rate": 3.274455097451269e-05,
881
+ "epoch": 1.49746192893401,
882
+ "step": 1180
883
+ },
884
+ {
885
+ "loss": 0.0318,
886
+ "grad_norm": 0.3481772541999817,
887
+ "learning_rate": 3.1206416180598995e-05,
888
+ "epoch": 1.5101522842639594,
889
+ "step": 1190
890
+ },
891
+ {
892
+ "loss": 0.0335,
893
+ "grad_norm": 0.24047453701496124,
894
+ "learning_rate": 2.9698577585382282e-05,
895
+ "epoch": 1.5228426395939088,
896
+ "step": 1200
897
+ },
898
+ {
899
+ "loss": 0.0339,
900
+ "grad_norm": 0.21146714687347412,
901
+ "learning_rate": 2.8221699230116793e-05,
902
+ "epoch": 1.5355329949238579,
903
+ "step": 1210
904
+ },
905
+ {
906
+ "loss": 0.0308,
907
+ "grad_norm": 0.140832781791687,
908
+ "learning_rate": 2.67764315213902e-05,
909
+ "epoch": 1.548223350253807,
910
+ "step": 1220
911
+ },
912
+ {
913
+ "loss": 0.026,
914
+ "grad_norm": 0.1721792370080948,
915
+ "learning_rate": 2.536341094468906e-05,
916
+ "epoch": 1.5609137055837563,
917
+ "step": 1230
918
+ },
919
+ {
920
+ "loss": 0.0277,
921
+ "grad_norm": 0.14980490505695343,
922
+ "learning_rate": 2.398325978409539e-05,
923
+ "epoch": 1.5736040609137056,
924
+ "step": 1240
925
+ },
926
+ {
927
+ "loss": 0.028,
928
+ "grad_norm": 0.18908673524856567,
929
+ "learning_rate": 2.263658584823717e-05,
930
+ "epoch": 1.5862944162436547,
931
+ "step": 1250
932
+ },
933
+ {
934
+ "eval_loss": 0.052472274750471115,
935
+ "eval_runtime": 19.9786,
936
+ "eval_samples_per_second": 66.421,
937
+ "eval_steps_per_second": 16.618,
938
+ "epoch": 1.5939086294416245,
939
+ "step": 1256
940
+ },
941
+ {
942
+ "loss": 0.0272,
943
+ "grad_norm": 0.12164825201034546,
944
+ "learning_rate": 2.1323982202613735e-05,
945
+ "epoch": 1.598984771573604,
946
+ "step": 1260
947
+ },
948
+ {
949
+ "loss": 0.0245,
950
+ "grad_norm": 0.2658851146697998,
951
+ "learning_rate": 2.004602690841414e-05,
952
+ "epoch": 1.6116751269035534,
953
+ "step": 1270
954
+ },
955
+ {
956
+ "loss": 0.0304,
957
+ "grad_norm": 0.2891974151134491,
958
+ "learning_rate": 1.8803282767942954e-05,
959
+ "epoch": 1.6243654822335025,
960
+ "step": 1280
961
+ },
962
+ {
963
+ "loss": 0.0292,
964
+ "grad_norm": 0.2979351580142975,
965
+ "learning_rate": 1.7596297076766455e-05,
966
+ "epoch": 1.6370558375634516,
967
+ "step": 1290
968
+ },
969
+ {
970
+ "loss": 0.0284,
971
+ "grad_norm": 0.20141719281673431,
972
+ "learning_rate": 1.6425601382687405e-05,
973
+ "epoch": 1.649746192893401,
974
+ "step": 1300
975
+ },
976
+ {
977
+ "loss": 0.0254,
978
+ "grad_norm": 0.1950131356716156,
979
+ "learning_rate": 1.5291711251655316e-05,
980
+ "epoch": 1.6624365482233503,
981
+ "step": 1310
982
+ },
983
+ {
984
+ "loss": 0.0282,
985
+ "grad_norm": 0.21205022931098938,
986
+ "learning_rate": 1.41951260407149e-05,
987
+ "epoch": 1.6751269035532994,
988
+ "step": 1320
989
+ },
990
+ {
991
+ "loss": 0.0247,
992
+ "grad_norm": 0.2470894753932953,
993
+ "learning_rate": 1.3136328678092746e-05,
994
+ "epoch": 1.6878172588832487,
995
+ "step": 1330
996
+ },
997
+ {
998
+ "loss": 0.0257,
999
+ "grad_norm": 0.26378998160362244,
1000
+ "learning_rate": 1.2115785450519434e-05,
1001
+ "epoch": 1.700507614213198,
1002
+ "step": 1340
1003
+ },
1004
+ {
1005
+ "loss": 0.0282,
1006
+ "grad_norm": 0.12680888175964355,
1007
+ "learning_rate": 1.1133945797879908e-05,
1008
+ "epoch": 1.7131979695431472,
1009
+ "step": 1350
1010
+ },
1011
+ {
1012
+ "loss": 0.0251,
1013
+ "grad_norm": 0.19744935631752014,
1014
+ "learning_rate": 1.019124211528365e-05,
1015
+ "epoch": 1.7258883248730963,
1016
+ "step": 1360
1017
+ },
1018
+ {
1019
+ "loss": 0.0327,
1020
+ "grad_norm": 0.18419434130191803,
1021
+ "learning_rate": 9.288089562640844e-06,
1022
+ "epoch": 1.7385786802030458,
1023
+ "step": 1370
1024
+ },
1025
+ {
1026
+ "loss": 0.0282,
1027
+ "grad_norm": 0.19115136563777924,
1028
+ "learning_rate": 8.42488588182897e-06,
1029
+ "epoch": 1.751269035532995,
1030
+ "step": 1380
1031
+ },
1032
+ {
1033
+ "loss": 0.0245,
1034
+ "grad_norm": 0.17252641916275024,
1035
+ "learning_rate": 7.602011221530236e-06,
1036
+ "epoch": 1.763959390862944,
1037
+ "step": 1390
1038
+ },
1039
+ {
1040
+ "loss": 0.029,
1041
+ "grad_norm": 0.22253695130348206,
1042
+ "learning_rate": 6.819827969816661e-06,
1043
+ "epoch": 1.7766497461928934,
1044
+ "step": 1400
1045
+ },
1046
+ {
1047
+ "loss": 0.0269,
1048
+ "grad_norm": 0.21938475966453552,
1049
+ "learning_rate": 6.078680594557163e-06,
1050
+ "epoch": 1.7893401015228427,
1051
+ "step": 1410
1052
+ },
1053
+ {
1054
+ "eval_loss": 0.05091211572289467,
1055
+ "eval_runtime": 20.0145,
1056
+ "eval_samples_per_second": 66.302,
1057
+ "eval_steps_per_second": 16.588,
1058
+ "epoch": 1.7931472081218274,
1059
+ "step": 1413
1060
+ },
1061
+ {
1062
+ "loss": 0.0305,
1063
+ "grad_norm": 0.2024271935224533,
1064
+ "learning_rate": 5.378895491716285e-06,
1065
+ "epoch": 1.8020304568527918,
1066
+ "step": 1420
1067
+ },
1068
+ {
1069
+ "loss": 0.029,
1070
+ "grad_norm": 0.22723488509655,
1071
+ "learning_rate": 4.720780841611738e-06,
1072
+ "epoch": 1.8147208121827412,
1073
+ "step": 1430
1074
+ },
1075
+ {
1076
+ "loss": 0.0266,
1077
+ "grad_norm": 0.2747625410556793,
1078
+ "learning_rate": 4.104626473194151e-06,
1079
+ "epoch": 1.8274111675126905,
1080
+ "step": 1440
1081
+ },
1082
+ {
1083
+ "loss": 0.0262,
1084
+ "grad_norm": 0.18593831360340118,
1085
+ "learning_rate": 3.5307037364083253e-06,
1086
+ "epoch": 1.8401015228426396,
1087
+ "step": 1450
1088
+ },
1089
+ {
1090
+ "loss": 0.0291,
1091
+ "grad_norm": 0.2651998996734619,
1092
+ "learning_rate": 2.9992653826927508e-06,
1093
+ "epoch": 1.8527918781725887,
1094
+ "step": 1460
1095
+ },
1096
+ {
1097
+ "loss": 0.026,
1098
+ "grad_norm": 0.19439752399921417,
1099
+ "learning_rate": 2.510545453669744e-06,
1100
+ "epoch": 1.865482233502538,
1101
+ "step": 1470
1102
+ },
1103
+ {
1104
+ "loss": 0.03,
1105
+ "grad_norm": 0.17483021318912506,
1106
+ "learning_rate": 2.06475917807506e-06,
1107
+ "epoch": 1.8781725888324874,
1108
+ "step": 1480
1109
+ },
1110
+ {
1111
+ "loss": 0.029,
1112
+ "grad_norm": 0.22444817423820496,
1113
+ "learning_rate": 1.662102876972882e-06,
1114
+ "epoch": 1.8908629441624365,
1115
+ "step": 1490
1116
+ },
1117
+ {
1118
+ "loss": 0.0243,
1119
+ "grad_norm": 0.17885605990886688,
1120
+ "learning_rate": 1.3027538772973026e-06,
1121
+ "epoch": 1.9035532994923858,
1122
+ "step": 1500
1123
+ },
1124
+ {
1125
+ "loss": 0.0272,
1126
+ "grad_norm": 0.19312232732772827,
1127
+ "learning_rate": 9.868704337588797e-07,
1128
+ "epoch": 1.9162436548223352,
1129
+ "step": 1510
1130
+ },
1131
+ {
1132
+ "loss": 0.0254,
1133
+ "grad_norm": 0.1709776520729065,
1134
+ "learning_rate": 7.145916591504098e-07,
1135
+ "epoch": 1.9289340101522843,
1136
+ "step": 1520
1137
+ },
1138
+ {
1139
+ "loss": 0.0252,
1140
+ "grad_norm": 0.18656505644321442,
1141
+ "learning_rate": 4.860374630826004e-07,
1142
+ "epoch": 1.9416243654822334,
1143
+ "step": 1530
1144
+ },
1145
+ {
1146
+ "loss": 0.0267,
1147
+ "grad_norm": 0.11956395953893661,
1148
+ "learning_rate": 3.0130849917681114e-07,
1149
+ "epoch": 1.9543147208121827,
1150
+ "step": 1540
1151
+ },
1152
+ {
1153
+ "loss": 0.0305,
1154
+ "grad_norm": 0.25038954615592957,
1155
+ "learning_rate": 1.604861207378794e-07,
1156
+ "epoch": 1.967005076142132,
1157
+ "step": 1550
1158
+ },
1159
+ {
1160
+ "loss": 0.025,
1161
+ "grad_norm": 0.19318363070487976,
1162
+ "learning_rate": 6.363234492674507e-08,
1163
+ "epoch": 1.9796954314720812,
1164
+ "step": 1560
1165
+ },
1166
+ {
1167
+ "loss": 0.0276,
1168
+ "grad_norm": 0.26012641191482544,
1169
+ "learning_rate": 1.0789825448476177e-08,
1170
+ "epoch": 1.9923857868020305,
1171
+ "step": 1570
1172
+ },
1173
+ {
1174
+ "eval_loss": 0.0504293330013752,
1175
+ "eval_runtime": 19.8337,
1176
+ "eval_samples_per_second": 66.906,
1177
+ "eval_steps_per_second": 16.739,
1178
+ "epoch": 1.9923857868020305,
1179
+ "step": 1570
1180
+ },
1181
+ {
1182
+ "train_runtime": 2681.8444,
1183
+ "train_samples_per_second": 18.795,
1184
+ "train_steps_per_second": 0.588,
1185
+ "total_flos": 6.800278675429786e+17,
1186
+ "train_loss": 0.06838441643920647,
1187
+ "epoch": 2.0,
1188
+ "step": 1576
1189
+ }
1190
+ ]
train/training_loss.png ADDED
train/validation_loss.png ADDED