kth8 commited on
Commit
0699faf
·
verified ·
1 Parent(s): 06cb196

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: llama3.2
3
+ language:
4
+ - en
5
+ base_model: unsloth/Llama-3.2-1B-Instruct
6
+ datasets:
7
+ - m-a-p/SuperGPQA
8
+ pipeline_tag: text-generation
9
+ library_name: transformers
10
+ tags:
11
+ - sft
12
+ - trl
13
+ - unsloth
14
+ - llama
15
+ - llama3
16
+ - llama3.1
17
+ ---
18
+ ![logo](https://i.imgur.com/iuUzm7L.jpeg)
19
+ A fine-tune of [unsloth/Llama-3.2-1B-Instruct](https://huggingface.co/unsloth/Llama-3.2-1B-Instruct) on the [m-a-p/SuperGPQA](https://huggingface.co/datasets/m-a-p/SuperGPQA) dataset.
20
+
21
+ ## Usage example
22
+ **System prompt**
23
+ ```
24
+ You are a classifier. Categorize the following problem into discipline, field, and subfield in JSON format.
25
+ ```
26
+ **User prompt**
27
+ ```
28
+ Cotton and linen both readily catch fire. A batch of towels is composed of both cotton and linen, and is known to have caught fire. If it is known that the towels were ignited by a lit cigarette, which of the following arguments utilizes the most appropriate form of reasoning?
29
+ ```
30
+ **Assistant response**
31
+ ```
32
+ {"discipline": "Philosophy", "field": "Philosophy", "subfield": "Logic"}
33
+ ```
34
+ # Possible output options
35
+ Discipline
36
+ ```
37
+ ['Management', 'Philosophy', 'History', 'Engineering', 'Medicine', 'Literature and Arts', 'Economics', 'Science', 'Sociology', 'Agronomy', 'Education', 'Law', 'Military Science']
38
+ ```
39
+ Field
40
+ ```
41
+ ['Geological Resources and Geological Engineering', 'Control Science and Engineering', 'Animal Husbandry', 'Chemistry', 'Agricultural Engineering', 'Veterinary Medicine', 'Systems Science', 'Petroleum and Natural Gas Engineering', 'Geography', 'Weapon Science and Technology', 'Surveying and Mapping Science and Technology', 'Atmospheric Science', 'Architecture', 'Transportation Engineering', 'Nuclear Science and Technology', 'Forestry Engineering', 'Business Administration', 'Optical Engineering', 'Traditional Chinese Medicine', 'Musicology', 'Philosophy', 'Electronic Science and Technology', 'Physical Education', 'Language and Literature', 'Library, Information and Archival Management', 'Electrical Engineering', 'Art Studies', 'Civil Engineering', 'Stomatology', 'Crop Science', 'Instrument Science and Technology', 'Sociology', 'Education', 'Chemical Engineering and Technology', 'Aquaculture', 'Clinical Medicine', 'Computer Science and Technology', 'Physics', 'Aeronautical and Astronautical Science and Technology', 'Military Science', 'Mechanics', 'Environmental Science and Engineering', 'Food Science and Engineering', 'Management Science and Engineering', 'Psychology', 'History', 'Mining Engineering', 'Power Engineering and Engineering Thermophysics', 'Hydraulic Engineering', 'Mechanical Engineering', 'Physical Oceanography', 'Applied Economics', 'Metallurgical Engineering', 'Mathematics', 'Political Science', 'Basic Medicine', 'Geophysics', 'Public Administration', 'Geology', 'Forestry', 'Oceanography', 'Theoretical Economics', 'Journalism and Communication', 'Textile Science and Engineering', 'Information and Communication Engineering', 'Materials Science and Engineering', 'Biology', 'Naval Architecture and Ocean Engineering', 'Public Health and Preventive Medicine', 'Law', 'Pharmacy', 'Astronomy']
42
+ ```
43
+ Subfield
44
+ ```
45
+ ['Mineral Processing Engineering', 'Labor Economics', 'Instrumentation and Performance', 'Otorhinolaryngology', 'Analytical Chemistry', 'Guidance, Navigation and Control', 'Linguistics and Applied Linguistics', 'Military Command and Information Systems', 'Mining and Safety Engineering', 'Forest Cultivation and Genetic Breeding', 'Obstetrics and Gynecology', 'Physical Chemistry', 'Signal and Information Processing', 'Marine Engineering', 'Nutrition and Food Hygiene', 'Theoretical Mechanics', 'Education Economics, Management and Social Security', 'Transportation Planning and Management', 'Quantitative Economics', 'Atomic and Molecular Physics', 'Fundamental Mathematics', 'Imaging and Nuclear Medicine', 'Subatomic and Atomic Physics', 'Rigid Body Mechanics', 'Electrodynamics', 'Crop Science', 'Radiochemistry', 'Group Theory', 'National and Defense Economics', 'Civil and Commercial Law', 'Logic', 'Internal Combustion Engineering', 'Aquaculture', 'Sports Science and Medicine', 'Dance Studies', 'Aeronautical and Astronautical Science and Technology', 'Ordinary Differential Equations', 'Materials Physics and Chemistry', 'Educational Technology and Principles', 'Theoretical Optics', 'Physical Chemistry of Metallurgical Process', 'Solid Mechanics', 'Urban Planning and Design', 'Botany', 'Databases', 'Wood Science and Technology', 'Maternal, Child and Adolescent Health', 'Atmospheric Physics and Atmospheric Environment', 'Literary History', 'Polynomials and Series Expansions', 'Constitutional and Administrative Law', 'Political Science', 'Discrete Mathematics', 'Vehicle Operation Engineering', 'Fluid Machinery and Engineering', 'Harmony', 'Philosophy of Science and Technology', 'Functions of Complex Variables', 'Electrochemistry', 'Internal Medicine', 'Statistical Mechanics', 'Pathogen Biology', 'Hydrogeology', 'Agricultural Mechanization Engineering', 'Fundamentals of Dynamics and Control', 'Clinical Stomatology', 'Antenna and Radio Communication', 'Military Management', 'Electrical Theory and New Technologies', 'Power Systems and Automation', 'Food Biochemistry', 'Formal Languages', 'Water conservancy and Hydropower Engineering', 'Legal Theory and Legal History', 'Optical Fiber Communication', 'Astrophysics', 'Contract Law', 'Preschool Education', 'Power Machinery and Engineering', 'Advanced Programming Languages', 'Principles of Metallurgy', 'Astronomical Observation and Technology', 'Physiology', 'Advanced Algebra', 'Economic Statistics', 'Veterinary Medicine', 'Anesthesiology', 'Psychiatry and Mental Health', 'Military Logistics and Equipment', 'Electromagnetic Field and Microwave Technology', 'Music History, Education, and Technology', 'Meteorology', 'Engineering Fluid Mechanics', 'Road and Railway Engineering', 'Composition', 'Forensic Medicine', 'Geotechnical Engineering', 'Pitch and Scales', 'Environmental and Resource Protection', 'Historical Geography', 'Communication Principles', 'Dynamic Meteorology', 'Mechatronic Engineering', 'Environmental Science', 'Theoretical Fluid Mechanics', 'Circuits and Systems', 'Archaeology and Museology', 'Geriatric Medicine', 'Land Resource Management and Administrative Management', 'Weapon Systems Science and Engineering', 'Marine Biology', 'Particle and Nuclear Physics', 'Biochemistry and Molecular Biology', 'Procedural Law', 'High Voltage and Insulation Technology', 'Philology and Bibliography', 'Design Arts', 'Optoelectronic Technology', 'Poromechanics and Reservoir Physics', 'Engineering Thermophysics', 'Broadcasting and Television Art', 'Library and Archival Science', 'Ethics', 'Physical Oceanography', 'Solar System Science', 'Thermal Energy Engineering', 'Fuzzy Mathematics', 'Agricultural Environment and Soil-Water Engineering', 'Cartography and Geographic Information Engineering', 'Genetics', 'Chemical Transport Engineering', 'Film Studies', 'Information Management Science', 'Architectural Design and Theory', 'Pharmacology', 'Computer Software and Theory', 'Inorganic Chemistry', 'Microbiology', 'Philosophical Aesthetics', 'Non-ferrous Metallurgy', 'Stellar and Interstellar Evolution', 'Computer Networks', 'Marine Chemistry', 'Thermodynamics and Statistical Physics', 'Operating Systems', 'Elements of Chemical Reaction Engineering', 'Thermodynamics', 'Literary Theory', 'Musical Forms and Analysis', 'Traditional Chinese Health Preservation', 'Geological Resources and Geological Engineering', 'Surgery', 'Operations Research and Cybernetics', 'Journalism and News Practice', 'Drama and Opera Studies', 'Human Geography', 'Systems Science', 'Industrial Economics', 'Organic Chemistry', 'Social and Folklore Studies', 'Bridge and Tunnel Engineering', 'Military Chemistry and Pyrotechnics', 'Principles of Seismic Exploration', 'Landscape Plants and Ornamental Horticulture', 'Communication and Information Systems', 'Traditional Chinese Pharmacy', 'Finance', 'Computational Mathematics', 'History and Theory of Journalism and Media Management', 'Emergency Medicine', 'Digital Surveying and Remote Sensing Applications', 'Quantum Mechanics', 'Nuclear Energy and Reactor Technology', 'Mathematical Analysis', 'Biophysics', 'Microelectronics and Solid-State Electronics', 'International Law', 'Mass Transport and Separation Process in Chemical Engineering', 'Medicinal Chemistry', 'Physical Education and Training', 'Forest Engineering', 'Instrument Science and Technology', 'Laser Technology', 'Information Management and Communication', 'Russian Language and Literature', 'Theory of Curriculum and Instruction', 'Power Electronics and Electrical Drives', 'Cryptography', 'Communication and Broadcasting', 'Functions of Real Variables', 'International Trade', 'Materials Processing Engineering', 'Economic History', 'Traffic Information Engineering and Control', 'Textile Chemistry and Dyeing Engineering', 'Urban Infrastructure Engineering', 'Semiconductor Physics', 'Ship Mechanics and Design Principles', 'Neurology', 'Immunology', 'Ophthalmology', 'Ecology', 'Heat Transfer', 'Graph Theory', 'Epidemiology and Health Statistics', 'Military Thought and History', 'Radiation Protection and Nuclear Technology Applications', 'Computer Architecture', 'Control Theory and Control Engineering', 'Polymer Physics', 'Underwater Acoustics', 'Nursing and Rehabilitation Medicine', 'Radiation Medicine', 'Geochemistry', 'Traditional Chinese Medicine Theory', 'Oncology', 'Physical Geography', 'Tourism Management and Technological Economics Management', 'Number Theory', 'Social Medicine and Health Management', 'Pediatrics', 'French Language and Literature', 'Demography and Anthropology', 'Oil and Gas Field Development and Storage & Transportation Engineering', 'Zoology', 'Sports Humanities and Sociology', 'Architectural History', 'Environmental Engineering', 'Paleontology and Stratigraphy', 'Space physics', 'Public Finance', 'Structural Geology', 'Combinatorial Mathematics', 'Applied Optics', 'Food Processing and Storage Engineering', 'Iron and Steel Metallurgy', 'Fluid Physics', 'Hydraulics and Hydrology', 'World History', 'Classical Chinese Literature', 'Solid State Physics', 'Textile Materials Science', 'Dermatology and Venereology', 'Geometry and Topology', 'Data Structures', 'Military Law', 'Health Toxicology and Environmental Health', 'Religious Studies', 'Management Science and Engineering', 'Relativity', 'Psychology', 'Criminal Law', 'Refrigeration and Cryogenic Engineering', 'Political Economy', 'Stochastic Processes', 'Clinical Laboratory Diagnostics', 'Cell Biology', 'Modern and Contemporary Chinese Literature', 'Western Economics', 'Basic Stomatology', 'Solid Earth Geophysics', 'Geodesy and Surveying Engineering', 'Microbiology and Biochemical Pharmacy', 'Pattern Recognition', 'Animal Rearing and Breeding', 'Polymer Chemistry and Physics', 'Acoustics', 'Business and Accounting Management', 'Fluid Flow and Heat Transfer in Chemical Engineering', 'Cosmology', 'Mineralogy, Petrology, and Economic Geology', 'Special Education', 'Principles of Computer Organization', 'Fine Arts', 'Structural Engineering', 'Pharmaceutical Analysis', 'Probability and Statistics', 'Special Number Theory', 'Animal Nutrition and Feed Science', 'Pharmaceutics', 'Manufacturing Automation', 'Human Anatomy and Histology-Embryology', 'Law and Social Governance', 'Pathology and Pathophysiology', 'Numerical Analysis']
46
+ ```
47
+ ## Model Details
48
+ - Base Model: `unsloth/Llama-3.2-1B-Instruct`
49
+ - Parameter Count: 1235814400
50
+ - Precision: torch.bfloat16
51
+
52
+ ## Training Settings
53
+ ### Hardware
54
+ - GPU: NVIDIA A100-SXM4-40GB
55
+
56
+ ### PEFT
57
+ - Rank: 32
58
+ - LoRA alpha: 64
59
+ - Modules: q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj
60
+
61
+ ### SFT
62
+ - Epoch: 2
63
+ - Batch size: 16
64
+ - Gradient Accumulation steps: 1
65
+ - Warmup ratio: 0.1
66
+ - Learning rate: 0.0002
67
+ - Optimizer: adamw_torch_fused
68
+ - Learning rate scheduler: cosine
69
+
70
+ ## Training stats
71
+ - Global step: 3152
72
+ - Training runtime (seconds): 1163.1262
73
+ - Average training loss: 0.0777996052304746
74
+ - Final validation loss: 0.051274195313453674
75
+
76
+ ## Framework versions
77
+ - Unsloth: 2026.3.8
78
+ - TRL: 0.22.2
79
+ - Transformers: 4.56.2
80
+ - Pytorch: 2.10.0+cu128
81
+ - Datasets: 4.3.0
82
+ - Tokenizers: 0.22.2
83
+
84
+ ## License
85
+ This model is released under the Llama3 license. See the [Terms of Use](https://www.llama.com/llama3/license/) for details.
chat_template.jinja ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if custom_tools is defined %}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if not tools_in_user_message is defined %}
6
+ {%- set tools_in_user_message = true %}
7
+ {%- endif %}
8
+ {%- if not date_string is defined %}
9
+ {%- if strftime_now is defined %}
10
+ {%- set date_string = strftime_now("%d %b %Y") %}
11
+ {%- else %}
12
+ {%- set date_string = "26 Jul 2024" %}
13
+ {%- endif %}
14
+ {%- endif %}
15
+ {%- if not tools is defined %}
16
+ {%- set tools = none %}
17
+ {%- endif %}
18
+
19
+ {#- This block extracts the system message, so we can slot it into the right place. #}
20
+ {%- if messages[0]['role'] == 'system' %}
21
+ {%- set system_message = messages[0]['content']|trim %}
22
+ {%- set messages = messages[1:] %}
23
+ {%- else %}
24
+ {%- set system_message = "" %}
25
+ {%- endif %}
26
+
27
+ {#- System message #}
28
+ {{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
29
+ {%- if tools is not none %}
30
+ {{- "Environment: ipython\n" }}
31
+ {%- endif %}
32
+ {{- "Cutting Knowledge Date: December 2023\n" }}
33
+ {{- "Today Date: " + date_string + "\n\n" }}
34
+ {%- if tools is not none and not tools_in_user_message %}
35
+ {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
36
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
37
+ {{- "Do not use variables.\n\n" }}
38
+ {%- for t in tools %}
39
+ {{- t | tojson(indent=4) }}
40
+ {{- "\n\n" }}
41
+ {%- endfor %}
42
+ {%- endif %}
43
+ {{- system_message }}
44
+ {{- "<|eot_id|>" }}
45
+
46
+ {#- Custom tools are passed in a user message with some extra guidance #}
47
+ {%- if tools_in_user_message and not tools is none %}
48
+ {#- Extract the first user message so we can plug it in here #}
49
+ {%- if messages | length != 0 %}
50
+ {%- set first_user_message = messages[0]['content']|trim %}
51
+ {%- set messages = messages[1:] %}
52
+ {%- else %}
53
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
54
+ {%- endif %}
55
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
56
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
57
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
58
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
59
+ {{- "Do not use variables.\n\n" }}
60
+ {%- for t in tools %}
61
+ {{- t | tojson(indent=4) }}
62
+ {{- "\n\n" }}
63
+ {%- endfor %}
64
+ {{- first_user_message + "<|eot_id|>"}}
65
+ {%- endif %}
66
+
67
+ {%- for message in messages %}
68
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
69
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
70
+ {%- elif 'tool_calls' in message %}
71
+ {%- if not message.tool_calls|length == 1 %}
72
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
73
+ {%- endif %}
74
+ {%- set tool_call = message.tool_calls[0].function %}
75
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
76
+ {{- '{"name": "' + tool_call.name + '", ' }}
77
+ {{- '"parameters": ' }}
78
+ {{- tool_call.arguments | tojson }}
79
+ {{- "}" }}
80
+ {{- "<|eot_id|>" }}
81
+ {%- elif message.role == "tool" or message.role == "ipython" %}
82
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
83
+ {%- if message.content is mapping or message.content is iterable %}
84
+ {{- message.content | tojson }}
85
+ {%- else %}
86
+ {{- message.content }}
87
+ {%- endif %}
88
+ {{- "<|eot_id|>" }}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
93
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 128009,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 32,
19
+ "num_hidden_layers": 16,
20
+ "num_key_value_heads": 8,
21
+ "pad_token_id": 128004,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-05,
24
+ "rope_scaling": {
25
+ "factor": 32.0,
26
+ "high_freq_factor": 4.0,
27
+ "low_freq_factor": 1.0,
28
+ "original_max_position_embeddings": 8192,
29
+ "rope_type": "llama3"
30
+ },
31
+ "rope_theta": 500000.0,
32
+ "tie_word_embeddings": true,
33
+ "transformers_version": "4.56.2",
34
+ "unsloth_fixed": true,
35
+ "use_cache": true,
36
+ "vocab_size": 128256
37
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "max_length": 131072,
10
+ "pad_token_id": 128004,
11
+ "temperature": 0.6,
12
+ "top_p": 0.9,
13
+ "transformers_version": "4.56.2"
14
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57401189f8100bbc8a83e3731582721ac23c46c964245a881f675c3858b61324
3
+ size 2471645608
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot_id|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad_id|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
tokenizer_config.json ADDED
@@ -0,0 +1,2066 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "added_tokens_decoder": {
4
+ "128000": {
5
+ "content": "<|begin_of_text|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "128001": {
13
+ "content": "<|end_of_text|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "128002": {
21
+ "content": "<|reserved_special_token_0|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "128003": {
29
+ "content": "<|reserved_special_token_1|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "128004": {
37
+ "content": "<|finetune_right_pad_id|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "128005": {
45
+ "content": "<|reserved_special_token_2|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "128006": {
53
+ "content": "<|start_header_id|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "128007": {
61
+ "content": "<|end_header_id|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "128008": {
69
+ "content": "<|eom_id|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "128009": {
77
+ "content": "<|eot_id|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "128010": {
85
+ "content": "<|python_tag|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "128011": {
93
+ "content": "<|reserved_special_token_3|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "128012": {
101
+ "content": "<|reserved_special_token_4|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "128013": {
109
+ "content": "<|reserved_special_token_5|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "128014": {
117
+ "content": "<|reserved_special_token_6|>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "128015": {
125
+ "content": "<|reserved_special_token_7|>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "128016": {
133
+ "content": "<|reserved_special_token_8|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "128017": {
141
+ "content": "<|reserved_special_token_9|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "128018": {
149
+ "content": "<|reserved_special_token_10|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "128019": {
157
+ "content": "<|reserved_special_token_11|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "128020": {
165
+ "content": "<|reserved_special_token_12|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "128021": {
173
+ "content": "<|reserved_special_token_13|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "128022": {
181
+ "content": "<|reserved_special_token_14|>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "128023": {
189
+ "content": "<|reserved_special_token_15|>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "128024": {
197
+ "content": "<|reserved_special_token_16|>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "128025": {
205
+ "content": "<|reserved_special_token_17|>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "128026": {
213
+ "content": "<|reserved_special_token_18|>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "128027": {
221
+ "content": "<|reserved_special_token_19|>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "128028": {
229
+ "content": "<|reserved_special_token_20|>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "128029": {
237
+ "content": "<|reserved_special_token_21|>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "128030": {
245
+ "content": "<|reserved_special_token_22|>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "128031": {
253
+ "content": "<|reserved_special_token_23|>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "128032": {
261
+ "content": "<|reserved_special_token_24|>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "128033": {
269
+ "content": "<|reserved_special_token_25|>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "128034": {
277
+ "content": "<|reserved_special_token_26|>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "128035": {
285
+ "content": "<|reserved_special_token_27|>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "128036": {
293
+ "content": "<|reserved_special_token_28|>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "128037": {
301
+ "content": "<|reserved_special_token_29|>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "128038": {
309
+ "content": "<|reserved_special_token_30|>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "128039": {
317
+ "content": "<|reserved_special_token_31|>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "128040": {
325
+ "content": "<|reserved_special_token_32|>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "128041": {
333
+ "content": "<|reserved_special_token_33|>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "128042": {
341
+ "content": "<|reserved_special_token_34|>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "128043": {
349
+ "content": "<|reserved_special_token_35|>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "128044": {
357
+ "content": "<|reserved_special_token_36|>",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "128045": {
365
+ "content": "<|reserved_special_token_37|>",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "128046": {
373
+ "content": "<|reserved_special_token_38|>",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "128047": {
381
+ "content": "<|reserved_special_token_39|>",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "128048": {
389
+ "content": "<|reserved_special_token_40|>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "128049": {
397
+ "content": "<|reserved_special_token_41|>",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "128050": {
405
+ "content": "<|reserved_special_token_42|>",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "128051": {
413
+ "content": "<|reserved_special_token_43|>",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "128052": {
421
+ "content": "<|reserved_special_token_44|>",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "128053": {
429
+ "content": "<|reserved_special_token_45|>",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "128054": {
437
+ "content": "<|reserved_special_token_46|>",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "128055": {
445
+ "content": "<|reserved_special_token_47|>",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "128056": {
453
+ "content": "<|reserved_special_token_48|>",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "128057": {
461
+ "content": "<|reserved_special_token_49|>",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "128058": {
469
+ "content": "<|reserved_special_token_50|>",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "128059": {
477
+ "content": "<|reserved_special_token_51|>",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "128060": {
485
+ "content": "<|reserved_special_token_52|>",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "128061": {
493
+ "content": "<|reserved_special_token_53|>",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "128062": {
501
+ "content": "<|reserved_special_token_54|>",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "128063": {
509
+ "content": "<|reserved_special_token_55|>",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "128064": {
517
+ "content": "<|reserved_special_token_56|>",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "128065": {
525
+ "content": "<|reserved_special_token_57|>",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "128066": {
533
+ "content": "<|reserved_special_token_58|>",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "128067": {
541
+ "content": "<|reserved_special_token_59|>",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "128068": {
549
+ "content": "<|reserved_special_token_60|>",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "128069": {
557
+ "content": "<|reserved_special_token_61|>",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "128070": {
565
+ "content": "<|reserved_special_token_62|>",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "128071": {
573
+ "content": "<|reserved_special_token_63|>",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "128072": {
581
+ "content": "<|reserved_special_token_64|>",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "128073": {
589
+ "content": "<|reserved_special_token_65|>",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "128074": {
597
+ "content": "<|reserved_special_token_66|>",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "128075": {
605
+ "content": "<|reserved_special_token_67|>",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "128076": {
613
+ "content": "<|reserved_special_token_68|>",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "128077": {
621
+ "content": "<|reserved_special_token_69|>",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "128078": {
629
+ "content": "<|reserved_special_token_70|>",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "128079": {
637
+ "content": "<|reserved_special_token_71|>",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "128080": {
645
+ "content": "<|reserved_special_token_72|>",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "128081": {
653
+ "content": "<|reserved_special_token_73|>",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "128082": {
661
+ "content": "<|reserved_special_token_74|>",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "128083": {
669
+ "content": "<|reserved_special_token_75|>",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "128084": {
677
+ "content": "<|reserved_special_token_76|>",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "128085": {
685
+ "content": "<|reserved_special_token_77|>",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "128086": {
693
+ "content": "<|reserved_special_token_78|>",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "128087": {
701
+ "content": "<|reserved_special_token_79|>",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "128088": {
709
+ "content": "<|reserved_special_token_80|>",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "128089": {
717
+ "content": "<|reserved_special_token_81|>",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "128090": {
725
+ "content": "<|reserved_special_token_82|>",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "128091": {
733
+ "content": "<|reserved_special_token_83|>",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "128092": {
741
+ "content": "<|reserved_special_token_84|>",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "128093": {
749
+ "content": "<|reserved_special_token_85|>",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "128094": {
757
+ "content": "<|reserved_special_token_86|>",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "128095": {
765
+ "content": "<|reserved_special_token_87|>",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "128096": {
773
+ "content": "<|reserved_special_token_88|>",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "128097": {
781
+ "content": "<|reserved_special_token_89|>",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "128098": {
789
+ "content": "<|reserved_special_token_90|>",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "128099": {
797
+ "content": "<|reserved_special_token_91|>",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "128100": {
805
+ "content": "<|reserved_special_token_92|>",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "128101": {
813
+ "content": "<|reserved_special_token_93|>",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "128102": {
821
+ "content": "<|reserved_special_token_94|>",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": true
827
+ },
828
+ "128103": {
829
+ "content": "<|reserved_special_token_95|>",
830
+ "lstrip": false,
831
+ "normalized": false,
832
+ "rstrip": false,
833
+ "single_word": false,
834
+ "special": true
835
+ },
836
+ "128104": {
837
+ "content": "<|reserved_special_token_96|>",
838
+ "lstrip": false,
839
+ "normalized": false,
840
+ "rstrip": false,
841
+ "single_word": false,
842
+ "special": true
843
+ },
844
+ "128105": {
845
+ "content": "<|reserved_special_token_97|>",
846
+ "lstrip": false,
847
+ "normalized": false,
848
+ "rstrip": false,
849
+ "single_word": false,
850
+ "special": true
851
+ },
852
+ "128106": {
853
+ "content": "<|reserved_special_token_98|>",
854
+ "lstrip": false,
855
+ "normalized": false,
856
+ "rstrip": false,
857
+ "single_word": false,
858
+ "special": true
859
+ },
860
+ "128107": {
861
+ "content": "<|reserved_special_token_99|>",
862
+ "lstrip": false,
863
+ "normalized": false,
864
+ "rstrip": false,
865
+ "single_word": false,
866
+ "special": true
867
+ },
868
+ "128108": {
869
+ "content": "<|reserved_special_token_100|>",
870
+ "lstrip": false,
871
+ "normalized": false,
872
+ "rstrip": false,
873
+ "single_word": false,
874
+ "special": true
875
+ },
876
+ "128109": {
877
+ "content": "<|reserved_special_token_101|>",
878
+ "lstrip": false,
879
+ "normalized": false,
880
+ "rstrip": false,
881
+ "single_word": false,
882
+ "special": true
883
+ },
884
+ "128110": {
885
+ "content": "<|reserved_special_token_102|>",
886
+ "lstrip": false,
887
+ "normalized": false,
888
+ "rstrip": false,
889
+ "single_word": false,
890
+ "special": true
891
+ },
892
+ "128111": {
893
+ "content": "<|reserved_special_token_103|>",
894
+ "lstrip": false,
895
+ "normalized": false,
896
+ "rstrip": false,
897
+ "single_word": false,
898
+ "special": true
899
+ },
900
+ "128112": {
901
+ "content": "<|reserved_special_token_104|>",
902
+ "lstrip": false,
903
+ "normalized": false,
904
+ "rstrip": false,
905
+ "single_word": false,
906
+ "special": true
907
+ },
908
+ "128113": {
909
+ "content": "<|reserved_special_token_105|>",
910
+ "lstrip": false,
911
+ "normalized": false,
912
+ "rstrip": false,
913
+ "single_word": false,
914
+ "special": true
915
+ },
916
+ "128114": {
917
+ "content": "<|reserved_special_token_106|>",
918
+ "lstrip": false,
919
+ "normalized": false,
920
+ "rstrip": false,
921
+ "single_word": false,
922
+ "special": true
923
+ },
924
+ "128115": {
925
+ "content": "<|reserved_special_token_107|>",
926
+ "lstrip": false,
927
+ "normalized": false,
928
+ "rstrip": false,
929
+ "single_word": false,
930
+ "special": true
931
+ },
932
+ "128116": {
933
+ "content": "<|reserved_special_token_108|>",
934
+ "lstrip": false,
935
+ "normalized": false,
936
+ "rstrip": false,
937
+ "single_word": false,
938
+ "special": true
939
+ },
940
+ "128117": {
941
+ "content": "<|reserved_special_token_109|>",
942
+ "lstrip": false,
943
+ "normalized": false,
944
+ "rstrip": false,
945
+ "single_word": false,
946
+ "special": true
947
+ },
948
+ "128118": {
949
+ "content": "<|reserved_special_token_110|>",
950
+ "lstrip": false,
951
+ "normalized": false,
952
+ "rstrip": false,
953
+ "single_word": false,
954
+ "special": true
955
+ },
956
+ "128119": {
957
+ "content": "<|reserved_special_token_111|>",
958
+ "lstrip": false,
959
+ "normalized": false,
960
+ "rstrip": false,
961
+ "single_word": false,
962
+ "special": true
963
+ },
964
+ "128120": {
965
+ "content": "<|reserved_special_token_112|>",
966
+ "lstrip": false,
967
+ "normalized": false,
968
+ "rstrip": false,
969
+ "single_word": false,
970
+ "special": true
971
+ },
972
+ "128121": {
973
+ "content": "<|reserved_special_token_113|>",
974
+ "lstrip": false,
975
+ "normalized": false,
976
+ "rstrip": false,
977
+ "single_word": false,
978
+ "special": true
979
+ },
980
+ "128122": {
981
+ "content": "<|reserved_special_token_114|>",
982
+ "lstrip": false,
983
+ "normalized": false,
984
+ "rstrip": false,
985
+ "single_word": false,
986
+ "special": true
987
+ },
988
+ "128123": {
989
+ "content": "<|reserved_special_token_115|>",
990
+ "lstrip": false,
991
+ "normalized": false,
992
+ "rstrip": false,
993
+ "single_word": false,
994
+ "special": true
995
+ },
996
+ "128124": {
997
+ "content": "<|reserved_special_token_116|>",
998
+ "lstrip": false,
999
+ "normalized": false,
1000
+ "rstrip": false,
1001
+ "single_word": false,
1002
+ "special": true
1003
+ },
1004
+ "128125": {
1005
+ "content": "<|reserved_special_token_117|>",
1006
+ "lstrip": false,
1007
+ "normalized": false,
1008
+ "rstrip": false,
1009
+ "single_word": false,
1010
+ "special": true
1011
+ },
1012
+ "128126": {
1013
+ "content": "<|reserved_special_token_118|>",
1014
+ "lstrip": false,
1015
+ "normalized": false,
1016
+ "rstrip": false,
1017
+ "single_word": false,
1018
+ "special": true
1019
+ },
1020
+ "128127": {
1021
+ "content": "<|reserved_special_token_119|>",
1022
+ "lstrip": false,
1023
+ "normalized": false,
1024
+ "rstrip": false,
1025
+ "single_word": false,
1026
+ "special": true
1027
+ },
1028
+ "128128": {
1029
+ "content": "<|reserved_special_token_120|>",
1030
+ "lstrip": false,
1031
+ "normalized": false,
1032
+ "rstrip": false,
1033
+ "single_word": false,
1034
+ "special": true
1035
+ },
1036
+ "128129": {
1037
+ "content": "<|reserved_special_token_121|>",
1038
+ "lstrip": false,
1039
+ "normalized": false,
1040
+ "rstrip": false,
1041
+ "single_word": false,
1042
+ "special": true
1043
+ },
1044
+ "128130": {
1045
+ "content": "<|reserved_special_token_122|>",
1046
+ "lstrip": false,
1047
+ "normalized": false,
1048
+ "rstrip": false,
1049
+ "single_word": false,
1050
+ "special": true
1051
+ },
1052
+ "128131": {
1053
+ "content": "<|reserved_special_token_123|>",
1054
+ "lstrip": false,
1055
+ "normalized": false,
1056
+ "rstrip": false,
1057
+ "single_word": false,
1058
+ "special": true
1059
+ },
1060
+ "128132": {
1061
+ "content": "<|reserved_special_token_124|>",
1062
+ "lstrip": false,
1063
+ "normalized": false,
1064
+ "rstrip": false,
1065
+ "single_word": false,
1066
+ "special": true
1067
+ },
1068
+ "128133": {
1069
+ "content": "<|reserved_special_token_125|>",
1070
+ "lstrip": false,
1071
+ "normalized": false,
1072
+ "rstrip": false,
1073
+ "single_word": false,
1074
+ "special": true
1075
+ },
1076
+ "128134": {
1077
+ "content": "<|reserved_special_token_126|>",
1078
+ "lstrip": false,
1079
+ "normalized": false,
1080
+ "rstrip": false,
1081
+ "single_word": false,
1082
+ "special": true
1083
+ },
1084
+ "128135": {
1085
+ "content": "<|reserved_special_token_127|>",
1086
+ "lstrip": false,
1087
+ "normalized": false,
1088
+ "rstrip": false,
1089
+ "single_word": false,
1090
+ "special": true
1091
+ },
1092
+ "128136": {
1093
+ "content": "<|reserved_special_token_128|>",
1094
+ "lstrip": false,
1095
+ "normalized": false,
1096
+ "rstrip": false,
1097
+ "single_word": false,
1098
+ "special": true
1099
+ },
1100
+ "128137": {
1101
+ "content": "<|reserved_special_token_129|>",
1102
+ "lstrip": false,
1103
+ "normalized": false,
1104
+ "rstrip": false,
1105
+ "single_word": false,
1106
+ "special": true
1107
+ },
1108
+ "128138": {
1109
+ "content": "<|reserved_special_token_130|>",
1110
+ "lstrip": false,
1111
+ "normalized": false,
1112
+ "rstrip": false,
1113
+ "single_word": false,
1114
+ "special": true
1115
+ },
1116
+ "128139": {
1117
+ "content": "<|reserved_special_token_131|>",
1118
+ "lstrip": false,
1119
+ "normalized": false,
1120
+ "rstrip": false,
1121
+ "single_word": false,
1122
+ "special": true
1123
+ },
1124
+ "128140": {
1125
+ "content": "<|reserved_special_token_132|>",
1126
+ "lstrip": false,
1127
+ "normalized": false,
1128
+ "rstrip": false,
1129
+ "single_word": false,
1130
+ "special": true
1131
+ },
1132
+ "128141": {
1133
+ "content": "<|reserved_special_token_133|>",
1134
+ "lstrip": false,
1135
+ "normalized": false,
1136
+ "rstrip": false,
1137
+ "single_word": false,
1138
+ "special": true
1139
+ },
1140
+ "128142": {
1141
+ "content": "<|reserved_special_token_134|>",
1142
+ "lstrip": false,
1143
+ "normalized": false,
1144
+ "rstrip": false,
1145
+ "single_word": false,
1146
+ "special": true
1147
+ },
1148
+ "128143": {
1149
+ "content": "<|reserved_special_token_135|>",
1150
+ "lstrip": false,
1151
+ "normalized": false,
1152
+ "rstrip": false,
1153
+ "single_word": false,
1154
+ "special": true
1155
+ },
1156
+ "128144": {
1157
+ "content": "<|reserved_special_token_136|>",
1158
+ "lstrip": false,
1159
+ "normalized": false,
1160
+ "rstrip": false,
1161
+ "single_word": false,
1162
+ "special": true
1163
+ },
1164
+ "128145": {
1165
+ "content": "<|reserved_special_token_137|>",
1166
+ "lstrip": false,
1167
+ "normalized": false,
1168
+ "rstrip": false,
1169
+ "single_word": false,
1170
+ "special": true
1171
+ },
1172
+ "128146": {
1173
+ "content": "<|reserved_special_token_138|>",
1174
+ "lstrip": false,
1175
+ "normalized": false,
1176
+ "rstrip": false,
1177
+ "single_word": false,
1178
+ "special": true
1179
+ },
1180
+ "128147": {
1181
+ "content": "<|reserved_special_token_139|>",
1182
+ "lstrip": false,
1183
+ "normalized": false,
1184
+ "rstrip": false,
1185
+ "single_word": false,
1186
+ "special": true
1187
+ },
1188
+ "128148": {
1189
+ "content": "<|reserved_special_token_140|>",
1190
+ "lstrip": false,
1191
+ "normalized": false,
1192
+ "rstrip": false,
1193
+ "single_word": false,
1194
+ "special": true
1195
+ },
1196
+ "128149": {
1197
+ "content": "<|reserved_special_token_141|>",
1198
+ "lstrip": false,
1199
+ "normalized": false,
1200
+ "rstrip": false,
1201
+ "single_word": false,
1202
+ "special": true
1203
+ },
1204
+ "128150": {
1205
+ "content": "<|reserved_special_token_142|>",
1206
+ "lstrip": false,
1207
+ "normalized": false,
1208
+ "rstrip": false,
1209
+ "single_word": false,
1210
+ "special": true
1211
+ },
1212
+ "128151": {
1213
+ "content": "<|reserved_special_token_143|>",
1214
+ "lstrip": false,
1215
+ "normalized": false,
1216
+ "rstrip": false,
1217
+ "single_word": false,
1218
+ "special": true
1219
+ },
1220
+ "128152": {
1221
+ "content": "<|reserved_special_token_144|>",
1222
+ "lstrip": false,
1223
+ "normalized": false,
1224
+ "rstrip": false,
1225
+ "single_word": false,
1226
+ "special": true
1227
+ },
1228
+ "128153": {
1229
+ "content": "<|reserved_special_token_145|>",
1230
+ "lstrip": false,
1231
+ "normalized": false,
1232
+ "rstrip": false,
1233
+ "single_word": false,
1234
+ "special": true
1235
+ },
1236
+ "128154": {
1237
+ "content": "<|reserved_special_token_146|>",
1238
+ "lstrip": false,
1239
+ "normalized": false,
1240
+ "rstrip": false,
1241
+ "single_word": false,
1242
+ "special": true
1243
+ },
1244
+ "128155": {
1245
+ "content": "<|reserved_special_token_147|>",
1246
+ "lstrip": false,
1247
+ "normalized": false,
1248
+ "rstrip": false,
1249
+ "single_word": false,
1250
+ "special": true
1251
+ },
1252
+ "128156": {
1253
+ "content": "<|reserved_special_token_148|>",
1254
+ "lstrip": false,
1255
+ "normalized": false,
1256
+ "rstrip": false,
1257
+ "single_word": false,
1258
+ "special": true
1259
+ },
1260
+ "128157": {
1261
+ "content": "<|reserved_special_token_149|>",
1262
+ "lstrip": false,
1263
+ "normalized": false,
1264
+ "rstrip": false,
1265
+ "single_word": false,
1266
+ "special": true
1267
+ },
1268
+ "128158": {
1269
+ "content": "<|reserved_special_token_150|>",
1270
+ "lstrip": false,
1271
+ "normalized": false,
1272
+ "rstrip": false,
1273
+ "single_word": false,
1274
+ "special": true
1275
+ },
1276
+ "128159": {
1277
+ "content": "<|reserved_special_token_151|>",
1278
+ "lstrip": false,
1279
+ "normalized": false,
1280
+ "rstrip": false,
1281
+ "single_word": false,
1282
+ "special": true
1283
+ },
1284
+ "128160": {
1285
+ "content": "<|reserved_special_token_152|>",
1286
+ "lstrip": false,
1287
+ "normalized": false,
1288
+ "rstrip": false,
1289
+ "single_word": false,
1290
+ "special": true
1291
+ },
1292
+ "128161": {
1293
+ "content": "<|reserved_special_token_153|>",
1294
+ "lstrip": false,
1295
+ "normalized": false,
1296
+ "rstrip": false,
1297
+ "single_word": false,
1298
+ "special": true
1299
+ },
1300
+ "128162": {
1301
+ "content": "<|reserved_special_token_154|>",
1302
+ "lstrip": false,
1303
+ "normalized": false,
1304
+ "rstrip": false,
1305
+ "single_word": false,
1306
+ "special": true
1307
+ },
1308
+ "128163": {
1309
+ "content": "<|reserved_special_token_155|>",
1310
+ "lstrip": false,
1311
+ "normalized": false,
1312
+ "rstrip": false,
1313
+ "single_word": false,
1314
+ "special": true
1315
+ },
1316
+ "128164": {
1317
+ "content": "<|reserved_special_token_156|>",
1318
+ "lstrip": false,
1319
+ "normalized": false,
1320
+ "rstrip": false,
1321
+ "single_word": false,
1322
+ "special": true
1323
+ },
1324
+ "128165": {
1325
+ "content": "<|reserved_special_token_157|>",
1326
+ "lstrip": false,
1327
+ "normalized": false,
1328
+ "rstrip": false,
1329
+ "single_word": false,
1330
+ "special": true
1331
+ },
1332
+ "128166": {
1333
+ "content": "<|reserved_special_token_158|>",
1334
+ "lstrip": false,
1335
+ "normalized": false,
1336
+ "rstrip": false,
1337
+ "single_word": false,
1338
+ "special": true
1339
+ },
1340
+ "128167": {
1341
+ "content": "<|reserved_special_token_159|>",
1342
+ "lstrip": false,
1343
+ "normalized": false,
1344
+ "rstrip": false,
1345
+ "single_word": false,
1346
+ "special": true
1347
+ },
1348
+ "128168": {
1349
+ "content": "<|reserved_special_token_160|>",
1350
+ "lstrip": false,
1351
+ "normalized": false,
1352
+ "rstrip": false,
1353
+ "single_word": false,
1354
+ "special": true
1355
+ },
1356
+ "128169": {
1357
+ "content": "<|reserved_special_token_161|>",
1358
+ "lstrip": false,
1359
+ "normalized": false,
1360
+ "rstrip": false,
1361
+ "single_word": false,
1362
+ "special": true
1363
+ },
1364
+ "128170": {
1365
+ "content": "<|reserved_special_token_162|>",
1366
+ "lstrip": false,
1367
+ "normalized": false,
1368
+ "rstrip": false,
1369
+ "single_word": false,
1370
+ "special": true
1371
+ },
1372
+ "128171": {
1373
+ "content": "<|reserved_special_token_163|>",
1374
+ "lstrip": false,
1375
+ "normalized": false,
1376
+ "rstrip": false,
1377
+ "single_word": false,
1378
+ "special": true
1379
+ },
1380
+ "128172": {
1381
+ "content": "<|reserved_special_token_164|>",
1382
+ "lstrip": false,
1383
+ "normalized": false,
1384
+ "rstrip": false,
1385
+ "single_word": false,
1386
+ "special": true
1387
+ },
1388
+ "128173": {
1389
+ "content": "<|reserved_special_token_165|>",
1390
+ "lstrip": false,
1391
+ "normalized": false,
1392
+ "rstrip": false,
1393
+ "single_word": false,
1394
+ "special": true
1395
+ },
1396
+ "128174": {
1397
+ "content": "<|reserved_special_token_166|>",
1398
+ "lstrip": false,
1399
+ "normalized": false,
1400
+ "rstrip": false,
1401
+ "single_word": false,
1402
+ "special": true
1403
+ },
1404
+ "128175": {
1405
+ "content": "<|reserved_special_token_167|>",
1406
+ "lstrip": false,
1407
+ "normalized": false,
1408
+ "rstrip": false,
1409
+ "single_word": false,
1410
+ "special": true
1411
+ },
1412
+ "128176": {
1413
+ "content": "<|reserved_special_token_168|>",
1414
+ "lstrip": false,
1415
+ "normalized": false,
1416
+ "rstrip": false,
1417
+ "single_word": false,
1418
+ "special": true
1419
+ },
1420
+ "128177": {
1421
+ "content": "<|reserved_special_token_169|>",
1422
+ "lstrip": false,
1423
+ "normalized": false,
1424
+ "rstrip": false,
1425
+ "single_word": false,
1426
+ "special": true
1427
+ },
1428
+ "128178": {
1429
+ "content": "<|reserved_special_token_170|>",
1430
+ "lstrip": false,
1431
+ "normalized": false,
1432
+ "rstrip": false,
1433
+ "single_word": false,
1434
+ "special": true
1435
+ },
1436
+ "128179": {
1437
+ "content": "<|reserved_special_token_171|>",
1438
+ "lstrip": false,
1439
+ "normalized": false,
1440
+ "rstrip": false,
1441
+ "single_word": false,
1442
+ "special": true
1443
+ },
1444
+ "128180": {
1445
+ "content": "<|reserved_special_token_172|>",
1446
+ "lstrip": false,
1447
+ "normalized": false,
1448
+ "rstrip": false,
1449
+ "single_word": false,
1450
+ "special": true
1451
+ },
1452
+ "128181": {
1453
+ "content": "<|reserved_special_token_173|>",
1454
+ "lstrip": false,
1455
+ "normalized": false,
1456
+ "rstrip": false,
1457
+ "single_word": false,
1458
+ "special": true
1459
+ },
1460
+ "128182": {
1461
+ "content": "<|reserved_special_token_174|>",
1462
+ "lstrip": false,
1463
+ "normalized": false,
1464
+ "rstrip": false,
1465
+ "single_word": false,
1466
+ "special": true
1467
+ },
1468
+ "128183": {
1469
+ "content": "<|reserved_special_token_175|>",
1470
+ "lstrip": false,
1471
+ "normalized": false,
1472
+ "rstrip": false,
1473
+ "single_word": false,
1474
+ "special": true
1475
+ },
1476
+ "128184": {
1477
+ "content": "<|reserved_special_token_176|>",
1478
+ "lstrip": false,
1479
+ "normalized": false,
1480
+ "rstrip": false,
1481
+ "single_word": false,
1482
+ "special": true
1483
+ },
1484
+ "128185": {
1485
+ "content": "<|reserved_special_token_177|>",
1486
+ "lstrip": false,
1487
+ "normalized": false,
1488
+ "rstrip": false,
1489
+ "single_word": false,
1490
+ "special": true
1491
+ },
1492
+ "128186": {
1493
+ "content": "<|reserved_special_token_178|>",
1494
+ "lstrip": false,
1495
+ "normalized": false,
1496
+ "rstrip": false,
1497
+ "single_word": false,
1498
+ "special": true
1499
+ },
1500
+ "128187": {
1501
+ "content": "<|reserved_special_token_179|>",
1502
+ "lstrip": false,
1503
+ "normalized": false,
1504
+ "rstrip": false,
1505
+ "single_word": false,
1506
+ "special": true
1507
+ },
1508
+ "128188": {
1509
+ "content": "<|reserved_special_token_180|>",
1510
+ "lstrip": false,
1511
+ "normalized": false,
1512
+ "rstrip": false,
1513
+ "single_word": false,
1514
+ "special": true
1515
+ },
1516
+ "128189": {
1517
+ "content": "<|reserved_special_token_181|>",
1518
+ "lstrip": false,
1519
+ "normalized": false,
1520
+ "rstrip": false,
1521
+ "single_word": false,
1522
+ "special": true
1523
+ },
1524
+ "128190": {
1525
+ "content": "<|reserved_special_token_182|>",
1526
+ "lstrip": false,
1527
+ "normalized": false,
1528
+ "rstrip": false,
1529
+ "single_word": false,
1530
+ "special": true
1531
+ },
1532
+ "128191": {
1533
+ "content": "<|reserved_special_token_183|>",
1534
+ "lstrip": false,
1535
+ "normalized": false,
1536
+ "rstrip": false,
1537
+ "single_word": false,
1538
+ "special": true
1539
+ },
1540
+ "128192": {
1541
+ "content": "<|reserved_special_token_184|>",
1542
+ "lstrip": false,
1543
+ "normalized": false,
1544
+ "rstrip": false,
1545
+ "single_word": false,
1546
+ "special": true
1547
+ },
1548
+ "128193": {
1549
+ "content": "<|reserved_special_token_185|>",
1550
+ "lstrip": false,
1551
+ "normalized": false,
1552
+ "rstrip": false,
1553
+ "single_word": false,
1554
+ "special": true
1555
+ },
1556
+ "128194": {
1557
+ "content": "<|reserved_special_token_186|>",
1558
+ "lstrip": false,
1559
+ "normalized": false,
1560
+ "rstrip": false,
1561
+ "single_word": false,
1562
+ "special": true
1563
+ },
1564
+ "128195": {
1565
+ "content": "<|reserved_special_token_187|>",
1566
+ "lstrip": false,
1567
+ "normalized": false,
1568
+ "rstrip": false,
1569
+ "single_word": false,
1570
+ "special": true
1571
+ },
1572
+ "128196": {
1573
+ "content": "<|reserved_special_token_188|>",
1574
+ "lstrip": false,
1575
+ "normalized": false,
1576
+ "rstrip": false,
1577
+ "single_word": false,
1578
+ "special": true
1579
+ },
1580
+ "128197": {
1581
+ "content": "<|reserved_special_token_189|>",
1582
+ "lstrip": false,
1583
+ "normalized": false,
1584
+ "rstrip": false,
1585
+ "single_word": false,
1586
+ "special": true
1587
+ },
1588
+ "128198": {
1589
+ "content": "<|reserved_special_token_190|>",
1590
+ "lstrip": false,
1591
+ "normalized": false,
1592
+ "rstrip": false,
1593
+ "single_word": false,
1594
+ "special": true
1595
+ },
1596
+ "128199": {
1597
+ "content": "<|reserved_special_token_191|>",
1598
+ "lstrip": false,
1599
+ "normalized": false,
1600
+ "rstrip": false,
1601
+ "single_word": false,
1602
+ "special": true
1603
+ },
1604
+ "128200": {
1605
+ "content": "<|reserved_special_token_192|>",
1606
+ "lstrip": false,
1607
+ "normalized": false,
1608
+ "rstrip": false,
1609
+ "single_word": false,
1610
+ "special": true
1611
+ },
1612
+ "128201": {
1613
+ "content": "<|reserved_special_token_193|>",
1614
+ "lstrip": false,
1615
+ "normalized": false,
1616
+ "rstrip": false,
1617
+ "single_word": false,
1618
+ "special": true
1619
+ },
1620
+ "128202": {
1621
+ "content": "<|reserved_special_token_194|>",
1622
+ "lstrip": false,
1623
+ "normalized": false,
1624
+ "rstrip": false,
1625
+ "single_word": false,
1626
+ "special": true
1627
+ },
1628
+ "128203": {
1629
+ "content": "<|reserved_special_token_195|>",
1630
+ "lstrip": false,
1631
+ "normalized": false,
1632
+ "rstrip": false,
1633
+ "single_word": false,
1634
+ "special": true
1635
+ },
1636
+ "128204": {
1637
+ "content": "<|reserved_special_token_196|>",
1638
+ "lstrip": false,
1639
+ "normalized": false,
1640
+ "rstrip": false,
1641
+ "single_word": false,
1642
+ "special": true
1643
+ },
1644
+ "128205": {
1645
+ "content": "<|reserved_special_token_197|>",
1646
+ "lstrip": false,
1647
+ "normalized": false,
1648
+ "rstrip": false,
1649
+ "single_word": false,
1650
+ "special": true
1651
+ },
1652
+ "128206": {
1653
+ "content": "<|reserved_special_token_198|>",
1654
+ "lstrip": false,
1655
+ "normalized": false,
1656
+ "rstrip": false,
1657
+ "single_word": false,
1658
+ "special": true
1659
+ },
1660
+ "128207": {
1661
+ "content": "<|reserved_special_token_199|>",
1662
+ "lstrip": false,
1663
+ "normalized": false,
1664
+ "rstrip": false,
1665
+ "single_word": false,
1666
+ "special": true
1667
+ },
1668
+ "128208": {
1669
+ "content": "<|reserved_special_token_200|>",
1670
+ "lstrip": false,
1671
+ "normalized": false,
1672
+ "rstrip": false,
1673
+ "single_word": false,
1674
+ "special": true
1675
+ },
1676
+ "128209": {
1677
+ "content": "<|reserved_special_token_201|>",
1678
+ "lstrip": false,
1679
+ "normalized": false,
1680
+ "rstrip": false,
1681
+ "single_word": false,
1682
+ "special": true
1683
+ },
1684
+ "128210": {
1685
+ "content": "<|reserved_special_token_202|>",
1686
+ "lstrip": false,
1687
+ "normalized": false,
1688
+ "rstrip": false,
1689
+ "single_word": false,
1690
+ "special": true
1691
+ },
1692
+ "128211": {
1693
+ "content": "<|reserved_special_token_203|>",
1694
+ "lstrip": false,
1695
+ "normalized": false,
1696
+ "rstrip": false,
1697
+ "single_word": false,
1698
+ "special": true
1699
+ },
1700
+ "128212": {
1701
+ "content": "<|reserved_special_token_204|>",
1702
+ "lstrip": false,
1703
+ "normalized": false,
1704
+ "rstrip": false,
1705
+ "single_word": false,
1706
+ "special": true
1707
+ },
1708
+ "128213": {
1709
+ "content": "<|reserved_special_token_205|>",
1710
+ "lstrip": false,
1711
+ "normalized": false,
1712
+ "rstrip": false,
1713
+ "single_word": false,
1714
+ "special": true
1715
+ },
1716
+ "128214": {
1717
+ "content": "<|reserved_special_token_206|>",
1718
+ "lstrip": false,
1719
+ "normalized": false,
1720
+ "rstrip": false,
1721
+ "single_word": false,
1722
+ "special": true
1723
+ },
1724
+ "128215": {
1725
+ "content": "<|reserved_special_token_207|>",
1726
+ "lstrip": false,
1727
+ "normalized": false,
1728
+ "rstrip": false,
1729
+ "single_word": false,
1730
+ "special": true
1731
+ },
1732
+ "128216": {
1733
+ "content": "<|reserved_special_token_208|>",
1734
+ "lstrip": false,
1735
+ "normalized": false,
1736
+ "rstrip": false,
1737
+ "single_word": false,
1738
+ "special": true
1739
+ },
1740
+ "128217": {
1741
+ "content": "<|reserved_special_token_209|>",
1742
+ "lstrip": false,
1743
+ "normalized": false,
1744
+ "rstrip": false,
1745
+ "single_word": false,
1746
+ "special": true
1747
+ },
1748
+ "128218": {
1749
+ "content": "<|reserved_special_token_210|>",
1750
+ "lstrip": false,
1751
+ "normalized": false,
1752
+ "rstrip": false,
1753
+ "single_word": false,
1754
+ "special": true
1755
+ },
1756
+ "128219": {
1757
+ "content": "<|reserved_special_token_211|>",
1758
+ "lstrip": false,
1759
+ "normalized": false,
1760
+ "rstrip": false,
1761
+ "single_word": false,
1762
+ "special": true
1763
+ },
1764
+ "128220": {
1765
+ "content": "<|reserved_special_token_212|>",
1766
+ "lstrip": false,
1767
+ "normalized": false,
1768
+ "rstrip": false,
1769
+ "single_word": false,
1770
+ "special": true
1771
+ },
1772
+ "128221": {
1773
+ "content": "<|reserved_special_token_213|>",
1774
+ "lstrip": false,
1775
+ "normalized": false,
1776
+ "rstrip": false,
1777
+ "single_word": false,
1778
+ "special": true
1779
+ },
1780
+ "128222": {
1781
+ "content": "<|reserved_special_token_214|>",
1782
+ "lstrip": false,
1783
+ "normalized": false,
1784
+ "rstrip": false,
1785
+ "single_word": false,
1786
+ "special": true
1787
+ },
1788
+ "128223": {
1789
+ "content": "<|reserved_special_token_215|>",
1790
+ "lstrip": false,
1791
+ "normalized": false,
1792
+ "rstrip": false,
1793
+ "single_word": false,
1794
+ "special": true
1795
+ },
1796
+ "128224": {
1797
+ "content": "<|reserved_special_token_216|>",
1798
+ "lstrip": false,
1799
+ "normalized": false,
1800
+ "rstrip": false,
1801
+ "single_word": false,
1802
+ "special": true
1803
+ },
1804
+ "128225": {
1805
+ "content": "<|reserved_special_token_217|>",
1806
+ "lstrip": false,
1807
+ "normalized": false,
1808
+ "rstrip": false,
1809
+ "single_word": false,
1810
+ "special": true
1811
+ },
1812
+ "128226": {
1813
+ "content": "<|reserved_special_token_218|>",
1814
+ "lstrip": false,
1815
+ "normalized": false,
1816
+ "rstrip": false,
1817
+ "single_word": false,
1818
+ "special": true
1819
+ },
1820
+ "128227": {
1821
+ "content": "<|reserved_special_token_219|>",
1822
+ "lstrip": false,
1823
+ "normalized": false,
1824
+ "rstrip": false,
1825
+ "single_word": false,
1826
+ "special": true
1827
+ },
1828
+ "128228": {
1829
+ "content": "<|reserved_special_token_220|>",
1830
+ "lstrip": false,
1831
+ "normalized": false,
1832
+ "rstrip": false,
1833
+ "single_word": false,
1834
+ "special": true
1835
+ },
1836
+ "128229": {
1837
+ "content": "<|reserved_special_token_221|>",
1838
+ "lstrip": false,
1839
+ "normalized": false,
1840
+ "rstrip": false,
1841
+ "single_word": false,
1842
+ "special": true
1843
+ },
1844
+ "128230": {
1845
+ "content": "<|reserved_special_token_222|>",
1846
+ "lstrip": false,
1847
+ "normalized": false,
1848
+ "rstrip": false,
1849
+ "single_word": false,
1850
+ "special": true
1851
+ },
1852
+ "128231": {
1853
+ "content": "<|reserved_special_token_223|>",
1854
+ "lstrip": false,
1855
+ "normalized": false,
1856
+ "rstrip": false,
1857
+ "single_word": false,
1858
+ "special": true
1859
+ },
1860
+ "128232": {
1861
+ "content": "<|reserved_special_token_224|>",
1862
+ "lstrip": false,
1863
+ "normalized": false,
1864
+ "rstrip": false,
1865
+ "single_word": false,
1866
+ "special": true
1867
+ },
1868
+ "128233": {
1869
+ "content": "<|reserved_special_token_225|>",
1870
+ "lstrip": false,
1871
+ "normalized": false,
1872
+ "rstrip": false,
1873
+ "single_word": false,
1874
+ "special": true
1875
+ },
1876
+ "128234": {
1877
+ "content": "<|reserved_special_token_226|>",
1878
+ "lstrip": false,
1879
+ "normalized": false,
1880
+ "rstrip": false,
1881
+ "single_word": false,
1882
+ "special": true
1883
+ },
1884
+ "128235": {
1885
+ "content": "<|reserved_special_token_227|>",
1886
+ "lstrip": false,
1887
+ "normalized": false,
1888
+ "rstrip": false,
1889
+ "single_word": false,
1890
+ "special": true
1891
+ },
1892
+ "128236": {
1893
+ "content": "<|reserved_special_token_228|>",
1894
+ "lstrip": false,
1895
+ "normalized": false,
1896
+ "rstrip": false,
1897
+ "single_word": false,
1898
+ "special": true
1899
+ },
1900
+ "128237": {
1901
+ "content": "<|reserved_special_token_229|>",
1902
+ "lstrip": false,
1903
+ "normalized": false,
1904
+ "rstrip": false,
1905
+ "single_word": false,
1906
+ "special": true
1907
+ },
1908
+ "128238": {
1909
+ "content": "<|reserved_special_token_230|>",
1910
+ "lstrip": false,
1911
+ "normalized": false,
1912
+ "rstrip": false,
1913
+ "single_word": false,
1914
+ "special": true
1915
+ },
1916
+ "128239": {
1917
+ "content": "<|reserved_special_token_231|>",
1918
+ "lstrip": false,
1919
+ "normalized": false,
1920
+ "rstrip": false,
1921
+ "single_word": false,
1922
+ "special": true
1923
+ },
1924
+ "128240": {
1925
+ "content": "<|reserved_special_token_232|>",
1926
+ "lstrip": false,
1927
+ "normalized": false,
1928
+ "rstrip": false,
1929
+ "single_word": false,
1930
+ "special": true
1931
+ },
1932
+ "128241": {
1933
+ "content": "<|reserved_special_token_233|>",
1934
+ "lstrip": false,
1935
+ "normalized": false,
1936
+ "rstrip": false,
1937
+ "single_word": false,
1938
+ "special": true
1939
+ },
1940
+ "128242": {
1941
+ "content": "<|reserved_special_token_234|>",
1942
+ "lstrip": false,
1943
+ "normalized": false,
1944
+ "rstrip": false,
1945
+ "single_word": false,
1946
+ "special": true
1947
+ },
1948
+ "128243": {
1949
+ "content": "<|reserved_special_token_235|>",
1950
+ "lstrip": false,
1951
+ "normalized": false,
1952
+ "rstrip": false,
1953
+ "single_word": false,
1954
+ "special": true
1955
+ },
1956
+ "128244": {
1957
+ "content": "<|reserved_special_token_236|>",
1958
+ "lstrip": false,
1959
+ "normalized": false,
1960
+ "rstrip": false,
1961
+ "single_word": false,
1962
+ "special": true
1963
+ },
1964
+ "128245": {
1965
+ "content": "<|reserved_special_token_237|>",
1966
+ "lstrip": false,
1967
+ "normalized": false,
1968
+ "rstrip": false,
1969
+ "single_word": false,
1970
+ "special": true
1971
+ },
1972
+ "128246": {
1973
+ "content": "<|reserved_special_token_238|>",
1974
+ "lstrip": false,
1975
+ "normalized": false,
1976
+ "rstrip": false,
1977
+ "single_word": false,
1978
+ "special": true
1979
+ },
1980
+ "128247": {
1981
+ "content": "<|reserved_special_token_239|>",
1982
+ "lstrip": false,
1983
+ "normalized": false,
1984
+ "rstrip": false,
1985
+ "single_word": false,
1986
+ "special": true
1987
+ },
1988
+ "128248": {
1989
+ "content": "<|reserved_special_token_240|>",
1990
+ "lstrip": false,
1991
+ "normalized": false,
1992
+ "rstrip": false,
1993
+ "single_word": false,
1994
+ "special": true
1995
+ },
1996
+ "128249": {
1997
+ "content": "<|reserved_special_token_241|>",
1998
+ "lstrip": false,
1999
+ "normalized": false,
2000
+ "rstrip": false,
2001
+ "single_word": false,
2002
+ "special": true
2003
+ },
2004
+ "128250": {
2005
+ "content": "<|reserved_special_token_242|>",
2006
+ "lstrip": false,
2007
+ "normalized": false,
2008
+ "rstrip": false,
2009
+ "single_word": false,
2010
+ "special": true
2011
+ },
2012
+ "128251": {
2013
+ "content": "<|reserved_special_token_243|>",
2014
+ "lstrip": false,
2015
+ "normalized": false,
2016
+ "rstrip": false,
2017
+ "single_word": false,
2018
+ "special": true
2019
+ },
2020
+ "128252": {
2021
+ "content": "<|reserved_special_token_244|>",
2022
+ "lstrip": false,
2023
+ "normalized": false,
2024
+ "rstrip": false,
2025
+ "single_word": false,
2026
+ "special": true
2027
+ },
2028
+ "128253": {
2029
+ "content": "<|reserved_special_token_245|>",
2030
+ "lstrip": false,
2031
+ "normalized": false,
2032
+ "rstrip": false,
2033
+ "single_word": false,
2034
+ "special": true
2035
+ },
2036
+ "128254": {
2037
+ "content": "<|reserved_special_token_246|>",
2038
+ "lstrip": false,
2039
+ "normalized": false,
2040
+ "rstrip": false,
2041
+ "single_word": false,
2042
+ "special": true
2043
+ },
2044
+ "128255": {
2045
+ "content": "<|reserved_special_token_247|>",
2046
+ "lstrip": false,
2047
+ "normalized": false,
2048
+ "rstrip": false,
2049
+ "single_word": false,
2050
+ "special": true
2051
+ }
2052
+ },
2053
+ "bos_token": "<|begin_of_text|>",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|eot_id|>",
2056
+ "extra_special_tokens": {},
2057
+ "model_input_names": [
2058
+ "input_ids",
2059
+ "attention_mask"
2060
+ ],
2061
+ "model_max_length": 131072,
2062
+ "pad_token": "<|finetune_right_pad_id|>",
2063
+ "padding_side": "left",
2064
+ "tokenizer_class": "PreTrainedTokenizerFast",
2065
+ "unk_token": null
2066
+ }
train/log.json ADDED
@@ -0,0 +1,2296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "loss": 1.416,
4
+ "grad_norm": 7.261270046234131,
5
+ "learning_rate": 5.69620253164557e-06,
6
+ "epoch": 0.006345177664974619,
7
+ "step": 10
8
+ },
9
+ {
10
+ "loss": 1.0241,
11
+ "grad_norm": 4.087796688079834,
12
+ "learning_rate": 1.2025316455696203e-05,
13
+ "epoch": 0.012690355329949238,
14
+ "step": 20
15
+ },
16
+ {
17
+ "loss": 0.7054,
18
+ "grad_norm": 3.7453219890594482,
19
+ "learning_rate": 1.8354430379746836e-05,
20
+ "epoch": 0.01903553299492386,
21
+ "step": 30
22
+ },
23
+ {
24
+ "loss": 0.5727,
25
+ "grad_norm": 1.5524132251739502,
26
+ "learning_rate": 2.468354430379747e-05,
27
+ "epoch": 0.025380710659898477,
28
+ "step": 40
29
+ },
30
+ {
31
+ "loss": 0.4431,
32
+ "grad_norm": 1.175743579864502,
33
+ "learning_rate": 3.10126582278481e-05,
34
+ "epoch": 0.031725888324873094,
35
+ "step": 50
36
+ },
37
+ {
38
+ "loss": 0.3589,
39
+ "grad_norm": 1.3493213653564453,
40
+ "learning_rate": 3.7341772151898736e-05,
41
+ "epoch": 0.03807106598984772,
42
+ "step": 60
43
+ },
44
+ {
45
+ "loss": 0.3363,
46
+ "grad_norm": 1.4607553482055664,
47
+ "learning_rate": 4.367088607594937e-05,
48
+ "epoch": 0.044416243654822336,
49
+ "step": 70
50
+ },
51
+ {
52
+ "loss": 0.2936,
53
+ "grad_norm": 1.7902618646621704,
54
+ "learning_rate": 5e-05,
55
+ "epoch": 0.050761421319796954,
56
+ "step": 80
57
+ },
58
+ {
59
+ "loss": 0.27,
60
+ "grad_norm": 1.5136945247650146,
61
+ "learning_rate": 5.6329113924050636e-05,
62
+ "epoch": 0.05710659898477157,
63
+ "step": 90
64
+ },
65
+ {
66
+ "loss": 0.2111,
67
+ "grad_norm": 1.2513262033462524,
68
+ "learning_rate": 6.265822784810128e-05,
69
+ "epoch": 0.06345177664974619,
70
+ "step": 100
71
+ },
72
+ {
73
+ "loss": 0.2127,
74
+ "grad_norm": 1.538354754447937,
75
+ "learning_rate": 6.89873417721519e-05,
76
+ "epoch": 0.06979695431472081,
77
+ "step": 110
78
+ },
79
+ {
80
+ "loss": 0.1974,
81
+ "grad_norm": 1.3688119649887085,
82
+ "learning_rate": 7.531645569620254e-05,
83
+ "epoch": 0.07614213197969544,
84
+ "step": 120
85
+ },
86
+ {
87
+ "loss": 0.1973,
88
+ "grad_norm": 1.1792855262756348,
89
+ "learning_rate": 8.164556962025317e-05,
90
+ "epoch": 0.08248730964467005,
91
+ "step": 130
92
+ },
93
+ {
94
+ "loss": 0.1735,
95
+ "grad_norm": 1.0454416275024414,
96
+ "learning_rate": 8.797468354430381e-05,
97
+ "epoch": 0.08883248730964467,
98
+ "step": 140
99
+ },
100
+ {
101
+ "loss": 0.1464,
102
+ "grad_norm": 1.3377323150634766,
103
+ "learning_rate": 9.430379746835444e-05,
104
+ "epoch": 0.09517766497461928,
105
+ "step": 150
106
+ },
107
+ {
108
+ "loss": 0.1812,
109
+ "grad_norm": 1.117480754852295,
110
+ "learning_rate": 0.00010063291139240508,
111
+ "epoch": 0.10152284263959391,
112
+ "step": 160
113
+ },
114
+ {
115
+ "loss": 0.1533,
116
+ "grad_norm": 1.5500177145004272,
117
+ "learning_rate": 0.00010696202531645569,
118
+ "epoch": 0.10786802030456853,
119
+ "step": 170
120
+ },
121
+ {
122
+ "loss": 0.1348,
123
+ "grad_norm": 0.986708402633667,
124
+ "learning_rate": 0.00011329113924050634,
125
+ "epoch": 0.11421319796954314,
126
+ "step": 180
127
+ },
128
+ {
129
+ "loss": 0.1342,
130
+ "grad_norm": 1.2270374298095703,
131
+ "learning_rate": 0.00011962025316455696,
132
+ "epoch": 0.12055837563451777,
133
+ "step": 190
134
+ },
135
+ {
136
+ "loss": 0.1381,
137
+ "grad_norm": 0.9935520887374878,
138
+ "learning_rate": 0.0001259493670886076,
139
+ "epoch": 0.12690355329949238,
140
+ "step": 200
141
+ },
142
+ {
143
+ "loss": 0.1513,
144
+ "grad_norm": 1.4393113851547241,
145
+ "learning_rate": 0.00013227848101265822,
146
+ "epoch": 0.13324873096446702,
147
+ "step": 210
148
+ },
149
+ {
150
+ "loss": 0.1221,
151
+ "grad_norm": 0.9539300799369812,
152
+ "learning_rate": 0.00013860759493670888,
153
+ "epoch": 0.13959390862944163,
154
+ "step": 220
155
+ },
156
+ {
157
+ "loss": 0.1414,
158
+ "grad_norm": 0.8915525078773499,
159
+ "learning_rate": 0.0001449367088607595,
160
+ "epoch": 0.14593908629441624,
161
+ "step": 230
162
+ },
163
+ {
164
+ "loss": 0.1331,
165
+ "grad_norm": 0.6808815002441406,
166
+ "learning_rate": 0.00015126582278481013,
167
+ "epoch": 0.15228426395939088,
168
+ "step": 240
169
+ },
170
+ {
171
+ "loss": 0.1371,
172
+ "grad_norm": 0.7938814759254456,
173
+ "learning_rate": 0.00015759493670886075,
174
+ "epoch": 0.15862944162436549,
175
+ "step": 250
176
+ },
177
+ {
178
+ "loss": 0.1321,
179
+ "grad_norm": 0.6890779733657837,
180
+ "learning_rate": 0.0001639240506329114,
181
+ "epoch": 0.1649746192893401,
182
+ "step": 260
183
+ },
184
+ {
185
+ "loss": 0.1158,
186
+ "grad_norm": 0.4467112720012665,
187
+ "learning_rate": 0.00017025316455696204,
188
+ "epoch": 0.1713197969543147,
189
+ "step": 270
190
+ },
191
+ {
192
+ "loss": 0.1334,
193
+ "grad_norm": 1.0458476543426514,
194
+ "learning_rate": 0.00017658227848101266,
195
+ "epoch": 0.17766497461928935,
196
+ "step": 280
197
+ },
198
+ {
199
+ "loss": 0.1183,
200
+ "grad_norm": 0.8898298144340515,
201
+ "learning_rate": 0.0001829113924050633,
202
+ "epoch": 0.18401015228426396,
203
+ "step": 290
204
+ },
205
+ {
206
+ "loss": 0.1164,
207
+ "grad_norm": 0.8841809034347534,
208
+ "learning_rate": 0.00018924050632911394,
209
+ "epoch": 0.19035532994923857,
210
+ "step": 300
211
+ },
212
+ {
213
+ "loss": 0.1178,
214
+ "grad_norm": 0.7970095276832581,
215
+ "learning_rate": 0.00019556962025316457,
216
+ "epoch": 0.1967005076142132,
217
+ "step": 310
218
+ },
219
+ {
220
+ "eval_loss": 0.11779838055372238,
221
+ "eval_runtime": 45.5288,
222
+ "eval_samples_per_second": 29.146,
223
+ "eval_steps_per_second": 7.292,
224
+ "epoch": 0.1998730964467005,
225
+ "step": 315
226
+ },
227
+ {
228
+ "loss": 0.104,
229
+ "grad_norm": 0.7851632237434387,
230
+ "learning_rate": 0.00019999944779618028,
231
+ "epoch": 0.20304568527918782,
232
+ "step": 320
233
+ },
234
+ {
235
+ "loss": 0.1279,
236
+ "grad_norm": 0.7609685659408569,
237
+ "learning_rate": 0.00019998963100903997,
238
+ "epoch": 0.20939086294416243,
239
+ "step": 330
240
+ },
241
+ {
242
+ "loss": 0.1225,
243
+ "grad_norm": 0.764049232006073,
244
+ "learning_rate": 0.00019996754441248412,
245
+ "epoch": 0.21573604060913706,
246
+ "step": 340
247
+ },
248
+ {
249
+ "loss": 0.1057,
250
+ "grad_norm": 0.7909657955169678,
251
+ "learning_rate": 0.000199933190716777,
252
+ "epoch": 0.22208121827411167,
253
+ "step": 350
254
+ },
255
+ {
256
+ "loss": 0.1036,
257
+ "grad_norm": 0.9112188816070557,
258
+ "learning_rate": 0.00019988657413748867,
259
+ "epoch": 0.22842639593908629,
260
+ "step": 360
261
+ },
262
+ {
263
+ "loss": 0.1065,
264
+ "grad_norm": 0.8363418579101562,
265
+ "learning_rate": 0.0001998277003949778,
266
+ "epoch": 0.23477157360406092,
267
+ "step": 370
268
+ },
269
+ {
270
+ "loss": 0.1095,
271
+ "grad_norm": 1.1866352558135986,
272
+ "learning_rate": 0.00019975657671368943,
273
+ "epoch": 0.24111675126903553,
274
+ "step": 380
275
+ },
276
+ {
277
+ "loss": 0.1049,
278
+ "grad_norm": 0.5500998497009277,
279
+ "learning_rate": 0.00019967321182126873,
280
+ "epoch": 0.24746192893401014,
281
+ "step": 390
282
+ },
283
+ {
284
+ "loss": 0.1064,
285
+ "grad_norm": 0.7981701493263245,
286
+ "learning_rate": 0.00019957761594748975,
287
+ "epoch": 0.25380710659898476,
288
+ "step": 400
289
+ },
290
+ {
291
+ "loss": 0.0907,
292
+ "grad_norm": 0.5598951578140259,
293
+ "learning_rate": 0.0001994698008230005,
294
+ "epoch": 0.26015228426395937,
295
+ "step": 410
296
+ },
297
+ {
298
+ "loss": 0.1064,
299
+ "grad_norm": 0.8710437417030334,
300
+ "learning_rate": 0.00019934977967788294,
301
+ "epoch": 0.26649746192893403,
302
+ "step": 420
303
+ },
304
+ {
305
+ "loss": 0.107,
306
+ "grad_norm": 0.5126726627349854,
307
+ "learning_rate": 0.00019921756724003008,
308
+ "epoch": 0.27284263959390864,
309
+ "step": 430
310
+ },
311
+ {
312
+ "loss": 0.0998,
313
+ "grad_norm": 0.7381883263587952,
314
+ "learning_rate": 0.0001990731797333383,
315
+ "epoch": 0.27918781725888325,
316
+ "step": 440
317
+ },
318
+ {
319
+ "loss": 0.1012,
320
+ "grad_norm": 0.6682422757148743,
321
+ "learning_rate": 0.00019891663487571663,
322
+ "epoch": 0.28553299492385786,
323
+ "step": 450
324
+ },
325
+ {
326
+ "loss": 0.0988,
327
+ "grad_norm": 0.5485838651657104,
328
+ "learning_rate": 0.00019874795187691267,
329
+ "epoch": 0.2918781725888325,
330
+ "step": 460
331
+ },
332
+ {
333
+ "loss": 0.0915,
334
+ "grad_norm": 0.46157270669937134,
335
+ "learning_rate": 0.00019856715143615512,
336
+ "epoch": 0.2982233502538071,
337
+ "step": 470
338
+ },
339
+ {
340
+ "loss": 0.0943,
341
+ "grad_norm": 0.7231584787368774,
342
+ "learning_rate": 0.000198374255739614,
343
+ "epoch": 0.30456852791878175,
344
+ "step": 480
345
+ },
346
+ {
347
+ "loss": 0.0884,
348
+ "grad_norm": 0.6452136039733887,
349
+ "learning_rate": 0.000198169288457678,
350
+ "epoch": 0.31091370558375636,
351
+ "step": 490
352
+ },
353
+ {
354
+ "loss": 0.0952,
355
+ "grad_norm": 0.5756458640098572,
356
+ "learning_rate": 0.00019795227474205,
357
+ "epoch": 0.31725888324873097,
358
+ "step": 500
359
+ },
360
+ {
361
+ "loss": 0.0942,
362
+ "grad_norm": 0.5166134238243103,
363
+ "learning_rate": 0.00019772324122266054,
364
+ "epoch": 0.3236040609137056,
365
+ "step": 510
366
+ },
367
+ {
368
+ "loss": 0.0981,
369
+ "grad_norm": 0.7283093333244324,
370
+ "learning_rate": 0.00019748221600440015,
371
+ "epoch": 0.3299492385786802,
372
+ "step": 520
373
+ },
374
+ {
375
+ "loss": 0.0817,
376
+ "grad_norm": 0.5151678323745728,
377
+ "learning_rate": 0.00019722922866367054,
378
+ "epoch": 0.3362944162436548,
379
+ "step": 530
380
+ },
381
+ {
382
+ "loss": 0.0855,
383
+ "grad_norm": 0.4918259382247925,
384
+ "learning_rate": 0.0001969643102447552,
385
+ "epoch": 0.3426395939086294,
386
+ "step": 540
387
+ },
388
+ {
389
+ "loss": 0.0902,
390
+ "grad_norm": 0.4107823669910431,
391
+ "learning_rate": 0.00019668749325601006,
392
+ "epoch": 0.3489847715736041,
393
+ "step": 550
394
+ },
395
+ {
396
+ "loss": 0.0815,
397
+ "grad_norm": 0.45388150215148926,
398
+ "learning_rate": 0.00019639881166587427,
399
+ "epoch": 0.3553299492385787,
400
+ "step": 560
401
+ },
402
+ {
403
+ "loss": 0.0857,
404
+ "grad_norm": 0.46063661575317383,
405
+ "learning_rate": 0.00019609830089870183,
406
+ "epoch": 0.3616751269035533,
407
+ "step": 570
408
+ },
409
+ {
410
+ "loss": 0.0892,
411
+ "grad_norm": 0.43047210574150085,
412
+ "learning_rate": 0.00019578599783041492,
413
+ "epoch": 0.3680203045685279,
414
+ "step": 580
415
+ },
416
+ {
417
+ "loss": 0.0822,
418
+ "grad_norm": 0.6658169627189636,
419
+ "learning_rate": 0.0001954619407839784,
420
+ "epoch": 0.3743654822335025,
421
+ "step": 590
422
+ },
423
+ {
424
+ "loss": 0.0865,
425
+ "grad_norm": 0.686517596244812,
426
+ "learning_rate": 0.0001951261695246976,
427
+ "epoch": 0.38071065989847713,
428
+ "step": 600
429
+ },
430
+ {
431
+ "loss": 0.0843,
432
+ "grad_norm": 0.39768409729003906,
433
+ "learning_rate": 0.00019477872525533845,
434
+ "epoch": 0.3870558375634518,
435
+ "step": 610
436
+ },
437
+ {
438
+ "loss": 0.0921,
439
+ "grad_norm": 0.5196660757064819,
440
+ "learning_rate": 0.0001944196506110714,
441
+ "epoch": 0.3934010152284264,
442
+ "step": 620
443
+ },
444
+ {
445
+ "loss": 0.0957,
446
+ "grad_norm": 0.5350348949432373,
447
+ "learning_rate": 0.0001940489896542398,
448
+ "epoch": 0.399746192893401,
449
+ "step": 630
450
+ },
451
+ {
452
+ "eval_loss": 0.08027429133653641,
453
+ "eval_runtime": 23.4642,
454
+ "eval_samples_per_second": 56.554,
455
+ "eval_steps_per_second": 14.149,
456
+ "epoch": 0.399746192893401,
457
+ "step": 630
458
+ },
459
+ {
460
+ "loss": 0.0911,
461
+ "grad_norm": 0.7363661527633667,
462
+ "learning_rate": 0.00019366678786895286,
463
+ "epoch": 0.40609137055837563,
464
+ "step": 640
465
+ },
466
+ {
467
+ "loss": 0.0789,
468
+ "grad_norm": 0.3176012933254242,
469
+ "learning_rate": 0.00019327309215550433,
470
+ "epoch": 0.41243654822335024,
471
+ "step": 650
472
+ },
473
+ {
474
+ "loss": 0.0875,
475
+ "grad_norm": 0.4339086711406708,
476
+ "learning_rate": 0.00019286795082461722,
477
+ "epoch": 0.41878172588832485,
478
+ "step": 660
479
+ },
480
+ {
481
+ "loss": 0.0934,
482
+ "grad_norm": 0.5191656351089478,
483
+ "learning_rate": 0.00019245141359151572,
484
+ "epoch": 0.4251269035532995,
485
+ "step": 670
486
+ },
487
+ {
488
+ "loss": 0.0825,
489
+ "grad_norm": 0.41910088062286377,
490
+ "learning_rate": 0.0001920235315698244,
491
+ "epoch": 0.43147208121827413,
492
+ "step": 680
493
+ },
494
+ {
495
+ "loss": 0.0823,
496
+ "grad_norm": 0.7931292057037354,
497
+ "learning_rate": 0.0001915843572652962,
498
+ "epoch": 0.43781725888324874,
499
+ "step": 690
500
+ },
501
+ {
502
+ "loss": 0.0931,
503
+ "grad_norm": 0.6371334195137024,
504
+ "learning_rate": 0.00019113394456936927,
505
+ "epoch": 0.44416243654822335,
506
+ "step": 700
507
+ },
508
+ {
509
+ "loss": 0.0844,
510
+ "grad_norm": 0.46574267745018005,
511
+ "learning_rate": 0.00019067234875255393,
512
+ "epoch": 0.45050761421319796,
513
+ "step": 710
514
+ },
515
+ {
516
+ "loss": 0.0893,
517
+ "grad_norm": 0.5641367435455322,
518
+ "learning_rate": 0.00019019962645765054,
519
+ "epoch": 0.45685279187817257,
520
+ "step": 720
521
+ },
522
+ {
523
+ "loss": 0.0758,
524
+ "grad_norm": 0.4197913408279419,
525
+ "learning_rate": 0.0001897158356927985,
526
+ "epoch": 0.4631979695431472,
527
+ "step": 730
528
+ },
529
+ {
530
+ "loss": 0.0748,
531
+ "grad_norm": 0.5938242673873901,
532
+ "learning_rate": 0.00018922103582435845,
533
+ "epoch": 0.46954314720812185,
534
+ "step": 740
535
+ },
536
+ {
537
+ "loss": 0.082,
538
+ "grad_norm": 0.4360695481300354,
539
+ "learning_rate": 0.00018871528756962696,
540
+ "epoch": 0.47588832487309646,
541
+ "step": 750
542
+ },
543
+ {
544
+ "loss": 0.0767,
545
+ "grad_norm": 0.4777214229106903,
546
+ "learning_rate": 0.00018819865298938605,
547
+ "epoch": 0.48223350253807107,
548
+ "step": 760
549
+ },
550
+ {
551
+ "loss": 0.0681,
552
+ "grad_norm": 0.7168831825256348,
553
+ "learning_rate": 0.0001876711954802877,
554
+ "epoch": 0.4885786802030457,
555
+ "step": 770
556
+ },
557
+ {
558
+ "loss": 0.0754,
559
+ "grad_norm": 0.46045243740081787,
560
+ "learning_rate": 0.00018713297976707437,
561
+ "epoch": 0.4949238578680203,
562
+ "step": 780
563
+ },
564
+ {
565
+ "loss": 0.0853,
566
+ "grad_norm": 0.6867933869361877,
567
+ "learning_rate": 0.00018658407189463643,
568
+ "epoch": 0.501269035532995,
569
+ "step": 790
570
+ },
571
+ {
572
+ "loss": 0.0831,
573
+ "grad_norm": 0.3895761966705322,
574
+ "learning_rate": 0.00018602453921990798,
575
+ "epoch": 0.5076142131979695,
576
+ "step": 800
577
+ },
578
+ {
579
+ "loss": 0.081,
580
+ "grad_norm": 0.4646884500980377,
581
+ "learning_rate": 0.00018545445040360123,
582
+ "epoch": 0.5139593908629442,
583
+ "step": 810
584
+ },
585
+ {
586
+ "loss": 0.0729,
587
+ "grad_norm": 0.4897511601448059,
588
+ "learning_rate": 0.00018487387540178118,
589
+ "epoch": 0.5203045685279187,
590
+ "step": 820
591
+ },
592
+ {
593
+ "loss": 0.0849,
594
+ "grad_norm": 0.5871896743774414,
595
+ "learning_rate": 0.00018428288545728125,
596
+ "epoch": 0.5266497461928934,
597
+ "step": 830
598
+ },
599
+ {
600
+ "loss": 0.0751,
601
+ "grad_norm": 0.4862145781517029,
602
+ "learning_rate": 0.000183681553090961,
603
+ "epoch": 0.5329949238578681,
604
+ "step": 840
605
+ },
606
+ {
607
+ "loss": 0.0643,
608
+ "grad_norm": 0.4235672950744629,
609
+ "learning_rate": 0.0001830699520928069,
610
+ "epoch": 0.5393401015228426,
611
+ "step": 850
612
+ },
613
+ {
614
+ "loss": 0.075,
615
+ "grad_norm": 0.28317147493362427,
616
+ "learning_rate": 0.00018244815751287786,
617
+ "epoch": 0.5456852791878173,
618
+ "step": 860
619
+ },
620
+ {
621
+ "loss": 0.0731,
622
+ "grad_norm": 0.4897977411746979,
623
+ "learning_rate": 0.00018181624565209535,
624
+ "epoch": 0.5520304568527918,
625
+ "step": 870
626
+ },
627
+ {
628
+ "loss": 0.09,
629
+ "grad_norm": 0.4528476595878601,
630
+ "learning_rate": 0.0001811742940528808,
631
+ "epoch": 0.5583756345177665,
632
+ "step": 880
633
+ },
634
+ {
635
+ "loss": 0.0906,
636
+ "grad_norm": 0.4155082106590271,
637
+ "learning_rate": 0.00018052238148964005,
638
+ "epoch": 0.5647208121827412,
639
+ "step": 890
640
+ },
641
+ {
642
+ "loss": 0.0794,
643
+ "grad_norm": 0.525433361530304,
644
+ "learning_rate": 0.00017986058795909702,
645
+ "epoch": 0.5710659898477157,
646
+ "step": 900
647
+ },
648
+ {
649
+ "loss": 0.0708,
650
+ "grad_norm": 0.4712257981300354,
651
+ "learning_rate": 0.00017918899467047716,
652
+ "epoch": 0.5774111675126904,
653
+ "step": 910
654
+ },
655
+ {
656
+ "loss": 0.0758,
657
+ "grad_norm": 0.4515252709388733,
658
+ "learning_rate": 0.00017850768403554238,
659
+ "epoch": 0.583756345177665,
660
+ "step": 920
661
+ },
662
+ {
663
+ "loss": 0.0773,
664
+ "grad_norm": 0.5485005378723145,
665
+ "learning_rate": 0.00017781673965847801,
666
+ "epoch": 0.5901015228426396,
667
+ "step": 930
668
+ },
669
+ {
670
+ "loss": 0.0759,
671
+ "grad_norm": 0.5751654505729675,
672
+ "learning_rate": 0.00017711624632563372,
673
+ "epoch": 0.5964467005076142,
674
+ "step": 940
675
+ },
676
+ {
677
+ "eval_loss": 0.07487394660711288,
678
+ "eval_runtime": 23.4569,
679
+ "eval_samples_per_second": 56.572,
680
+ "eval_steps_per_second": 14.154,
681
+ "epoch": 0.5996192893401016,
682
+ "step": 945
683
+ },
684
+ {
685
+ "loss": 0.0849,
686
+ "grad_norm": 0.44699952006340027,
687
+ "learning_rate": 0.00017640628999511946,
688
+ "epoch": 0.6027918781725888,
689
+ "step": 950
690
+ },
691
+ {
692
+ "loss": 0.0653,
693
+ "grad_norm": 0.2837465703487396,
694
+ "learning_rate": 0.00017568695778625727,
695
+ "epoch": 0.6091370558375635,
696
+ "step": 960
697
+ },
698
+ {
699
+ "loss": 0.0764,
700
+ "grad_norm": 0.3115606904029846,
701
+ "learning_rate": 0.00017495833796889098,
702
+ "epoch": 0.6154822335025381,
703
+ "step": 970
704
+ },
705
+ {
706
+ "loss": 0.0796,
707
+ "grad_norm": 0.7619908452033997,
708
+ "learning_rate": 0.0001742205199525544,
709
+ "epoch": 0.6218274111675127,
710
+ "step": 980
711
+ },
712
+ {
713
+ "loss": 0.0748,
714
+ "grad_norm": 0.7126716375350952,
715
+ "learning_rate": 0.00017347359427549982,
716
+ "epoch": 0.6281725888324873,
717
+ "step": 990
718
+ },
719
+ {
720
+ "loss": 0.0828,
721
+ "grad_norm": 0.42106711864471436,
722
+ "learning_rate": 0.00017271765259358816,
723
+ "epoch": 0.6345177664974619,
724
+ "step": 1000
725
+ },
726
+ {
727
+ "loss": 0.0755,
728
+ "grad_norm": 0.7738011479377747,
729
+ "learning_rate": 0.00017195278766904157,
730
+ "epoch": 0.6408629441624365,
731
+ "step": 1010
732
+ },
733
+ {
734
+ "loss": 0.0783,
735
+ "grad_norm": 0.4499462842941284,
736
+ "learning_rate": 0.00017117909335906056,
737
+ "epoch": 0.6472081218274112,
738
+ "step": 1020
739
+ },
740
+ {
741
+ "loss": 0.0868,
742
+ "grad_norm": 0.5416187047958374,
743
+ "learning_rate": 0.00017039666460430688,
744
+ "epoch": 0.6535532994923858,
745
+ "step": 1030
746
+ },
747
+ {
748
+ "loss": 0.0601,
749
+ "grad_norm": 0.5818226933479309,
750
+ "learning_rate": 0.00016960559741725304,
751
+ "epoch": 0.6598984771573604,
752
+ "step": 1040
753
+ },
754
+ {
755
+ "loss": 0.0789,
756
+ "grad_norm": 0.5406737923622131,
757
+ "learning_rate": 0.0001688059888704007,
758
+ "epoch": 0.666243654822335,
759
+ "step": 1050
760
+ },
761
+ {
762
+ "loss": 0.076,
763
+ "grad_norm": 0.449704110622406,
764
+ "learning_rate": 0.00016799793708436873,
765
+ "epoch": 0.6725888324873096,
766
+ "step": 1060
767
+ },
768
+ {
769
+ "loss": 0.0796,
770
+ "grad_norm": 0.3174469769001007,
771
+ "learning_rate": 0.00016718154121585284,
772
+ "epoch": 0.6789340101522843,
773
+ "step": 1070
774
+ },
775
+ {
776
+ "loss": 0.0771,
777
+ "grad_norm": 0.5006186962127686,
778
+ "learning_rate": 0.00016635690144545782,
779
+ "epoch": 0.6852791878172588,
780
+ "step": 1080
781
+ },
782
+ {
783
+ "loss": 0.0652,
784
+ "grad_norm": 0.45733827352523804,
785
+ "learning_rate": 0.0001655241189654045,
786
+ "epoch": 0.6916243654822335,
787
+ "step": 1090
788
+ },
789
+ {
790
+ "loss": 0.068,
791
+ "grad_norm": 0.4655713140964508,
792
+ "learning_rate": 0.00016468329596711233,
793
+ "epoch": 0.6979695431472082,
794
+ "step": 1100
795
+ },
796
+ {
797
+ "loss": 0.0819,
798
+ "grad_norm": 0.5244554281234741,
799
+ "learning_rate": 0.0001638345356286592,
800
+ "epoch": 0.7043147208121827,
801
+ "step": 1110
802
+ },
803
+ {
804
+ "loss": 0.071,
805
+ "grad_norm": 0.3699105381965637,
806
+ "learning_rate": 0.00016297794210212049,
807
+ "epoch": 0.7106598984771574,
808
+ "step": 1120
809
+ },
810
+ {
811
+ "loss": 0.069,
812
+ "grad_norm": 0.2289179265499115,
813
+ "learning_rate": 0.00016211362050078863,
814
+ "epoch": 0.7170050761421319,
815
+ "step": 1130
816
+ },
817
+ {
818
+ "loss": 0.0748,
819
+ "grad_norm": 0.43648213148117065,
820
+ "learning_rate": 0.00016124167688627434,
821
+ "epoch": 0.7233502538071066,
822
+ "step": 1140
823
+ },
824
+ {
825
+ "loss": 0.0646,
826
+ "grad_norm": 0.42901450395584106,
827
+ "learning_rate": 0.00016036221825549185,
828
+ "epoch": 0.7296954314720813,
829
+ "step": 1150
830
+ },
831
+ {
832
+ "loss": 0.067,
833
+ "grad_norm": 0.3802100121974945,
834
+ "learning_rate": 0.00015947535252752913,
835
+ "epoch": 0.7360406091370558,
836
+ "step": 1160
837
+ },
838
+ {
839
+ "loss": 0.0786,
840
+ "grad_norm": 0.49444833397865295,
841
+ "learning_rate": 0.00015858118853040516,
842
+ "epoch": 0.7423857868020305,
843
+ "step": 1170
844
+ },
845
+ {
846
+ "loss": 0.0741,
847
+ "grad_norm": 0.4874003231525421,
848
+ "learning_rate": 0.00015767983598771545,
849
+ "epoch": 0.748730964467005,
850
+ "step": 1180
851
+ },
852
+ {
853
+ "loss": 0.0691,
854
+ "grad_norm": 0.3816661238670349,
855
+ "learning_rate": 0.0001567714055051679,
856
+ "epoch": 0.7550761421319797,
857
+ "step": 1190
858
+ },
859
+ {
860
+ "loss": 0.0693,
861
+ "grad_norm": 0.3660362660884857,
862
+ "learning_rate": 0.00015585600855701011,
863
+ "epoch": 0.7614213197969543,
864
+ "step": 1200
865
+ },
866
+ {
867
+ "loss": 0.0718,
868
+ "grad_norm": 0.4611922800540924,
869
+ "learning_rate": 0.00015493375747235047,
870
+ "epoch": 0.7677664974619289,
871
+ "step": 1210
872
+ },
873
+ {
874
+ "loss": 0.0753,
875
+ "grad_norm": 0.3931231200695038,
876
+ "learning_rate": 0.00015400476542137392,
877
+ "epoch": 0.7741116751269036,
878
+ "step": 1220
879
+ },
880
+ {
881
+ "loss": 0.0725,
882
+ "grad_norm": 0.7242083549499512,
883
+ "learning_rate": 0.00015306914640145504,
884
+ "epoch": 0.7804568527918782,
885
+ "step": 1230
886
+ },
887
+ {
888
+ "loss": 0.0826,
889
+ "grad_norm": 0.36801645159721375,
890
+ "learning_rate": 0.0001521270152231691,
891
+ "epoch": 0.7868020304568528,
892
+ "step": 1240
893
+ },
894
+ {
895
+ "loss": 0.072,
896
+ "grad_norm": 0.4194789528846741,
897
+ "learning_rate": 0.00015117848749620366,
898
+ "epoch": 0.7931472081218274,
899
+ "step": 1250
900
+ },
901
+ {
902
+ "loss": 0.0663,
903
+ "grad_norm": 0.5587615370750427,
904
+ "learning_rate": 0.00015022367961517208,
905
+ "epoch": 0.799492385786802,
906
+ "step": 1260
907
+ },
908
+ {
909
+ "eval_loss": 0.06896253675222397,
910
+ "eval_runtime": 23.7164,
911
+ "eval_samples_per_second": 55.953,
912
+ "eval_steps_per_second": 13.999,
913
+ "epoch": 0.799492385786802,
914
+ "step": 1260
915
+ },
916
+ {
917
+ "loss": 0.0678,
918
+ "grad_norm": 0.26218849420547485,
919
+ "learning_rate": 0.00014926270874533043,
920
+ "epoch": 0.8058375634517766,
921
+ "step": 1270
922
+ },
923
+ {
924
+ "loss": 0.0711,
925
+ "grad_norm": 0.3901388943195343,
926
+ "learning_rate": 0.0001482956928082003,
927
+ "epoch": 0.8121827411167513,
928
+ "step": 1280
929
+ },
930
+ {
931
+ "loss": 0.0597,
932
+ "grad_norm": 0.4186069965362549,
933
+ "learning_rate": 0.0001473227504670984,
934
+ "epoch": 0.8185279187817259,
935
+ "step": 1290
936
+ },
937
+ {
938
+ "loss": 0.0624,
939
+ "grad_norm": 0.3466233015060425,
940
+ "learning_rate": 0.00014634400111257528,
941
+ "epoch": 0.8248730964467005,
942
+ "step": 1300
943
+ },
944
+ {
945
+ "loss": 0.0643,
946
+ "grad_norm": 0.7102236151695251,
947
+ "learning_rate": 0.00014535956484776482,
948
+ "epoch": 0.8312182741116751,
949
+ "step": 1310
950
+ },
951
+ {
952
+ "loss": 0.0687,
953
+ "grad_norm": 0.44185611605644226,
954
+ "learning_rate": 0.00014436956247364645,
955
+ "epoch": 0.8375634517766497,
956
+ "step": 1320
957
+ },
958
+ {
959
+ "loss": 0.0676,
960
+ "grad_norm": 0.4165748953819275,
961
+ "learning_rate": 0.00014337411547422132,
962
+ "epoch": 0.8439086294416244,
963
+ "step": 1330
964
+ },
965
+ {
966
+ "loss": 0.0767,
967
+ "grad_norm": 0.45543935894966125,
968
+ "learning_rate": 0.00014237334600160505,
969
+ "epoch": 0.850253807106599,
970
+ "step": 1340
971
+ },
972
+ {
973
+ "loss": 0.0677,
974
+ "grad_norm": 0.3414968252182007,
975
+ "learning_rate": 0.00014136737686103827,
976
+ "epoch": 0.8565989847715736,
977
+ "step": 1350
978
+ },
979
+ {
980
+ "loss": 0.0604,
981
+ "grad_norm": 0.2950080931186676,
982
+ "learning_rate": 0.00014035633149581716,
983
+ "epoch": 0.8629441624365483,
984
+ "step": 1360
985
+ },
986
+ {
987
+ "loss": 0.0767,
988
+ "grad_norm": 0.3940573036670685,
989
+ "learning_rate": 0.0001393403339721455,
990
+ "epoch": 0.8692893401015228,
991
+ "step": 1370
992
+ },
993
+ {
994
+ "loss": 0.0634,
995
+ "grad_norm": 0.39822471141815186,
996
+ "learning_rate": 0.00013831950896391054,
997
+ "epoch": 0.8756345177664975,
998
+ "step": 1380
999
+ },
1000
+ {
1001
+ "loss": 0.065,
1002
+ "grad_norm": 0.4311733543872833,
1003
+ "learning_rate": 0.00013729398173738415,
1004
+ "epoch": 0.881979695431472,
1005
+ "step": 1390
1006
+ },
1007
+ {
1008
+ "loss": 0.0555,
1009
+ "grad_norm": 0.37173038721084595,
1010
+ "learning_rate": 0.00013626387813585116,
1011
+ "epoch": 0.8883248730964467,
1012
+ "step": 1400
1013
+ },
1014
+ {
1015
+ "loss": 0.0672,
1016
+ "grad_norm": 0.419643759727478,
1017
+ "learning_rate": 0.0001352293245641672,
1018
+ "epoch": 0.8946700507614214,
1019
+ "step": 1410
1020
+ },
1021
+ {
1022
+ "loss": 0.0746,
1023
+ "grad_norm": 0.29487496614456177,
1024
+ "learning_rate": 0.00013419044797324738,
1025
+ "epoch": 0.9010152284263959,
1026
+ "step": 1420
1027
+ },
1028
+ {
1029
+ "loss": 0.0715,
1030
+ "grad_norm": 0.536567747592926,
1031
+ "learning_rate": 0.00013314737584448796,
1032
+ "epoch": 0.9073604060913706,
1033
+ "step": 1430
1034
+ },
1035
+ {
1036
+ "loss": 0.0681,
1037
+ "grad_norm": 0.3137727677822113,
1038
+ "learning_rate": 0.00013210023617412313,
1039
+ "epoch": 0.9137055837563451,
1040
+ "step": 1440
1041
+ },
1042
+ {
1043
+ "loss": 0.0754,
1044
+ "grad_norm": 0.25659045577049255,
1045
+ "learning_rate": 0.0001310491574575184,
1046
+ "epoch": 0.9200507614213198,
1047
+ "step": 1450
1048
+ },
1049
+ {
1050
+ "loss": 0.0654,
1051
+ "grad_norm": 0.6076952219009399,
1052
+ "learning_rate": 0.00012999426867340295,
1053
+ "epoch": 0.9263959390862944,
1054
+ "step": 1460
1055
+ },
1056
+ {
1057
+ "loss": 0.0619,
1058
+ "grad_norm": 0.3243611752986908,
1059
+ "learning_rate": 0.00012893569926804237,
1060
+ "epoch": 0.932741116751269,
1061
+ "step": 1470
1062
+ },
1063
+ {
1064
+ "loss": 0.0567,
1065
+ "grad_norm": 0.5209382772445679,
1066
+ "learning_rate": 0.00012787357913935437,
1067
+ "epoch": 0.9390862944162437,
1068
+ "step": 1480
1069
+ },
1070
+ {
1071
+ "loss": 0.0631,
1072
+ "grad_norm": 0.36193498969078064,
1073
+ "learning_rate": 0.00012680803862096874,
1074
+ "epoch": 0.9454314720812182,
1075
+ "step": 1490
1076
+ },
1077
+ {
1078
+ "loss": 0.0609,
1079
+ "grad_norm": 0.3028424382209778,
1080
+ "learning_rate": 0.0001257392084662342,
1081
+ "epoch": 0.9517766497461929,
1082
+ "step": 1500
1083
+ },
1084
+ {
1085
+ "loss": 0.0721,
1086
+ "grad_norm": 0.6666154861450195,
1087
+ "learning_rate": 0.00012466721983217332,
1088
+ "epoch": 0.9581218274111675,
1089
+ "step": 1510
1090
+ },
1091
+ {
1092
+ "loss": 0.0603,
1093
+ "grad_norm": 0.3871685564517975,
1094
+ "learning_rate": 0.0001235922042633883,
1095
+ "epoch": 0.9644670050761421,
1096
+ "step": 1520
1097
+ },
1098
+ {
1099
+ "loss": 0.0644,
1100
+ "grad_norm": 0.25319841504096985,
1101
+ "learning_rate": 0.00012251429367591903,
1102
+ "epoch": 0.9708121827411168,
1103
+ "step": 1530
1104
+ },
1105
+ {
1106
+ "loss": 0.0582,
1107
+ "grad_norm": 0.34290412068367004,
1108
+ "learning_rate": 0.00012143362034105541,
1109
+ "epoch": 0.9771573604060914,
1110
+ "step": 1540
1111
+ },
1112
+ {
1113
+ "loss": 0.0612,
1114
+ "grad_norm": 0.39071834087371826,
1115
+ "learning_rate": 0.0001203503168691064,
1116
+ "epoch": 0.983502538071066,
1117
+ "step": 1550
1118
+ },
1119
+ {
1120
+ "loss": 0.0609,
1121
+ "grad_norm": 0.3652092516422272,
1122
+ "learning_rate": 0.00011926451619312724,
1123
+ "epoch": 0.9898477157360406,
1124
+ "step": 1560
1125
+ },
1126
+ {
1127
+ "loss": 0.0721,
1128
+ "grad_norm": 0.29865705966949463,
1129
+ "learning_rate": 0.00011817635155260707,
1130
+ "epoch": 0.9961928934010152,
1131
+ "step": 1570
1132
+ },
1133
+ {
1134
+ "eval_loss": 0.061239708214998245,
1135
+ "eval_runtime": 23.5194,
1136
+ "eval_samples_per_second": 56.421,
1137
+ "eval_steps_per_second": 14.116,
1138
+ "epoch": 0.9993654822335025,
1139
+ "step": 1575
1140
+ },
1141
+ {
1142
+ "loss": 0.0582,
1143
+ "grad_norm": 0.2990482449531555,
1144
+ "learning_rate": 0.00011708595647711909,
1145
+ "epoch": 1.00253807106599,
1146
+ "step": 1580
1147
+ },
1148
+ {
1149
+ "loss": 0.0416,
1150
+ "grad_norm": 0.3069770932197571,
1151
+ "learning_rate": 0.00011599346476993495,
1152
+ "epoch": 1.0088832487309645,
1153
+ "step": 1590
1154
+ },
1155
+ {
1156
+ "loss": 0.0434,
1157
+ "grad_norm": 0.29351532459259033,
1158
+ "learning_rate": 0.0001148990104916058,
1159
+ "epoch": 1.015228426395939,
1160
+ "step": 1600
1161
+ },
1162
+ {
1163
+ "loss": 0.0359,
1164
+ "grad_norm": 0.2513970136642456,
1165
+ "learning_rate": 0.00011380272794351138,
1166
+ "epoch": 1.0215736040609138,
1167
+ "step": 1610
1168
+ },
1169
+ {
1170
+ "loss": 0.0542,
1171
+ "grad_norm": 0.3737475574016571,
1172
+ "learning_rate": 0.00011270475165138,
1173
+ "epoch": 1.0279187817258884,
1174
+ "step": 1620
1175
+ },
1176
+ {
1177
+ "loss": 0.0444,
1178
+ "grad_norm": 0.43037623167037964,
1179
+ "learning_rate": 0.00011160521634878058,
1180
+ "epoch": 1.034263959390863,
1181
+ "step": 1630
1182
+ },
1183
+ {
1184
+ "loss": 0.0393,
1185
+ "grad_norm": 0.5113381147384644,
1186
+ "learning_rate": 0.00011050425696058963,
1187
+ "epoch": 1.0406091370558375,
1188
+ "step": 1640
1189
+ },
1190
+ {
1191
+ "loss": 0.0447,
1192
+ "grad_norm": 0.40034595131874084,
1193
+ "learning_rate": 0.00010940200858643424,
1194
+ "epoch": 1.0469543147208122,
1195
+ "step": 1650
1196
+ },
1197
+ {
1198
+ "loss": 0.0378,
1199
+ "grad_norm": 0.3174040615558624,
1200
+ "learning_rate": 0.00010829860648411407,
1201
+ "epoch": 1.0532994923857868,
1202
+ "step": 1660
1203
+ },
1204
+ {
1205
+ "loss": 0.0391,
1206
+ "grad_norm": 0.4041596055030823,
1207
+ "learning_rate": 0.00010719418605300374,
1208
+ "epoch": 1.0596446700507614,
1209
+ "step": 1670
1210
+ },
1211
+ {
1212
+ "loss": 0.0379,
1213
+ "grad_norm": 0.3625265061855316,
1214
+ "learning_rate": 0.00010608888281743778,
1215
+ "epoch": 1.0659898477157361,
1216
+ "step": 1680
1217
+ },
1218
+ {
1219
+ "loss": 0.031,
1220
+ "grad_norm": 0.4304555058479309,
1221
+ "learning_rate": 0.0001049828324100804,
1222
+ "epoch": 1.0723350253807107,
1223
+ "step": 1690
1224
+ },
1225
+ {
1226
+ "loss": 0.0487,
1227
+ "grad_norm": 0.2591390311717987,
1228
+ "learning_rate": 0.0001038761705552819,
1229
+ "epoch": 1.0786802030456852,
1230
+ "step": 1700
1231
+ },
1232
+ {
1233
+ "loss": 0.0394,
1234
+ "grad_norm": 0.3746626377105713,
1235
+ "learning_rate": 0.00010276903305242372,
1236
+ "epoch": 1.0850253807106598,
1237
+ "step": 1710
1238
+ },
1239
+ {
1240
+ "loss": 0.0411,
1241
+ "grad_norm": 0.3750106990337372,
1242
+ "learning_rate": 0.00010166155575925454,
1243
+ "epoch": 1.0913705583756346,
1244
+ "step": 1720
1245
+ },
1246
+ {
1247
+ "loss": 0.0373,
1248
+ "grad_norm": 0.45232951641082764,
1249
+ "learning_rate": 0.00010055387457521892,
1250
+ "epoch": 1.0977157360406091,
1251
+ "step": 1730
1252
+ },
1253
+ {
1254
+ "loss": 0.0409,
1255
+ "grad_norm": 0.40455761551856995,
1256
+ "learning_rate": 9.944612542478109e-05,
1257
+ "epoch": 1.1040609137055837,
1258
+ "step": 1740
1259
+ },
1260
+ {
1261
+ "loss": 0.0404,
1262
+ "grad_norm": 0.45171070098876953,
1263
+ "learning_rate": 9.833844424074546e-05,
1264
+ "epoch": 1.1104060913705585,
1265
+ "step": 1750
1266
+ },
1267
+ {
1268
+ "loss": 0.0354,
1269
+ "grad_norm": 0.16944268345832825,
1270
+ "learning_rate": 9.723096694757629e-05,
1271
+ "epoch": 1.116751269035533,
1272
+ "step": 1760
1273
+ },
1274
+ {
1275
+ "loss": 0.0449,
1276
+ "grad_norm": 0.3960643410682678,
1277
+ "learning_rate": 9.612382944471811e-05,
1278
+ "epoch": 1.1230964467005076,
1279
+ "step": 1770
1280
+ },
1281
+ {
1282
+ "loss": 0.0383,
1283
+ "grad_norm": 0.39104658365249634,
1284
+ "learning_rate": 9.50171675899196e-05,
1285
+ "epoch": 1.1294416243654823,
1286
+ "step": 1780
1287
+ },
1288
+ {
1289
+ "loss": 0.0401,
1290
+ "grad_norm": 0.3628897964954376,
1291
+ "learning_rate": 9.391111718256225e-05,
1292
+ "epoch": 1.135786802030457,
1293
+ "step": 1790
1294
+ },
1295
+ {
1296
+ "loss": 0.038,
1297
+ "grad_norm": 0.46199262142181396,
1298
+ "learning_rate": 9.28058139469963e-05,
1299
+ "epoch": 1.1421319796954315,
1300
+ "step": 1800
1301
+ },
1302
+ {
1303
+ "loss": 0.0404,
1304
+ "grad_norm": 0.5942744612693787,
1305
+ "learning_rate": 9.170139351588594e-05,
1306
+ "epoch": 1.148477157360406,
1307
+ "step": 1810
1308
+ },
1309
+ {
1310
+ "loss": 0.0359,
1311
+ "grad_norm": 0.46782055497169495,
1312
+ "learning_rate": 9.059799141356579e-05,
1313
+ "epoch": 1.1548223350253808,
1314
+ "step": 1820
1315
+ },
1316
+ {
1317
+ "loss": 0.0413,
1318
+ "grad_norm": 0.42671218514442444,
1319
+ "learning_rate": 8.949574303941039e-05,
1320
+ "epoch": 1.1611675126903553,
1321
+ "step": 1830
1322
+ },
1323
+ {
1324
+ "loss": 0.0455,
1325
+ "grad_norm": 0.41504427790641785,
1326
+ "learning_rate": 8.839478365121943e-05,
1327
+ "epoch": 1.16751269035533,
1328
+ "step": 1840
1329
+ },
1330
+ {
1331
+ "loss": 0.0446,
1332
+ "grad_norm": 0.19736254215240479,
1333
+ "learning_rate": 8.729524834862003e-05,
1334
+ "epoch": 1.1738578680203045,
1335
+ "step": 1850
1336
+ },
1337
+ {
1338
+ "loss": 0.0435,
1339
+ "grad_norm": 0.4627068340778351,
1340
+ "learning_rate": 8.619727205648863e-05,
1341
+ "epoch": 1.1802030456852792,
1342
+ "step": 1860
1343
+ },
1344
+ {
1345
+ "loss": 0.0411,
1346
+ "grad_norm": 0.4953509569168091,
1347
+ "learning_rate": 8.51009895083942e-05,
1348
+ "epoch": 1.1865482233502538,
1349
+ "step": 1870
1350
+ },
1351
+ {
1352
+ "loss": 0.0311,
1353
+ "grad_norm": 0.21544459462165833,
1354
+ "learning_rate": 8.400653523006506e-05,
1355
+ "epoch": 1.1928934010152283,
1356
+ "step": 1880
1357
+ },
1358
+ {
1359
+ "loss": 0.0469,
1360
+ "grad_norm": 0.4519999921321869,
1361
+ "learning_rate": 8.291404352288095e-05,
1362
+ "epoch": 1.1992385786802031,
1363
+ "step": 1890
1364
+ },
1365
+ {
1366
+ "eval_loss": 0.05870498716831207,
1367
+ "eval_runtime": 23.3902,
1368
+ "eval_samples_per_second": 56.733,
1369
+ "eval_steps_per_second": 14.194,
1370
+ "epoch": 1.1992385786802031,
1371
+ "step": 1890
1372
+ },
1373
+ {
1374
+ "loss": 0.0466,
1375
+ "grad_norm": 0.6195783019065857,
1376
+ "learning_rate": 8.182364844739297e-05,
1377
+ "epoch": 1.2055837563451777,
1378
+ "step": 1900
1379
+ },
1380
+ {
1381
+ "loss": 0.0363,
1382
+ "grad_norm": 0.4518129229545593,
1383
+ "learning_rate": 8.07354838068728e-05,
1384
+ "epoch": 1.2119289340101522,
1385
+ "step": 1910
1386
+ },
1387
+ {
1388
+ "loss": 0.0455,
1389
+ "grad_norm": 0.47803568840026855,
1390
+ "learning_rate": 7.964968313089363e-05,
1391
+ "epoch": 1.218274111675127,
1392
+ "step": 1920
1393
+ },
1394
+ {
1395
+ "loss": 0.0395,
1396
+ "grad_norm": 0.17739883065223694,
1397
+ "learning_rate": 7.856637965894462e-05,
1398
+ "epoch": 1.2246192893401016,
1399
+ "step": 1930
1400
+ },
1401
+ {
1402
+ "loss": 0.0415,
1403
+ "grad_norm": 0.3920941650867462,
1404
+ "learning_rate": 7.748570632408099e-05,
1405
+ "epoch": 1.2309644670050761,
1406
+ "step": 1940
1407
+ },
1408
+ {
1409
+ "loss": 0.0377,
1410
+ "grad_norm": 0.3294707238674164,
1411
+ "learning_rate": 7.640779573661171e-05,
1412
+ "epoch": 1.2373096446700507,
1413
+ "step": 1950
1414
+ },
1415
+ {
1416
+ "loss": 0.039,
1417
+ "grad_norm": 0.34252774715423584,
1418
+ "learning_rate": 7.53327801678267e-05,
1419
+ "epoch": 1.2436548223350254,
1420
+ "step": 1960
1421
+ },
1422
+ {
1423
+ "loss": 0.0403,
1424
+ "grad_norm": 0.4406156837940216,
1425
+ "learning_rate": 7.426079153376582e-05,
1426
+ "epoch": 1.25,
1427
+ "step": 1970
1428
+ },
1429
+ {
1430
+ "loss": 0.0381,
1431
+ "grad_norm": 0.33009278774261475,
1432
+ "learning_rate": 7.319196137903126e-05,
1433
+ "epoch": 1.2563451776649746,
1434
+ "step": 1980
1435
+ },
1436
+ {
1437
+ "loss": 0.0397,
1438
+ "grad_norm": 0.4772908389568329,
1439
+ "learning_rate": 7.212642086064564e-05,
1440
+ "epoch": 1.262690355329949,
1441
+ "step": 1990
1442
+ },
1443
+ {
1444
+ "loss": 0.0433,
1445
+ "grad_norm": 0.3636586666107178,
1446
+ "learning_rate": 7.106430073195764e-05,
1447
+ "epoch": 1.2690355329949239,
1448
+ "step": 2000
1449
+ },
1450
+ {
1451
+ "loss": 0.0399,
1452
+ "grad_norm": 0.37633877992630005,
1453
+ "learning_rate": 7.000573132659706e-05,
1454
+ "epoch": 1.2753807106598984,
1455
+ "step": 2010
1456
+ },
1457
+ {
1458
+ "loss": 0.0425,
1459
+ "grad_norm": 0.3575541377067566,
1460
+ "learning_rate": 6.895084254248162e-05,
1461
+ "epoch": 1.281725888324873,
1462
+ "step": 2020
1463
+ },
1464
+ {
1465
+ "loss": 0.0475,
1466
+ "grad_norm": 0.41141068935394287,
1467
+ "learning_rate": 6.78997638258769e-05,
1468
+ "epoch": 1.2880710659898478,
1469
+ "step": 2030
1470
+ },
1471
+ {
1472
+ "loss": 0.0391,
1473
+ "grad_norm": 0.26739487051963806,
1474
+ "learning_rate": 6.685262415551207e-05,
1475
+ "epoch": 1.2944162436548223,
1476
+ "step": 2040
1477
+ },
1478
+ {
1479
+ "loss": 0.0432,
1480
+ "grad_norm": 0.40416017174720764,
1481
+ "learning_rate": 6.580955202675263e-05,
1482
+ "epoch": 1.3007614213197969,
1483
+ "step": 2050
1484
+ },
1485
+ {
1486
+ "loss": 0.0428,
1487
+ "grad_norm": 0.46243521571159363,
1488
+ "learning_rate": 6.47706754358328e-05,
1489
+ "epoch": 1.3071065989847717,
1490
+ "step": 2060
1491
+ },
1492
+ {
1493
+ "loss": 0.036,
1494
+ "grad_norm": 0.4293893575668335,
1495
+ "learning_rate": 6.373612186414883e-05,
1496
+ "epoch": 1.3134517766497462,
1497
+ "step": 2070
1498
+ },
1499
+ {
1500
+ "loss": 0.0392,
1501
+ "grad_norm": 0.29024600982666016,
1502
+ "learning_rate": 6.270601826261589e-05,
1503
+ "epoch": 1.3197969543147208,
1504
+ "step": 2080
1505
+ },
1506
+ {
1507
+ "loss": 0.0376,
1508
+ "grad_norm": 0.4428802728652954,
1509
+ "learning_rate": 6.168049103608948e-05,
1510
+ "epoch": 1.3261421319796955,
1511
+ "step": 2090
1512
+ },
1513
+ {
1514
+ "loss": 0.038,
1515
+ "grad_norm": 0.38020622730255127,
1516
+ "learning_rate": 6.0659666027854555e-05,
1517
+ "epoch": 1.33248730964467,
1518
+ "step": 2100
1519
+ },
1520
+ {
1521
+ "loss": 0.0327,
1522
+ "grad_norm": 0.3990342915058136,
1523
+ "learning_rate": 5.96436685041829e-05,
1524
+ "epoch": 1.3388324873096447,
1525
+ "step": 2110
1526
+ },
1527
+ {
1528
+ "loss": 0.0382,
1529
+ "grad_norm": 0.3502098023891449,
1530
+ "learning_rate": 5.863262313896176e-05,
1531
+ "epoch": 1.3451776649746192,
1532
+ "step": 2120
1533
+ },
1534
+ {
1535
+ "loss": 0.0359,
1536
+ "grad_norm": 0.1891971081495285,
1537
+ "learning_rate": 5.7626653998394984e-05,
1538
+ "epoch": 1.351522842639594,
1539
+ "step": 2130
1540
+ },
1541
+ {
1542
+ "loss": 0.0573,
1543
+ "grad_norm": 0.5771194100379944,
1544
+ "learning_rate": 5.6625884525778706e-05,
1545
+ "epoch": 1.3578680203045685,
1546
+ "step": 2140
1547
+ },
1548
+ {
1549
+ "loss": 0.043,
1550
+ "grad_norm": 0.38253533840179443,
1551
+ "learning_rate": 5.5630437526353554e-05,
1552
+ "epoch": 1.364213197969543,
1553
+ "step": 2150
1554
+ },
1555
+ {
1556
+ "loss": 0.0462,
1557
+ "grad_norm": 0.3203031122684479,
1558
+ "learning_rate": 5.464043515223518e-05,
1559
+ "epoch": 1.3705583756345177,
1560
+ "step": 2160
1561
+ },
1562
+ {
1563
+ "loss": 0.0339,
1564
+ "grad_norm": 0.259575754404068,
1565
+ "learning_rate": 5.365599888742476e-05,
1566
+ "epoch": 1.3769035532994924,
1567
+ "step": 2170
1568
+ },
1569
+ {
1570
+ "loss": 0.039,
1571
+ "grad_norm": 0.2003287672996521,
1572
+ "learning_rate": 5.267724953290161e-05,
1573
+ "epoch": 1.383248730964467,
1574
+ "step": 2180
1575
+ },
1576
+ {
1577
+ "loss": 0.0347,
1578
+ "grad_norm": 0.38988855481147766,
1579
+ "learning_rate": 5.170430719179966e-05,
1580
+ "epoch": 1.3895939086294415,
1581
+ "step": 2190
1582
+ },
1583
+ {
1584
+ "loss": 0.0363,
1585
+ "grad_norm": 0.29437464475631714,
1586
+ "learning_rate": 5.073729125466957e-05,
1587
+ "epoch": 1.3959390862944163,
1588
+ "step": 2200
1589
+ },
1590
+ {
1591
+ "eval_loss": 0.057627856731414795,
1592
+ "eval_runtime": 23.9115,
1593
+ "eval_samples_per_second": 55.496,
1594
+ "eval_steps_per_second": 13.885,
1595
+ "epoch": 1.3991116751269035,
1596
+ "step": 2205
1597
+ },
1598
+ {
1599
+ "loss": 0.0359,
1600
+ "grad_norm": 0.4737837314605713,
1601
+ "learning_rate": 4.9776320384827926e-05,
1602
+ "epoch": 1.4022842639593909,
1603
+ "step": 2210
1604
+ },
1605
+ {
1606
+ "loss": 0.0332,
1607
+ "grad_norm": 0.3594450056552887,
1608
+ "learning_rate": 4.882151250379635e-05,
1609
+ "epoch": 1.4086294416243654,
1610
+ "step": 2220
1611
+ },
1612
+ {
1613
+ "loss": 0.0425,
1614
+ "grad_norm": 0.40817004442214966,
1615
+ "learning_rate": 4.787298477683092e-05,
1616
+ "epoch": 1.4149746192893402,
1617
+ "step": 2230
1618
+ },
1619
+ {
1620
+ "loss": 0.0364,
1621
+ "grad_norm": 0.43428879976272583,
1622
+ "learning_rate": 4.6930853598544997e-05,
1623
+ "epoch": 1.4213197969543148,
1624
+ "step": 2240
1625
+ },
1626
+ {
1627
+ "loss": 0.0357,
1628
+ "grad_norm": 0.40719398856163025,
1629
+ "learning_rate": 4.599523457862609e-05,
1630
+ "epoch": 1.4276649746192893,
1631
+ "step": 2250
1632
+ },
1633
+ {
1634
+ "loss": 0.0424,
1635
+ "grad_norm": 0.3827572762966156,
1636
+ "learning_rate": 4.506624252764954e-05,
1637
+ "epoch": 1.434010152284264,
1638
+ "step": 2260
1639
+ },
1640
+ {
1641
+ "loss": 0.0393,
1642
+ "grad_norm": 0.3325338065624237,
1643
+ "learning_rate": 4.414399144298989e-05,
1644
+ "epoch": 1.4403553299492386,
1645
+ "step": 2270
1646
+ },
1647
+ {
1648
+ "loss": 0.0406,
1649
+ "grad_norm": 0.1980651319026947,
1650
+ "learning_rate": 4.3228594494832106e-05,
1651
+ "epoch": 1.4467005076142132,
1652
+ "step": 2280
1653
+ },
1654
+ {
1655
+ "loss": 0.0336,
1656
+ "grad_norm": 0.2884531319141388,
1657
+ "learning_rate": 4.232016401228458e-05,
1658
+ "epoch": 1.4530456852791878,
1659
+ "step": 2290
1660
+ },
1661
+ {
1662
+ "loss": 0.0346,
1663
+ "grad_norm": 0.3362226188182831,
1664
+ "learning_rate": 4.1418811469594884e-05,
1665
+ "epoch": 1.4593908629441623,
1666
+ "step": 2300
1667
+ },
1668
+ {
1669
+ "loss": 0.0326,
1670
+ "grad_norm": 0.3298276364803314,
1671
+ "learning_rate": 4.05246474724709e-05,
1672
+ "epoch": 1.465736040609137,
1673
+ "step": 2310
1674
+ },
1675
+ {
1676
+ "loss": 0.0412,
1677
+ "grad_norm": 0.29744839668273926,
1678
+ "learning_rate": 3.9637781744508205e-05,
1679
+ "epoch": 1.4720812182741116,
1680
+ "step": 2320
1681
+ },
1682
+ {
1683
+ "loss": 0.0313,
1684
+ "grad_norm": 0.16129061579704285,
1685
+ "learning_rate": 3.875832311372568e-05,
1686
+ "epoch": 1.4784263959390862,
1687
+ "step": 2330
1688
+ },
1689
+ {
1690
+ "loss": 0.0392,
1691
+ "grad_norm": 0.4510703980922699,
1692
+ "learning_rate": 3.788637949921141e-05,
1693
+ "epoch": 1.484771573604061,
1694
+ "step": 2340
1695
+ },
1696
+ {
1697
+ "loss": 0.032,
1698
+ "grad_norm": 0.29818281531333923,
1699
+ "learning_rate": 3.70220578978795e-05,
1700
+ "epoch": 1.4911167512690355,
1701
+ "step": 2350
1702
+ },
1703
+ {
1704
+ "loss": 0.0427,
1705
+ "grad_norm": 0.48012402653694153,
1706
+ "learning_rate": 3.616546437134083e-05,
1707
+ "epoch": 1.49746192893401,
1708
+ "step": 2360
1709
+ },
1710
+ {
1711
+ "loss": 0.0311,
1712
+ "grad_norm": 0.29981890320777893,
1713
+ "learning_rate": 3.531670403288766e-05,
1714
+ "epoch": 1.5038071065989849,
1715
+ "step": 2370
1716
+ },
1717
+ {
1718
+ "loss": 0.0346,
1719
+ "grad_norm": 0.5202313661575317,
1720
+ "learning_rate": 3.447588103459549e-05,
1721
+ "epoch": 1.5101522842639594,
1722
+ "step": 2380
1723
+ },
1724
+ {
1725
+ "loss": 0.0395,
1726
+ "grad_norm": 0.33824649453163147,
1727
+ "learning_rate": 3.3643098554542195e-05,
1728
+ "epoch": 1.516497461928934,
1729
+ "step": 2390
1730
+ },
1731
+ {
1732
+ "loss": 0.0337,
1733
+ "grad_norm": 0.40760862827301025,
1734
+ "learning_rate": 3.281845878414719e-05,
1735
+ "epoch": 1.5228426395939088,
1736
+ "step": 2400
1737
+ },
1738
+ {
1739
+ "loss": 0.0411,
1740
+ "grad_norm": 0.41836321353912354,
1741
+ "learning_rate": 3.2002062915631306e-05,
1742
+ "epoch": 1.529187817258883,
1743
+ "step": 2410
1744
+ },
1745
+ {
1746
+ "loss": 0.0353,
1747
+ "grad_norm": 0.41234228014945984,
1748
+ "learning_rate": 3.1194011129599324e-05,
1749
+ "epoch": 1.5355329949238579,
1750
+ "step": 2420
1751
+ },
1752
+ {
1753
+ "loss": 0.0358,
1754
+ "grad_norm": 0.4182668924331665,
1755
+ "learning_rate": 3.0394402582746996e-05,
1756
+ "epoch": 1.5418781725888326,
1757
+ "step": 2430
1758
+ },
1759
+ {
1760
+ "loss": 0.0293,
1761
+ "grad_norm": 0.24028272926807404,
1762
+ "learning_rate": 2.9603335395693143e-05,
1763
+ "epoch": 1.548223350253807,
1764
+ "step": 2440
1765
+ },
1766
+ {
1767
+ "loss": 0.0373,
1768
+ "grad_norm": 0.4068399667739868,
1769
+ "learning_rate": 2.882090664093945e-05,
1770
+ "epoch": 1.5545685279187818,
1771
+ "step": 2450
1772
+ },
1773
+ {
1774
+ "loss": 0.0343,
1775
+ "grad_norm": 0.4273289442062378,
1776
+ "learning_rate": 2.8047212330958474e-05,
1777
+ "epoch": 1.5609137055837563,
1778
+ "step": 2460
1779
+ },
1780
+ {
1781
+ "loss": 0.0351,
1782
+ "grad_norm": 0.346050888299942,
1783
+ "learning_rate": 2.7282347406411836e-05,
1784
+ "epoch": 1.5672588832487309,
1785
+ "step": 2470
1786
+ },
1787
+ {
1788
+ "loss": 0.0344,
1789
+ "grad_norm": 0.19225843250751495,
1790
+ "learning_rate": 2.6526405724500193e-05,
1791
+ "epoch": 1.5736040609137056,
1792
+ "step": 2480
1793
+ },
1794
+ {
1795
+ "loss": 0.0291,
1796
+ "grad_norm": 0.23613357543945312,
1797
+ "learning_rate": 2.5779480047445627e-05,
1798
+ "epoch": 1.5799492385786802,
1799
+ "step": 2490
1800
+ },
1801
+ {
1802
+ "loss": 0.0444,
1803
+ "grad_norm": 0.26933079957962036,
1804
+ "learning_rate": 2.5041662031109047e-05,
1805
+ "epoch": 1.5862944162436547,
1806
+ "step": 2500
1807
+ },
1808
+ {
1809
+ "loss": 0.0363,
1810
+ "grad_norm": 0.3984769582748413,
1811
+ "learning_rate": 2.4313042213742744e-05,
1812
+ "epoch": 1.5926395939086295,
1813
+ "step": 2510
1814
+ },
1815
+ {
1816
+ "loss": 0.035,
1817
+ "grad_norm": 0.24797454476356506,
1818
+ "learning_rate": 2.3593710004880565e-05,
1819
+ "epoch": 1.598984771573604,
1820
+ "step": 2520
1821
+ },
1822
+ {
1823
+ "eval_loss": 0.05244370922446251,
1824
+ "eval_runtime": 23.8874,
1825
+ "eval_samples_per_second": 55.552,
1826
+ "eval_steps_per_second": 13.899,
1827
+ "epoch": 1.598984771573604,
1828
+ "step": 2520
1829
+ },
1830
+ {
1831
+ "loss": 0.0306,
1832
+ "grad_norm": 0.2291479855775833,
1833
+ "learning_rate": 2.2883753674366315e-05,
1834
+ "epoch": 1.6053299492385786,
1835
+ "step": 2530
1836
+ },
1837
+ {
1838
+ "loss": 0.037,
1839
+ "grad_norm": 0.3272293210029602,
1840
+ "learning_rate": 2.2183260341522026e-05,
1841
+ "epoch": 1.6116751269035534,
1842
+ "step": 2540
1843
+ },
1844
+ {
1845
+ "loss": 0.0297,
1846
+ "grad_norm": 0.18494157493114471,
1847
+ "learning_rate": 2.149231596445762e-05,
1848
+ "epoch": 1.618020304568528,
1849
+ "step": 2550
1850
+ },
1851
+ {
1852
+ "loss": 0.0342,
1853
+ "grad_norm": 0.25217947363853455,
1854
+ "learning_rate": 2.0811005329522825e-05,
1855
+ "epoch": 1.6243654822335025,
1856
+ "step": 2560
1857
+ },
1858
+ {
1859
+ "loss": 0.0407,
1860
+ "grad_norm": 0.305937796831131,
1861
+ "learning_rate": 2.0139412040903016e-05,
1862
+ "epoch": 1.6307106598984773,
1863
+ "step": 2570
1864
+ },
1865
+ {
1866
+ "loss": 0.0346,
1867
+ "grad_norm": 0.3220507502555847,
1868
+ "learning_rate": 1.9477618510359964e-05,
1869
+ "epoch": 1.6370558375634516,
1870
+ "step": 2580
1871
+ },
1872
+ {
1873
+ "loss": 0.0349,
1874
+ "grad_norm": 0.2863691747188568,
1875
+ "learning_rate": 1.882570594711919e-05,
1876
+ "epoch": 1.6434010152284264,
1877
+ "step": 2590
1878
+ },
1879
+ {
1880
+ "loss": 0.0333,
1881
+ "grad_norm": 0.7027552127838135,
1882
+ "learning_rate": 1.8183754347904646e-05,
1883
+ "epoch": 1.649746192893401,
1884
+ "step": 2600
1885
+ },
1886
+ {
1887
+ "loss": 0.0354,
1888
+ "grad_norm": 0.43354132771492004,
1889
+ "learning_rate": 1.755184248712215e-05,
1890
+ "epoch": 1.6560913705583755,
1891
+ "step": 2610
1892
+ },
1893
+ {
1894
+ "loss": 0.0346,
1895
+ "grad_norm": 0.3740198612213135,
1896
+ "learning_rate": 1.6930047907193114e-05,
1897
+ "epoch": 1.6624365482233503,
1898
+ "step": 2620
1899
+ },
1900
+ {
1901
+ "loss": 0.0412,
1902
+ "grad_norm": 0.40340694785118103,
1903
+ "learning_rate": 1.631844690903904e-05,
1904
+ "epoch": 1.6687817258883249,
1905
+ "step": 2630
1906
+ },
1907
+ {
1908
+ "loss": 0.0309,
1909
+ "grad_norm": 0.3262211084365845,
1910
+ "learning_rate": 1.571711454271877e-05,
1911
+ "epoch": 1.6751269035532994,
1912
+ "step": 2640
1913
+ },
1914
+ {
1915
+ "loss": 0.0333,
1916
+ "grad_norm": 0.49483418464660645,
1917
+ "learning_rate": 1.5126124598218826e-05,
1918
+ "epoch": 1.6814720812182742,
1919
+ "step": 2650
1920
+ },
1921
+ {
1922
+ "loss": 0.0347,
1923
+ "grad_norm": 0.34749022126197815,
1924
+ "learning_rate": 1.4545549596398789e-05,
1925
+ "epoch": 1.6878172588832487,
1926
+ "step": 2660
1927
+ },
1928
+ {
1929
+ "loss": 0.0256,
1930
+ "grad_norm": 0.2993925213813782,
1931
+ "learning_rate": 1.3975460780092053e-05,
1932
+ "epoch": 1.6941624365482233,
1933
+ "step": 2670
1934
+ },
1935
+ {
1936
+ "loss": 0.0432,
1937
+ "grad_norm": 0.21877335011959076,
1938
+ "learning_rate": 1.3415928105363595e-05,
1939
+ "epoch": 1.700507614213198,
1940
+ "step": 2680
1941
+ },
1942
+ {
1943
+ "loss": 0.0363,
1944
+ "grad_norm": 0.4628012180328369,
1945
+ "learning_rate": 1.2867020232925675e-05,
1946
+ "epoch": 1.7068527918781726,
1947
+ "step": 2690
1948
+ },
1949
+ {
1950
+ "loss": 0.0283,
1951
+ "grad_norm": 0.34107235074043274,
1952
+ "learning_rate": 1.2328804519712312e-05,
1953
+ "epoch": 1.7131979695431472,
1954
+ "step": 2700
1955
+ },
1956
+ {
1957
+ "loss": 0.0358,
1958
+ "grad_norm": 0.38295891880989075,
1959
+ "learning_rate": 1.1801347010613962e-05,
1960
+ "epoch": 1.719543147208122,
1961
+ "step": 2710
1962
+ },
1963
+ {
1964
+ "loss": 0.0303,
1965
+ "grad_norm": 0.3934635519981384,
1966
+ "learning_rate": 1.1284712430373056e-05,
1967
+ "epoch": 1.7258883248730963,
1968
+ "step": 2720
1969
+ },
1970
+ {
1971
+ "loss": 0.0355,
1972
+ "grad_norm": 0.4926978051662445,
1973
+ "learning_rate": 1.0778964175641548e-05,
1974
+ "epoch": 1.732233502538071,
1975
+ "step": 2730
1976
+ },
1977
+ {
1978
+ "loss": 0.0289,
1979
+ "grad_norm": 0.2570490837097168,
1980
+ "learning_rate": 1.0284164307201493e-05,
1981
+ "epoch": 1.7385786802030458,
1982
+ "step": 2740
1983
+ },
1984
+ {
1985
+ "loss": 0.0366,
1986
+ "grad_norm": 0.4256710708141327,
1987
+ "learning_rate": 9.800373542349483e-06,
1988
+ "epoch": 1.7449238578680202,
1989
+ "step": 2750
1990
+ },
1991
+ {
1992
+ "loss": 0.0303,
1993
+ "grad_norm": 0.31267115473747253,
1994
+ "learning_rate": 9.32765124744608e-06,
1995
+ "epoch": 1.751269035532995,
1996
+ "step": 2760
1997
+ },
1998
+ {
1999
+ "loss": 0.0406,
2000
+ "grad_norm": 0.2698454260826111,
2001
+ "learning_rate": 8.86605543063076e-06,
2002
+ "epoch": 1.7576142131979695,
2003
+ "step": 2770
2004
+ },
2005
+ {
2006
+ "loss": 0.0348,
2007
+ "grad_norm": 0.2938295006752014,
2008
+ "learning_rate": 8.415642734703821e-06,
2009
+ "epoch": 1.763959390862944,
2010
+ "step": 2780
2011
+ },
2012
+ {
2013
+ "loss": 0.03,
2014
+ "grad_norm": 0.3625064790248871,
2015
+ "learning_rate": 7.976468430175599e-06,
2016
+ "epoch": 1.7703045685279188,
2017
+ "step": 2790
2018
+ },
2019
+ {
2020
+ "loss": 0.0264,
2021
+ "grad_norm": 0.35078808665275574,
2022
+ "learning_rate": 7.548586408484282e-06,
2023
+ "epoch": 1.7766497461928934,
2024
+ "step": 2800
2025
+ },
2026
+ {
2027
+ "loss": 0.0287,
2028
+ "grad_norm": 0.30412939190864563,
2029
+ "learning_rate": 7.1320491753827825e-06,
2030
+ "epoch": 1.782994923857868,
2031
+ "step": 2810
2032
+ },
2033
+ {
2034
+ "loss": 0.0299,
2035
+ "grad_norm": 0.33658623695373535,
2036
+ "learning_rate": 6.726907844495689e-06,
2037
+ "epoch": 1.7893401015228427,
2038
+ "step": 2820
2039
+ },
2040
+ {
2041
+ "loss": 0.0336,
2042
+ "grad_norm": 0.34409451484680176,
2043
+ "learning_rate": 6.333212131047162e-06,
2044
+ "epoch": 1.7956852791878173,
2045
+ "step": 2830
2046
+ },
2047
+ {
2048
+ "eval_loss": 0.05158456787467003,
2049
+ "eval_runtime": 23.7884,
2050
+ "eval_samples_per_second": 55.784,
2051
+ "eval_steps_per_second": 13.956,
2052
+ "epoch": 1.7988578680203045,
2053
+ "step": 2835
2054
+ },
2055
+ {
2056
+ "loss": 0.0359,
2057
+ "grad_norm": 0.45101943612098694,
2058
+ "learning_rate": 5.951010345760221e-06,
2059
+ "epoch": 1.8020304568527918,
2060
+ "step": 2840
2061
+ },
2062
+ {
2063
+ "loss": 0.0335,
2064
+ "grad_norm": 0.2589711844921112,
2065
+ "learning_rate": 5.580349388928607e-06,
2066
+ "epoch": 1.8083756345177666,
2067
+ "step": 2850
2068
+ },
2069
+ {
2070
+ "loss": 0.038,
2071
+ "grad_norm": 0.2490888088941574,
2072
+ "learning_rate": 5.221274744661553e-06,
2073
+ "epoch": 1.8147208121827412,
2074
+ "step": 2860
2075
+ },
2076
+ {
2077
+ "loss": 0.0305,
2078
+ "grad_norm": 0.3543383777141571,
2079
+ "learning_rate": 4.873830475302377e-06,
2080
+ "epoch": 1.8210659898477157,
2081
+ "step": 2870
2082
+ },
2083
+ {
2084
+ "loss": 0.0327,
2085
+ "grad_norm": 0.26961830258369446,
2086
+ "learning_rate": 4.538059216021595e-06,
2087
+ "epoch": 1.8274111675126905,
2088
+ "step": 2880
2089
+ },
2090
+ {
2091
+ "loss": 0.031,
2092
+ "grad_norm": 0.34061428904533386,
2093
+ "learning_rate": 4.214002169585107e-06,
2094
+ "epoch": 1.8337563451776648,
2095
+ "step": 2890
2096
+ },
2097
+ {
2098
+ "loss": 0.0255,
2099
+ "grad_norm": 0.30612456798553467,
2100
+ "learning_rate": 3.901699101298173e-06,
2101
+ "epoch": 1.8401015228426396,
2102
+ "step": 2900
2103
+ },
2104
+ {
2105
+ "loss": 0.0293,
2106
+ "grad_norm": 0.34559229016304016,
2107
+ "learning_rate": 3.60118833412576e-06,
2108
+ "epoch": 1.8464467005076142,
2109
+ "step": 2910
2110
+ },
2111
+ {
2112
+ "loss": 0.0303,
2113
+ "grad_norm": 0.3501076400279999,
2114
+ "learning_rate": 3.3125067439899406e-06,
2115
+ "epoch": 1.8527918781725887,
2116
+ "step": 2920
2117
+ },
2118
+ {
2119
+ "loss": 0.0262,
2120
+ "grad_norm": 0.560280442237854,
2121
+ "learning_rate": 3.0356897552448194e-06,
2122
+ "epoch": 1.8591370558375635,
2123
+ "step": 2930
2124
+ },
2125
+ {
2126
+ "loss": 0.029,
2127
+ "grad_norm": 0.23615562915802002,
2128
+ "learning_rate": 2.7707713363294806e-06,
2129
+ "epoch": 1.865482233502538,
2130
+ "step": 2940
2131
+ },
2132
+ {
2133
+ "loss": 0.0303,
2134
+ "grad_norm": 0.22095191478729248,
2135
+ "learning_rate": 2.517783995599865e-06,
2136
+ "epoch": 1.8718274111675126,
2137
+ "step": 2950
2138
+ },
2139
+ {
2140
+ "loss": 0.0337,
2141
+ "grad_norm": 0.5112082958221436,
2142
+ "learning_rate": 2.2767587773394605e-06,
2143
+ "epoch": 1.8781725888324874,
2144
+ "step": 2960
2145
+ },
2146
+ {
2147
+ "loss": 0.0351,
2148
+ "grad_norm": 0.3294392228126526,
2149
+ "learning_rate": 2.0477252579500062e-06,
2150
+ "epoch": 1.884517766497462,
2151
+ "step": 2970
2152
+ },
2153
+ {
2154
+ "loss": 0.0348,
2155
+ "grad_norm": 0.5974733233451843,
2156
+ "learning_rate": 1.8307115423219945e-06,
2157
+ "epoch": 1.8908629441624365,
2158
+ "step": 2980
2159
+ },
2160
+ {
2161
+ "loss": 0.0303,
2162
+ "grad_norm": 0.3671284019947052,
2163
+ "learning_rate": 1.6257442603860196e-06,
2164
+ "epoch": 1.8972081218274113,
2165
+ "step": 2990
2166
+ },
2167
+ {
2168
+ "loss": 0.0341,
2169
+ "grad_norm": 0.3103559911251068,
2170
+ "learning_rate": 1.4328485638449018e-06,
2171
+ "epoch": 1.9035532994923858,
2172
+ "step": 3000
2173
+ },
2174
+ {
2175
+ "loss": 0.0322,
2176
+ "grad_norm": 0.22946634888648987,
2177
+ "learning_rate": 1.252048123087346e-06,
2178
+ "epoch": 1.9098984771573604,
2179
+ "step": 3010
2180
+ },
2181
+ {
2182
+ "loss": 0.0299,
2183
+ "grad_norm": 0.4102902114391327,
2184
+ "learning_rate": 1.0833651242833643e-06,
2185
+ "epoch": 1.9162436548223352,
2186
+ "step": 3020
2187
+ },
2188
+ {
2189
+ "loss": 0.0359,
2190
+ "grad_norm": 0.49926474690437317,
2191
+ "learning_rate": 9.268202666617099e-07,
2192
+ "epoch": 1.9225888324873095,
2193
+ "step": 3030
2194
+ },
2195
+ {
2196
+ "loss": 0.0368,
2197
+ "grad_norm": 0.48568469285964966,
2198
+ "learning_rate": 7.824327599699199e-07,
2199
+ "epoch": 1.9289340101522843,
2200
+ "step": 3040
2201
+ },
2202
+ {
2203
+ "loss": 0.0269,
2204
+ "grad_norm": 0.4492437541484833,
2205
+ "learning_rate": 6.502203221170677e-07,
2206
+ "epoch": 1.9352791878172588,
2207
+ "step": 3050
2208
+ },
2209
+ {
2210
+ "loss": 0.0396,
2211
+ "grad_norm": 0.25819239020347595,
2212
+ "learning_rate": 5.301991769995352e-07,
2213
+ "epoch": 1.9416243654822334,
2214
+ "step": 3060
2215
+ },
2216
+ {
2217
+ "loss": 0.0301,
2218
+ "grad_norm": 0.24553845822811127,
2219
+ "learning_rate": 4.223840525102385e-07,
2220
+ "epoch": 1.9479695431472082,
2221
+ "step": 3070
2222
+ },
2223
+ {
2224
+ "loss": 0.0271,
2225
+ "grad_norm": 0.4226374924182892,
2226
+ "learning_rate": 3.267881787312854e-07,
2227
+ "epoch": 1.9543147208121827,
2228
+ "step": 3080
2229
+ },
2230
+ {
2231
+ "loss": 0.0334,
2232
+ "grad_norm": 0.5136414766311646,
2233
+ "learning_rate": 2.434232863105623e-07,
2234
+ "epoch": 1.9606598984771573,
2235
+ "step": 3090
2236
+ },
2237
+ {
2238
+ "loss": 0.0258,
2239
+ "grad_norm": 0.3708105981349945,
2240
+ "learning_rate": 1.722996050222081e-07,
2241
+ "epoch": 1.967005076142132,
2242
+ "step": 3100
2243
+ },
2244
+ {
2245
+ "loss": 0.0363,
2246
+ "grad_norm": 0.28896448016166687,
2247
+ "learning_rate": 1.1342586251132936e-07,
2248
+ "epoch": 1.9733502538071066,
2249
+ "step": 3110
2250
+ },
2251
+ {
2252
+ "loss": 0.032,
2253
+ "grad_norm": 0.2560702860355377,
2254
+ "learning_rate": 6.680928322302382e-08,
2255
+ "epoch": 1.9796954314720812,
2256
+ "step": 3120
2257
+ },
2258
+ {
2259
+ "loss": 0.0312,
2260
+ "grad_norm": 0.5401180386543274,
2261
+ "learning_rate": 3.2455587515889394e-08,
2262
+ "epoch": 1.986040609137056,
2263
+ "step": 3130
2264
+ },
2265
+ {
2266
+ "loss": 0.0319,
2267
+ "grad_norm": 0.5009672045707703,
2268
+ "learning_rate": 1.0368990960019088e-08,
2269
+ "epoch": 1.9923857868020305,
2270
+ "step": 3140
2271
+ },
2272
+ {
2273
+ "loss": 0.0253,
2274
+ "grad_norm": 0.2637391984462738,
2275
+ "learning_rate": 5.522038197369739e-10,
2276
+ "epoch": 1.998730964467005,
2277
+ "step": 3150
2278
+ },
2279
+ {
2280
+ "eval_loss": 0.051274195313453674,
2281
+ "eval_runtime": 23.5282,
2282
+ "eval_samples_per_second": 56.4,
2283
+ "eval_steps_per_second": 14.111,
2284
+ "epoch": 1.998730964467005,
2285
+ "step": 3150
2286
+ },
2287
+ {
2288
+ "train_runtime": 1163.1262,
2289
+ "train_samples_per_second": 43.335,
2290
+ "train_steps_per_second": 2.71,
2291
+ "total_flos": 8.249162336833536e+16,
2292
+ "train_loss": 0.0777996052304746,
2293
+ "epoch": 2.0,
2294
+ "step": 3152
2295
+ }
2296
+ ]
train/training_loss.png ADDED
train/validation_loss.png ADDED