WavyHec commited on
Commit
2c7103e
·
verified ·
1 Parent(s): ba441e4

Delete lora_persona_2-20251120T233231Z-1-001

Browse files
Files changed (28) hide show
  1. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/README.md +0 -202
  2. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/adapter_config.json +0 -36
  3. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/adapter_model.safetensors +0 -3
  4. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/README.md +0 -202
  5. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/adapter_config.json +0 -36
  6. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/adapter_model.safetensors +0 -3
  7. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/optimizer.pt +0 -3
  8. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/rng_state.pth +0 -3
  9. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/scheduler.pt +0 -3
  10. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/special_tokens_map.json +0 -30
  11. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/tokenizer.json +0 -0
  12. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/tokenizer_config.json +0 -44
  13. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/trainer_state.json +0 -127
  14. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/training_args.bin +0 -3
  15. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/README.md +0 -202
  16. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/adapter_config.json +0 -36
  17. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/adapter_model.safetensors +0 -3
  18. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/optimizer.pt +0 -3
  19. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/rng_state.pth +0 -3
  20. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/scheduler.pt +0 -3
  21. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/special_tokens_map.json +0 -30
  22. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/tokenizer.json +0 -0
  23. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/tokenizer_config.json +0 -44
  24. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/trainer_state.json +0 -170
  25. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/training_args.bin +0 -3
  26. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/special_tokens_map.json +0 -30
  27. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/tokenizer.json +0 -0
  28. lora_persona_2-20251120T233231Z-1-001/lora_persona_2/tokenizer_config.json +0 -44
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/README.md DELETED
@@ -1,202 +0,0 @@
1
- ---
2
- base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
3
- library_name: peft
4
- ---
5
-
6
- # Model Card for Model ID
7
-
8
- <!-- Provide a quick summary of what the model is/does. -->
9
-
10
-
11
-
12
- ## Model Details
13
-
14
- ### Model Description
15
-
16
- <!-- Provide a longer summary of what this model is. -->
17
-
18
-
19
-
20
- - **Developed by:** [More Information Needed]
21
- - **Funded by [optional]:** [More Information Needed]
22
- - **Shared by [optional]:** [More Information Needed]
23
- - **Model type:** [More Information Needed]
24
- - **Language(s) (NLP):** [More Information Needed]
25
- - **License:** [More Information Needed]
26
- - **Finetuned from model [optional]:** [More Information Needed]
27
-
28
- ### Model Sources [optional]
29
-
30
- <!-- Provide the basic links for the model. -->
31
-
32
- - **Repository:** [More Information Needed]
33
- - **Paper [optional]:** [More Information Needed]
34
- - **Demo [optional]:** [More Information Needed]
35
-
36
- ## Uses
37
-
38
- <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
-
40
- ### Direct Use
41
-
42
- <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
-
44
- [More Information Needed]
45
-
46
- ### Downstream Use [optional]
47
-
48
- <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
-
50
- [More Information Needed]
51
-
52
- ### Out-of-Scope Use
53
-
54
- <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
-
56
- [More Information Needed]
57
-
58
- ## Bias, Risks, and Limitations
59
-
60
- <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
-
62
- [More Information Needed]
63
-
64
- ### Recommendations
65
-
66
- <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
-
68
- Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
-
70
- ## How to Get Started with the Model
71
-
72
- Use the code below to get started with the model.
73
-
74
- [More Information Needed]
75
-
76
- ## Training Details
77
-
78
- ### Training Data
79
-
80
- <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
-
82
- [More Information Needed]
83
-
84
- ### Training Procedure
85
-
86
- <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
-
88
- #### Preprocessing [optional]
89
-
90
- [More Information Needed]
91
-
92
-
93
- #### Training Hyperparameters
94
-
95
- - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
-
97
- #### Speeds, Sizes, Times [optional]
98
-
99
- <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
-
101
- [More Information Needed]
102
-
103
- ## Evaluation
104
-
105
- <!-- This section describes the evaluation protocols and provides the results. -->
106
-
107
- ### Testing Data, Factors & Metrics
108
-
109
- #### Testing Data
110
-
111
- <!-- This should link to a Dataset Card if possible. -->
112
-
113
- [More Information Needed]
114
-
115
- #### Factors
116
-
117
- <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
-
119
- [More Information Needed]
120
-
121
- #### Metrics
122
-
123
- <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
-
125
- [More Information Needed]
126
-
127
- ### Results
128
-
129
- [More Information Needed]
130
-
131
- #### Summary
132
-
133
-
134
-
135
- ## Model Examination [optional]
136
-
137
- <!-- Relevant interpretability work for the model goes here -->
138
-
139
- [More Information Needed]
140
-
141
- ## Environmental Impact
142
-
143
- <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
-
145
- Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
-
147
- - **Hardware Type:** [More Information Needed]
148
- - **Hours used:** [More Information Needed]
149
- - **Cloud Provider:** [More Information Needed]
150
- - **Compute Region:** [More Information Needed]
151
- - **Carbon Emitted:** [More Information Needed]
152
-
153
- ## Technical Specifications [optional]
154
-
155
- ### Model Architecture and Objective
156
-
157
- [More Information Needed]
158
-
159
- ### Compute Infrastructure
160
-
161
- [More Information Needed]
162
-
163
- #### Hardware
164
-
165
- [More Information Needed]
166
-
167
- #### Software
168
-
169
- [More Information Needed]
170
-
171
- ## Citation [optional]
172
-
173
- <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
-
175
- **BibTeX:**
176
-
177
- [More Information Needed]
178
-
179
- **APA:**
180
-
181
- [More Information Needed]
182
-
183
- ## Glossary [optional]
184
-
185
- <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
-
187
- [More Information Needed]
188
-
189
- ## More Information [optional]
190
-
191
- [More Information Needed]
192
-
193
- ## Model Card Authors [optional]
194
-
195
- [More Information Needed]
196
-
197
- ## Model Card Contact
198
-
199
- [More Information Needed]
200
- ### Framework versions
201
-
202
- - PEFT 0.15.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/adapter_config.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "alpha_pattern": {},
3
- "auto_mapping": null,
4
- "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
5
- "bias": "none",
6
- "corda_config": null,
7
- "eva_config": null,
8
- "exclude_modules": null,
9
- "fan_in_fan_out": false,
10
- "inference_mode": true,
11
- "init_lora_weights": true,
12
- "layer_replication": null,
13
- "layers_pattern": null,
14
- "layers_to_transform": null,
15
- "loftq_config": {},
16
- "lora_alpha": 16,
17
- "lora_bias": false,
18
- "lora_dropout": 0.05,
19
- "megatron_config": null,
20
- "megatron_core": "megatron.core",
21
- "modules_to_save": null,
22
- "peft_type": "LORA",
23
- "r": 8,
24
- "rank_pattern": {},
25
- "revision": null,
26
- "target_modules": [
27
- "v_proj",
28
- "q_proj",
29
- "o_proj",
30
- "k_proj"
31
- ],
32
- "task_type": "CAUSAL_LM",
33
- "trainable_token_indices": null,
34
- "use_dora": false,
35
- "use_rslora": false
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/adapter_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e01ada8d2eedddc5bc6aeccd3bdfb612d8cef9ee18237bcf670b5c87a092c5cd
3
- size 9034304
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/README.md DELETED
@@ -1,202 +0,0 @@
1
- ---
2
- base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
3
- library_name: peft
4
- ---
5
-
6
- # Model Card for Model ID
7
-
8
- <!-- Provide a quick summary of what the model is/does. -->
9
-
10
-
11
-
12
- ## Model Details
13
-
14
- ### Model Description
15
-
16
- <!-- Provide a longer summary of what this model is. -->
17
-
18
-
19
-
20
- - **Developed by:** [More Information Needed]
21
- - **Funded by [optional]:** [More Information Needed]
22
- - **Shared by [optional]:** [More Information Needed]
23
- - **Model type:** [More Information Needed]
24
- - **Language(s) (NLP):** [More Information Needed]
25
- - **License:** [More Information Needed]
26
- - **Finetuned from model [optional]:** [More Information Needed]
27
-
28
- ### Model Sources [optional]
29
-
30
- <!-- Provide the basic links for the model. -->
31
-
32
- - **Repository:** [More Information Needed]
33
- - **Paper [optional]:** [More Information Needed]
34
- - **Demo [optional]:** [More Information Needed]
35
-
36
- ## Uses
37
-
38
- <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
-
40
- ### Direct Use
41
-
42
- <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
-
44
- [More Information Needed]
45
-
46
- ### Downstream Use [optional]
47
-
48
- <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
-
50
- [More Information Needed]
51
-
52
- ### Out-of-Scope Use
53
-
54
- <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
-
56
- [More Information Needed]
57
-
58
- ## Bias, Risks, and Limitations
59
-
60
- <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
-
62
- [More Information Needed]
63
-
64
- ### Recommendations
65
-
66
- <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
-
68
- Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
-
70
- ## How to Get Started with the Model
71
-
72
- Use the code below to get started with the model.
73
-
74
- [More Information Needed]
75
-
76
- ## Training Details
77
-
78
- ### Training Data
79
-
80
- <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
-
82
- [More Information Needed]
83
-
84
- ### Training Procedure
85
-
86
- <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
-
88
- #### Preprocessing [optional]
89
-
90
- [More Information Needed]
91
-
92
-
93
- #### Training Hyperparameters
94
-
95
- - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
-
97
- #### Speeds, Sizes, Times [optional]
98
-
99
- <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
-
101
- [More Information Needed]
102
-
103
- ## Evaluation
104
-
105
- <!-- This section describes the evaluation protocols and provides the results. -->
106
-
107
- ### Testing Data, Factors & Metrics
108
-
109
- #### Testing Data
110
-
111
- <!-- This should link to a Dataset Card if possible. -->
112
-
113
- [More Information Needed]
114
-
115
- #### Factors
116
-
117
- <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
-
119
- [More Information Needed]
120
-
121
- #### Metrics
122
-
123
- <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
-
125
- [More Information Needed]
126
-
127
- ### Results
128
-
129
- [More Information Needed]
130
-
131
- #### Summary
132
-
133
-
134
-
135
- ## Model Examination [optional]
136
-
137
- <!-- Relevant interpretability work for the model goes here -->
138
-
139
- [More Information Needed]
140
-
141
- ## Environmental Impact
142
-
143
- <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
-
145
- Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
-
147
- - **Hardware Type:** [More Information Needed]
148
- - **Hours used:** [More Information Needed]
149
- - **Cloud Provider:** [More Information Needed]
150
- - **Compute Region:** [More Information Needed]
151
- - **Carbon Emitted:** [More Information Needed]
152
-
153
- ## Technical Specifications [optional]
154
-
155
- ### Model Architecture and Objective
156
-
157
- [More Information Needed]
158
-
159
- ### Compute Infrastructure
160
-
161
- [More Information Needed]
162
-
163
- #### Hardware
164
-
165
- [More Information Needed]
166
-
167
- #### Software
168
-
169
- [More Information Needed]
170
-
171
- ## Citation [optional]
172
-
173
- <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
-
175
- **BibTeX:**
176
-
177
- [More Information Needed]
178
-
179
- **APA:**
180
-
181
- [More Information Needed]
182
-
183
- ## Glossary [optional]
184
-
185
- <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
-
187
- [More Information Needed]
188
-
189
- ## More Information [optional]
190
-
191
- [More Information Needed]
192
-
193
- ## Model Card Authors [optional]
194
-
195
- [More Information Needed]
196
-
197
- ## Model Card Contact
198
-
199
- [More Information Needed]
200
- ### Framework versions
201
-
202
- - PEFT 0.15.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/adapter_config.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "alpha_pattern": {},
3
- "auto_mapping": null,
4
- "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
5
- "bias": "none",
6
- "corda_config": null,
7
- "eva_config": null,
8
- "exclude_modules": null,
9
- "fan_in_fan_out": false,
10
- "inference_mode": true,
11
- "init_lora_weights": true,
12
- "layer_replication": null,
13
- "layers_pattern": null,
14
- "layers_to_transform": null,
15
- "loftq_config": {},
16
- "lora_alpha": 16,
17
- "lora_bias": false,
18
- "lora_dropout": 0.05,
19
- "megatron_config": null,
20
- "megatron_core": "megatron.core",
21
- "modules_to_save": null,
22
- "peft_type": "LORA",
23
- "r": 8,
24
- "rank_pattern": {},
25
- "revision": null,
26
- "target_modules": [
27
- "v_proj",
28
- "q_proj",
29
- "o_proj",
30
- "k_proj"
31
- ],
32
- "task_type": "CAUSAL_LM",
33
- "trainable_token_indices": null,
34
- "use_dora": false,
35
- "use_rslora": false
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/adapter_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:86bf181e150b9e2c07cb1d3dffe3b4f1d42a8438c31e620d224210b0cdedcb0d
3
- size 9034304
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:60381bda5f3a69b59fe17c0f307b8999f024159891c685636714ec3138329cfe
3
- size 18170234
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:64fe9fd798e94ea754b1945d5c4f12f98570dcc0814502fa88bf914f1653f144
3
- size 14244
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b85da682d9c5423d734ae339f497c048e78cd7b04a2bc6cf407c6f9ee4f0a06e
3
- size 1064
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/special_tokens_map.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "unk_token": {
24
- "content": "<unk>",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- }
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/tokenizer_config.json DELETED
@@ -1,44 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": null,
5
- "added_tokens_decoder": {
6
- "0": {
7
- "content": "<unk>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "1": {
15
- "content": "<s>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": true
21
- },
22
- "2": {
23
- "content": "</s>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": true
29
- }
30
- },
31
- "bos_token": "<s>",
32
- "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
33
- "clean_up_tokenization_spaces": false,
34
- "eos_token": "</s>",
35
- "extra_special_tokens": {},
36
- "legacy": false,
37
- "model_max_length": 2048,
38
- "pad_token": "</s>",
39
- "padding_side": "right",
40
- "sp_model_kwargs": {},
41
- "tokenizer_class": "LlamaTokenizer",
42
- "unk_token": "<unk>",
43
- "use_default_system_prompt": false
44
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/trainer_state.json DELETED
@@ -1,127 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 2.0,
6
- "eval_steps": 500,
7
- "global_step": 556,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.17985611510791366,
14
- "grad_norm": 0.9889341592788696,
15
- "learning_rate": 0.0001995649347969019,
16
- "loss": 3.5084,
17
- "step": 50
18
- },
19
- {
20
- "epoch": 0.3597122302158273,
21
- "grad_norm": 1.3072373867034912,
22
- "learning_rate": 0.00019588933215113926,
23
- "loss": 2.8647,
24
- "step": 100
25
- },
26
- {
27
- "epoch": 0.539568345323741,
28
- "grad_norm": 1.416175127029419,
29
- "learning_rate": 0.00018860113594683148,
30
- "loss": 2.8011,
31
- "step": 150
32
- },
33
- {
34
- "epoch": 0.7194244604316546,
35
- "grad_norm": 1.3511329889297485,
36
- "learning_rate": 0.00017797492616144256,
37
- "loss": 2.796,
38
- "step": 200
39
- },
40
- {
41
- "epoch": 0.8992805755395683,
42
- "grad_norm": 1.0564734935760498,
43
- "learning_rate": 0.0001644110411450398,
44
- "loss": 2.6583,
45
- "step": 250
46
- },
47
- {
48
- "epoch": 1.0,
49
- "eval_loss": 2.709125280380249,
50
- "eval_runtime": 4.5805,
51
- "eval_samples_per_second": 66.15,
52
- "eval_steps_per_second": 8.296,
53
- "step": 278
54
- },
55
- {
56
- "epoch": 1.079136690647482,
57
- "grad_norm": 1.3963377475738525,
58
- "learning_rate": 0.0001484204950275565,
59
- "loss": 2.6333,
60
- "step": 300
61
- },
62
- {
63
- "epoch": 1.2589928057553956,
64
- "grad_norm": 0.9653306603431702,
65
- "learning_rate": 0.00013060572545878875,
66
- "loss": 2.5746,
67
- "step": 350
68
- },
69
- {
70
- "epoch": 1.4388489208633093,
71
- "grad_norm": 1.270931601524353,
72
- "learning_rate": 0.00011163789700258655,
73
- "loss": 2.6636,
74
- "step": 400
75
- },
76
- {
77
- "epoch": 1.6187050359712232,
78
- "grad_norm": 1.5205957889556885,
79
- "learning_rate": 9.223161527109937e-05,
80
- "loss": 2.6855,
81
- "step": 450
82
- },
83
- {
84
- "epoch": 1.7985611510791366,
85
- "grad_norm": 1.5966553688049316,
86
- "learning_rate": 7.311800443430251e-05,
87
- "loss": 2.4911,
88
- "step": 500
89
- },
90
- {
91
- "epoch": 1.9784172661870505,
92
- "grad_norm": 1.0975911617279053,
93
- "learning_rate": 5.501716239923642e-05,
94
- "loss": 2.5095,
95
- "step": 550
96
- },
97
- {
98
- "epoch": 2.0,
99
- "eval_loss": 2.6189730167388916,
100
- "eval_runtime": 4.5727,
101
- "eval_samples_per_second": 66.262,
102
- "eval_steps_per_second": 8.31,
103
- "step": 556
104
- }
105
- ],
106
- "logging_steps": 50,
107
- "max_steps": 834,
108
- "num_input_tokens_seen": 0,
109
- "num_train_epochs": 3,
110
- "save_steps": 500,
111
- "stateful_callbacks": {
112
- "TrainerControl": {
113
- "args": {
114
- "should_epoch_stop": false,
115
- "should_evaluate": false,
116
- "should_log": false,
117
- "should_save": true,
118
- "should_training_stop": false
119
- },
120
- "attributes": {}
121
- }
122
- },
123
- "total_flos": 7070572672450560.0,
124
- "train_batch_size": 8,
125
- "trial_name": null,
126
- "trial_params": null
127
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-556/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9b3fd8e8084b31a1a949d9efdde4e692bea6a03cf5ab81951e304608d93790
3
- size 5304
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/README.md DELETED
@@ -1,202 +0,0 @@
1
- ---
2
- base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
3
- library_name: peft
4
- ---
5
-
6
- # Model Card for Model ID
7
-
8
- <!-- Provide a quick summary of what the model is/does. -->
9
-
10
-
11
-
12
- ## Model Details
13
-
14
- ### Model Description
15
-
16
- <!-- Provide a longer summary of what this model is. -->
17
-
18
-
19
-
20
- - **Developed by:** [More Information Needed]
21
- - **Funded by [optional]:** [More Information Needed]
22
- - **Shared by [optional]:** [More Information Needed]
23
- - **Model type:** [More Information Needed]
24
- - **Language(s) (NLP):** [More Information Needed]
25
- - **License:** [More Information Needed]
26
- - **Finetuned from model [optional]:** [More Information Needed]
27
-
28
- ### Model Sources [optional]
29
-
30
- <!-- Provide the basic links for the model. -->
31
-
32
- - **Repository:** [More Information Needed]
33
- - **Paper [optional]:** [More Information Needed]
34
- - **Demo [optional]:** [More Information Needed]
35
-
36
- ## Uses
37
-
38
- <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
-
40
- ### Direct Use
41
-
42
- <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
-
44
- [More Information Needed]
45
-
46
- ### Downstream Use [optional]
47
-
48
- <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
-
50
- [More Information Needed]
51
-
52
- ### Out-of-Scope Use
53
-
54
- <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
-
56
- [More Information Needed]
57
-
58
- ## Bias, Risks, and Limitations
59
-
60
- <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
-
62
- [More Information Needed]
63
-
64
- ### Recommendations
65
-
66
- <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
-
68
- Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
-
70
- ## How to Get Started with the Model
71
-
72
- Use the code below to get started with the model.
73
-
74
- [More Information Needed]
75
-
76
- ## Training Details
77
-
78
- ### Training Data
79
-
80
- <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
-
82
- [More Information Needed]
83
-
84
- ### Training Procedure
85
-
86
- <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
-
88
- #### Preprocessing [optional]
89
-
90
- [More Information Needed]
91
-
92
-
93
- #### Training Hyperparameters
94
-
95
- - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
-
97
- #### Speeds, Sizes, Times [optional]
98
-
99
- <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
-
101
- [More Information Needed]
102
-
103
- ## Evaluation
104
-
105
- <!-- This section describes the evaluation protocols and provides the results. -->
106
-
107
- ### Testing Data, Factors & Metrics
108
-
109
- #### Testing Data
110
-
111
- <!-- This should link to a Dataset Card if possible. -->
112
-
113
- [More Information Needed]
114
-
115
- #### Factors
116
-
117
- <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
-
119
- [More Information Needed]
120
-
121
- #### Metrics
122
-
123
- <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
-
125
- [More Information Needed]
126
-
127
- ### Results
128
-
129
- [More Information Needed]
130
-
131
- #### Summary
132
-
133
-
134
-
135
- ## Model Examination [optional]
136
-
137
- <!-- Relevant interpretability work for the model goes here -->
138
-
139
- [More Information Needed]
140
-
141
- ## Environmental Impact
142
-
143
- <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
-
145
- Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
-
147
- - **Hardware Type:** [More Information Needed]
148
- - **Hours used:** [More Information Needed]
149
- - **Cloud Provider:** [More Information Needed]
150
- - **Compute Region:** [More Information Needed]
151
- - **Carbon Emitted:** [More Information Needed]
152
-
153
- ## Technical Specifications [optional]
154
-
155
- ### Model Architecture and Objective
156
-
157
- [More Information Needed]
158
-
159
- ### Compute Infrastructure
160
-
161
- [More Information Needed]
162
-
163
- #### Hardware
164
-
165
- [More Information Needed]
166
-
167
- #### Software
168
-
169
- [More Information Needed]
170
-
171
- ## Citation [optional]
172
-
173
- <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
-
175
- **BibTeX:**
176
-
177
- [More Information Needed]
178
-
179
- **APA:**
180
-
181
- [More Information Needed]
182
-
183
- ## Glossary [optional]
184
-
185
- <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
-
187
- [More Information Needed]
188
-
189
- ## More Information [optional]
190
-
191
- [More Information Needed]
192
-
193
- ## Model Card Authors [optional]
194
-
195
- [More Information Needed]
196
-
197
- ## Model Card Contact
198
-
199
- [More Information Needed]
200
- ### Framework versions
201
-
202
- - PEFT 0.15.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/adapter_config.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "alpha_pattern": {},
3
- "auto_mapping": null,
4
- "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
5
- "bias": "none",
6
- "corda_config": null,
7
- "eva_config": null,
8
- "exclude_modules": null,
9
- "fan_in_fan_out": false,
10
- "inference_mode": true,
11
- "init_lora_weights": true,
12
- "layer_replication": null,
13
- "layers_pattern": null,
14
- "layers_to_transform": null,
15
- "loftq_config": {},
16
- "lora_alpha": 16,
17
- "lora_bias": false,
18
- "lora_dropout": 0.05,
19
- "megatron_config": null,
20
- "megatron_core": "megatron.core",
21
- "modules_to_save": null,
22
- "peft_type": "LORA",
23
- "r": 8,
24
- "rank_pattern": {},
25
- "revision": null,
26
- "target_modules": [
27
- "v_proj",
28
- "q_proj",
29
- "o_proj",
30
- "k_proj"
31
- ],
32
- "task_type": "CAUSAL_LM",
33
- "trainable_token_indices": null,
34
- "use_dora": false,
35
- "use_rslora": false
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/adapter_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e01ada8d2eedddc5bc6aeccd3bdfb612d8cef9ee18237bcf670b5c87a092c5cd
3
- size 9034304
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4707b97b83912c5fe6023019b2438b5ab9497dd8917b5d67995a27397dc43651
3
- size 18170234
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b2dc47077c689c3fbdfb061ad914c351315092e0b115981fd9d7048f4a614e0
3
- size 14244
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:05614d7b355c8882c70599104ce4a16af1bb47f75f63be6eabf345a31748bc04
3
- size 1064
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/special_tokens_map.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "unk_token": {
24
- "content": "<unk>",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- }
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/tokenizer_config.json DELETED
@@ -1,44 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": null,
5
- "added_tokens_decoder": {
6
- "0": {
7
- "content": "<unk>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "1": {
15
- "content": "<s>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": true
21
- },
22
- "2": {
23
- "content": "</s>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": true
29
- }
30
- },
31
- "bos_token": "<s>",
32
- "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
33
- "clean_up_tokenization_spaces": false,
34
- "eos_token": "</s>",
35
- "extra_special_tokens": {},
36
- "legacy": false,
37
- "model_max_length": 2048,
38
- "pad_token": "</s>",
39
- "padding_side": "right",
40
- "sp_model_kwargs": {},
41
- "tokenizer_class": "LlamaTokenizer",
42
- "unk_token": "<unk>",
43
- "use_default_system_prompt": false
44
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/trainer_state.json DELETED
@@ -1,170 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 3.0,
6
- "eval_steps": 500,
7
- "global_step": 834,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.17985611510791366,
14
- "grad_norm": 0.9889341592788696,
15
- "learning_rate": 0.0001995649347969019,
16
- "loss": 3.5084,
17
- "step": 50
18
- },
19
- {
20
- "epoch": 0.3597122302158273,
21
- "grad_norm": 1.3072373867034912,
22
- "learning_rate": 0.00019588933215113926,
23
- "loss": 2.8647,
24
- "step": 100
25
- },
26
- {
27
- "epoch": 0.539568345323741,
28
- "grad_norm": 1.416175127029419,
29
- "learning_rate": 0.00018860113594683148,
30
- "loss": 2.8011,
31
- "step": 150
32
- },
33
- {
34
- "epoch": 0.7194244604316546,
35
- "grad_norm": 1.3511329889297485,
36
- "learning_rate": 0.00017797492616144256,
37
- "loss": 2.796,
38
- "step": 200
39
- },
40
- {
41
- "epoch": 0.8992805755395683,
42
- "grad_norm": 1.0564734935760498,
43
- "learning_rate": 0.0001644110411450398,
44
- "loss": 2.6583,
45
- "step": 250
46
- },
47
- {
48
- "epoch": 1.0,
49
- "eval_loss": 2.709125280380249,
50
- "eval_runtime": 4.5805,
51
- "eval_samples_per_second": 66.15,
52
- "eval_steps_per_second": 8.296,
53
- "step": 278
54
- },
55
- {
56
- "epoch": 1.079136690647482,
57
- "grad_norm": 1.3963377475738525,
58
- "learning_rate": 0.0001484204950275565,
59
- "loss": 2.6333,
60
- "step": 300
61
- },
62
- {
63
- "epoch": 1.2589928057553956,
64
- "grad_norm": 0.9653306603431702,
65
- "learning_rate": 0.00013060572545878875,
66
- "loss": 2.5746,
67
- "step": 350
68
- },
69
- {
70
- "epoch": 1.4388489208633093,
71
- "grad_norm": 1.270931601524353,
72
- "learning_rate": 0.00011163789700258655,
73
- "loss": 2.6636,
74
- "step": 400
75
- },
76
- {
77
- "epoch": 1.6187050359712232,
78
- "grad_norm": 1.5205957889556885,
79
- "learning_rate": 9.223161527109937e-05,
80
- "loss": 2.6855,
81
- "step": 450
82
- },
83
- {
84
- "epoch": 1.7985611510791366,
85
- "grad_norm": 1.5966553688049316,
86
- "learning_rate": 7.311800443430251e-05,
87
- "loss": 2.4911,
88
- "step": 500
89
- },
90
- {
91
- "epoch": 1.9784172661870505,
92
- "grad_norm": 1.0975911617279053,
93
- "learning_rate": 5.501716239923642e-05,
94
- "loss": 2.5095,
95
- "step": 550
96
- },
97
- {
98
- "epoch": 2.0,
99
- "eval_loss": 2.6189730167388916,
100
- "eval_runtime": 4.5727,
101
- "eval_samples_per_second": 66.262,
102
- "eval_steps_per_second": 8.31,
103
- "step": 556
104
- },
105
- {
106
- "epoch": 2.158273381294964,
107
- "grad_norm": 1.6500194072723389,
108
- "learning_rate": 3.861103139944449e-05,
109
- "loss": 2.5351,
110
- "step": 600
111
- },
112
- {
113
- "epoch": 2.338129496402878,
114
- "grad_norm": 1.2627792358398438,
115
- "learning_rate": 2.451770608467432e-05,
116
- "loss": 2.4278,
117
- "step": 650
118
- },
119
- {
120
- "epoch": 2.5179856115107913,
121
- "grad_norm": 1.2723170518875122,
122
- "learning_rate": 1.326814704364262e-05,
123
- "loss": 2.5119,
124
- "step": 700
125
- },
126
- {
127
- "epoch": 2.697841726618705,
128
- "grad_norm": 1.461732268333435,
129
- "learning_rate": 5.286177068899989e-06,
130
- "loss": 2.4941,
131
- "step": 750
132
- },
133
- {
134
- "epoch": 2.8776978417266186,
135
- "grad_norm": 1.4850504398345947,
136
- "learning_rate": 8.725137967920738e-07,
137
- "loss": 2.6372,
138
- "step": 800
139
- },
140
- {
141
- "epoch": 3.0,
142
- "eval_loss": 2.6041791439056396,
143
- "eval_runtime": 4.5702,
144
- "eval_samples_per_second": 66.299,
145
- "eval_steps_per_second": 8.315,
146
- "step": 834
147
- }
148
- ],
149
- "logging_steps": 50,
150
- "max_steps": 834,
151
- "num_input_tokens_seen": 0,
152
- "num_train_epochs": 3,
153
- "save_steps": 500,
154
- "stateful_callbacks": {
155
- "TrainerControl": {
156
- "args": {
157
- "should_epoch_stop": false,
158
- "should_evaluate": false,
159
- "should_log": false,
160
- "should_save": true,
161
- "should_training_stop": true
162
- },
163
- "attributes": {}
164
- }
165
- },
166
- "total_flos": 1.060585900867584e+16,
167
- "train_batch_size": 8,
168
- "trial_name": null,
169
- "trial_params": null
170
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/checkpoint-834/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9b3fd8e8084b31a1a949d9efdde4e692bea6a03cf5ab81951e304608d93790
3
- size 5304
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/special_tokens_map.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "unk_token": {
24
- "content": "<unk>",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- }
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
lora_persona_2-20251120T233231Z-1-001/lora_persona_2/tokenizer_config.json DELETED
@@ -1,44 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": null,
5
- "added_tokens_decoder": {
6
- "0": {
7
- "content": "<unk>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "1": {
15
- "content": "<s>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": true
21
- },
22
- "2": {
23
- "content": "</s>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": true
29
- }
30
- },
31
- "bos_token": "<s>",
32
- "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
33
- "clean_up_tokenization_spaces": false,
34
- "eos_token": "</s>",
35
- "extra_special_tokens": {},
36
- "legacy": false,
37
- "model_max_length": 2048,
38
- "pad_token": "</s>",
39
- "padding_side": "right",
40
- "sp_model_kwargs": {},
41
- "tokenizer_class": "LlamaTokenizer",
42
- "unk_token": "<unk>",
43
- "use_default_system_prompt": false
44
- }