permutans commited on
Commit
348e0c6
·
verified ·
1 Parent(s): f6fe748

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +147 -157
  2. config.json +2 -5
  3. model.safetensors +1 -1
README.md CHANGED
@@ -31,7 +31,7 @@ This model performs span-level detection of 72 rhetorical marker types using BIO
31
  | Base model | `bert-base-uncased` |
32
  | Task | Token classification (BIO tagging) |
33
  | Labels | 145 (72 marker types × B/I + O) |
34
- | Best F1 | **0.459** (macro, markers only) |
35
  | Training | 15 epochs, batch 8, lr 2e-5 |
36
  | Loss | Focal loss (γ=1.0) for class imbalance |
37
 
@@ -119,165 +119,155 @@ Characteristics of written, analytical discourse:
119
 
120
  ## Evaluation
121
 
122
- Per-class F1 on test set (selected markers):
123
-
124
- | Marker | Precision | Recall | F1 | Support |
125
- |--------|-----------|--------|-----|---------|
126
- | oral_vocative | 0.889 | 0.593 | 0.711 | 27 |
127
- | oral_inclusive_we | 0.500 | 0.586 | 0.540 | 29 |
128
- | oral_second_person | 0.556 | 0.600 | 0.577 | 25 |
129
- | literate_conditional | 0.769 | 0.714 | 0.741 | 14 |
130
- | oral_self_correction | 1.000 | 1.000 | 1.000 | 3 |
131
- | oral_audience_response | 1.000 | 1.000 | 1.000 | 4 |
132
- | literate_citation | 0.000 | 0.000 | 0.000 | 10 |
133
 
134
  <details><summary>Click to show per-marker precision/recall/F1/support</summary>
135
 
136
  ```
137
- precision recall f1-score support
138
-
139
- O 0.730 0.850 0.785 3496
140
- B-literate_abstract_noun 0.500 0.071 0.125 14
141
- B-literate_additive_formal 1.000 0.667 0.800 3
142
- B-literate_agent_demoted 1.000 1.000 1.000 4
143
- B-literate_agentless_passive 0.297 0.458 0.361 24
144
- B-literate_aside 0.412 0.778 0.538 9
145
- B-literate_categorical_statement 0.571 1.000 0.727 4
146
- B-literate_causal_chain 0.667 0.667 0.667 3
147
- B-literate_causal_explicit 0.467 0.636 0.538 11
148
- B-literate_citation 0.000 0.000 0.000 10
149
- B-literate_conceptual_metaphor 0.500 0.333 0.400 6
150
- B-literate_concessive 1.000 1.000 1.000 2
151
- B-literate_concessive_connector 0.800 0.800 0.800 5
152
- B-literate_conditional 0.769 0.714 0.741 14
153
- B-literate_contrastive 0.455 0.625 0.526 8
154
- B-literate_definitional_move 1.000 1.000 1.000 1
155
- B-literate_enumeration 0.429 1.000 0.600 3
156
- B-literate_epistemic_hedge 0.357 0.417 0.385 24
157
- B-literate_evidential 0.000 0.000 0.000 11
158
- B-literate_footnote_reference 0.500 0.667 0.571 3
159
- B-literate_institutional_subject 0.429 1.000 0.600 3
160
- B-literate_list_structure 0.000 0.000 0.000 1
161
- B-literate_metadiscourse 0.500 0.250 0.333 4
162
- B-literate_methodological_framing 1.000 0.500 0.667 4
163
- B-literate_nested_clauses 0.345 0.455 0.392 22
164
- B-literate_nominalization 0.600 0.300 0.400 10
165
- B-literate_objectifying_stance 0.800 1.000 0.889 4
166
- B-literate_paradox 0.333 0.333 0.333 3
167
- B-literate_probability 0.333 0.200 0.250 5
168
- B-literate_qualified_assertion 0.000 0.000 0.000 5
169
- B-literate_relative_chain 0.270 0.773 0.400 22
170
- B-literate_technical_abbreviation 0.000 0.000 0.000 2
171
- B-literate_technical_term 0.333 0.667 0.444 3
172
- B-literate_temporal_embedding 1.000 0.500 0.667 4
173
- B-literate_third_person_reference 0.500 0.333 0.400 3
174
- B-oral_alliteration 0.000 0.000 0.000 3
175
- B-oral_anaphora 0.188 0.400 0.255 15
176
- B-oral_asyndeton 0.000 0.000 0.000 1
177
- B-oral_audience_response 1.000 1.000 1.000 4
178
- B-oral_binomial_expression 0.333 0.400 0.364 5
179
- B-oral_conflict_frame 0.800 0.800 0.800 5
180
- B-oral_discourse_formula 0.500 0.500 0.500 6
181
- B-oral_dramatic_pause 1.000 0.500 0.667 2
182
- B-oral_embodied_action 1.000 0.167 0.286 6
183
- B-oral_epistrophe 0.500 0.333 0.400 3
184
- B-oral_epithet 0.000 0.000 0.000 2
185
- B-oral_everyday_example 0.750 1.000 0.857 3
186
- B-oral_first_person 0.000 0.000 0.000 5
187
- B-oral_imperative 0.571 0.571 0.571 14
188
- B-oral_inclusive_we 0.500 0.586 0.540 29
189
- B-oral_intensifier_doubling 1.000 0.667 0.800 3
190
- B-oral_lexical_repetition 0.267 0.400 0.320 10
191
- B-oral_named_individual 0.474 0.450 0.462 20
192
- B-oral_parallelism 0.100 0.143 0.118 7
193
- B-oral_phatic_check 1.000 1.000 1.000 1
194
- B-oral_phatic_filler 0.429 0.600 0.500 5
195
- B-oral_polysyndeton 0.200 0.100 0.133 10
196
- B-oral_proverb 0.600 0.500 0.545 6
197
- B-oral_refrain 1.000 1.000 1.000 1
198
- B-oral_religious_formula 0.000 0.000 0.000 2
199
- B-oral_rhetorical_question 0.125 0.500 0.200 2
200
- B-oral_rhythm 0.714 0.833 0.769 6
201
- B-oral_second_person 0.556 0.600 0.577 25
202
- B-oral_self_correction 1.000 1.000 1.000 3
203
- B-oral_sensory_detail 0.500 1.000 0.667 1
204
- B-oral_simple_conjunction 0.000 0.000 0.000 3
205
- B-oral_specific_place 0.333 0.667 0.444 3
206
- B-oral_temporal_anchor 0.000 0.000 0.000 3
207
- B-oral_tricolon 0.286 1.000 0.444 2
208
- B-oral_us_them 0.667 0.667 0.667 3
209
- B-oral_vocative 0.889 0.593 0.711 27
210
- I-literate_abstract_noun 0.500 0.083 0.143 12
211
- I-literate_additive_formal 0.000 0.000 0.000 6
212
- I-literate_agent_demoted 0.583 0.933 0.718 15
213
- I-literate_agentless_passive 0.518 0.414 0.460 70
214
- I-literate_aside 0.700 0.618 0.656 102
215
- I-literate_categorical_statement 0.692 0.391 0.500 23
216
- I-literate_causal_chain 0.889 0.640 0.744 25
217
- I-literate_causal_explicit 0.593 0.762 0.667 21
218
- I-literate_citation 0.625 0.227 0.333 44
219
- I-literate_conceptual_metaphor 0.857 0.400 0.545 15
220
- I-literate_concessive 0.800 0.571 0.667 7
221
- I-literate_concessive_connector 0.667 0.667 0.667 3
222
- I-literate_conditional 0.784 0.351 0.485 114
223
- I-literate_contrastive 0.857 0.400 0.545 15
224
- I-literate_cross_reference 0.000 0.000 0.000 0
225
- I-literate_definitional_move 1.000 1.000 1.000 5
226
- I-literate_enumeration 0.680 0.436 0.531 39
227
- I-literate_epistemic_hedge 0.425 0.386 0.405 44
228
- I-literate_evidential 0.500 0.103 0.171 29
229
- I-literate_footnote_reference 1.000 0.727 0.842 11
230
- I-literate_institutional_subject 0.625 1.000 0.769 5
231
- I-literate_list_structure 0.000 0.000 0.000 3
232
- I-literate_metadiscourse 0.222 0.125 0.160 16
233
- I-literate_methodological_framing 0.857 0.500 0.632 12
234
- I-literate_nested_clauses 0.487 0.256 0.336 379
235
- I-literate_nominalization 0.000 0.000 0.000 11
236
- I-literate_objectifying_stance 0.900 0.692 0.783 13
237
- I-literate_paradox 0.250 0.062 0.100 16
238
- I-literate_probability 0.000 0.000 0.000 7
239
- I-literate_qualified_assertion 0.000 0.000 0.000 21
240
- I-literate_relative_chain 0.494 0.482 0.488 251
241
- I-literate_technical_abbreviation 0.750 0.273 0.400 11
242
- I-literate_technical_term 0.167 0.273 0.207 11
243
- I-literate_temporal_embedding 1.000 0.600 0.750 50
244
- I-literate_third_person_reference 0.714 0.833 0.769 6
245
- I-oral_alliteration 1.000 0.222 0.364 9
246
- I-oral_anaphora 0.140 0.070 0.093 100
247
- I-oral_asyndeton 0.417 0.714 0.526 7
248
- I-oral_audience_response 0.905 0.905 0.905 21
249
- I-oral_binomial_expression 0.529 0.818 0.643 11
250
- I-oral_conflict_frame 0.833 0.714 0.769 7
251
- I-oral_discourse_formula 0.625 0.833 0.714 6
252
- I-oral_dramatic_pause 0.500 0.500 0.500 4
253
- I-oral_embodied_action 1.000 0.062 0.118 16
254
- I-oral_epistrophe 0.000 0.000 0.000 3
255
- I-oral_epithet 0.200 0.200 0.200 5
256
- I-oral_everyday_example 1.000 0.950 0.974 20
257
- I-oral_first_person 0.000 0.000 0.000 2
258
- I-oral_imperative 0.692 0.333 0.450 27
259
- I-oral_inclusive_we 0.849 0.900 0.874 50
260
- I-oral_intensifier_doubling 0.800 1.000 0.889 4
261
- I-oral_lexical_repetition 0.241 0.341 0.283 41
262
- I-oral_named_individual 0.689 0.646 0.667 48
263
- I-oral_parallelism 0.518 0.319 0.394 135
264
- I-oral_phatic_check 1.000 1.000 1.000 3
265
- I-oral_phatic_filler 0.667 0.400 0.500 5
266
- I-oral_polysyndeton 1.000 0.268 0.423 82
267
- I-oral_proverb 0.955 0.568 0.712 37
268
- I-oral_refrain 1.000 1.000 1.000 4
269
- I-oral_religious_formula 0.000 0.000 0.000 16
270
- I-oral_rhetorical_question 0.312 0.333 0.323 15
271
- I-oral_rhythm 0.979 0.610 0.752 77
272
- I-oral_second_person 0.143 0.143 0.143 7
273
- I-oral_self_correction 0.857 0.900 0.878 20
274
- I-oral_sensory_detail 0.833 1.000 0.909 5
275
- I-oral_simple_conjunction 0.714 1.000 0.833 5
276
- I-oral_specific_place 0.714 0.625 0.667 8
277
- I-oral_temporal_anchor 0.059 0.100 0.074 10
278
- I-oral_tricolon 0.388 0.929 0.547 28
279
- I-oral_us_them 0.800 0.889 0.842 9
280
- I-oral_vocative 0.971 0.723 0.829 47
281
  ```
282
 
283
  </details>
@@ -383,7 +373,7 @@ Markers with <100 examples: 57 (79%)
383
 
384
  </details>
385
 
386
- **Macro F1 (all 145 labels):** 0.487
387
  **Weighted F1:** 0.645
388
  **Accuracy:** 66.5%
389
 
 
31
  | Base model | `bert-base-uncased` |
32
  | Task | Token classification (BIO tagging) |
33
  | Labels | 145 (72 marker types × B/I + O) |
34
+ | Best F1 | **0.4611** (macro, markers only) |
35
  | Training | 15 epochs, batch 8, lr 2e-5 |
36
  | Loss | Focal loss (γ=1.0) for class imbalance |
37
 
 
119
 
120
  ## Evaluation
121
 
122
+ Per-class F1 on test set:
 
 
 
 
 
 
 
 
 
 
123
 
124
  <details><summary>Click to show per-marker precision/recall/F1/support</summary>
125
 
126
  ```
127
+ precision recall f1-score support
128
+
129
+ O 0.721 0.835 0.774 3496
130
+ B-literate_abstract_noun 0.500 0.286 0.364 14
131
+ B-literate_additive_formal 0.667 0.667 0.667 3
132
+ B-literate_agent_demoted 0.800 1.000 0.889 4
133
+ B-literate_agentless_passive 0.312 0.417 0.357 24
134
+ B-literate_aside 0.438 0.778 0.560 9
135
+ B-literate_categorical_statement 0.333 0.500 0.400 4
136
+ B-literate_causal_chain 0.667 0.667 0.667 3
137
+ B-literate_causal_explicit 0.538 0.636 0.583 11
138
+ B-literate_citation 0.000 0.000 0.000 10
139
+ B-literate_conceptual_metaphor 0.500 0.167 0.250 6
140
+ B-literate_concessive 1.000 1.000 1.000 2
141
+ B-literate_concessive_connector 0.800 0.800 0.800 5
142
+ B-literate_conditional 0.667 0.714 0.690 14
143
+ B-literate_contrastive 0.333 0.375 0.353 8
144
+ B-literate_definitional_move 1.000 1.000 1.000 1
145
+ B-literate_enumeration 0.500 0.667 0.571 3
146
+ B-literate_epistemic_hedge 0.371 0.542 0.441 24
147
+ B-literate_evidential 0.000 0.000 0.000 11
148
+ B-literate_footnote_reference 0.500 0.667 0.571 3
149
+ B-literate_institutional_subject 0.600 1.000 0.750 3
150
+ B-literate_list_structure 0.000 0.000 0.000 1
151
+ B-literate_metadiscourse 0.500 0.500 0.500 4
152
+ B-literate_methodological_framing 1.000 0.500 0.667 4
153
+ B-literate_nested_clauses 0.300 0.545 0.387 22
154
+ B-literate_nominalization 0.750 0.300 0.429 10
155
+ B-literate_objectifying_stance 0.800 1.000 0.889 4
156
+ B-literate_paradox 0.500 0.333 0.400 3
157
+ B-literate_probability 0.333 0.200 0.250 5
158
+ B-literate_qualified_assertion 0.000 0.000 0.000 5
159
+ B-literate_relative_chain 0.327 0.773 0.459 22
160
+ B-literate_technical_abbreviation 0.000 0.000 0.000 2
161
+ B-literate_technical_term 0.400 0.667 0.500 3
162
+ B-literate_temporal_embedding 1.000 0.500 0.667 4
163
+ B-literate_third_person_reference 0.250 0.333 0.286 3
164
+ B-oral_alliteration 0.000 0.000 0.000 3
165
+ B-oral_anaphora 0.185 0.333 0.238 15
166
+ B-oral_asyndeton 0.000 0.000 0.000 1
167
+ B-oral_audience_response 1.000 1.000 1.000 4
168
+ B-oral_binomial_expression 0.333 0.400 0.364 5
169
+ B-oral_conflict_frame 0.800 0.800 0.800 5
170
+ B-oral_discourse_formula 0.333 0.500 0.400 6
171
+ B-oral_dramatic_pause 0.000 0.000 0.000 2
172
+ B-oral_embodied_action 1.000 0.167 0.286 6
173
+ B-oral_epistrophe 0.000 0.000 0.000 3
174
+ B-oral_epithet 0.333 0.500 0.400 2
175
+ B-oral_everyday_example 0.750 1.000 0.857 3
176
+ B-oral_first_person 0.000 0.000 0.000 5
177
+ B-oral_imperative 0.600 0.643 0.621 14
178
+ B-oral_inclusive_we 0.486 0.586 0.531 29
179
+ B-oral_intensifier_doubling 0.667 0.667 0.667 3
180
+ B-oral_lexical_repetition 0.273 0.300 0.286 10
181
+ B-oral_named_individual 0.450 0.450 0.450 20
182
+ B-oral_parallelism 0.000 0.000 0.000 7
183
+ B-oral_phatic_check 1.000 1.000 1.000 1
184
+ B-oral_phatic_filler 0.667 0.800 0.727 5
185
+ B-oral_polysyndeton 0.250 0.100 0.143 10
186
+ B-oral_proverb 1.000 0.333 0.500 6
187
+ B-oral_refrain 1.000 1.000 1.000 1
188
+ B-oral_religious_formula 0.000 0.000 0.000 2
189
+ B-oral_rhetorical_question 0.222 1.000 0.364 2
190
+ B-oral_rhythm 0.714 0.833 0.769 6
191
+ B-oral_second_person 0.533 0.640 0.582 25
192
+ B-oral_self_correction 0.600 1.000 0.750 3
193
+ B-oral_sensory_detail 1.000 1.000 1.000 1
194
+ B-oral_simple_conjunction 0.000 0.000 0.000 3
195
+ B-oral_specific_place 0.333 0.667 0.444 3
196
+ B-oral_temporal_anchor 0.000 0.000 0.000 3
197
+ B-oral_tricolon 0.200 1.000 0.333 2
198
+ B-oral_us_them 0.667 0.667 0.667 3
199
+ B-oral_vocative 0.714 0.556 0.625 27
200
+ I-literate_abstract_noun 0.000 0.000 0.000 12
201
+ I-literate_additive_formal 0.000 0.000 0.000 6
202
+ I-literate_agent_demoted 0.500 0.800 0.615 15
203
+ I-literate_agentless_passive 0.483 0.414 0.446 70
204
+ I-literate_aside 0.400 0.235 0.296 102
205
+ I-literate_categorical_statement 0.412 0.304 0.350 23
206
+ I-literate_causal_chain 0.917 0.440 0.595 25
207
+ I-literate_causal_explicit 0.444 0.762 0.561 21
208
+ I-literate_citation 0.444 0.182 0.258 44
209
+ I-literate_conceptual_metaphor 0.571 0.267 0.364 15
210
+ I-literate_concessive 0.750 0.429 0.545 7
211
+ I-literate_concessive_connector 0.400 0.667 0.500 3
212
+ I-literate_conditional 0.479 0.307 0.374 114
213
+ I-literate_contrastive 0.600 0.400 0.480 15
214
+ I-literate_cross_reference 0.000 0.000 0.000 0
215
+ I-literate_definitional_move 0.833 1.000 0.909 5
216
+ I-literate_enumeration 0.824 0.718 0.767 39
217
+ I-literate_epistemic_hedge 0.375 0.341 0.357 44
218
+ I-literate_evidential 0.333 0.034 0.062 29
219
+ I-literate_footnote_reference 0.667 0.727 0.696 11
220
+ I-literate_institutional_subject 1.000 1.000 1.000 5
221
+ I-literate_list_structure 0.000 0.000 0.000 3
222
+ I-literate_metadiscourse 0.200 0.125 0.154 16
223
+ I-literate_methodological_framing 0.750 0.500 0.600 12
224
+ I-literate_nested_clauses 0.336 0.127 0.184 379
225
+ I-literate_nominalization 0.000 0.000 0.000 11
226
+ I-literate_objectifying_stance 0.917 0.846 0.880 13
227
+ I-literate_paradox 0.100 0.062 0.077 16
228
+ I-literate_probability 0.000 0.000 0.000 7
229
+ I-literate_qualified_assertion 0.000 0.000 0.000 21
230
+ I-literate_relative_chain 0.402 0.422 0.412 251
231
+ I-literate_technical_abbreviation 0.833 0.455 0.588 11
232
+ I-literate_technical_term 0.250 0.273 0.261 11
233
+ I-literate_temporal_embedding 1.000 0.600 0.750 50
234
+ I-literate_third_person_reference 0.556 0.833 0.667 6
235
+ I-oral_alliteration 0.778 0.778 0.778 9
236
+ I-oral_anaphora 0.116 0.080 0.095 100
237
+ I-oral_asyndeton 0.000 0.000 0.000 7
238
+ I-oral_audience_response 0.864 0.905 0.884 21
239
+ I-oral_binomial_expression 0.533 0.727 0.615 11
240
+ I-oral_conflict_frame 1.000 0.714 0.833 7
241
+ I-oral_discourse_formula 0.545 1.000 0.706 6
242
+ I-oral_dramatic_pause 0.400 0.500 0.444 4
243
+ I-oral_embodied_action 0.000 0.000 0.000 16
244
+ I-oral_epistrophe 0.000 0.000 0.000 3
245
+ I-oral_epithet 0.400 0.400 0.400 5
246
+ I-oral_everyday_example 0.947 0.900 0.923 20
247
+ I-oral_first_person 0.000 0.000 0.000 2
248
+ I-oral_imperative 0.714 0.370 0.488 27
249
+ I-oral_inclusive_we 0.754 0.920 0.829 50
250
+ I-oral_intensifier_doubling 0.800 1.000 0.889 4
251
+ I-oral_lexical_repetition 0.250 0.317 0.280 41
252
+ I-oral_named_individual 0.620 0.646 0.633 48
253
+ I-oral_parallelism 0.485 0.237 0.318 135
254
+ I-oral_phatic_check 1.000 1.000 1.000 3
255
+ I-oral_phatic_filler 1.000 0.400 0.571 5
256
+ I-oral_polysyndeton 0.700 0.171 0.275 82
257
+ I-oral_proverb 0.938 0.405 0.566 37
258
+ I-oral_refrain 1.000 1.000 1.000 4
259
+ I-oral_religious_formula 1.000 0.062 0.118 16
260
+ I-oral_rhetorical_question 0.389 0.467 0.424 15
261
+ I-oral_rhythm 0.957 0.584 0.726 77
262
+ I-oral_second_person 0.250 0.143 0.182 7
263
+ I-oral_self_correction 0.889 0.800 0.842 20
264
+ I-oral_sensory_detail 0.833 1.000 0.909 5
265
+ I-oral_simple_conjunction 0.625 1.000 0.769 5
266
+ I-oral_specific_place 0.556 0.625 0.588 8
267
+ I-oral_temporal_anchor 0.056 0.100 0.071 10
268
+ I-oral_tricolon 0.329 0.964 0.491 28
269
+ I-oral_us_them 0.750 0.333 0.462 9
270
+ I-oral_vocative 0.846 0.702 0.767 47
271
  ```
272
 
273
  </details>
 
373
 
374
  </details>
375
 
376
+ **Macro F1 (all 145 labels):** 0.4611
377
  **Weighted F1:** 0.645
378
  **Accuracy:** 66.5%
379
 
config.json CHANGED
@@ -1,16 +1,13 @@
1
  {
2
  "add_cross_attention": false,
3
  "architectures": [
4
- "BertTokenClassifier"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": null,
8
  "classifier_dropout": null,
9
- "dropout": 0.1,
10
  "dtype": "float32",
11
  "eos_token_id": null,
12
- "focal_alpha": 1.0,
13
- "focal_gamma": 1.0,
14
  "gradient_checkpointing": false,
15
  "hidden_act": "gelu",
16
  "hidden_dropout_prob": 0.1,
@@ -314,7 +311,7 @@
314
  },
315
  "layer_norm_eps": 1e-12,
316
  "max_position_embeddings": 512,
317
- "model_type": "bert-token-classifier",
318
  "num_attention_heads": 12,
319
  "num_hidden_layers": 12,
320
  "pad_token_id": 0,
 
1
  {
2
  "add_cross_attention": false,
3
  "architectures": [
4
+ "BertForTokenClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": null,
8
  "classifier_dropout": null,
 
9
  "dtype": "float32",
10
  "eos_token_id": null,
 
 
11
  "gradient_checkpointing": false,
12
  "hidden_act": "gelu",
13
  "hidden_dropout_prob": 0.1,
 
311
  },
312
  "layer_norm_eps": 1e-12,
313
  "max_position_embeddings": 512,
314
+ "model_type": "bert",
315
  "num_attention_heads": 12,
316
  "num_hidden_layers": 12,
317
  "pad_token_id": 0,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d310f9767c901ae616ffd9d2fa59addc5e10a450b3b25d44c12bdedaeab3fbeb
3
  size 436035932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:862313c38ca9273d9d4cbb21c001ce9fbf3798c8c3601eddfc009e63303341d7
3
  size 436035932