Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

README.md +69 -69
config.json +73 -67
head_config.json +1 -1
model.safetensors +2 -2
type_to_idx.json +12 -11

README.md CHANGED Viewed

@@ -22,7 +22,7 @@ datasets:
 BERT-based token classifier for detecting **oral and literate markers** in text, based on Walter Ong's "Orality and Literacy" (1982).
-This model performs multi-label span-level detection of 52 rhetorical marker types, where each token independently carries B/I/O labels per type — allowing overlapping spans (e.g. a token that is simultaneously part of a concessive and a nested clause).
 ## Model Details
@@ -30,8 +30,8 @@ This model performs multi-label span-level detection of 52 rhetorical marker typ
 |----------|-------|
 | Base model | `bert-base-uncased` |
 | Task | Multi-label token classification (independent B/I/O per type) |
-| Marker types | 52 (21 oral, 31 literate) |
-| Test macro F1 | **0.394** (per-type detection, binary positive = B or I) |
 | Training | 20 epochs, batch 24, lr 3e-5, fp16 |
 | Regularization | Mixout (p=0.1) — stochastic L2 anchor to pretrained weights |
 | Loss | Per-type weighted cross-entropy with inverse-frequency type weights |
@@ -80,16 +80,16 @@ for i, token in enumerate(tokens):
 - Types with fewer than 150 annotated spans are excluded from training
 - Multi-label BIO annotation: tokens can carry labels for multiple overlapping marker types simultaneously
-## Marker Types (52)
-### Oral Markers (21 types)
 Characteristics of oral tradition and spoken discourse:
 | Category | Markers |
 |----------|---------|
 | **Address & Interaction** | vocative, imperative, second_person, inclusive_we, rhetorical_question, phatic_check, phatic_filler |
-| **Repetition & Pattern** | anaphora, tricolon, lexical_repetition, antithesis |
 | **Conjunction** | simple_conjunction |
 | **Formulas** | discourse_formula, intensifier_doubling |
 | **Narrative** | named_individual, specific_place, temporal_anchor, sensory_detail, embodied_action, everyday_example |
@@ -115,75 +115,75 @@ Characteristics of written, analytical discourse:
 Per-type detection F1 on test set (binary: B or I = positive, O = negative):
 <details><summary>Click to show per-marker precision/recall/F1/support</summary>
 ```
 Type                                            Prec    Rec     F1    Sup
 ========================================================================
-literate_abstract_noun                         0.283  0.036  0.064    474
-literate_additive_formal                       0.458  0.388  0.420     85
-literate_agent_demoted                         0.495  0.569  0.530    288
-literate_agentless_passive                     0.659  0.592  0.624   1285
-literate_aside                                 0.468  0.524  0.494    481
-literate_categorical_statement                 0.256  0.141  0.182    389
-literate_causal_explicit                       0.457  0.196  0.275    382
-literate_citation                              0.624  0.539  0.578    243
-literate_conceptual_metaphor                   0.366  0.242  0.291    219
-literate_concessive                            0.558  0.290  0.382    742
-literate_concessive_connector                  0.286  0.324  0.304     37
-literate_concrete_setting                      0.222  0.132  0.166    303
-literate_conditional                           0.664  0.597  0.629   1642
-literate_contrastive                           0.481  0.227  0.308    388
-literate_cross_reference                       0.644  0.326  0.433     89
-literate_definitional_move                     0.279  0.284  0.281     67
-literate_enumeration                           0.507  0.580  0.541    855
-literate_epistemic_hedge                       0.523  0.405  0.456    543
-literate_evidential                            0.487  0.457  0.471    162
-literate_institutional_subject                 0.330  0.274  0.300    248
-literate_list_structure                        0.929  0.464  0.619    653
-literate_metadiscourse                         0.355  0.251  0.294    355
-literate_nested_clauses                        0.212  0.140  0.169   1250
-literate_nominalization                        0.527  0.397  0.453   1147
-literate_objectifying_stance                   0.593  0.400  0.478    200
-literate_probability                           0.740  0.544  0.627    136
-literate_qualified_assertion                   0.153  0.073  0.099    123
-literate_relative_chain                        0.333  0.179  0.233   1717
-literate_technical_abbreviation                0.613  0.725  0.665    153
-literate_technical_term                        0.490  0.311  0.381    897
-literate_temporal_embedding                    0.210  0.143  0.170    553
-oral_anaphora                                  0.205  0.128  0.157    141
-oral_antithesis                                0.389  0.181  0.247    453
-oral_discourse_formula                         0.557  0.173  0.263    568
-oral_embodied_action                           0.421  0.213  0.283    489
-oral_everyday_example                          0.219  0.209  0.214    358
-oral_imperative                                0.537  0.695  0.606    200
-oral_inclusive_we                              0.616  0.599  0.608    751
-oral_intensifier_doubling                      0.632  0.152  0.245     79
-oral_lexical_repetition                        0.406  0.468  0.435    218
-oral_named_individual                          0.535  0.566  0.550    813
-oral_phatic_check                              0.591  0.684  0.634     76
-oral_phatic_filler                             0.469  0.524  0.495    189
-oral_rhetorical_question                       0.677  0.646  0.661   1273
-oral_second_person                             0.618  0.493  0.549    842
-oral_self_correction                           0.582  0.205  0.303    156
-oral_sensory_detail                            0.281  0.247  0.263    352
-oral_simple_conjunction                        0.146  0.085  0.107     71
-oral_specific_place                            0.534  0.582  0.557    373
-oral_temporal_anchor                           0.518  0.510  0.514    563
-oral_tricolon                                  0.247  0.185  0.212    562
-oral_vocative                                  0.667  0.684  0.675    158
 ========================================================================
-Macro avg (types w/ support)                                 0.394
 ```
 </details>
-**Missing labels (test set):** 0/52 — all types detected at least once.
 Notable patterns:
-- **Strong performers** (F1 > 0.5): vocative (0.675), technical_abbreviation (0.665), rhetorical_question (0.661), phatic_check (0.634), conditional (0.629), probability (0.627), agentless_passive (0.624), list_structure (0.619), inclusive_we (0.608), imperative (0.606), citation (0.578), specific_place (0.557), named_individual (0.550), second_person (0.549), enumeration (0.541), agent_demoted (0.530), temporal_anchor (0.514)
-- **Weak performers** (F1 < 0.2): abstract_noun (0.064), qualified_assertion (0.099), simple_conjunction (0.107), anaphora (0.157), concrete_setting (0.166), nested_clauses (0.169), temporal_embedding (0.170), categorical_statement (0.182)
-- **Precision-recall tradeoff**: Most types now show higher precision than recall, indicating the model under-predicts rather than over-predicts markers (reversed from the previous release)
-- **Dropped type**: `oral_parallelism` was excluded from this training run (fell below the 150-span minimum threshold)
 ## Architecture
@@ -215,9 +215,9 @@ classifier.bias    → randomly initialized
 ## Limitations
-- **Low-precision types**: Several types show precision below 0.25, meaning most predictions for those types are false positives
-- **Low-recall types**: `abstract_noun` (0.036 recall), `simple_conjunction` (0.085), and `qualified_assertion` (0.073) are near-invisible to the model despite nonzero precision
-- **Excluded type**: `oral_parallelism` fell below the 150-span minimum and was excluded; structural parallelism remains undetected
 - **Context window**: 128 tokens max; longer spans may be truncated
 - **Domain**: Trained primarily on historical/literary texts; may underperform on modern social media
 - **Subjectivity**: Some marker boundaries are inherently ambiguous
@@ -239,4 +239,4 @@ classifier.bias    → randomly initialized
 ---
-*Trained: February 2026*

 BERT-based token classifier for detecting **oral and literate markers** in text, based on Walter Ong's "Orality and Literacy" (1982).
+This model performs multi-label span-level detection of 53 rhetorical marker types, where each token independently carries B/I/O labels per type — allowing overlapping spans (e.g. a token that is simultaneously part of a concessive and a nested clause).
 ## Model Details
 |----------|-------|
 | Base model | `bert-base-uncased` |
 | Task | Multi-label token classification (independent B/I/O per type) |
+| Marker types | 53 (22 oral, 31 literate) |
+| Test macro F1 | **0.400** (per-type detection, binary positive = B or I) |
 | Training | 20 epochs, batch 24, lr 3e-5, fp16 |
 | Regularization | Mixout (p=0.1) — stochastic L2 anchor to pretrained weights |
 | Loss | Per-type weighted cross-entropy with inverse-frequency type weights |
 - Types with fewer than 150 annotated spans are excluded from training
 - Multi-label BIO annotation: tokens can carry labels for multiple overlapping marker types simultaneously
+## Marker Types (53)
+### Oral Markers (22 types)
 Characteristics of oral tradition and spoken discourse:
 | Category | Markers |
 |----------|---------|
 | **Address & Interaction** | vocative, imperative, second_person, inclusive_we, rhetorical_question, phatic_check, phatic_filler |
+| **Repetition & Pattern** | anaphora, parallelism, tricolon, lexical_repetition, antithesis |
 | **Conjunction** | simple_conjunction |
 | **Formulas** | discourse_formula, intensifier_doubling |
 | **Narrative** | named_individual, specific_place, temporal_anchor, sensory_detail, embodied_action, everyday_example |
 Per-type detection F1 on test set (binary: B or I = positive, O = negative):
 <details><summary>Click to show per-marker precision/recall/F1/support</summary>
 ```
 Type                                            Prec    Rec     F1    Sup
 ========================================================================
+literate_abstract_noun                         0.211  0.319  0.254    464
+literate_additive_formal                       0.263  0.506  0.346     83
+literate_agent_demoted                         0.364  0.629  0.461    291
+literate_agentless_passive                     0.545  0.701  0.613   1274
+literate_aside                                 0.396  0.565  0.466    467
+literate_categorical_statement                 0.246  0.245  0.245    388
+literate_causal_explicit                       0.325  0.305  0.315    370
+literate_citation                              0.500  0.551  0.524    243
+literate_conceptual_metaphor                   0.168  0.297  0.215    219
+literate_concessive                            0.542  0.491  0.515    731
+literate_concessive_connector                  0.113  0.378  0.174     37
+literate_concrete_setting                      0.174  0.279  0.214    301
+literate_conditional                           0.586  0.710  0.642   1610
+literate_contrastive                           0.374  0.343  0.358    382
+literate_cross_reference                       0.351  0.317  0.333     82
+literate_definitional_move                     0.217  0.371  0.274     62
+literate_enumeration                           0.456  0.570  0.507    899
+literate_epistemic_hedge                       0.415  0.511  0.458    534
+literate_evidential                            0.364  0.503  0.422    175
+literate_institutional_subject                 0.296  0.520  0.378    246
+literate_list_structure                        0.709  0.559  0.625    653
+literate_metadiscourse                         0.291  0.451  0.354    355
+literate_nested_clauses                        0.105  0.266  0.151   1250
+literate_nominalization                        0.475  0.554  0.511   1144
+literate_objectifying_stance                   0.518  0.448  0.481    194
+literate_probability                           0.612  0.548  0.578    115
+literate_qualified_assertion                   0.185  0.160  0.172    125
+literate_relative_chain                        0.320  0.537  0.401   1713
+literate_technical_abbreviation                0.545  0.783  0.643    161
+literate_technical_term                        0.331  0.458  0.384    909
+literate_temporal_embedding                    0.222  0.249  0.235    570
+oral_anaphora                                  0.207  0.248  0.226    137
+oral_antithesis                                0.245  0.289  0.265    453
+oral_discourse_formula                         0.353  0.384  0.368    563
+oral_embodied_action                           0.263  0.374  0.309    470
+oral_everyday_example                          0.160  0.164  0.162    366
+oral_imperative                                0.519  0.670  0.585    200
+oral_inclusive_we                              0.587  0.672  0.626    752
+oral_intensifier_doubling                      0.310  0.165  0.215     79
+oral_lexical_repetition                        0.293  0.488  0.366    217
+oral_named_individual                          0.428  0.676  0.524    791
+oral_parallelism                               0.654  0.048  0.089    710
+oral_phatic_check                              0.465  0.882  0.609     76
+oral_phatic_filler                             0.375  0.582  0.456    182
+oral_rhetorical_question                       0.589  0.894  0.710   1264
+oral_second_person                             0.614  0.545  0.577    833
+oral_self_correction                           0.597  0.295  0.395    156
+oral_sensory_detail                            0.275  0.312  0.293    352
+oral_simple_conjunction                        0.096  0.211  0.132     71
+oral_specific_place                            0.472  0.716  0.569    387
+oral_temporal_anchor                           0.397  0.748  0.518    551
+oral_tricolon                                  0.274  0.285  0.280    557
+oral_vocative                                  0.634  0.761  0.692    155
 ========================================================================
+Macro avg (types w/ support)                                 0.400
 ```
 </details>
+**Missing labels (test set):** 0/53 — all types detected at least once.
 Notable patterns:
+- **Strong performers** (F1 > 0.5): rhetorical_question (0.710), vocative (0.692), conditional (0.642), technical_abbreviation (0.643), inclusive_we (0.626), list_structure (0.625), agentless_passive (0.613), phatic_check (0.609), imperative (0.585), probability (0.578), second_person (0.577), specific_place (0.569), citation (0.524), named_individual (0.524), temporal_anchor (0.518), concessive (0.515), nominalization (0.511), enumeration (0.507)
+- **Weak performers** (F1 < 0.2): parallelism (0.089), simple_conjunction (0.132), nested_clauses (0.151), everyday_example (0.162), qualified_assertion (0.172), concessive_connector (0.174)
+- **Precision-recall tradeoff**: Most types now show higher recall than precision, indicating the model over-predicts markers — reversed from the previous release. Notable exceptions include `parallelism` (0.654 precision / 0.048 recall), `self_correction`, and `intensifier_doubling`, which remain high-precision but low-recall.
+- **Recovered type**: `oral_parallelism` crossed the 150-span threshold and was re-included, though its near-zero recall (0.048) means it is effectively non-functional despite high precision when it does fire.
 ## Architecture
 ## Limitations
+- **Recall-dominated errors**: Most types over-predict (recall > precision), producing false positives; downstream applications may need confidence thresholding
+- **Near-zero recall types**: `parallelism` (0.048 recall), `intensifier_doubling` (0.165), and `simple_conjunction` (0.211) are rarely detected despite being present in training data
+- **Low-precision types**: `simple_conjunction` (0.096), `nested_clauses` (0.105), and `concessive_connector` (0.113) have precision below 0.15, meaning most predictions for those types are false positives
 - **Context window**: 128 tokens max; longer spans may be truncated
 - **Domain**: Trained primarily on historical/literary texts; may underperform on modern social media
 - **Subjectivity**: Some marker boundaries are inherently ambiguous
 ---
+*Trained: February 2026*

config.json CHANGED Viewed

@@ -44,42 +44,45 @@
     "120": "O-oral_named_individual",
     "121": "B-oral_named_individual",
     "122": "I-oral_named_individual",
-    "123": "O-oral_phatic_check",
-    "124": "B-oral_phatic_check",
-    "125": "I-oral_phatic_check",
-    "126": "O-oral_phatic_filler",
-    "127": "B-oral_phatic_filler",
-    "128": "I-oral_phatic_filler",
-    "129": "O-oral_rhetorical_question",
     "13": "B-literate_aside",
-    "130": "B-oral_rhetorical_question",
-    "131": "I-oral_rhetorical_question",
-    "132": "O-oral_second_person",
-    "133": "B-oral_second_person",
-    "134": "I-oral_second_person",
-    "135": "O-oral_self_correction",
-    "136": "B-oral_self_correction",
-    "137": "I-oral_self_correction",
-    "138": "O-oral_sensory_detail",
-    "139": "B-oral_sensory_detail",
     "14": "I-literate_aside",
-    "140": "I-oral_sensory_detail",
-    "141": "O-oral_simple_conjunction",
-    "142": "B-oral_simple_conjunction",
-    "143": "I-oral_simple_conjunction",
-    "144": "O-oral_specific_place",
-    "145": "B-oral_specific_place",
-    "146": "I-oral_specific_place",
-    "147": "O-oral_temporal_anchor",
-    "148": "B-oral_temporal_anchor",
-    "149": "I-oral_temporal_anchor",
     "15": "O-literate_categorical_statement",
-    "150": "O-oral_tricolon",
-    "151": "B-oral_tricolon",
-    "152": "I-oral_tricolon",
-    "153": "O-oral_vocative",
-    "154": "B-oral_vocative",
-    "155": "I-oral_vocative",
     "16": "B-literate_categorical_statement",
     "17": "I-literate_categorical_statement",
     "18": "O-literate_causal_explicit",
@@ -218,17 +221,18 @@
     "B-oral_intensifier_doubling": 115,
     "B-oral_lexical_repetition": 118,
     "B-oral_named_individual": 121,
-    "B-oral_phatic_check": 124,
-    "B-oral_phatic_filler": 127,
-    "B-oral_rhetorical_question": 130,
-    "B-oral_second_person": 133,
-    "B-oral_self_correction": 136,
-    "B-oral_sensory_detail": 139,
-    "B-oral_simple_conjunction": 142,
-    "B-oral_specific_place": 145,
-    "B-oral_temporal_anchor": 148,
-    "B-oral_tricolon": 151,
-    "B-oral_vocative": 154,
     "I-literate_abstract_noun": 2,
     "I-literate_additive_formal": 5,
     "I-literate_agent_demoted": 8,
@@ -270,17 +274,18 @@
     "I-oral_intensifier_doubling": 116,
     "I-oral_lexical_repetition": 119,
     "I-oral_named_individual": 122,
-    "I-oral_phatic_check": 125,
-    "I-oral_phatic_filler": 128,
-    "I-oral_rhetorical_question": 131,
-    "I-oral_second_person": 134,
-    "I-oral_self_correction": 137,
-    "I-oral_sensory_detail": 140,
-    "I-oral_simple_conjunction": 143,
-    "I-oral_specific_place": 146,
-    "I-oral_temporal_anchor": 149,
-    "I-oral_tricolon": 152,
-    "I-oral_vocative": 155,
     "O-literate_abstract_noun": 0,
     "O-literate_additive_formal": 3,
     "O-literate_agent_demoted": 6,
@@ -322,24 +327,25 @@
     "O-oral_intensifier_doubling": 114,
     "O-oral_lexical_repetition": 117,
     "O-oral_named_individual": 120,
-    "O-oral_phatic_check": 123,
-    "O-oral_phatic_filler": 126,
-    "O-oral_rhetorical_question": 129,
-    "O-oral_second_person": 132,
-    "O-oral_self_correction": 135,
-    "O-oral_sensory_detail": 138,
-    "O-oral_simple_conjunction": 141,
-    "O-oral_specific_place": 144,
-    "O-oral_temporal_anchor": 147,
-    "O-oral_tricolon": 150,
-    "O-oral_vocative": 153
   },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
-  "num_types": 52,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "tie_word_embeddings": true,

     "120": "O-oral_named_individual",
     "121": "B-oral_named_individual",
     "122": "I-oral_named_individual",
+    "123": "O-oral_parallelism",
+    "124": "B-oral_parallelism",
+    "125": "I-oral_parallelism",
+    "126": "O-oral_phatic_check",
+    "127": "B-oral_phatic_check",
+    "128": "I-oral_phatic_check",
+    "129": "O-oral_phatic_filler",
     "13": "B-literate_aside",
+    "130": "B-oral_phatic_filler",
+    "131": "I-oral_phatic_filler",
+    "132": "O-oral_rhetorical_question",
+    "133": "B-oral_rhetorical_question",
+    "134": "I-oral_rhetorical_question",
+    "135": "O-oral_second_person",
+    "136": "B-oral_second_person",
+    "137": "I-oral_second_person",
+    "138": "O-oral_self_correction",
+    "139": "B-oral_self_correction",
     "14": "I-literate_aside",
+    "140": "I-oral_self_correction",
+    "141": "O-oral_sensory_detail",
+    "142": "B-oral_sensory_detail",
+    "143": "I-oral_sensory_detail",
+    "144": "O-oral_simple_conjunction",
+    "145": "B-oral_simple_conjunction",
+    "146": "I-oral_simple_conjunction",
+    "147": "O-oral_specific_place",
+    "148": "B-oral_specific_place",
+    "149": "I-oral_specific_place",
     "15": "O-literate_categorical_statement",
+    "150": "O-oral_temporal_anchor",
+    "151": "B-oral_temporal_anchor",
+    "152": "I-oral_temporal_anchor",
+    "153": "O-oral_tricolon",
+    "154": "B-oral_tricolon",
+    "155": "I-oral_tricolon",
+    "156": "O-oral_vocative",
+    "157": "B-oral_vocative",
+    "158": "I-oral_vocative",
     "16": "B-literate_categorical_statement",
     "17": "I-literate_categorical_statement",
     "18": "O-literate_causal_explicit",
     "B-oral_intensifier_doubling": 115,
     "B-oral_lexical_repetition": 118,
     "B-oral_named_individual": 121,
+    "B-oral_parallelism": 124,
+    "B-oral_phatic_check": 127,
+    "B-oral_phatic_filler": 130,
+    "B-oral_rhetorical_question": 133,
+    "B-oral_second_person": 136,
+    "B-oral_self_correction": 139,
+    "B-oral_sensory_detail": 142,
+    "B-oral_simple_conjunction": 145,
+    "B-oral_specific_place": 148,
+    "B-oral_temporal_anchor": 151,
+    "B-oral_tricolon": 154,
+    "B-oral_vocative": 157,
     "I-literate_abstract_noun": 2,
     "I-literate_additive_formal": 5,
     "I-literate_agent_demoted": 8,
     "I-oral_intensifier_doubling": 116,
     "I-oral_lexical_repetition": 119,
     "I-oral_named_individual": 122,
+    "I-oral_parallelism": 125,
+    "I-oral_phatic_check": 128,
+    "I-oral_phatic_filler": 131,
+    "I-oral_rhetorical_question": 134,
+    "I-oral_second_person": 137,
+    "I-oral_self_correction": 140,
+    "I-oral_sensory_detail": 143,
+    "I-oral_simple_conjunction": 146,
+    "I-oral_specific_place": 149,
+    "I-oral_temporal_anchor": 152,
+    "I-oral_tricolon": 155,
+    "I-oral_vocative": 158,
     "O-literate_abstract_noun": 0,
     "O-literate_additive_formal": 3,
     "O-literate_agent_demoted": 6,
     "O-oral_intensifier_doubling": 114,
     "O-oral_lexical_repetition": 117,
     "O-oral_named_individual": 120,
+    "O-oral_parallelism": 123,
+    "O-oral_phatic_check": 126,
+    "O-oral_phatic_filler": 129,
+    "O-oral_rhetorical_question": 132,
+    "O-oral_second_person": 135,
+    "O-oral_self_correction": 138,
+    "O-oral_sensory_detail": 141,
+    "O-oral_simple_conjunction": 144,
+    "O-oral_specific_place": 147,
+    "O-oral_temporal_anchor": 150,
+    "O-oral_tricolon": 153,
+    "O-oral_vocative": 156
   },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
+  "num_types": 53,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "tie_word_embeddings": true,

head_config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
   "model_name": "bert-base-uncased",
-  "num_types": 52,
   "hidden_size": 768
 }

 {
   "model_name": "bert-base-uncased",
+  "num_types": 53,
   "hidden_size": 768
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7f65c36ddbc7fa2756a9e31ff2735c9708f2d891e4d637519a90675c6aa7088
-size 436073152

 version https://git-lfs.github.com/spec/v1
+oid sha256:41995685c78ead06fdda874b90a8bdf7b283997fa076207a33c0bd7136179ef3
+size 436082548

type_to_idx.json CHANGED Viewed

@@ -40,15 +40,16 @@
   "oral_intensifier_doubling": 38,
   "oral_lexical_repetition": 39,
   "oral_named_individual": 40,
-  "oral_phatic_check": 41,
-  "oral_phatic_filler": 42,
-  "oral_rhetorical_question": 43,
-  "oral_second_person": 44,
-  "oral_self_correction": 45,
-  "oral_sensory_detail": 46,
-  "oral_simple_conjunction": 47,
-  "oral_specific_place": 48,
-  "oral_temporal_anchor": 49,
-  "oral_tricolon": 50,
-  "oral_vocative": 51
 }

   "oral_intensifier_doubling": 38,
   "oral_lexical_repetition": 39,
   "oral_named_individual": 40,
+  "oral_parallelism": 41,
+  "oral_phatic_check": 42,
+  "oral_phatic_filler": 43,
+  "oral_rhetorical_question": 44,
+  "oral_second_person": 45,
+  "oral_self_correction": 46,
+  "oral_sensory_detail": 47,
+  "oral_simple_conjunction": 48,
+  "oral_specific_place": 49,
+  "oral_temporal_anchor": 50,
+  "oral_tricolon": 51,
+  "oral_vocative": 52
 }