browndw commited on
Commit
75bce74
·
verified ·
1 Parent(s): 369e511

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -34,3 +34,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  en_docusco_spacy_cd-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
36
  tok2vec/model filter=lfs diff=lfs merge=lfs -text
 
 
 
 
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  en_docusco_spacy_cd-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
36
  tok2vec/model filter=lfs diff=lfs merge=lfs -text
37
+ ner/model filter=lfs diff=lfs merge=lfs -text
38
+ tagger/model filter=lfs diff=lfs merge=lfs -text
39
+ vocab/strings.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -14,28 +14,28 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.8206658604
18
  - name: NER Recall
19
  type: recall
20
- value: 0.80740266
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.8139802353
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
- value: 0.9763683149
31
  ---
32
  English pipeline for part-of-speech and rhetorical tagging using a smaller 'common dictionary'.
33
 
34
  | Feature | Description |
35
  | --- | --- |
36
  | **Name** | `en_docusco_spacy_cd` |
37
- | **Version** | `1.3` |
38
- | **spaCy** | `>=3.7.4,<3.8.0` |
39
  | **Default Pipeline** | `tok2vec`, `tagger`, `ner` |
40
  | **Components** | `tok2vec`, `tagger`, `ner` |
41
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
@@ -47,11 +47,11 @@ English pipeline for part-of-speech and rhetorical tagging using a smaller 'comm
47
 
48
  <details>
49
 
50
- <summary>View label scheme (289 labels for 2 components)</summary>
51
 
52
  | Component | Labels |
53
  | --- | --- |
54
- | **`tagger`** | `APPGE`, `AT`, `AT1`, `BCL21`, `BCL22`, `CC`, `CCB`, `CS`, `CS21`, `CS22`, `CS31`, `CS32`, `CS33`, `CS41`, `CS42`, `CS43`, `CS44`, `CSA`, `CSN`, `CST`, `CSW`, `CSW31`, `CSW32`, `CSW33`, `DA`, `DA1`, `DA2`, `DAR`, `DAT`, `DB`, `DB2`, `DD`, `DD1`, `DD2`, `DDQ`, `DDQGE`, `DDQGE31`, `DDQGE32`, `DDQGE33`, `DDQV`, `DDQV31`, `DDQV32`, `DDQV33`, `EX`, `FO`, `FU`, `FW`, `GE`, `IF`, `II`, `II21`, `II22`, `II31`, `II32`, `II33`, `II41`, `II42`, `II43`, `II44`, `IO`, `IW`, `JJ`, `JJ21`, `JJ22`, `JJ31`, `JJ32`, `JJ33`, `JJ41`, `JJ42`, `JJ43`, `JJ44`, `JJR`, `JJT`, `JK`, `MC`, `MC1`, `MC121`, `MC122`, `MC2`, `MC221`, `MC222`, `MCMC`, `MD`, `MF`, `ND1`, `NN`, `NN1`, `NN121`, `NN122`, `NN131`, `NN132`, `NN133`, `NN141`, `NN142`, `NN143`, `NN144`, `NN2`, `NN21`, `NN22`, `NN221`, `NN222`, `NN31`, `NN32`, `NN33`, `NNA`, `NNB`, `NNL1`, `NNL2`, `NNO`, `NNO2`, `NNT1`, `NNT131`, `NNT132`, `NNT133`, `NNT2`, `NNU`, `NNU1`, `NNU2`, `NNU21`, `NNU22`, `NP`, `NP1`, `NP2`, `NPD1`, `NPD2`, `NPM1`, `NPM2`, `PN`, `PN1`, `PN121`, `PN122`, `PN21`, `PN22`, `PNQO`, `PNQS`, `PNQS31`, `PNQS32`, `PNQS33`, `PNQV`, `PNQV31`, `PNQV32`, `PNQV33`, `PNX1`, `PPGE`, `PPH1`, `PPHO1`, `PPHO2`, `PPHS1`, `PPHS2`, `PPIO1`, `PPIO2`, `PPIS1`, `PPIS2`, `PPX1`, `PPX121`, `PPX122`, `PPX2`, `PPX221`, `PPX222`, `PPY`, `RA`, `RA21`, `RA22`, `REX`, `REX21`, `REX22`, `REX41`, `REX42`, `REX43`, `REX44`, `RG`, `RG21`, `RG22`, `RG41`, `RG42`, `RG43`, `RG44`, `RGQ`, `RGQV`, `RGQV31`, `RGQV32`, `RGQV33`, `RGR`, `RGT`, `RL`, `RL21`, `RL22`, `RL31`, `RL32`, `RL33`, `RP`, `RPK`, `RR`, `RR21`, `RR22`, `RR31`, `RR32`, `RR33`, `RR41`, `RR42`, `RR43`, `RR44`, `RR51`, `RR52`, `RR53`, `RR54`, `RR55`, `RRQ`, `RRQV`, `RRQV31`, `RRQV32`, `RRQV33`, `RRR`, `RRT`, `RT`, `RT21`, `RT22`, `RT31`, `RT32`, `RT33`, `RT41`, `RT42`, `RT43`, `RT44`, `TO`, `UH`, `UH21`, `UH22`, `UH31`, `UH32`, `UH33`, `VB0`, `VBDR`, `VBDZ`, `VBG`, `VBI`, `VBM`, `VBN`, `VBR`, `VBZ`, `VD0`, `VDD`, `VDG`, `VDI`, `VDN`, `VDZ`, `VH0`, `VHD`, `VHG`, `VHI`, `VHN`, `VHZ`, `VM`, `VM21`, `VM22`, `VMK`, `VV0`, `VVD`, `VVG`, `VVGK`, `VVI`, `VVN`, `VVNK`, `VVZ`, `XX`, `Y`, `ZZ1`, `ZZ2`, `ZZ221`, `ZZ222` |
55
  | **`ner`** | `ActorsAbstractions`, `ActorsFirstPerson`, `ActorsPeople`, `ActorsPublicEntities`, `CitationAuthority`, `CitationControversy`, `CitationNeutral`, `ConfidenceHedged`, `ConfidenceHigh`, `OrganizationNarrative`, `OrganizationReasoning`, `PlanningFuture`, `PlanningStrategy`, `SentimentNegative`, `SentimentPositive`, `SignpostingAcademicWritingMoves`, `SignpostingMetadiscourse`, `StanceEmphatic`, `StanceModerated` |
56
 
57
  </details>
@@ -60,10 +60,10 @@ English pipeline for part-of-speech and rhetorical tagging using a smaller 'comm
60
 
61
  | Type | Score |
62
  | --- | --- |
63
- | `TAG_ACC` | 97.64 |
64
- | `ENTS_F` | 81.40 |
65
- | `ENTS_P` | 82.07 |
66
- | `ENTS_R` | 80.74 |
67
- | `TOK2VEC_LOSS` | 150973939.97 |
68
- | `TAGGER_LOSS` | 3936874.26 |
69
- | `NER_LOSS` | 12742855.43 |
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.8259539432
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.8094701467
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.8176289731
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
+ value: 0.9767705858
31
  ---
32
  English pipeline for part-of-speech and rhetorical tagging using a smaller 'common dictionary'.
33
 
34
  | Feature | Description |
35
  | --- | --- |
36
  | **Name** | `en_docusco_spacy_cd` |
37
+ | **Version** | `1.5` |
38
+ | **spaCy** | `>=3.8.6,<3.9.0` |
39
  | **Default Pipeline** | `tok2vec`, `tagger`, `ner` |
40
  | **Components** | `tok2vec`, `tagger`, `ner` |
41
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
 
47
 
48
  <details>
49
 
50
+ <summary>View label scheme (293 labels for 2 components)</summary>
51
 
52
  | Component | Labels |
53
  | --- | --- |
54
+ | **`tagger`** | `APPGE`, `AT`, `AT1`, `BCL21`, `BCL22`, `CC`, `CCB`, `CS`, `CS21`, `CS22`, `CS31`, `CS32`, `CS33`, `CS41`, `CS42`, `CS43`, `CS44`, `CSA`, `CSN`, `CST`, `CSW`, `CSW31`, `CSW32`, `CSW33`, `DA`, `DA1`, `DA2`, `DAR`, `DAT`, `DB`, `DB2`, `DD`, `DD1`, `DD2`, `DDQ`, `DDQGE`, `DDQGE31`, `DDQGE32`, `DDQGE33`, `DDQV`, `DDQV31`, `DDQV32`, `DDQV33`, `EX`, `FO`, `FU`, `FW`, `GE`, `IF`, `II`, `II21`, `II22`, `II31`, `II32`, `II33`, `II41`, `II42`, `II43`, `II44`, `IO`, `IW`, `JJ`, `JJ21`, `JJ22`, `JJ31`, `JJ32`, `JJ33`, `JJ41`, `JJ42`, `JJ43`, `JJ44`, `JJR`, `JJT`, `JK`, `MC`, `MC1`, `MC2`, `MC221`, `MC222`, `MCMC`, `MD`, `MF`, `ND1`, `NN`, `NN1`, `NN121`, `NN122`, `NN131`, `NN132`, `NN133`, `NN141`, `NN142`, `NN143`, `NN144`, `NN2`, `NN21`, `NN22`, `NN221`, `NN222`, `NN231`, `NN232`, `NN233`, `NN31`, `NN32`, `NN33`, `NNA`, `NNB`, `NNL1`, `NNL2`, `NNO`, `NNO2`, `NNT1`, `NNT131`, `NNT132`, `NNT133`, `NNT2`, `NNU`, `NNU1`, `NNU2`, `NNU21`, `NNU22`, `NP`, `NP1`, `NP2`, `NPD1`, `NPD2`, `NPM1`, `NPM2`, `PN`, `PN1`, `PN121`, `PN122`, `PN21`, `PN22`, `PNQO`, `PNQS`, `PNQS31`, `PNQS32`, `PNQS33`, `PNQV`, `PNQV31`, `PNQV32`, `PNQV33`, `PNX1`, `PPGE`, `PPH1`, `PPHO1`, `PPHO2`, `PPHS1`, `PPHS2`, `PPIO1`, `PPIO2`, `PPIS1`, `PPIS2`, `PPX1`, `PPX121`, `PPX122`, `PPX2`, `PPX221`, `PPX222`, `PPY`, `RA`, `RA21`, `RA22`, `REX`, `REX21`, `REX22`, `REX41`, `REX42`, `REX43`, `REX44`, `RG`, `RG21`, `RG22`, `RG31`, `RG32`, `RG33`, `RG41`, `RG42`, `RG43`, `RG44`, `RGQ`, `RGQV`, `RGQV31`, `RGQV32`, `RGQV33`, `RGR`, `RGT`, `RL`, `RL21`, `RL22`, `RL31`, `RL32`, `RL33`, `RP`, `RPK`, `RR`, `RR21`, `RR22`, `RR31`, `RR32`, `RR33`, `RR41`, `RR42`, `RR43`, `RR44`, `RR51`, `RR52`, `RR53`, `RR54`, `RR55`, `RRQ`, `RRQV`, `RRQV31`, `RRQV32`, `RRQV33`, `RRR`, `RRT`, `RT`, `RT21`, `RT22`, `RT31`, `RT32`, `RT33`, `RT41`, `RT42`, `RT43`, `RT44`, `TO`, `UH`, `UH21`, `UH22`, `UH31`, `UH32`, `UH33`, `VB0`, `VBDR`, `VBDZ`, `VBG`, `VBI`, `VBM`, `VBN`, `VBR`, `VBZ`, `VD0`, `VDD`, `VDG`, `VDI`, `VDN`, `VDZ`, `VH0`, `VHD`, `VHG`, `VHI`, `VHN`, `VHZ`, `VM`, `VM21`, `VM22`, `VMK`, `VV0`, `VVD`, `VVG`, `VVGK`, `VVI`, `VVN`, `VVNK`, `VVZ`, `XX`, `Y`, `ZZ1`, `ZZ2`, `ZZ221`, `ZZ222` |
55
  | **`ner`** | `ActorsAbstractions`, `ActorsFirstPerson`, `ActorsPeople`, `ActorsPublicEntities`, `CitationAuthority`, `CitationControversy`, `CitationNeutral`, `ConfidenceHedged`, `ConfidenceHigh`, `OrganizationNarrative`, `OrganizationReasoning`, `PlanningFuture`, `PlanningStrategy`, `SentimentNegative`, `SentimentPositive`, `SignpostingAcademicWritingMoves`, `SignpostingMetadiscourse`, `StanceEmphatic`, `StanceModerated` |
56
 
57
  </details>
 
60
 
61
  | Type | Score |
62
  | --- | --- |
63
+ | `TAG_ACC` | 97.68 |
64
+ | `ENTS_F` | 81.76 |
65
+ | `ENTS_P` | 82.60 |
66
+ | `ENTS_R` | 80.95 |
67
+ | `TOK2VEC_LOSS` | 210102533.04 |
68
+ | `TAGGER_LOSS` | 4732599.11 |
69
+ | `NER_LOSS` | 15047904.69 |
config.cfg CHANGED
@@ -1,6 +1,6 @@
1
  [paths]
2
- train = "spacy_train_07.spacy"
3
- dev = "spacy_dev_07.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
@@ -106,8 +106,8 @@ dropout = 0.1
106
  accumulate_gradient = 1
107
  patience = 20000
108
  max_epochs = -1
109
- max_steps = 80000
110
- eval_frequency = 1000
111
  frozen_components = []
112
  annotating_components = []
113
  before_to_disk = null
@@ -142,8 +142,8 @@ eps = 0.00000001
142
  learn_rate = 0.001
143
 
144
  [training.score_weights]
145
- tag_acc = 0.4
146
- ents_f = 0.6
147
  ents_p = 0.0
148
  ents_r = 0.0
149
  ents_per_type = null
 
1
  [paths]
2
+ train = "spacy_train_cd.spacy"
3
+ dev = "spacy_test_cd.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
 
106
  accumulate_gradient = 1
107
  patience = 20000
108
  max_epochs = -1
109
+ max_steps = 96000
110
+ eval_frequency = 1200
111
  frozen_components = []
112
  annotating_components = []
113
  before_to_disk = null
 
142
  learn_rate = 0.001
143
 
144
  [training.score_weights]
145
+ tag_acc = 0.35
146
+ ents_f = 0.65
147
  ents_p = 0.0
148
  ents_r = 0.0
149
  ents_per_type = null
en_docusco_spacy_cd-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c4ba27b27fa3effb8af587c05fc1d6a1a7a312ced5d884a65cbb048a84e8a93
3
- size 8394802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cf13c1ab62c7f84ee5a9ab32638c4e2118db61d29f0f97a3ccb97bf37f08323
3
+ size 8544562
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"en",
3
  "name":"docusco_spacy_cd",
4
- "version":"1.3",
5
  "description":"English pipeline for part-of-speech and rhetorical tagging using a smaller 'common dictionary'.",
6
  "author":"David Brown",
7
  "email":"dwb2@andrew.cmu.edu",
8
  "url":"https://docuscope.github.io",
9
  "license":"MIT",
10
- "spacy_version":">=3.7.4,<3.8.0",
11
- "spacy_git_version":"bff8725f4",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -96,8 +96,6 @@
96
  "JK",
97
  "MC",
98
  "MC1",
99
- "MC121",
100
- "MC122",
101
  "MC2",
102
  "MC221",
103
  "MC222",
@@ -121,6 +119,9 @@
121
  "NN22",
122
  "NN221",
123
  "NN222",
 
 
 
124
  "NN31",
125
  "NN32",
126
  "NN33",
@@ -193,6 +194,9 @@
193
  "RG",
194
  "RG21",
195
  "RG22",
 
 
 
196
  "RG41",
197
  "RG42",
198
  "RG43",
@@ -327,112 +331,112 @@
327
 
328
  ],
329
  "performance":{
330
- "tag_acc":0.9763683149,
331
- "ents_f":0.8139802353,
332
- "ents_p":0.8206658604,
333
- "ents_r":0.80740266,
334
  "ents_per_type":{
335
- "ActorsPeople":{
336
- "p":0.8542168374,
337
- "r":0.8696353974,
338
- "f":0.8618571637
339
  },
340
- "ActorsPublicEntities":{
341
- "p":0.8169841646,
342
- "r":0.8246103931,
343
- "f":0.8207795646
344
  },
345
- "OrganizationReasoning":{
346
- "p":0.8497536946,
347
- "r":0.8395089039,
348
- "f":0.8446002337
349
  },
350
- "ActorsFirstPerson":{
351
- "p":0.8645147555,
352
- "r":0.8759769676,
353
- "f":0.8702081187
354
  },
355
  "ConfidenceHedged":{
356
- "p":0.8414330099,
357
- "r":0.849020822,
358
- "f":0.8452098865
359
  },
360
- "SentimentPositive":{
361
- "p":0.7541410809,
362
- "r":0.6988926856,
363
- "f":0.7254665342
 
 
 
 
 
 
 
 
 
 
364
  },
365
  "SignpostingMetadiscourse":{
366
- "p":0.9222331178,
367
- "r":0.8799657453,
368
- "f":0.9006037785
369
  },
370
- "ActorsAbstractions":{
371
- "p":0.812620511,
372
- "r":0.8397741356,
373
- "f":0.825974217
374
  },
375
- "CitationAuthority":{
376
- "p":0.7421895511,
377
- "r":0.6606683805,
378
- "f":0.6990603363
379
  },
380
- "SentimentNegative":{
381
- "p":0.7569732066,
382
- "r":0.681115792,
383
- "f":0.7170438069
384
  },
385
- "OrganizationNarrative":{
386
- "p":0.8146691347,
387
- "r":0.7606297812,
388
- "f":0.7867225698
389
  },
390
- "StanceEmphatic":{
391
- "p":0.8325835219,
392
- "r":0.8587117676,
393
- "f":0.8454458216
394
  },
395
- "ConfidenceHigh":{
396
- "p":0.793492611,
397
- "r":0.7964435325,
398
- "f":0.7949653333
399
  },
400
  "PlanningFuture":{
401
- "p":0.8015720524,
402
- "r":0.7731229292,
403
- "f":0.7870905037
404
  },
405
- "SignpostingAcademicWritingMoves":{
406
- "p":0.6799470549,
407
- "r":0.6417239225,
408
- "f":0.6602827763
409
  },
410
  "PlanningStrategy":{
411
- "p":0.7405392335,
412
- "r":0.7067443605,
413
- "f":0.7232472325
414
- },
415
- "CitationNeutral":{
416
- "p":0.8012995179,
417
- "r":0.7580805076,
418
- "f":0.7790910944
419
  },
420
  "StanceModerated":{
421
- "p":0.8127539304,
422
- "r":0.8244042286,
423
- "f":0.8185376268
424
- },
425
- "CitationControversy":{
426
- "p":0.7450381679,
427
- "r":0.7160674982,
428
- "f":0.7302656192
429
  }
430
  },
431
- "tok2vec_loss":1509739.3996848087,
432
- "tagger_loss":39368.7426280975,
433
- "ner_loss":127428.554314194
434
  },
435
  "requirements":[
436
-
437
  ]
438
  }
 
1
  {
2
  "lang":"en",
3
  "name":"docusco_spacy_cd",
4
+ "version":"1.5",
5
  "description":"English pipeline for part-of-speech and rhetorical tagging using a smaller 'common dictionary'.",
6
  "author":"David Brown",
7
  "email":"dwb2@andrew.cmu.edu",
8
  "url":"https://docuscope.github.io",
9
  "license":"MIT",
10
+ "spacy_version":">=3.8.6,<3.9.0",
11
+ "spacy_git_version":"911539e",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
96
  "JK",
97
  "MC",
98
  "MC1",
 
 
99
  "MC2",
100
  "MC221",
101
  "MC222",
 
119
  "NN22",
120
  "NN221",
121
  "NN222",
122
+ "NN231",
123
+ "NN232",
124
+ "NN233",
125
  "NN31",
126
  "NN32",
127
  "NN33",
 
194
  "RG",
195
  "RG21",
196
  "RG22",
197
+ "RG31",
198
+ "RG32",
199
+ "RG33",
200
  "RG41",
201
  "RG42",
202
  "RG43",
 
331
 
332
  ],
333
  "performance":{
334
+ "tag_acc":0.9767705858,
335
+ "ents_f":0.8176289731,
336
+ "ents_p":0.8259539432,
337
+ "ents_r":0.8094701467,
338
  "ents_per_type":{
339
+ "OrganizationNarrative":{
340
+ "p":0.8145934055,
341
+ "r":0.7777322915,
342
+ "f":0.7957361953
343
  },
344
+ "SentimentNegative":{
345
+ "p":0.7480783987,
346
+ "r":0.6944167217,
347
+ "f":0.720249437
348
  },
349
+ "ConfidenceHigh":{
350
+ "p":0.8115417559,
351
+ "r":0.7851297881,
352
+ "f":0.79811732
353
  },
354
+ "SignpostingAcademicWritingMoves":{
355
+ "p":0.7271968274,
356
+ "r":0.6005515973,
357
+ "f":0.6578342827
358
  },
359
  "ConfidenceHedged":{
360
+ "p":0.8557559199,
361
+ "r":0.8532536641,
362
+ "f":0.8545029601
363
  },
364
+ "ActorsAbstractions":{
365
+ "p":0.8306375536,
366
+ "r":0.8279040157,
367
+ "f":0.829268532
368
+ },
369
+ "StanceEmphatic":{
370
+ "p":0.8390007208,
371
+ "r":0.8589556074,
372
+ "f":0.8488609062
373
+ },
374
+ "CitationNeutral":{
375
+ "p":0.8226691042,
376
+ "r":0.7520193111,
377
+ "f":0.7857593248
378
  },
379
  "SignpostingMetadiscourse":{
380
+ "p":0.9362602913,
381
+ "r":0.8712781955,
382
+ "f":0.9026011707
383
  },
384
+ "OrganizationReasoning":{
385
+ "p":0.8655633706,
386
+ "r":0.8245572547,
387
+ "f":0.8445628625
388
  },
389
+ "ActorsFirstPerson":{
390
+ "p":0.8573744296,
391
+ "r":0.8892793396,
392
+ "f":0.8730354922
393
  },
394
+ "SentimentPositive":{
395
+ "p":0.7647190243,
396
+ "r":0.7007852205,
397
+ "f":0.7313575406
398
  },
399
+ "ActorsPublicEntities":{
400
+ "p":0.8314828214,
401
+ "r":0.8213995218,
402
+ "f":0.8264104154
403
  },
404
+ "ActorsPeople":{
405
+ "p":0.8479485025,
406
+ "r":0.879949668,
407
+ "f":0.8636527499
408
  },
409
+ "CitationAuthority":{
410
+ "p":0.7637069384,
411
+ "r":0.6656798477,
412
+ "f":0.7113320529
413
  },
414
  "PlanningFuture":{
415
+ "p":0.7797759737,
416
+ "r":0.8022413702,
417
+ "f":0.7908491623
418
  },
419
+ "CitationControversy":{
420
+ "p":0.7966404575,
421
+ "r":0.7434956638,
422
+ "f":0.7691511387
423
  },
424
  "PlanningStrategy":{
425
+ "p":0.7800919498,
426
+ "r":0.6847222598,
427
+ "f":0.7293024866
 
 
 
 
 
428
  },
429
  "StanceModerated":{
430
+ "p":0.8150178586,
431
+ "r":0.8149501661,
432
+ "f":0.814984011
 
 
 
 
 
433
  }
434
  },
435
+ "tok2vec_loss":2101025.3303794791,
436
+ "tagger_loss":47325.9910907745,
437
+ "ner_loss":150479.046875
438
  },
439
  "requirements":[
440
+ "spacy>=3.8.6,<3.9.0"
441
  ]
442
  }
ner/model CHANGED
Binary files a/ner/model and b/ner/model differ
 
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves�t{"0":{},"1":{"ActorsPeople":2252459,"ActorsAbstractions":2160829,"SentimentNegative":1838447,"OrganizationNarrative":1220253,"SentimentPositive":1215068,"SignpostingMetadiscourse":982819,"ActorsFirstPerson":942047,"OrganizationReasoning":603068,"StanceEmphatic":540777,"ActorsPublicEntities":488472,"ConfidenceHedged":449697,"ConfidenceHigh":422991,"PlanningFuture":318827,"PlanningStrategy":277732,"SignpostingAcademicWritingMoves":153321,"CitationNeutral":95864,"StanceModerated":85078,"CitationAuthority":80084,"CitationControversy":22589},"2":{"ActorsPeople":2252459,"ActorsAbstractions":2160829,"SentimentNegative":1838447,"OrganizationNarrative":1220253,"SentimentPositive":1215068,"SignpostingMetadiscourse":982819,"ActorsFirstPerson":942047,"OrganizationReasoning":603068,"StanceEmphatic":540777,"ActorsPublicEntities":488472,"ConfidenceHedged":449697,"ConfidenceHigh":422991,"PlanningFuture":318827,"PlanningStrategy":277732,"SignpostingAcademicWritingMoves":153321,"CitationNeutral":95864,"StanceModerated":85078,"CitationAuthority":80084,"CitationControversy":22589},"3":{"ActorsPeople":2252459,"ActorsAbstractions":2160829,"SentimentNegative":1838447,"OrganizationNarrative":1220253,"SentimentPositive":1215068,"SignpostingMetadiscourse":982819,"ActorsFirstPerson":942047,"OrganizationReasoning":603068,"StanceEmphatic":540777,"ActorsPublicEntities":488472,"ConfidenceHedged":449697,"ConfidenceHigh":422991,"PlanningFuture":318827,"PlanningStrategy":277732,"SignpostingAcademicWritingMoves":153321,"CitationNeutral":95864,"StanceModerated":85078,"CitationAuthority":80084,"CitationControversy":22589},"4":{"ActorsPeople":2252459,"ActorsAbstractions":2160829,"SentimentNegative":1838447,"OrganizationNarrative":1220253,"SentimentPositive":1215068,"SignpostingMetadiscourse":982819,"ActorsFirstPerson":942047,"OrganizationReasoning":603068,"StanceEmphatic":540777,"ActorsPublicEntities":488472,"ConfidenceHedged":449697,"ConfidenceHigh":422991,"PlanningFuture":318827,"PlanningStrategy":277732,"SignpostingAcademicWritingMoves":153321,"CitationNeutral":95864,"StanceModerated":85078,"CitationAuthority":80084,"CitationControversy":22589,"":1},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves�t{"0":{},"1":{"ActorsPeople":2139427,"ActorsAbstractions":2067908,"SentimentNegative":1736899,"OrganizationNarrative":1160428,"SentimentPositive":1135987,"SignpostingMetadiscourse":922408,"ActorsFirstPerson":868040,"OrganizationReasoning":565594,"StanceEmphatic":502098,"ActorsPublicEntities":464486,"ConfidenceHedged":424490,"ConfidenceHigh":391164,"PlanningFuture":296839,"PlanningStrategy":261955,"SignpostingAcademicWritingMoves":149460,"CitationNeutral":90667,"StanceModerated":80646,"CitationAuthority":76595,"CitationControversy":21685},"2":{"ActorsPeople":2139427,"ActorsAbstractions":2067908,"SentimentNegative":1736899,"OrganizationNarrative":1160428,"SentimentPositive":1135987,"SignpostingMetadiscourse":922408,"ActorsFirstPerson":868040,"OrganizationReasoning":565594,"StanceEmphatic":502098,"ActorsPublicEntities":464486,"ConfidenceHedged":424490,"ConfidenceHigh":391164,"PlanningFuture":296839,"PlanningStrategy":261955,"SignpostingAcademicWritingMoves":149460,"CitationNeutral":90667,"StanceModerated":80646,"CitationAuthority":76595,"CitationControversy":21685},"3":{"ActorsPeople":2139427,"ActorsAbstractions":2067908,"SentimentNegative":1736899,"OrganizationNarrative":1160428,"SentimentPositive":1135987,"SignpostingMetadiscourse":922408,"ActorsFirstPerson":868040,"OrganizationReasoning":565594,"StanceEmphatic":502098,"ActorsPublicEntities":464486,"ConfidenceHedged":424490,"ConfidenceHigh":391164,"PlanningFuture":296839,"PlanningStrategy":261955,"SignpostingAcademicWritingMoves":149460,"CitationNeutral":90667,"StanceModerated":80646,"CitationAuthority":76595,"CitationControversy":21685},"4":{"ActorsPeople":2139427,"ActorsAbstractions":2067908,"SentimentNegative":1736899,"OrganizationNarrative":1160428,"SentimentPositive":1135987,"SignpostingMetadiscourse":922408,"ActorsFirstPerson":868040,"OrganizationReasoning":565594,"StanceEmphatic":502098,"ActorsPublicEntities":464486,"ConfidenceHedged":424490,"ConfidenceHigh":391164,"PlanningFuture":296839,"PlanningStrategy":261955,"SignpostingAcademicWritingMoves":149460,"CitationNeutral":90667,"StanceModerated":80646,"CitationAuthority":76595,"CitationControversy":21685,"":1},"5":{"":1}}�cfg��neg_key�
tagger/cfg CHANGED
@@ -77,8 +77,6 @@
77
  "JK",
78
  "MC",
79
  "MC1",
80
- "MC121",
81
- "MC122",
82
  "MC2",
83
  "MC221",
84
  "MC222",
@@ -102,6 +100,9 @@
102
  "NN22",
103
  "NN221",
104
  "NN222",
 
 
 
105
  "NN31",
106
  "NN32",
107
  "NN33",
@@ -174,6 +175,9 @@
174
  "RG",
175
  "RG21",
176
  "RG22",
 
 
 
177
  "RG41",
178
  "RG42",
179
  "RG43",
 
77
  "JK",
78
  "MC",
79
  "MC1",
 
 
80
  "MC2",
81
  "MC221",
82
  "MC222",
 
100
  "NN22",
101
  "NN221",
102
  "NN222",
103
+ "NN231",
104
+ "NN232",
105
+ "NN233",
106
  "NN31",
107
  "NN32",
108
  "NN33",
 
175
  "RG",
176
  "RG21",
177
  "RG22",
178
+ "RG31",
179
+ "RG32",
180
+ "RG33",
181
  "RG41",
182
  "RG42",
183
  "RG43",
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58e0806e259d1699a33eb0804db3d207aea31ea5aba7826c5f32b62076f718c4
3
  size 6009091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17036ee71299a0f23fa8ed2a91afae0d44a583c3277fec5c5d9083dcf88dd921
3
  size 6009091
vocab/strings.json CHANGED
The diff for this file is too large to render. See raw diff