skalyan91 commited on
Commit
08c1d85
·
verified ·
1 Parent(s): d11d70d

deploy at 2025-12-28 19:47:11.235918

Browse files
Files changed (2) hide show
  1. Dependency length.ipynb +244 -141
  2. main.py +17 -1
Dependency length.ipynb CHANGED
@@ -38,7 +38,7 @@
38
  },
39
  {
40
  "cell_type": "code",
41
- "execution_count": 56,
42
  "id": "cefdbf22-b747-4bea-b279-c9b01e75ff2e",
43
  "metadata": {},
44
  "outputs": [],
@@ -67,6 +67,11 @@
67
  " if token.dep_ == \"ccomp\" and any(sibling.dep_ in relations[\"comp\"] \n",
68
  " for sibling in token.head.rights if sibling.i < token.i):\n",
69
  " token.dep_ = \"mod\"\n",
 
 
 
 
 
70
  " for rel in relations.keys():\n",
71
  " if token.dep_ in relations[rel]: token.dep_ = rel\n",
72
  " for token in doc:\n",
@@ -84,12 +89,23 @@
84
  " if any(t.text == \";\" for t in doc if (token.i < t.i < token.head.i or token.head.i < t.i < token.i) and token.pos_ != \"PUNCT\"):\n",
85
  " token.head = token\n",
86
  " token.dep_ = \"root\"\n",
 
 
 
 
 
 
 
 
 
 
 
87
  " return doc"
88
  ]
89
  },
90
  {
91
  "cell_type": "code",
92
- "execution_count": 28,
93
  "id": "23efda66-9d58-4169-9fa0-05de47267b5a",
94
  "metadata": {},
95
  "outputs": [],
@@ -141,264 +157,355 @@
141
  },
142
  {
143
  "cell_type": "code",
144
- "execution_count": 57,
145
  "id": "ba90ff19-c665-49d8-8ad4-5caee885901d",
146
  "metadata": {},
147
  "outputs": [
148
- {
149
- "name": "stdout",
150
- "output_type": "stream",
151
- "text": [
152
- "In []\n",
153
- "1849 []\n",
154
- "he []\n",
155
- "began []\n",
156
- "public []\n",
157
- "readings []\n",
158
- "of []\n",
159
- "the []\n",
160
- "story []\n",
161
- ", []\n",
162
- "which []\n",
163
- "proved ['punct']\n",
164
- "so []\n",
165
- "successful []\n",
166
- "he []\n",
167
- "undertook ['comp']\n",
168
- "127 []\n",
169
- "further []\n",
170
- "performances []\n",
171
- ". ['comp']\n"
172
- ]
173
- },
174
  {
175
  "data": {
176
  "text/html": [
177
- "<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"5232fcca8f23464a9783eb067e24fe32-0\" class=\"displacy\" width=\"3200\" height=\"399.5\" direction=\"ltr\" style=\"max-width: none; height: 399.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
178
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
179
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">In</tspan>\n",
180
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">ADP</tspan>\n",
181
  "</text>\n",
182
  "\n",
183
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
184
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"225\">1849</tspan>\n",
185
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">NUM</tspan>\n",
186
  "</text>\n",
187
  "\n",
188
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
189
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"400\">he</tspan>\n",
190
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"400\">PRON</tspan>\n",
191
  "</text>\n",
192
  "\n",
193
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
194
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"575\">began</tspan>\n",
195
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">VERB</tspan>\n",
196
  "</text>\n",
197
  "\n",
198
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
199
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"750\">public</tspan>\n",
200
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"750\">ADJ</tspan>\n",
201
  "</text>\n",
202
  "\n",
203
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
204
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"925\">readings</tspan>\n",
205
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"925\">NOUN</tspan>\n",
206
  "</text>\n",
207
  "\n",
208
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
209
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1100\">of</tspan>\n",
210
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1100\">ADP</tspan>\n",
211
  "</text>\n",
212
  "\n",
213
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
214
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1275\">the</tspan>\n",
215
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1275\">DET</tspan>\n",
216
  "</text>\n",
217
  "\n",
218
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
219
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1450\">story,</tspan>\n",
220
  " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1450\">NOUN</tspan>\n",
221
  "</text>\n",
222
  "\n",
223
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
224
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1625\">which</tspan>\n",
225
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1625\">PRON</tspan>\n",
226
  "</text>\n",
227
  "\n",
228
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
229
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1800\">proved</tspan>\n",
230
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1800\">VERB</tspan>\n",
231
  "</text>\n",
232
  "\n",
233
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
234
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1975\">so</tspan>\n",
235
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1975\">ADV</tspan>\n",
236
  "</text>\n",
237
  "\n",
238
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
239
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2150\">successful</tspan>\n",
240
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2150\">ADJ</tspan>\n",
241
  "</text>\n",
242
  "\n",
243
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
244
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2325\">he</tspan>\n",
245
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2325\">PRON</tspan>\n",
246
  "</text>\n",
247
  "\n",
248
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
249
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2500\">undertook</tspan>\n",
250
  " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2500\">VERB</tspan>\n",
251
  "</text>\n",
252
  "\n",
253
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
254
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2675\">127</tspan>\n",
255
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2675\">NUM</tspan>\n",
256
  "</text>\n",
257
  "\n",
258
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
259
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2850\">further</tspan>\n",
260
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2850\">ADJ</tspan>\n",
261
  "</text>\n",
262
  "\n",
263
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
264
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3025\">performances.</tspan>\n",
265
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3025\">NOUN</tspan>\n",
266
  "</text>\n",
267
  "\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  "<g class=\"displacy-arrow\">\n",
269
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-0\" stroke-width=\"2px\" d=\"M70,264.5 C70,89.5 570.0,89.5 570.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
270
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
271
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
272
  " </text>\n",
273
- " <path class=\"displacy-arrowhead\" d=\"M70,266.5 L62,254.5 78,254.5\" fill=\"currentColor\"/>\n",
274
  "</g>\n",
275
  "\n",
276
  "<g class=\"displacy-arrow\">\n",
277
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-1\" stroke-width=\"2px\" d=\"M70,264.5 C70,177.0 215.0,177.0 215.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
278
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
279
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
280
  " </text>\n",
281
- " <path class=\"displacy-arrowhead\" d=\"M215.0,266.5 L223.0,254.5 207.0,254.5\" fill=\"currentColor\"/>\n",
282
  "</g>\n",
283
  "\n",
284
  "<g class=\"displacy-arrow\">\n",
285
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-2\" stroke-width=\"2px\" d=\"M420,264.5 C420,177.0 565.0,177.0 565.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
286
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
287
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
288
  " </text>\n",
289
- " <path class=\"displacy-arrowhead\" d=\"M420,266.5 L412,254.5 428,254.5\" fill=\"currentColor\"/>\n",
290
  "</g>\n",
291
  "\n",
292
  "<g class=\"displacy-arrow\">\n",
293
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-3\" stroke-width=\"2px\" d=\"M770,264.5 C770,177.0 915.0,177.0 915.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
294
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
295
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
296
  " </text>\n",
297
- " <path class=\"displacy-arrowhead\" d=\"M770,266.5 L762,254.5 778,254.5\" fill=\"currentColor\"/>\n",
298
  "</g>\n",
299
  "\n",
300
  "<g class=\"displacy-arrow\">\n",
301
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-4\" stroke-width=\"2px\" d=\"M595,264.5 C595,89.5 920.0,89.5 920.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
302
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
303
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
304
  " </text>\n",
305
- " <path class=\"displacy-arrowhead\" d=\"M920.0,266.5 L928.0,254.5 912.0,254.5\" fill=\"currentColor\"/>\n",
306
  "</g>\n",
307
  "\n",
308
  "<g class=\"displacy-arrow\">\n",
309
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-5\" stroke-width=\"2px\" d=\"M945,264.5 C945,177.0 1090.0,177.0 1090.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
310
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
311
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
312
  " </text>\n",
313
- " <path class=\"displacy-arrowhead\" d=\"M1090.0,266.5 L1098.0,254.5 1082.0,254.5\" fill=\"currentColor\"/>\n",
314
  "</g>\n",
315
  "\n",
316
  "<g class=\"displacy-arrow\">\n",
317
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-6\" stroke-width=\"2px\" d=\"M1295,264.5 C1295,177.0 1440.0,177.0 1440.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
318
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
319
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
320
  " </text>\n",
321
- " <path class=\"displacy-arrowhead\" d=\"M1295,266.5 L1287,254.5 1303,254.5\" fill=\"currentColor\"/>\n",
322
  "</g>\n",
323
  "\n",
324
  "<g class=\"displacy-arrow\">\n",
325
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-7\" stroke-width=\"2px\" d=\"M1120,264.5 C1120,89.5 1445.0,89.5 1445.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
326
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
327
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-7\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
328
  " </text>\n",
329
- " <path class=\"displacy-arrowhead\" d=\"M1445.0,266.5 L1453.0,254.5 1437.0,254.5\" fill=\"currentColor\"/>\n",
330
  "</g>\n",
331
  "\n",
332
  "<g class=\"displacy-arrow\">\n",
333
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-8\" stroke-width=\"2px\" d=\"M1645,264.5 C1645,177.0 1790.0,177.0 1790.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
334
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
335
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-8\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
336
  " </text>\n",
337
- " <path class=\"displacy-arrowhead\" d=\"M1645,266.5 L1637,254.5 1653,254.5\" fill=\"currentColor\"/>\n",
338
  "</g>\n",
339
  "\n",
340
  "<g class=\"displacy-arrow\">\n",
341
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-9\" stroke-width=\"2px\" d=\"M1470,264.5 C1470,89.5 1795.0,89.5 1795.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
342
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
343
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-9\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
344
  " </text>\n",
345
- " <path class=\"displacy-arrowhead\" d=\"M1795.0,266.5 L1803.0,254.5 1787.0,254.5\" fill=\"currentColor\"/>\n",
346
  "</g>\n",
347
  "\n",
348
  "<g class=\"displacy-arrow\">\n",
349
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-10\" stroke-width=\"2px\" d=\"M1995,264.5 C1995,177.0 2140.0,177.0 2140.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
350
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
351
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-10\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
352
  " </text>\n",
353
- " <path class=\"displacy-arrowhead\" d=\"M1995,266.5 L1987,254.5 2003,254.5\" fill=\"currentColor\"/>\n",
354
  "</g>\n",
355
  "\n",
356
  "<g class=\"displacy-arrow\">\n",
357
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-11\" stroke-width=\"2px\" d=\"M1820,264.5 C1820,89.5 2145.0,89.5 2145.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
358
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
359
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-11\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
360
  " </text>\n",
361
- " <path class=\"displacy-arrowhead\" d=\"M2145.0,266.5 L2153.0,254.5 2137.0,254.5\" fill=\"currentColor\"/>\n",
362
  "</g>\n",
363
  "\n",
364
  "<g class=\"displacy-arrow\">\n",
365
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-12\" stroke-width=\"2px\" d=\"M2345,264.5 C2345,177.0 2490.0,177.0 2490.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
366
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
367
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-12\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
368
  " </text>\n",
369
- " <path class=\"displacy-arrowhead\" d=\"M2345,266.5 L2337,254.5 2353,254.5\" fill=\"currentColor\"/>\n",
370
  "</g>\n",
371
  "\n",
372
  "<g class=\"displacy-arrow\">\n",
373
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-13\" stroke-width=\"2px\" d=\"M595,264.5 C595,2.0 2500.0,2.0 2500.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
374
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
375
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-13\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
376
  " </text>\n",
377
- " <path class=\"displacy-arrowhead\" d=\"M2500.0,266.5 L2508.0,254.5 2492.0,254.5\" fill=\"currentColor\"/>\n",
378
  "</g>\n",
379
  "\n",
380
  "<g class=\"displacy-arrow\">\n",
381
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-14\" stroke-width=\"2px\" d=\"M2695,264.5 C2695,89.5 3020.0,89.5 3020.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
382
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
383
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-14\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
384
  " </text>\n",
385
- " <path class=\"displacy-arrowhead\" d=\"M2695,266.5 L2687,254.5 2703,254.5\" fill=\"currentColor\"/>\n",
386
  "</g>\n",
387
  "\n",
388
  "<g class=\"displacy-arrow\">\n",
389
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-15\" stroke-width=\"2px\" d=\"M2870,264.5 C2870,177.0 3015.0,177.0 3015.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
390
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
391
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-15\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
392
  " </text>\n",
393
- " <path class=\"displacy-arrowhead\" d=\"M2870,266.5 L2862,254.5 2878,254.5\" fill=\"currentColor\"/>\n",
394
  "</g>\n",
395
  "\n",
396
  "<g class=\"displacy-arrow\">\n",
397
- " <path class=\"displacy-arc\" id=\"arrow-5232fcca8f23464a9783eb067e24fe32-0-16\" stroke-width=\"2px\" d=\"M2520,264.5 C2520,2.0 3025.0,2.0 3025.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
398
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
399
- " <textPath xlink:href=\"#arrow-5232fcca8f23464a9783eb067e24fe32-0-16\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
400
  " </text>\n",
401
- " <path class=\"displacy-arrowhead\" d=\"M3025.0,266.5 L3033.0,254.5 3017.0,254.5\" fill=\"currentColor\"/>\n",
402
  "</g>\n",
403
  "</svg></span>"
404
  ],
@@ -411,7 +518,7 @@
411
  }
412
  ],
413
  "source": [
414
- "doc = ssudify(nlp(\"In 1849 he began public readings of the story, which proved so successful he undertook 127 further performances.\"))\n",
415
  "# Since this is an interactive Jupyter environment, we can use displacy.render here\n",
416
  "displacy.render(doc, style='dep')"
417
  ]
@@ -425,13 +532,9 @@
425
  {
426
  "data": {
427
  "text/plain": [
428
- "[(According to the studies, “People, 0),\n",
429
- " (who danced more than once a week, 7),\n",
430
- " (had, 0),\n",
431
- " (a 76% lower risk of getting dementia, 8),\n",
432
- " (compared to people who danced, 0),\n",
433
- " (less frequently or, 3),\n",
434
- " (didn’t dance at all,” Dr Pasricha noted., 0)]"
435
  ]
436
  },
437
  "execution_count": 7,
 
38
  },
39
  {
40
  "cell_type": "code",
41
+ "execution_count": 33,
42
  "id": "cefdbf22-b747-4bea-b279-c9b01e75ff2e",
43
  "metadata": {},
44
  "outputs": [],
 
67
  " if token.dep_ == \"ccomp\" and any(sibling.dep_ in relations[\"comp\"] \n",
68
  " for sibling in token.head.rights if sibling.i < token.i):\n",
69
  " token.dep_ = \"mod\"\n",
70
+ " dobjs = [child for child in token.children if child.dep_ == \"dobj\"]\n",
71
+ " if len(dobjs) > 1:\n",
72
+ " for i in range(1, len(dobjs)):\n",
73
+ " dobjs[i].head = dobjs[i-1]\n",
74
+ " dobjs[i].dep_ = \"appos\"\n",
75
  " for rel in relations.keys():\n",
76
  " if token.dep_ in relations[rel]: token.dep_ = rel\n",
77
  " for token in doc:\n",
 
89
  " if any(t.text == \";\" for t in doc if (token.i < t.i < token.head.i or token.head.i < t.i < token.i) and token.pos_ != \"PUNCT\"):\n",
90
  " token.head = token\n",
91
  " token.dep_ = \"root\"\n",
92
+ " if token.pos_ in [\"VERB\", \"AUX\"]:\n",
93
+ " core_children = [child for child in token.children if child.dep_ in [\"subj\", \"comp\", \"udep\"]]\n",
94
+ " core_children.append(token)\n",
95
+ " core_children.sort(key=lambda x: x.i)\n",
96
+ " right_edge = [t for t in core_children[-1].subtree if t.pos_ != \"PUNCT\"][-1]\n",
97
+ " if right_edge.i < len(doc) - 1:\n",
98
+ " if right_edge.text == \",\" or doc[right_edge.i+1].text == \",\":\n",
99
+ " for child in [child for child in token.children if child.i > right_edge.i and child.dep_ == \"conj\"]:\n",
100
+ " child.dep_ = \"mod\"\n",
101
+ " if token.pos_ in [\"VERB\", \"AUX\"] and token.head.pos_ == \"NOUN\" and token.dep_ == \"udep\":\n",
102
+ " token.dep_ = \"mod\"\n",
103
  " return doc"
104
  ]
105
  },
106
  {
107
  "cell_type": "code",
108
+ "execution_count": 4,
109
  "id": "23efda66-9d58-4169-9fa0-05de47267b5a",
110
  "metadata": {},
111
  "outputs": [],
 
157
  },
158
  {
159
  "cell_type": "code",
160
+ "execution_count": 34,
161
  "id": "ba90ff19-c665-49d8-8ad4-5caee885901d",
162
  "metadata": {},
163
  "outputs": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  {
165
  "data": {
166
  "text/html": [
167
+ "<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"91f44e7b72df4952ba0ee597a77e0111-0\" class=\"displacy\" width=\"4775\" height=\"487.0\" direction=\"ltr\" style=\"max-width: none; height: 487.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
168
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
169
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">Males</tspan>\n",
170
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">NOUN</tspan>\n",
171
  "</text>\n",
172
  "\n",
173
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
174
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"225\">have</tspan>\n",
175
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">VERB</tspan>\n",
176
  "</text>\n",
177
  "\n",
178
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
179
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"400\">a</tspan>\n",
180
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"400\">DET</tspan>\n",
181
  "</text>\n",
182
  "\n",
183
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
184
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"575\">bony</tspan>\n",
185
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">NOUN</tspan>\n",
186
  "</text>\n",
187
  "\n",
188
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
189
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"750\">protrusion</tspan>\n",
190
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"750\">NOUN</tspan>\n",
191
  "</text>\n",
192
  "\n",
193
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
194
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"925\">from</tspan>\n",
195
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"925\">ADP</tspan>\n",
196
  "</text>\n",
197
  "\n",
198
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
199
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1100\">the</tspan>\n",
200
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1100\">DET</tspan>\n",
201
  "</text>\n",
202
  "\n",
203
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
204
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1275\">gill</tspan>\n",
205
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1275\">PROPN</tspan>\n",
206
  "</text>\n",
207
  "\n",
208
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
209
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1450\">area (</tspan>\n",
210
  " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1450\">NOUN</tspan>\n",
211
  "</text>\n",
212
  "\n",
213
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
214
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1625\">the</tspan>\n",
215
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1625\">DET</tspan>\n",
216
  "</text>\n",
217
  "\n",
218
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
219
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1800\">opercular</tspan>\n",
220
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1800\">ADJ</tspan>\n",
221
  "</text>\n",
222
  "\n",
223
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
224
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1975\">process)</tspan>\n",
225
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1975\">NOUN</tspan>\n",
226
  "</text>\n",
227
  "\n",
228
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
229
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2150\">that</tspan>\n",
230
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2150\">PRON</tspan>\n",
231
  "</text>\n",
232
  "\n",
233
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
234
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2325\">is</tspan>\n",
235
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2325\">AUX</tspan>\n",
236
  "</text>\n",
237
  "\n",
238
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
239
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2500\">elongated</tspan>\n",
240
  " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2500\">VERB</tspan>\n",
241
  "</text>\n",
242
  "\n",
243
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
244
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2675\">and</tspan>\n",
245
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2675\">CCONJ</tspan>\n",
246
  "</text>\n",
247
  "\n",
248
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
249
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2850\">club-</tspan>\n",
250
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2850\">NOUN</tspan>\n",
251
  "</text>\n",
252
  "\n",
253
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
254
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3025\">like,</tspan>\n",
255
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3025\">ADJ</tspan>\n",
256
  "</text>\n",
257
  "\n",
258
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
259
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3200\">a</tspan>\n",
260
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3200\">DET</tspan>\n",
261
+ "</text>\n",
262
+ "\n",
263
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
264
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3375\">feature</tspan>\n",
265
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3375\">NOUN</tspan>\n",
266
+ "</text>\n",
267
+ "\n",
268
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
269
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3550\">that</tspan>\n",
270
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3550\">PRON</tspan>\n",
271
+ "</text>\n",
272
+ "\n",
273
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
274
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3725\">inspired</tspan>\n",
275
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3725\">VERB</tspan>\n",
276
+ "</text>\n",
277
+ "\n",
278
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
279
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3900\">the</tspan>\n",
280
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3900\">DET</tspan>\n",
281
+ "</text>\n",
282
+ "\n",
283
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
284
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4075\">name</tspan>\n",
285
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4075\">NOUN</tspan>\n",
286
+ "</text>\n",
287
+ "\n",
288
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
289
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4250\">of</tspan>\n",
290
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4250\">ADP</tspan>\n",
291
+ "</text>\n",
292
+ "\n",
293
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
294
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4425\">the</tspan>\n",
295
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4425\">DET</tspan>\n",
296
+ "</text>\n",
297
+ "\n",
298
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
299
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4600\">species</tspan>\n",
300
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4600\">NOUN</tspan>\n",
301
+ "</text>\n",
302
+ "\n",
303
+ "<g class=\"displacy-arrow\">\n",
304
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-0\" stroke-width=\"2px\" d=\"M70,352.0 C70,264.5 210.0,264.5 210.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
305
+ " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
306
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
307
+ " </text>\n",
308
+ " <path class=\"displacy-arrowhead\" d=\"M70,354.0 L62,342.0 78,342.0\" fill=\"currentColor\"/>\n",
309
+ "</g>\n",
310
+ "\n",
311
+ "<g class=\"displacy-arrow\">\n",
312
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-1\" stroke-width=\"2px\" d=\"M420,352.0 C420,177.0 740.0,177.0 740.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
313
+ " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
314
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
315
+ " </text>\n",
316
+ " <path class=\"displacy-arrowhead\" d=\"M420,354.0 L412,342.0 428,342.0\" fill=\"currentColor\"/>\n",
317
+ "</g>\n",
318
+ "\n",
319
+ "<g class=\"displacy-arrow\">\n",
320
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-2\" stroke-width=\"2px\" d=\"M595,352.0 C595,264.5 735.0,264.5 735.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
321
+ " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
322
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n",
323
+ " </text>\n",
324
+ " <path class=\"displacy-arrowhead\" d=\"M595,354.0 L587,342.0 603,342.0\" fill=\"currentColor\"/>\n",
325
+ "</g>\n",
326
+ "\n",
327
+ "<g class=\"displacy-arrow\">\n",
328
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-3\" stroke-width=\"2px\" d=\"M245,352.0 C245,89.5 745.0,89.5 745.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
329
+ " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
330
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
331
+ " </text>\n",
332
+ " <path class=\"displacy-arrowhead\" d=\"M745.0,354.0 L753.0,342.0 737.0,342.0\" fill=\"currentColor\"/>\n",
333
+ "</g>\n",
334
+ "\n",
335
+ "<g class=\"displacy-arrow\">\n",
336
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-4\" stroke-width=\"2px\" d=\"M770,352.0 C770,264.5 910.0,264.5 910.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
337
+ " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
338
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
339
+ " </text>\n",
340
+ " <path class=\"displacy-arrowhead\" d=\"M910.0,354.0 L918.0,342.0 902.0,342.0\" fill=\"currentColor\"/>\n",
341
+ "</g>\n",
342
+ "\n",
343
+ "<g class=\"displacy-arrow\">\n",
344
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-5\" stroke-width=\"2px\" d=\"M1120,352.0 C1120,177.0 1440.0,177.0 1440.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
345
+ " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
346
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
347
+ " </text>\n",
348
+ " <path class=\"displacy-arrowhead\" d=\"M1120,354.0 L1112,342.0 1128,342.0\" fill=\"currentColor\"/>\n",
349
+ "</g>\n",
350
+ "\n",
351
+ "<g class=\"displacy-arrow\">\n",
352
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-6\" stroke-width=\"2px\" d=\"M1295,352.0 C1295,264.5 1435.0,264.5 1435.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
353
+ " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
354
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n",
355
+ " </text>\n",
356
+ " <path class=\"displacy-arrowhead\" d=\"M1295,354.0 L1287,342.0 1303,342.0\" fill=\"currentColor\"/>\n",
357
+ "</g>\n",
358
+ "\n",
359
+ "<g class=\"displacy-arrow\">\n",
360
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-7\" stroke-width=\"2px\" d=\"M945,352.0 C945,89.5 1445.0,89.5 1445.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
361
+ " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
362
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-7\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
363
+ " </text>\n",
364
+ " <path class=\"displacy-arrowhead\" d=\"M1445.0,354.0 L1453.0,342.0 1437.0,342.0\" fill=\"currentColor\"/>\n",
365
+ "</g>\n",
366
+ "\n",
367
+ "<g class=\"displacy-arrow\">\n",
368
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-8\" stroke-width=\"2px\" d=\"M1645,352.0 C1645,177.0 1965.0,177.0 1965.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
369
+ " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
370
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-8\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
371
+ " </text>\n",
372
+ " <path class=\"displacy-arrowhead\" d=\"M1645,354.0 L1637,342.0 1653,342.0\" fill=\"currentColor\"/>\n",
373
+ "</g>\n",
374
+ "\n",
375
  "<g class=\"displacy-arrow\">\n",
376
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-9\" stroke-width=\"2px\" d=\"M1820,352.0 C1820,264.5 1960.0,264.5 1960.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
377
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
378
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-9\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
379
  " </text>\n",
380
+ " <path class=\"displacy-arrowhead\" d=\"M1820,354.0 L1812,342.0 1828,342.0\" fill=\"currentColor\"/>\n",
381
  "</g>\n",
382
  "\n",
383
  "<g class=\"displacy-arrow\">\n",
384
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-10\" stroke-width=\"2px\" d=\"M1470,352.0 C1470,89.5 1970.0,89.5 1970.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
385
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
386
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-10\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">appos</textPath>\n",
387
  " </text>\n",
388
+ " <path class=\"displacy-arrowhead\" d=\"M1970.0,354.0 L1978.0,342.0 1962.0,342.0\" fill=\"currentColor\"/>\n",
389
  "</g>\n",
390
  "\n",
391
  "<g class=\"displacy-arrow\">\n",
392
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-11\" stroke-width=\"2px\" d=\"M2170,352.0 C2170,264.5 2310.0,264.5 2310.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
393
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
394
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-11\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
395
  " </text>\n",
396
+ " <path class=\"displacy-arrowhead\" d=\"M2170,354.0 L2162,342.0 2178,342.0\" fill=\"currentColor\"/>\n",
397
  "</g>\n",
398
  "\n",
399
  "<g class=\"displacy-arrow\">\n",
400
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-12\" stroke-width=\"2px\" d=\"M1995,352.0 C1995,177.0 2315.0,177.0 2315.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
401
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
402
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-12\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
403
  " </text>\n",
404
+ " <path class=\"displacy-arrowhead\" d=\"M2315.0,354.0 L2323.0,342.0 2307.0,342.0\" fill=\"currentColor\"/>\n",
405
  "</g>\n",
406
  "\n",
407
  "<g class=\"displacy-arrow\">\n",
408
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-13\" stroke-width=\"2px\" d=\"M2345,352.0 C2345,264.5 2485.0,264.5 2485.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
409
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
410
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-13\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
411
  " </text>\n",
412
+ " <path class=\"displacy-arrowhead\" d=\"M2485.0,354.0 L2493.0,342.0 2477.0,342.0\" fill=\"currentColor\"/>\n",
413
  "</g>\n",
414
  "\n",
415
  "<g class=\"displacy-arrow\">\n",
416
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-14\" stroke-width=\"2px\" d=\"M2345,352.0 C2345,177.0 2665.0,177.0 2665.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
417
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
418
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-14\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">cc</textPath>\n",
419
  " </text>\n",
420
+ " <path class=\"displacy-arrowhead\" d=\"M2665.0,354.0 L2673.0,342.0 2657.0,342.0\" fill=\"currentColor\"/>\n",
421
  "</g>\n",
422
  "\n",
423
  "<g class=\"displacy-arrow\">\n",
424
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-15\" stroke-width=\"2px\" d=\"M2870,352.0 C2870,264.5 3010.0,264.5 3010.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
425
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
426
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-15\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
427
  " </text>\n",
428
+ " <path class=\"displacy-arrowhead\" d=\"M2870,354.0 L2862,342.0 2878,342.0\" fill=\"currentColor\"/>\n",
429
  "</g>\n",
430
  "\n",
431
  "<g class=\"displacy-arrow\">\n",
432
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-16\" stroke-width=\"2px\" d=\"M2345,352.0 C2345,89.5 3020.0,89.5 3020.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
433
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
434
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-16\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">conj</textPath>\n",
435
  " </text>\n",
436
+ " <path class=\"displacy-arrowhead\" d=\"M3020.0,354.0 L3028.0,342.0 3012.0,342.0\" fill=\"currentColor\"/>\n",
437
  "</g>\n",
438
  "\n",
439
  "<g class=\"displacy-arrow\">\n",
440
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-17\" stroke-width=\"2px\" d=\"M3220,352.0 C3220,264.5 3360.0,264.5 3360.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
441
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
442
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-17\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
443
  " </text>\n",
444
+ " <path class=\"displacy-arrowhead\" d=\"M3220,354.0 L3212,342.0 3228,342.0\" fill=\"currentColor\"/>\n",
445
  "</g>\n",
446
  "\n",
447
  "<g class=\"displacy-arrow\">\n",
448
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-18\" stroke-width=\"2px\" d=\"M770,352.0 C770,2.0 3375.0,2.0 3375.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
449
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
450
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-18\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">appos</textPath>\n",
451
  " </text>\n",
452
+ " <path class=\"displacy-arrowhead\" d=\"M3375.0,354.0 L3383.0,342.0 3367.0,342.0\" fill=\"currentColor\"/>\n",
453
  "</g>\n",
454
  "\n",
455
  "<g class=\"displacy-arrow\">\n",
456
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-19\" stroke-width=\"2px\" d=\"M3570,352.0 C3570,264.5 3710.0,264.5 3710.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
457
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
458
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-19\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
459
  " </text>\n",
460
+ " <path class=\"displacy-arrowhead\" d=\"M3570,354.0 L3562,342.0 3578,342.0\" fill=\"currentColor\"/>\n",
461
  "</g>\n",
462
  "\n",
463
  "<g class=\"displacy-arrow\">\n",
464
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-20\" stroke-width=\"2px\" d=\"M3395,352.0 C3395,177.0 3715.0,177.0 3715.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
465
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
466
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-20\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
467
  " </text>\n",
468
+ " <path class=\"displacy-arrowhead\" d=\"M3715.0,354.0 L3723.0,342.0 3707.0,342.0\" fill=\"currentColor\"/>\n",
469
  "</g>\n",
470
  "\n",
471
  "<g class=\"displacy-arrow\">\n",
472
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-21\" stroke-width=\"2px\" d=\"M3920,352.0 C3920,264.5 4060.0,264.5 4060.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
473
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
474
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-21\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
475
  " </text>\n",
476
+ " <path class=\"displacy-arrowhead\" d=\"M3920,354.0 L3912,342.0 3928,342.0\" fill=\"currentColor\"/>\n",
477
  "</g>\n",
478
  "\n",
479
  "<g class=\"displacy-arrow\">\n",
480
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-22\" stroke-width=\"2px\" d=\"M3745,352.0 C3745,177.0 4065.0,177.0 4065.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
481
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
482
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-22\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
483
  " </text>\n",
484
+ " <path class=\"displacy-arrowhead\" d=\"M4065.0,354.0 L4073.0,342.0 4057.0,342.0\" fill=\"currentColor\"/>\n",
485
  "</g>\n",
486
  "\n",
487
  "<g class=\"displacy-arrow\">\n",
488
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-23\" stroke-width=\"2px\" d=\"M4095,352.0 C4095,264.5 4235.0,264.5 4235.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
489
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
490
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-23\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
491
  " </text>\n",
492
+ " <path class=\"displacy-arrowhead\" d=\"M4235.0,354.0 L4243.0,342.0 4227.0,342.0\" fill=\"currentColor\"/>\n",
493
  "</g>\n",
494
  "\n",
495
  "<g class=\"displacy-arrow\">\n",
496
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-24\" stroke-width=\"2px\" d=\"M4445,352.0 C4445,264.5 4585.0,264.5 4585.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
497
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
498
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-24\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
499
  " </text>\n",
500
+ " <path class=\"displacy-arrowhead\" d=\"M4445,354.0 L4437,342.0 4453,342.0\" fill=\"currentColor\"/>\n",
501
  "</g>\n",
502
  "\n",
503
  "<g class=\"displacy-arrow\">\n",
504
+ " <path class=\"displacy-arc\" id=\"arrow-91f44e7b72df4952ba0ee597a77e0111-0-25\" stroke-width=\"2px\" d=\"M4270,352.0 C4270,177.0 4590.0,177.0 4590.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
505
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
506
+ " <textPath xlink:href=\"#arrow-91f44e7b72df4952ba0ee597a77e0111-0-25\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
507
  " </text>\n",
508
+ " <path class=\"displacy-arrowhead\" d=\"M4590.0,354.0 L4598.0,342.0 4582.0,342.0\" fill=\"currentColor\"/>\n",
509
  "</g>\n",
510
  "</svg></span>"
511
  ],
 
518
  }
519
  ],
520
  "source": [
521
+ "doc = ssudify(nlp(\"Males have a bony protrusion from the gill area (the opercular process) that is elongated and club-like, a feature that inspired the name of the species\"))\n",
522
  "# Since this is an interactive Jupyter environment, we can use displacy.render here\n",
523
  "displacy.render(doc, style='dep')"
524
  ]
 
532
  {
533
  "data": {
534
  "text/plain": [
535
+ "[(In 1849 he began, 0),\n",
536
+ " (public readings of the story, which proved so successful he, 11),\n",
537
+ " (undertook 127 further performances., 0)]"
 
 
 
 
538
  ]
539
  },
540
  "execution_count": 7,
main.py CHANGED
@@ -34,9 +34,14 @@ def ssudify(doc):
34
  if token.dep_ == "dep": token.dep_ = "unknown"
35
  if token.dep_ == "prep" and token.head.pos_ == "VERB" and token.i < token.head.i:
36
  token.dep_ = "mod"
37
- if token.dep_ == "ccomp" and any(sibling.dep_ in relations["comp"] + ["comp"]
38
  for sibling in token.head.rights if sibling.i < token.i):
39
  token.dep_ = "mod"
 
 
 
 
 
40
  for rel in relations.keys():
41
  if token.dep_ in relations[rel]: token.dep_ = rel
42
  for token in doc:
@@ -54,6 +59,17 @@ def ssudify(doc):
54
  if any(t.text == ";" for t in doc if (token.i < t.i < token.head.i or token.head.i < t.i < token.i) and token.pos_ != "PUNCT"):
55
  token.head = token
56
  token.dep_ = "root"
 
 
 
 
 
 
 
 
 
 
 
57
  return doc
58
 
59
 
 
34
  if token.dep_ == "dep": token.dep_ = "unknown"
35
  if token.dep_ == "prep" and token.head.pos_ == "VERB" and token.i < token.head.i:
36
  token.dep_ = "mod"
37
+ if token.dep_ == "ccomp" and any(sibling.dep_ in relations["comp"]
38
  for sibling in token.head.rights if sibling.i < token.i):
39
  token.dep_ = "mod"
40
+ dobjs = [child for child in token.children if child.dep_ == "dobj"]
41
+ if len(dobjs) > 1:
42
+ for i in range(1, len(dobjs)):
43
+ dobjs[i].head = dobjs[i-1]
44
+ dobjs[i].dep_ = "appos"
45
  for rel in relations.keys():
46
  if token.dep_ in relations[rel]: token.dep_ = rel
47
  for token in doc:
 
59
  if any(t.text == ";" for t in doc if (token.i < t.i < token.head.i or token.head.i < t.i < token.i) and token.pos_ != "PUNCT"):
60
  token.head = token
61
  token.dep_ = "root"
62
+ if token.pos_ in ["VERB", "AUX"]:
63
+ core_children = [child for child in token.children if child.dep_ in ["subj", "comp", "udep"]]
64
+ core_children.append(token)
65
+ core_children.sort(key=lambda x: x.i)
66
+ right_edge = [t for t in core_children[-1].subtree if t.pos_ != "PUNCT"][-1]
67
+ if right_edge.i < len(doc) - 1:
68
+ if right_edge.text == "," or doc[right_edge.i+1].text == ",":
69
+ for child in [child for child in token.children if child.i > right_edge.i and child.dep_ == "conj"]:
70
+ child.dep_ = "mod"
71
+ if token.pos_ in ["VERB", "AUX"] and token.head.pos_ == "NOUN" and token.dep_ == "udep":
72
+ token.dep_ = "mod"
73
  return doc
74
 
75