skalyan91 commited on
Commit
7037bc6
·
verified ·
1 Parent(s): 3928738

deploy at 2025-12-29 09:52:10.855787

Browse files
Files changed (2) hide show
  1. Dependency length.ipynb +65 -282
  2. main.py +10 -2
Dependency length.ipynb CHANGED
@@ -23,7 +23,7 @@
23
  },
24
  {
25
  "cell_type": "code",
26
- "execution_count": 2,
27
  "id": "c49f475d-547f-49d8-8550-2f1ad1555a14",
28
  "metadata": {},
29
  "outputs": [],
@@ -38,7 +38,7 @@
38
  },
39
  {
40
  "cell_type": "code",
41
- "execution_count": 22,
42
  "id": "cefdbf22-b747-4bea-b279-c9b01e75ff2e",
43
  "metadata": {},
44
  "outputs": [],
@@ -62,8 +62,14 @@
62
  " to_reverse[i-1].dep_ = \"comp\"\n",
63
  " for token in doc:\n",
64
  " if token.dep_ == \"dep\": token.dep_ = \"unknown\"\n",
65
- " if token.dep_ == \"prep\" and token.head.pos_ == \"VERB\" and token.i < token.head.i and token.head.dep_ not in relations[\"mod\"]:\n",
 
66
  " token.dep_ = \"mod\"\n",
 
 
 
 
 
67
  " if token.dep_ == \"ccomp\" and any(sibling.dep_ in relations[\"comp\"] \n",
68
  " for sibling in token.head.rights if sibling.i < token.i):\n",
69
  " token.dep_ = \"mod\"\n",
@@ -72,6 +78,7 @@
72
  " for i in range(1, len(dobjs)):\n",
73
  " dobjs[i].head = dobjs[i-1]\n",
74
  " dobjs[i].dep_ = \"appos\"\n",
 
75
  " for rel in relations.keys():\n",
76
  " if token.dep_ in relations[rel]: token.dep_ = rel\n",
77
  " for token in doc:\n",
@@ -86,7 +93,8 @@
86
  " if child.dep_ in [\"comp\", \"udep\"] and token.dep_ != \"mod\":\n",
87
  " child.dep_ = \"mod\"\n",
88
  " for token in doc:\n",
89
- " if any(t.text == \";\" for t in doc if (token.i < t.i < token.head.i or token.head.i < t.i < token.i) and token.pos_ != \"PUNCT\"):\n",
 
90
  " token.head = token\n",
91
  " token.dep_ = \"root\"\n",
92
  " if token.pos_ in [\"VERB\", \"AUX\"]:\n",
@@ -105,13 +113,13 @@
105
  },
106
  {
107
  "cell_type": "code",
108
- "execution_count": 4,
109
  "id": "23efda66-9d58-4169-9fa0-05de47267b5a",
110
  "metadata": {},
111
  "outputs": [],
112
  "source": [
113
  "def flyover(token):\n",
114
- " if token.dep_ in [\"subj\", \"comp\", \"udep\", \"conj\"]:\n",
115
  " dep_distance = abs(token.i - token.head.i)\n",
116
  " if token.head.i < token.i:\n",
117
  " return (token.doc[token.head.i+1:token.i], dep_distance - 1)\n",
@@ -157,334 +165,108 @@
157
  },
158
  {
159
  "cell_type": "code",
160
- "execution_count": 24,
161
  "id": "ba90ff19-c665-49d8-8ad4-5caee885901d",
162
  "metadata": {},
163
  "outputs": [
164
  {
165
  "data": {
166
  "text/html": [
167
- "<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"9f58d58c7a8540688553bdb828bd211d-0\" class=\"displacy\" width=\"4600\" height=\"662.0\" direction=\"ltr\" style=\"max-width: none; height: 662.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
168
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
169
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">to</tspan>\n",
170
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">PART</tspan>\n",
171
  "</text>\n",
172
  "\n",
173
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
174
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"225\">assume,</tspan>\n",
175
  " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">VERB</tspan>\n",
176
  "</text>\n",
177
  "\n",
178
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
179
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"400\">among</tspan>\n",
180
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"400\">ADP</tspan>\n",
181
  "</text>\n",
182
  "\n",
183
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
184
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"575\">the</tspan>\n",
185
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">DET</tspan>\n",
186
  "</text>\n",
187
  "\n",
188
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
189
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"750\">powers</tspan>\n",
190
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"750\">NOUN</tspan>\n",
191
  "</text>\n",
192
  "\n",
193
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
194
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"925\">of</tspan>\n",
195
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"925\">ADP</tspan>\n",
196
  "</text>\n",
197
  "\n",
198
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
199
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1100\">the</tspan>\n",
200
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1100\">DET</tspan>\n",
201
  "</text>\n",
202
  "\n",
203
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
204
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1275\">earth,</tspan>\n",
205
  " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1275\">NOUN</tspan>\n",
206
  "</text>\n",
207
  "\n",
208
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
209
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1450\">the</tspan>\n",
210
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1450\">DET</tspan>\n",
211
- "</text>\n",
212
- "\n",
213
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
214
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1625\">separate</tspan>\n",
215
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1625\">ADJ</tspan>\n",
216
- "</text>\n",
217
- "\n",
218
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
219
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1800\">and</tspan>\n",
220
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1800\">CCONJ</tspan>\n",
221
- "</text>\n",
222
- "\n",
223
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
224
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1975\">equal</tspan>\n",
225
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1975\">ADJ</tspan>\n",
226
- "</text>\n",
227
- "\n",
228
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
229
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2150\">station</tspan>\n",
230
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2150\">NOUN</tspan>\n",
231
- "</text>\n",
232
- "\n",
233
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
234
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2325\">to</tspan>\n",
235
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2325\">PART</tspan>\n",
236
- "</text>\n",
237
- "\n",
238
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
239
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2500\">which</tspan>\n",
240
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2500\">PRON</tspan>\n",
241
- "</text>\n",
242
- "\n",
243
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
244
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2675\">the</tspan>\n",
245
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2675\">DET</tspan>\n",
246
- "</text>\n",
247
- "\n",
248
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
249
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2850\">laws</tspan>\n",
250
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2850\">NOUN</tspan>\n",
251
- "</text>\n",
252
- "\n",
253
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
254
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3025\">of</tspan>\n",
255
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3025\">ADP</tspan>\n",
256
- "</text>\n",
257
- "\n",
258
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
259
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3200\">nature</tspan>\n",
260
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3200\">NOUN</tspan>\n",
261
- "</text>\n",
262
- "\n",
263
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
264
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3375\">and</tspan>\n",
265
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3375\">CCONJ</tspan>\n",
266
- "</text>\n",
267
- "\n",
268
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
269
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3550\">of</tspan>\n",
270
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3550\">ADP</tspan>\n",
271
- "</text>\n",
272
- "\n",
273
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
274
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3725\">nature</tspan>\n",
275
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3725\">NOUN</tspan>\n",
276
- "</text>\n",
277
- "\n",
278
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
279
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3900\">’s</tspan>\n",
280
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3900\">PART</tspan>\n",
281
- "</text>\n",
282
- "\n",
283
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
284
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4075\">God</tspan>\n",
285
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4075\">PROPN</tspan>\n",
286
- "</text>\n",
287
- "\n",
288
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
289
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4250\">entitle</tspan>\n",
290
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4250\">VERB</tspan>\n",
291
- "</text>\n",
292
- "\n",
293
- "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"572.0\">\n",
294
- " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4425\">them</tspan>\n",
295
- " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4425\">PRON</tspan>\n",
296
- "</text>\n",
297
- "\n",
298
- "<g class=\"displacy-arrow\">\n",
299
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-0\" stroke-width=\"2px\" d=\"M70,527.0 C70,439.5 200.0,439.5 200.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
300
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
301
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
302
- " </text>\n",
303
- " <path class=\"displacy-arrowhead\" d=\"M200.0,529.0 L208.0,517.0 192.0,517.0\" fill=\"currentColor\"/>\n",
304
- "</g>\n",
305
- "\n",
306
- "<g class=\"displacy-arrow\">\n",
307
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-1\" stroke-width=\"2px\" d=\"M245,527.0 C245,439.5 375.0,439.5 375.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
308
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
309
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
310
- " </text>\n",
311
- " <path class=\"displacy-arrowhead\" d=\"M375.0,529.0 L383.0,517.0 367.0,517.0\" fill=\"currentColor\"/>\n",
312
- "</g>\n",
313
- "\n",
314
- "<g class=\"displacy-arrow\">\n",
315
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-2\" stroke-width=\"2px\" d=\"M595,527.0 C595,439.5 725.0,439.5 725.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
316
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
317
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
318
- " </text>\n",
319
- " <path class=\"displacy-arrowhead\" d=\"M595,529.0 L587,517.0 603,517.0\" fill=\"currentColor\"/>\n",
320
- "</g>\n",
321
- "\n",
322
- "<g class=\"displacy-arrow\">\n",
323
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-3\" stroke-width=\"2px\" d=\"M420,527.0 C420,352.0 730.0,352.0 730.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
324
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
325
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
326
- " </text>\n",
327
- " <path class=\"displacy-arrowhead\" d=\"M730.0,529.0 L738.0,517.0 722.0,517.0\" fill=\"currentColor\"/>\n",
328
- "</g>\n",
329
- "\n",
330
- "<g class=\"displacy-arrow\">\n",
331
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-4\" stroke-width=\"2px\" d=\"M770,527.0 C770,439.5 900.0,439.5 900.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
332
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
333
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
334
- " </text>\n",
335
- " <path class=\"displacy-arrowhead\" d=\"M900.0,529.0 L908.0,517.0 892.0,517.0\" fill=\"currentColor\"/>\n",
336
- "</g>\n",
337
- "\n",
338
- "<g class=\"displacy-arrow\">\n",
339
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-5\" stroke-width=\"2px\" d=\"M1120,527.0 C1120,439.5 1250.0,439.5 1250.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
340
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
341
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
342
- " </text>\n",
343
- " <path class=\"displacy-arrowhead\" d=\"M1120,529.0 L1112,517.0 1128,517.0\" fill=\"currentColor\"/>\n",
344
- "</g>\n",
345
- "\n",
346
- "<g class=\"displacy-arrow\">\n",
347
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-6\" stroke-width=\"2px\" d=\"M945,527.0 C945,352.0 1255.0,352.0 1255.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
348
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
349
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
350
- " </text>\n",
351
- " <path class=\"displacy-arrowhead\" d=\"M1255.0,529.0 L1263.0,517.0 1247.0,517.0\" fill=\"currentColor\"/>\n",
352
- "</g>\n",
353
- "\n",
354
- "<g class=\"displacy-arrow\">\n",
355
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-7\" stroke-width=\"2px\" d=\"M1470,527.0 C1470,177.0 2140.0,177.0 2140.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
356
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
357
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-7\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
358
- " </text>\n",
359
- " <path class=\"displacy-arrowhead\" d=\"M1470,529.0 L1462,517.0 1478,517.0\" fill=\"currentColor\"/>\n",
360
- "</g>\n",
361
- "\n",
362
- "<g class=\"displacy-arrow\">\n",
363
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-8\" stroke-width=\"2px\" d=\"M1645,527.0 C1645,264.5 2135.0,264.5 2135.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
364
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
365
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-8\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
366
- " </text>\n",
367
- " <path class=\"displacy-arrowhead\" d=\"M1645,529.0 L1637,517.0 1653,517.0\" fill=\"currentColor\"/>\n",
368
- "</g>\n",
369
- "\n",
370
- "<g class=\"displacy-arrow\">\n",
371
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-9\" stroke-width=\"2px\" d=\"M1645,527.0 C1645,439.5 1775.0,439.5 1775.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
372
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
373
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-9\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">cc</textPath>\n",
374
- " </text>\n",
375
- " <path class=\"displacy-arrowhead\" d=\"M1775.0,529.0 L1783.0,517.0 1767.0,517.0\" fill=\"currentColor\"/>\n",
376
- "</g>\n",
377
- "\n",
378
- "<g class=\"displacy-arrow\">\n",
379
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-10\" stroke-width=\"2px\" d=\"M1645,527.0 C1645,352.0 1955.0,352.0 1955.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
380
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
381
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-10\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">conj</textPath>\n",
382
- " </text>\n",
383
- " <path class=\"displacy-arrowhead\" d=\"M1955.0,529.0 L1963.0,517.0 1947.0,517.0\" fill=\"currentColor\"/>\n",
384
- "</g>\n",
385
- "\n",
386
- "<g class=\"displacy-arrow\">\n",
387
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-11\" stroke-width=\"2px\" d=\"M245,527.0 C245,89.5 2145.0,89.5 2145.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
388
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
389
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-11\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
390
- " </text>\n",
391
- " <path class=\"displacy-arrowhead\" d=\"M2145.0,529.0 L2153.0,517.0 2137.0,517.0\" fill=\"currentColor\"/>\n",
392
- "</g>\n",
393
- "\n",
394
- "<g class=\"displacy-arrow\">\n",
395
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-12\" stroke-width=\"2px\" d=\"M2345,527.0 C2345,352.0 2830.0,352.0 2830.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
396
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
397
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-12\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
398
- " </text>\n",
399
- " <path class=\"displacy-arrowhead\" d=\"M2345,529.0 L2337,517.0 2353,517.0\" fill=\"currentColor\"/>\n",
400
- "</g>\n",
401
- "\n",
402
- "<g class=\"displacy-arrow\">\n",
403
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-13\" stroke-width=\"2px\" d=\"M2345,527.0 C2345,439.5 2475.0,439.5 2475.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
404
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
405
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-13\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
406
- " </text>\n",
407
- " <path class=\"displacy-arrowhead\" d=\"M2475.0,529.0 L2483.0,517.0 2467.0,517.0\" fill=\"currentColor\"/>\n",
408
- "</g>\n",
409
- "\n",
410
- "<g class=\"displacy-arrow\">\n",
411
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-14\" stroke-width=\"2px\" d=\"M2695,527.0 C2695,439.5 2825.0,439.5 2825.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
412
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
413
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-14\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
414
- " </text>\n",
415
- " <path class=\"displacy-arrowhead\" d=\"M2695,529.0 L2687,517.0 2703,517.0\" fill=\"currentColor\"/>\n",
416
- "</g>\n",
417
- "\n",
418
- "<g class=\"displacy-arrow\">\n",
419
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-15\" stroke-width=\"2px\" d=\"M2170,527.0 C2170,264.5 2835.0,264.5 2835.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
420
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
421
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-15\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
422
- " </text>\n",
423
- " <path class=\"displacy-arrowhead\" d=\"M2835.0,529.0 L2843.0,517.0 2827.0,517.0\" fill=\"currentColor\"/>\n",
424
- "</g>\n",
425
- "\n",
426
- "<g class=\"displacy-arrow\">\n",
427
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-16\" stroke-width=\"2px\" d=\"M2870,527.0 C2870,439.5 3000.0,439.5 3000.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
428
- " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
429
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-16\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
430
- " </text>\n",
431
- " <path class=\"displacy-arrowhead\" d=\"M3000.0,529.0 L3008.0,517.0 2992.0,517.0\" fill=\"currentColor\"/>\n",
432
- "</g>\n",
433
- "\n",
434
  "<g class=\"displacy-arrow\">\n",
435
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-17\" stroke-width=\"2px\" d=\"M3045,527.0 C3045,439.5 3175.0,439.5 3175.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
436
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
437
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-17\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
438
  " </text>\n",
439
- " <path class=\"displacy-arrowhead\" d=\"M3175.0,529.0 L3183.0,517.0 3167.0,517.0\" fill=\"currentColor\"/>\n",
440
  "</g>\n",
441
  "\n",
442
  "<g class=\"displacy-arrow\">\n",
443
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-18\" stroke-width=\"2px\" d=\"M2870,527.0 C2870,352.0 3355.0,352.0 3355.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
444
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
445
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-18\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">cc</textPath>\n",
446
  " </text>\n",
447
- " <path class=\"displacy-arrowhead\" d=\"M3355.0,529.0 L3363.0,517.0 3347.0,517.0\" fill=\"currentColor\"/>\n",
448
  "</g>\n",
449
  "\n",
450
  "<g class=\"displacy-arrow\">\n",
451
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-19\" stroke-width=\"2px\" d=\"M2870,527.0 C2870,264.5 3535.0,264.5 3535.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
452
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
453
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-19\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
454
  " </text>\n",
455
- " <path class=\"displacy-arrowhead\" d=\"M3535.0,529.0 L3543.0,517.0 3527.0,517.0\" fill=\"currentColor\"/>\n",
456
  "</g>\n",
457
  "\n",
458
  "<g class=\"displacy-arrow\">\n",
459
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-20\" stroke-width=\"2px\" d=\"M3570,527.0 C3570,439.5 3700.0,439.5 3700.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
460
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
461
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-20\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
462
  " </text>\n",
463
- " <path class=\"displacy-arrowhead\" d=\"M3700.0,529.0 L3708.0,517.0 3692.0,517.0\" fill=\"currentColor\"/>\n",
464
  "</g>\n",
465
  "\n",
466
  "<g class=\"displacy-arrow\">\n",
467
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-21\" stroke-width=\"2px\" d=\"M245,527.0 C245,2.0 3900.0,2.0 3900.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
468
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
469
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-21\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">punct</textPath>\n",
470
  " </text>\n",
471
- " <path class=\"displacy-arrowhead\" d=\"M3900.0,529.0 L3908.0,517.0 3892.0,517.0\" fill=\"currentColor\"/>\n",
472
  "</g>\n",
473
  "\n",
474
  "<g class=\"displacy-arrow\">\n",
475
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-22\" stroke-width=\"2px\" d=\"M4095,527.0 C4095,439.5 4225.0,439.5 4225.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
476
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
477
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-22\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
478
  " </text>\n",
479
- " <path class=\"displacy-arrowhead\" d=\"M4095,529.0 L4087,517.0 4103,517.0\" fill=\"currentColor\"/>\n",
480
  "</g>\n",
481
  "\n",
482
  "<g class=\"displacy-arrow\">\n",
483
- " <path class=\"displacy-arc\" id=\"arrow-9f58d58c7a8540688553bdb828bd211d-0-23\" stroke-width=\"2px\" d=\"M4270,527.0 C4270,439.5 4400.0,439.5 4400.0,527.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
484
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
485
- " <textPath xlink:href=\"#arrow-9f58d58c7a8540688553bdb828bd211d-0-23\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
486
  " </text>\n",
487
- " <path class=\"displacy-arrowhead\" d=\"M4400.0,529.0 L4408.0,517.0 4392.0,517.0\" fill=\"currentColor\"/>\n",
488
  "</g>\n",
489
  "</svg></span>"
490
  ],
@@ -497,7 +279,7 @@
497
  }
498
  ],
499
  "source": [
500
- "doc = ssudify(nlp(\"to assume, among the powers of the earth, the separate and equal station to which the laws of nature and of nature’s God entitle them\"))\n",
501
  "# Since this is an interactive Jupyter environment, we can use displacy.render here\n",
502
  "displacy.render(doc, style='dep')"
503
  ]
@@ -511,10 +293,11 @@
511
  {
512
  "data": {
513
  "text/plain": [
514
- "[(Males have a bony protrusion from the gill area (the opercular process) that is,\n",
515
- " 0),\n",
516
- " (elongated and club-, 4),\n",
517
- " (like, a feature that inspired the name of the species, 0)]"
 
518
  ]
519
  },
520
  "execution_count": 7,
 
23
  },
24
  {
25
  "cell_type": "code",
26
+ "execution_count": 21,
27
  "id": "c49f475d-547f-49d8-8550-2f1ad1555a14",
28
  "metadata": {},
29
  "outputs": [],
 
38
  },
39
  {
40
  "cell_type": "code",
41
+ "execution_count": 40,
42
  "id": "cefdbf22-b747-4bea-b279-c9b01e75ff2e",
43
  "metadata": {},
44
  "outputs": [],
 
62
  " to_reverse[i-1].dep_ = \"comp\"\n",
63
  " for token in doc:\n",
64
  " if token.dep_ == \"dep\": token.dep_ = \"unknown\"\n",
65
+ " if token.dep_ == \"prep\" and token.head.pos_ in [\"VERB\", \"AUX\"] and \\\n",
66
+ " token.i < token.head.i and token.head.dep_ not in relations[\"mod\"]:\n",
67
  " token.dep_ = \"mod\"\n",
68
+ " if token.dep_ == \"prep\" and token.head.pos_ in [\"VERB\", \"AUX\"] and \\\n",
69
+ " ((len(list(token.head.rights)) >= 1 and token == list(token.head.rights)[0]) or \n",
70
+ " (len(list(token.head.rights)) >= 2 and \n",
71
+ " list(token.head.rights)[0].dep_ == \"dobj\" and token == list(token.head.rights)[1])):\n",
72
+ " token.dep_ = \"comp\"\n",
73
  " if token.dep_ == \"ccomp\" and any(sibling.dep_ in relations[\"comp\"] \n",
74
  " for sibling in token.head.rights if sibling.i < token.i):\n",
75
  " token.dep_ = \"mod\"\n",
 
78
  " for i in range(1, len(dobjs)):\n",
79
  " dobjs[i].head = dobjs[i-1]\n",
80
  " dobjs[i].dep_ = \"appos\"\n",
81
+ " for token in doc:\n",
82
  " for rel in relations.keys():\n",
83
  " if token.dep_ in relations[rel]: token.dep_ = rel\n",
84
  " for token in doc:\n",
 
93
  " if child.dep_ in [\"comp\", \"udep\"] and token.dep_ != \"mod\":\n",
94
  " child.dep_ = \"mod\"\n",
95
  " for token in doc:\n",
96
+ " if any(t.text in [\";\", \":\"] for t in doc \n",
97
+ " if (token.i < t.i < token.head.i or token.head.i < t.i < token.i) and token.pos_ != \"PUNCT\"):\n",
98
  " token.head = token\n",
99
  " token.dep_ = \"root\"\n",
100
  " if token.pos_ in [\"VERB\", \"AUX\"]:\n",
 
113
  },
114
  {
115
  "cell_type": "code",
116
+ "execution_count": 22,
117
  "id": "23efda66-9d58-4169-9fa0-05de47267b5a",
118
  "metadata": {},
119
  "outputs": [],
120
  "source": [
121
  "def flyover(token):\n",
122
+ " if token.dep_ in [\"subj\", \"comp\", \"conj\"]:\n",
123
  " dep_distance = abs(token.i - token.head.i)\n",
124
  " if token.head.i < token.i:\n",
125
  " return (token.doc[token.head.i+1:token.i], dep_distance - 1)\n",
 
165
  },
166
  {
167
  "cell_type": "code",
168
+ "execution_count": 44,
169
  "id": "ba90ff19-c665-49d8-8ad4-5caee885901d",
170
  "metadata": {},
171
  "outputs": [
172
  {
173
  "data": {
174
  "text/html": [
175
+ "<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"f58be662a3bb49738b1eec7a743254f7-0\" class=\"displacy\" width=\"1450\" height=\"399.5\" direction=\"ltr\" style=\"max-width: none; height: 399.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
176
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
177
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">They</tspan>\n",
178
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">PRON</tspan>\n",
179
  "</text>\n",
180
  "\n",
181
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
182
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"225\">designated</tspan>\n",
183
  " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">VERB</tspan>\n",
184
  "</text>\n",
185
  "\n",
186
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
187
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"400\">the</tspan>\n",
188
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"400\">DET</tspan>\n",
189
  "</text>\n",
190
  "\n",
191
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
192
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"575\">building,</tspan>\n",
193
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">NOUN</tspan>\n",
194
  "</text>\n",
195
  "\n",
196
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
197
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"750\">as</tspan>\n",
198
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"750\">ADP</tspan>\n",
199
  "</text>\n",
200
  "\n",
201
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
202
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"925\">a</tspan>\n",
203
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"925\">DET</tspan>\n",
204
  "</text>\n",
205
  "\n",
206
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
207
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1100\">national</tspan>\n",
208
+ " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1100\">ADJ</tspan>\n",
209
  "</text>\n",
210
  "\n",
211
+ "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
212
+ " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1275\">landmark.</tspan>\n",
213
  " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1275\">NOUN</tspan>\n",
214
  "</text>\n",
215
  "\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  "<g class=\"displacy-arrow\">\n",
217
+ " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-0\" stroke-width=\"2px\" d=\"M70,264.5 C70,177.0 215.0,177.0 215.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
218
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
219
+ " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
220
  " </text>\n",
221
+ " <path class=\"displacy-arrowhead\" d=\"M70,266.5 L62,254.5 78,254.5\" fill=\"currentColor\"/>\n",
222
  "</g>\n",
223
  "\n",
224
  "<g class=\"displacy-arrow\">\n",
225
+ " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-1\" stroke-width=\"2px\" d=\"M420,264.5 C420,177.0 565.0,177.0 565.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
226
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
227
+ " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
228
  " </text>\n",
229
+ " <path class=\"displacy-arrowhead\" d=\"M420,266.5 L412,254.5 428,254.5\" fill=\"currentColor\"/>\n",
230
  "</g>\n",
231
  "\n",
232
  "<g class=\"displacy-arrow\">\n",
233
+ " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-2\" stroke-width=\"2px\" d=\"M245,264.5 C245,89.5 570.0,89.5 570.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
234
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
235
+ " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
236
  " </text>\n",
237
+ " <path class=\"displacy-arrowhead\" d=\"M570.0,266.5 L578.0,254.5 562.0,254.5\" fill=\"currentColor\"/>\n",
238
  "</g>\n",
239
  "\n",
240
  "<g class=\"displacy-arrow\">\n",
241
+ " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-3\" stroke-width=\"2px\" d=\"M245,264.5 C245,2.0 750.0,2.0 750.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
242
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
243
+ " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
244
  " </text>\n",
245
+ " <path class=\"displacy-arrowhead\" d=\"M750.0,266.5 L758.0,254.5 742.0,254.5\" fill=\"currentColor\"/>\n",
246
  "</g>\n",
247
  "\n",
248
  "<g class=\"displacy-arrow\">\n",
249
+ " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-4\" stroke-width=\"2px\" d=\"M945,264.5 C945,89.5 1270.0,89.5 1270.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
250
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
251
+ " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
252
  " </text>\n",
253
+ " <path class=\"displacy-arrowhead\" d=\"M945,266.5 L937,254.5 953,254.5\" fill=\"currentColor\"/>\n",
254
  "</g>\n",
255
  "\n",
256
  "<g class=\"displacy-arrow\">\n",
257
+ " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-5\" stroke-width=\"2px\" d=\"M1120,264.5 C1120,177.0 1265.0,177.0 1265.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
258
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
259
+ " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
260
  " </text>\n",
261
+ " <path class=\"displacy-arrowhead\" d=\"M1120,266.5 L1112,254.5 1128,254.5\" fill=\"currentColor\"/>\n",
262
  "</g>\n",
263
  "\n",
264
  "<g class=\"displacy-arrow\">\n",
265
+ " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-6\" stroke-width=\"2px\" d=\"M770,264.5 C770,2.0 1275.0,2.0 1275.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
266
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
267
+ " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
268
  " </text>\n",
269
+ " <path class=\"displacy-arrowhead\" d=\"M1275.0,266.5 L1283.0,254.5 1267.0,254.5\" fill=\"currentColor\"/>\n",
270
  "</g>\n",
271
  "</svg></span>"
272
  ],
 
279
  }
280
  ],
281
  "source": [
282
+ "doc = ssudify(nlp(\"They designated the building, as a national landmark.\"))\n",
283
  "# Since this is an interactive Jupyter environment, we can use displacy.render here\n",
284
  "displacy.render(doc, style='dep')"
285
  ]
 
293
  {
294
  "data": {
295
  "text/plain": [
296
+ "[(to assume, 0),\n",
297
+ " (, among the powers of the earth, the separate and equal, 12),\n",
298
+ " (station to which the laws, 0),\n",
299
+ " (of nature and, 3),\n",
300
+ " (of nature’s God entitle them, 0)]"
301
  ]
302
  },
303
  "execution_count": 7,
main.py CHANGED
@@ -32,8 +32,14 @@ def ssudify(doc):
32
  to_reverse[i-1].dep_ = "comp"
33
  for token in doc:
34
  if token.dep_ == "dep": token.dep_ = "unknown"
35
- if token.dep_ == "prep" and token.head.pos_ == "VERB" and token.i < token.head.i and token.head.dep_ not in relations["mod"]:
 
36
  token.dep_ = "mod"
 
 
 
 
 
37
  if token.dep_ == "ccomp" and any(sibling.dep_ in relations["comp"]
38
  for sibling in token.head.rights if sibling.i < token.i):
39
  token.dep_ = "mod"
@@ -42,6 +48,7 @@ def ssudify(doc):
42
  for i in range(1, len(dobjs)):
43
  dobjs[i].head = dobjs[i-1]
44
  dobjs[i].dep_ = "appos"
 
45
  for rel in relations.keys():
46
  if token.dep_ in relations[rel]: token.dep_ = rel
47
  for token in doc:
@@ -56,7 +63,8 @@ def ssudify(doc):
56
  if child.dep_ in ["comp", "udep"] and token.dep_ != "mod":
57
  child.dep_ = "mod"
58
  for token in doc:
59
- if any(t.text == ";" for t in doc if (token.i < t.i < token.head.i or token.head.i < t.i < token.i) and token.pos_ != "PUNCT"):
 
60
  token.head = token
61
  token.dep_ = "root"
62
  if token.pos_ in ["VERB", "AUX"]:
 
32
  to_reverse[i-1].dep_ = "comp"
33
  for token in doc:
34
  if token.dep_ == "dep": token.dep_ = "unknown"
35
+ if token.dep_ == "prep" and token.head.pos_ in ["VERB", "AUX"] and \
36
+ token.i < token.head.i and token.head.dep_ not in relations["mod"]:
37
  token.dep_ = "mod"
38
+ if token.dep_ == "prep" and token.head.pos_ in ["VERB", "AUX"] and \
39
+ ((len(list(token.head.rights)) >= 1 and token == list(token.head.rights)[0]) or
40
+ (len(list(token.head.rights)) >= 2 and
41
+ list(token.head.rights)[0].dep_ == "dobj" and token == list(token.head.rights)[1])):
42
+ token.dep_ = "comp"
43
  if token.dep_ == "ccomp" and any(sibling.dep_ in relations["comp"]
44
  for sibling in token.head.rights if sibling.i < token.i):
45
  token.dep_ = "mod"
 
48
  for i in range(1, len(dobjs)):
49
  dobjs[i].head = dobjs[i-1]
50
  dobjs[i].dep_ = "appos"
51
+ for token in doc:
52
  for rel in relations.keys():
53
  if token.dep_ in relations[rel]: token.dep_ = rel
54
  for token in doc:
 
63
  if child.dep_ in ["comp", "udep"] and token.dep_ != "mod":
64
  child.dep_ = "mod"
65
  for token in doc:
66
+ if any(t.text in [";", ":"] for t in doc
67
+ if (token.i < t.i < token.head.i or token.head.i < t.i < token.i) and token.pos_ != "PUNCT"):
68
  token.head = token
69
  token.dep_ = "root"
70
  if token.pos_ in ["VERB", "AUX"]: