skalyan91 commited on
Commit
a6daf52
·
verified ·
1 Parent(s): 7037bc6

deploy at 2025-12-29 10:11:47.435885

Browse files
Files changed (2) hide show
  1. Dependency length.ipynb +24 -25
  2. main.py +6 -1
Dependency length.ipynb CHANGED
@@ -23,7 +23,7 @@
23
  },
24
  {
25
  "cell_type": "code",
26
- "execution_count": 21,
27
  "id": "c49f475d-547f-49d8-8550-2f1ad1555a14",
28
  "metadata": {},
29
  "outputs": [],
@@ -38,7 +38,7 @@
38
  },
39
  {
40
  "cell_type": "code",
41
- "execution_count": 40,
42
  "id": "cefdbf22-b747-4bea-b279-c9b01e75ff2e",
43
  "metadata": {},
44
  "outputs": [],
@@ -94,7 +94,10 @@
94
  " child.dep_ = \"mod\"\n",
95
  " for token in doc:\n",
96
  " if any(t.text in [\";\", \":\"] for t in doc \n",
97
- " if (token.i < t.i < token.head.i or token.head.i < t.i < token.i) and token.pos_ != \"PUNCT\"):\n",
 
 
 
98
  " token.head = token\n",
99
  " token.dep_ = \"root\"\n",
100
  " if token.pos_ in [\"VERB\", \"AUX\"]:\n",
@@ -113,7 +116,7 @@
113
  },
114
  {
115
  "cell_type": "code",
116
- "execution_count": 22,
117
  "id": "23efda66-9d58-4169-9fa0-05de47267b5a",
118
  "metadata": {},
119
  "outputs": [],
@@ -165,14 +168,14 @@
165
  },
166
  {
167
  "cell_type": "code",
168
- "execution_count": 44,
169
  "id": "ba90ff19-c665-49d8-8ad4-5caee885901d",
170
  "metadata": {},
171
  "outputs": [
172
  {
173
  "data": {
174
  "text/html": [
175
- "<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"f58be662a3bb49738b1eec7a743254f7-0\" class=\"displacy\" width=\"1450\" height=\"399.5\" direction=\"ltr\" style=\"max-width: none; height: 399.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
176
  "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
177
  " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">They</tspan>\n",
178
  " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">PRON</tspan>\n",
@@ -214,57 +217,57 @@
214
  "</text>\n",
215
  "\n",
216
  "<g class=\"displacy-arrow\">\n",
217
- " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-0\" stroke-width=\"2px\" d=\"M70,264.5 C70,177.0 215.0,177.0 215.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
218
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
219
- " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
220
  " </text>\n",
221
  " <path class=\"displacy-arrowhead\" d=\"M70,266.5 L62,254.5 78,254.5\" fill=\"currentColor\"/>\n",
222
  "</g>\n",
223
  "\n",
224
  "<g class=\"displacy-arrow\">\n",
225
- " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-1\" stroke-width=\"2px\" d=\"M420,264.5 C420,177.0 565.0,177.0 565.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
226
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
227
- " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
228
  " </text>\n",
229
  " <path class=\"displacy-arrowhead\" d=\"M420,266.5 L412,254.5 428,254.5\" fill=\"currentColor\"/>\n",
230
  "</g>\n",
231
  "\n",
232
  "<g class=\"displacy-arrow\">\n",
233
- " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-2\" stroke-width=\"2px\" d=\"M245,264.5 C245,89.5 570.0,89.5 570.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
234
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
235
- " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
236
  " </text>\n",
237
  " <path class=\"displacy-arrowhead\" d=\"M570.0,266.5 L578.0,254.5 562.0,254.5\" fill=\"currentColor\"/>\n",
238
  "</g>\n",
239
  "\n",
240
  "<g class=\"displacy-arrow\">\n",
241
- " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-3\" stroke-width=\"2px\" d=\"M245,264.5 C245,2.0 750.0,2.0 750.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
242
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
243
- " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
244
  " </text>\n",
245
  " <path class=\"displacy-arrowhead\" d=\"M750.0,266.5 L758.0,254.5 742.0,254.5\" fill=\"currentColor\"/>\n",
246
  "</g>\n",
247
  "\n",
248
  "<g class=\"displacy-arrow\">\n",
249
- " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-4\" stroke-width=\"2px\" d=\"M945,264.5 C945,89.5 1270.0,89.5 1270.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
250
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
251
- " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
252
  " </text>\n",
253
  " <path class=\"displacy-arrowhead\" d=\"M945,266.5 L937,254.5 953,254.5\" fill=\"currentColor\"/>\n",
254
  "</g>\n",
255
  "\n",
256
  "<g class=\"displacy-arrow\">\n",
257
- " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-5\" stroke-width=\"2px\" d=\"M1120,264.5 C1120,177.0 1265.0,177.0 1265.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
258
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
259
- " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
260
  " </text>\n",
261
  " <path class=\"displacy-arrowhead\" d=\"M1120,266.5 L1112,254.5 1128,254.5\" fill=\"currentColor\"/>\n",
262
  "</g>\n",
263
  "\n",
264
  "<g class=\"displacy-arrow\">\n",
265
- " <path class=\"displacy-arc\" id=\"arrow-f58be662a3bb49738b1eec7a743254f7-0-6\" stroke-width=\"2px\" d=\"M770,264.5 C770,2.0 1275.0,2.0 1275.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
266
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
267
- " <textPath xlink:href=\"#arrow-f58be662a3bb49738b1eec7a743254f7-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
268
  " </text>\n",
269
  " <path class=\"displacy-arrowhead\" d=\"M1275.0,266.5 L1283.0,254.5 1267.0,254.5\" fill=\"currentColor\"/>\n",
270
  "</g>\n",
@@ -293,11 +296,7 @@
293
  {
294
  "data": {
295
  "text/plain": [
296
- "[(to assume, 0),\n",
297
- " (, among the powers of the earth, the separate and equal, 12),\n",
298
- " (station to which the laws, 0),\n",
299
- " (of nature and, 3),\n",
300
- " (of nature’s God entitle them, 0)]"
301
  ]
302
  },
303
  "execution_count": 7,
 
23
  },
24
  {
25
  "cell_type": "code",
26
+ "execution_count": 2,
27
  "id": "c49f475d-547f-49d8-8550-2f1ad1555a14",
28
  "metadata": {},
29
  "outputs": [],
 
38
  },
39
  {
40
  "cell_type": "code",
41
+ "execution_count": 3,
42
  "id": "cefdbf22-b747-4bea-b279-c9b01e75ff2e",
43
  "metadata": {},
44
  "outputs": [],
 
94
  " child.dep_ = \"mod\"\n",
95
  " for token in doc:\n",
96
  " if any(t.text in [\";\", \":\"] for t in doc \n",
97
+ " if ((token.i < t.i < token.head.i and not\n",
98
+ " (any(p.text == \"(\" for p in doc if token.i < p.i < t.i) and\n",
99
+ " any(p.text == \")\" for p in doc if t.i < p.i < token.head.i))) or \n",
100
+ " token.head.i < t.i < token.i) and token.pos_ != \"PUNCT\"):\n",
101
  " token.head = token\n",
102
  " token.dep_ = \"root\"\n",
103
  " if token.pos_ in [\"VERB\", \"AUX\"]:\n",
 
116
  },
117
  {
118
  "cell_type": "code",
119
+ "execution_count": 4,
120
  "id": "23efda66-9d58-4169-9fa0-05de47267b5a",
121
  "metadata": {},
122
  "outputs": [],
 
168
  },
169
  {
170
  "cell_type": "code",
171
+ "execution_count": 6,
172
  "id": "ba90ff19-c665-49d8-8ad4-5caee885901d",
173
  "metadata": {},
174
  "outputs": [
175
  {
176
  "data": {
177
  "text/html": [
178
+ "<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"7cae4e9aeafb4cf4bd557e4780eb30b6-0\" class=\"displacy\" width=\"1450\" height=\"399.5\" direction=\"ltr\" style=\"max-width: none; height: 399.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
179
  "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
180
  " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">They</tspan>\n",
181
  " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">PRON</tspan>\n",
 
217
  "</text>\n",
218
  "\n",
219
  "<g class=\"displacy-arrow\">\n",
220
+ " <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-0\" stroke-width=\"2px\" d=\"M70,264.5 C70,177.0 215.0,177.0 215.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
221
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
222
+ " <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
223
  " </text>\n",
224
  " <path class=\"displacy-arrowhead\" d=\"M70,266.5 L62,254.5 78,254.5\" fill=\"currentColor\"/>\n",
225
  "</g>\n",
226
  "\n",
227
  "<g class=\"displacy-arrow\">\n",
228
+ " <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-1\" stroke-width=\"2px\" d=\"M420,264.5 C420,177.0 565.0,177.0 565.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
229
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
230
+ " <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
231
  " </text>\n",
232
  " <path class=\"displacy-arrowhead\" d=\"M420,266.5 L412,254.5 428,254.5\" fill=\"currentColor\"/>\n",
233
  "</g>\n",
234
  "\n",
235
  "<g class=\"displacy-arrow\">\n",
236
+ " <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-2\" stroke-width=\"2px\" d=\"M245,264.5 C245,89.5 570.0,89.5 570.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
237
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
238
+ " <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
239
  " </text>\n",
240
  " <path class=\"displacy-arrowhead\" d=\"M570.0,266.5 L578.0,254.5 562.0,254.5\" fill=\"currentColor\"/>\n",
241
  "</g>\n",
242
  "\n",
243
  "<g class=\"displacy-arrow\">\n",
244
+ " <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-3\" stroke-width=\"2px\" d=\"M245,264.5 C245,2.0 750.0,2.0 750.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
245
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
246
+ " <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
247
  " </text>\n",
248
  " <path class=\"displacy-arrowhead\" d=\"M750.0,266.5 L758.0,254.5 742.0,254.5\" fill=\"currentColor\"/>\n",
249
  "</g>\n",
250
  "\n",
251
  "<g class=\"displacy-arrow\">\n",
252
+ " <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-4\" stroke-width=\"2px\" d=\"M945,264.5 C945,89.5 1270.0,89.5 1270.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
253
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
254
+ " <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
255
  " </text>\n",
256
  " <path class=\"displacy-arrowhead\" d=\"M945,266.5 L937,254.5 953,254.5\" fill=\"currentColor\"/>\n",
257
  "</g>\n",
258
  "\n",
259
  "<g class=\"displacy-arrow\">\n",
260
+ " <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-5\" stroke-width=\"2px\" d=\"M1120,264.5 C1120,177.0 1265.0,177.0 1265.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
261
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
262
+ " <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
263
  " </text>\n",
264
  " <path class=\"displacy-arrowhead\" d=\"M1120,266.5 L1112,254.5 1128,254.5\" fill=\"currentColor\"/>\n",
265
  "</g>\n",
266
  "\n",
267
  "<g class=\"displacy-arrow\">\n",
268
+ " <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-6\" stroke-width=\"2px\" d=\"M770,264.5 C770,2.0 1275.0,2.0 1275.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
269
  " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
270
+ " <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
271
  " </text>\n",
272
  " <path class=\"displacy-arrowhead\" d=\"M1275.0,266.5 L1283.0,254.5 1267.0,254.5\" fill=\"currentColor\"/>\n",
273
  "</g>\n",
 
296
  {
297
  "data": {
298
  "text/plain": [
299
+ "[(They designated the building, as a national landmark., 0)]"
 
 
 
 
300
  ]
301
  },
302
  "execution_count": 7,
main.py CHANGED
@@ -64,7 +64,12 @@ def ssudify(doc):
64
  child.dep_ = "mod"
65
  for token in doc:
66
  if any(t.text in [";", ":"] for t in doc
67
- if (token.i < t.i < token.head.i or token.head.i < t.i < token.i) and token.pos_ != "PUNCT"):
 
 
 
 
 
68
  token.head = token
69
  token.dep_ = "root"
70
  if token.pos_ in ["VERB", "AUX"]:
 
64
  child.dep_ = "mod"
65
  for token in doc:
66
  if any(t.text in [";", ":"] for t in doc
67
+ if ((token.i < t.i < token.head.i and not
68
+ (any(p.text == "(" for p in doc if token.i < p.i < t.i) and
69
+ any(p.text == ")" for p in doc if t.i < p.i < token.head.i))) or
70
+ (token.head.i < t.i < token.i and not
71
+ (any(p.text == "(" for p in doc if token.head.i < p.i < t.i) and
72
+ any(p.text == ")" for p in doc if t.i < p.i < token.i)))) and token.pos_ != "PUNCT"):
73
  token.head = token
74
  token.dep_ = "root"
75
  if token.pos_ in ["VERB", "AUX"]: