Spaces:
Running
Running
deploy at 2025-12-29 10:11:47.435885
Browse files- Dependency length.ipynb +24 -25
- main.py +6 -1
Dependency length.ipynb
CHANGED
|
@@ -23,7 +23,7 @@
|
|
| 23 |
},
|
| 24 |
{
|
| 25 |
"cell_type": "code",
|
| 26 |
-
"execution_count":
|
| 27 |
"id": "c49f475d-547f-49d8-8550-2f1ad1555a14",
|
| 28 |
"metadata": {},
|
| 29 |
"outputs": [],
|
|
@@ -38,7 +38,7 @@
|
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"cell_type": "code",
|
| 41 |
-
"execution_count":
|
| 42 |
"id": "cefdbf22-b747-4bea-b279-c9b01e75ff2e",
|
| 43 |
"metadata": {},
|
| 44 |
"outputs": [],
|
|
@@ -94,7 +94,10 @@
|
|
| 94 |
" child.dep_ = \"mod\"\n",
|
| 95 |
" for token in doc:\n",
|
| 96 |
" if any(t.text in [\";\", \":\"] for t in doc \n",
|
| 97 |
-
" if (token.i < t.i < token.head.i
|
|
|
|
|
|
|
|
|
|
| 98 |
" token.head = token\n",
|
| 99 |
" token.dep_ = \"root\"\n",
|
| 100 |
" if token.pos_ in [\"VERB\", \"AUX\"]:\n",
|
|
@@ -113,7 +116,7 @@
|
|
| 113 |
},
|
| 114 |
{
|
| 115 |
"cell_type": "code",
|
| 116 |
-
"execution_count":
|
| 117 |
"id": "23efda66-9d58-4169-9fa0-05de47267b5a",
|
| 118 |
"metadata": {},
|
| 119 |
"outputs": [],
|
|
@@ -165,14 +168,14 @@
|
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"cell_type": "code",
|
| 168 |
-
"execution_count":
|
| 169 |
"id": "ba90ff19-c665-49d8-8ad4-5caee885901d",
|
| 170 |
"metadata": {},
|
| 171 |
"outputs": [
|
| 172 |
{
|
| 173 |
"data": {
|
| 174 |
"text/html": [
|
| 175 |
-
"<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"
|
| 176 |
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
|
| 177 |
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">They</tspan>\n",
|
| 178 |
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">PRON</tspan>\n",
|
|
@@ -214,57 +217,57 @@
|
|
| 214 |
"</text>\n",
|
| 215 |
"\n",
|
| 216 |
"<g class=\"displacy-arrow\">\n",
|
| 217 |
-
" <path class=\"displacy-arc\" id=\"arrow-
|
| 218 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 219 |
-
" <textPath xlink:href=\"#arrow-
|
| 220 |
" </text>\n",
|
| 221 |
" <path class=\"displacy-arrowhead\" d=\"M70,266.5 L62,254.5 78,254.5\" fill=\"currentColor\"/>\n",
|
| 222 |
"</g>\n",
|
| 223 |
"\n",
|
| 224 |
"<g class=\"displacy-arrow\">\n",
|
| 225 |
-
" <path class=\"displacy-arc\" id=\"arrow-
|
| 226 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 227 |
-
" <textPath xlink:href=\"#arrow-
|
| 228 |
" </text>\n",
|
| 229 |
" <path class=\"displacy-arrowhead\" d=\"M420,266.5 L412,254.5 428,254.5\" fill=\"currentColor\"/>\n",
|
| 230 |
"</g>\n",
|
| 231 |
"\n",
|
| 232 |
"<g class=\"displacy-arrow\">\n",
|
| 233 |
-
" <path class=\"displacy-arc\" id=\"arrow-
|
| 234 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 235 |
-
" <textPath xlink:href=\"#arrow-
|
| 236 |
" </text>\n",
|
| 237 |
" <path class=\"displacy-arrowhead\" d=\"M570.0,266.5 L578.0,254.5 562.0,254.5\" fill=\"currentColor\"/>\n",
|
| 238 |
"</g>\n",
|
| 239 |
"\n",
|
| 240 |
"<g class=\"displacy-arrow\">\n",
|
| 241 |
-
" <path class=\"displacy-arc\" id=\"arrow-
|
| 242 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 243 |
-
" <textPath xlink:href=\"#arrow-
|
| 244 |
" </text>\n",
|
| 245 |
" <path class=\"displacy-arrowhead\" d=\"M750.0,266.5 L758.0,254.5 742.0,254.5\" fill=\"currentColor\"/>\n",
|
| 246 |
"</g>\n",
|
| 247 |
"\n",
|
| 248 |
"<g class=\"displacy-arrow\">\n",
|
| 249 |
-
" <path class=\"displacy-arc\" id=\"arrow-
|
| 250 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 251 |
-
" <textPath xlink:href=\"#arrow-
|
| 252 |
" </text>\n",
|
| 253 |
" <path class=\"displacy-arrowhead\" d=\"M945,266.5 L937,254.5 953,254.5\" fill=\"currentColor\"/>\n",
|
| 254 |
"</g>\n",
|
| 255 |
"\n",
|
| 256 |
"<g class=\"displacy-arrow\">\n",
|
| 257 |
-
" <path class=\"displacy-arc\" id=\"arrow-
|
| 258 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 259 |
-
" <textPath xlink:href=\"#arrow-
|
| 260 |
" </text>\n",
|
| 261 |
" <path class=\"displacy-arrowhead\" d=\"M1120,266.5 L1112,254.5 1128,254.5\" fill=\"currentColor\"/>\n",
|
| 262 |
"</g>\n",
|
| 263 |
"\n",
|
| 264 |
"<g class=\"displacy-arrow\">\n",
|
| 265 |
-
" <path class=\"displacy-arc\" id=\"arrow-
|
| 266 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 267 |
-
" <textPath xlink:href=\"#arrow-
|
| 268 |
" </text>\n",
|
| 269 |
" <path class=\"displacy-arrowhead\" d=\"M1275.0,266.5 L1283.0,254.5 1267.0,254.5\" fill=\"currentColor\"/>\n",
|
| 270 |
"</g>\n",
|
|
@@ -293,11 +296,7 @@
|
|
| 293 |
{
|
| 294 |
"data": {
|
| 295 |
"text/plain": [
|
| 296 |
-
"[(
|
| 297 |
-
" (, among the powers of the earth, the separate and equal, 12),\n",
|
| 298 |
-
" (station to which the laws, 0),\n",
|
| 299 |
-
" (of nature and, 3),\n",
|
| 300 |
-
" (of nature’s God entitle them, 0)]"
|
| 301 |
]
|
| 302 |
},
|
| 303 |
"execution_count": 7,
|
|
|
|
| 23 |
},
|
| 24 |
{
|
| 25 |
"cell_type": "code",
|
| 26 |
+
"execution_count": 2,
|
| 27 |
"id": "c49f475d-547f-49d8-8550-2f1ad1555a14",
|
| 28 |
"metadata": {},
|
| 29 |
"outputs": [],
|
|
|
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"cell_type": "code",
|
| 41 |
+
"execution_count": 3,
|
| 42 |
"id": "cefdbf22-b747-4bea-b279-c9b01e75ff2e",
|
| 43 |
"metadata": {},
|
| 44 |
"outputs": [],
|
|
|
|
| 94 |
" child.dep_ = \"mod\"\n",
|
| 95 |
" for token in doc:\n",
|
| 96 |
" if any(t.text in [\";\", \":\"] for t in doc \n",
|
| 97 |
+
" if ((token.i < t.i < token.head.i and not\n",
|
| 98 |
+
" (any(p.text == \"(\" for p in doc if token.i < p.i < t.i) and\n",
|
| 99 |
+
" any(p.text == \")\" for p in doc if t.i < p.i < token.head.i))) or \n",
|
| 100 |
+
" token.head.i < t.i < token.i) and token.pos_ != \"PUNCT\"):\n",
|
| 101 |
" token.head = token\n",
|
| 102 |
" token.dep_ = \"root\"\n",
|
| 103 |
" if token.pos_ in [\"VERB\", \"AUX\"]:\n",
|
|
|
|
| 116 |
},
|
| 117 |
{
|
| 118 |
"cell_type": "code",
|
| 119 |
+
"execution_count": 4,
|
| 120 |
"id": "23efda66-9d58-4169-9fa0-05de47267b5a",
|
| 121 |
"metadata": {},
|
| 122 |
"outputs": [],
|
|
|
|
| 168 |
},
|
| 169 |
{
|
| 170 |
"cell_type": "code",
|
| 171 |
+
"execution_count": 6,
|
| 172 |
"id": "ba90ff19-c665-49d8-8ad4-5caee885901d",
|
| 173 |
"metadata": {},
|
| 174 |
"outputs": [
|
| 175 |
{
|
| 176 |
"data": {
|
| 177 |
"text/html": [
|
| 178 |
+
"<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"7cae4e9aeafb4cf4bd557e4780eb30b6-0\" class=\"displacy\" width=\"1450\" height=\"399.5\" direction=\"ltr\" style=\"max-width: none; height: 399.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
|
| 179 |
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
|
| 180 |
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">They</tspan>\n",
|
| 181 |
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">PRON</tspan>\n",
|
|
|
|
| 217 |
"</text>\n",
|
| 218 |
"\n",
|
| 219 |
"<g class=\"displacy-arrow\">\n",
|
| 220 |
+
" <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-0\" stroke-width=\"2px\" d=\"M70,264.5 C70,177.0 215.0,177.0 215.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 221 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 222 |
+
" <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
|
| 223 |
" </text>\n",
|
| 224 |
" <path class=\"displacy-arrowhead\" d=\"M70,266.5 L62,254.5 78,254.5\" fill=\"currentColor\"/>\n",
|
| 225 |
"</g>\n",
|
| 226 |
"\n",
|
| 227 |
"<g class=\"displacy-arrow\">\n",
|
| 228 |
+
" <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-1\" stroke-width=\"2px\" d=\"M420,264.5 C420,177.0 565.0,177.0 565.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 229 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 230 |
+
" <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
| 231 |
" </text>\n",
|
| 232 |
" <path class=\"displacy-arrowhead\" d=\"M420,266.5 L412,254.5 428,254.5\" fill=\"currentColor\"/>\n",
|
| 233 |
"</g>\n",
|
| 234 |
"\n",
|
| 235 |
"<g class=\"displacy-arrow\">\n",
|
| 236 |
+
" <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-2\" stroke-width=\"2px\" d=\"M245,264.5 C245,89.5 570.0,89.5 570.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 237 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 238 |
+
" <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
|
| 239 |
" </text>\n",
|
| 240 |
" <path class=\"displacy-arrowhead\" d=\"M570.0,266.5 L578.0,254.5 562.0,254.5\" fill=\"currentColor\"/>\n",
|
| 241 |
"</g>\n",
|
| 242 |
"\n",
|
| 243 |
"<g class=\"displacy-arrow\">\n",
|
| 244 |
+
" <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-3\" stroke-width=\"2px\" d=\"M245,264.5 C245,2.0 750.0,2.0 750.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 245 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 246 |
+
" <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
|
| 247 |
" </text>\n",
|
| 248 |
" <path class=\"displacy-arrowhead\" d=\"M750.0,266.5 L758.0,254.5 742.0,254.5\" fill=\"currentColor\"/>\n",
|
| 249 |
"</g>\n",
|
| 250 |
"\n",
|
| 251 |
"<g class=\"displacy-arrow\">\n",
|
| 252 |
+
" <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-4\" stroke-width=\"2px\" d=\"M945,264.5 C945,89.5 1270.0,89.5 1270.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 253 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 254 |
+
" <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
| 255 |
" </text>\n",
|
| 256 |
" <path class=\"displacy-arrowhead\" d=\"M945,266.5 L937,254.5 953,254.5\" fill=\"currentColor\"/>\n",
|
| 257 |
"</g>\n",
|
| 258 |
"\n",
|
| 259 |
"<g class=\"displacy-arrow\">\n",
|
| 260 |
+
" <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-5\" stroke-width=\"2px\" d=\"M1120,264.5 C1120,177.0 1265.0,177.0 1265.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 261 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 262 |
+
" <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
|
| 263 |
" </text>\n",
|
| 264 |
" <path class=\"displacy-arrowhead\" d=\"M1120,266.5 L1112,254.5 1128,254.5\" fill=\"currentColor\"/>\n",
|
| 265 |
"</g>\n",
|
| 266 |
"\n",
|
| 267 |
"<g class=\"displacy-arrow\">\n",
|
| 268 |
+
" <path class=\"displacy-arc\" id=\"arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-6\" stroke-width=\"2px\" d=\"M770,264.5 C770,2.0 1275.0,2.0 1275.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 269 |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 270 |
+
" <textPath xlink:href=\"#arrow-7cae4e9aeafb4cf4bd557e4780eb30b6-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
|
| 271 |
" </text>\n",
|
| 272 |
" <path class=\"displacy-arrowhead\" d=\"M1275.0,266.5 L1283.0,254.5 1267.0,254.5\" fill=\"currentColor\"/>\n",
|
| 273 |
"</g>\n",
|
|
|
|
| 296 |
{
|
| 297 |
"data": {
|
| 298 |
"text/plain": [
|
| 299 |
+
"[(They designated the building, as a national landmark., 0)]"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
]
|
| 301 |
},
|
| 302 |
"execution_count": 7,
|
main.py
CHANGED
|
@@ -64,7 +64,12 @@ def ssudify(doc):
|
|
| 64 |
child.dep_ = "mod"
|
| 65 |
for token in doc:
|
| 66 |
if any(t.text in [";", ":"] for t in doc
|
| 67 |
-
if (token.i < t.i < token.head.i
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
token.head = token
|
| 69 |
token.dep_ = "root"
|
| 70 |
if token.pos_ in ["VERB", "AUX"]:
|
|
|
|
| 64 |
child.dep_ = "mod"
|
| 65 |
for token in doc:
|
| 66 |
if any(t.text in [";", ":"] for t in doc
|
| 67 |
+
if ((token.i < t.i < token.head.i and not
|
| 68 |
+
(any(p.text == "(" for p in doc if token.i < p.i < t.i) and
|
| 69 |
+
any(p.text == ")" for p in doc if t.i < p.i < token.head.i))) or
|
| 70 |
+
(token.head.i < t.i < token.i and not
|
| 71 |
+
(any(p.text == "(" for p in doc if token.head.i < p.i < t.i) and
|
| 72 |
+
any(p.text == ")" for p in doc if t.i < p.i < token.i)))) and token.pos_ != "PUNCT"):
|
| 73 |
token.head = token
|
| 74 |
token.dep_ = "root"
|
| 75 |
if token.pos_ in ["VERB", "AUX"]:
|