Spaces:
Running
Running
deploy at 2025-12-28 11:51:35.372233
Browse files- .gitignore +207 -0
- Dependency length.ipynb +801 -0
- Dockerfile +10 -0
- README.md +6 -5
- main.py +125 -0
- requirements.txt +4 -0
.gitignore
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[codz]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py.cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# UV
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
#uv.lock
|
| 102 |
+
|
| 103 |
+
# poetry
|
| 104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 106 |
+
# commonly ignored for libraries.
|
| 107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 108 |
+
#poetry.lock
|
| 109 |
+
#poetry.toml
|
| 110 |
+
|
| 111 |
+
# pdm
|
| 112 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 113 |
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
| 114 |
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
| 115 |
+
#pdm.lock
|
| 116 |
+
#pdm.toml
|
| 117 |
+
.pdm-python
|
| 118 |
+
.pdm-build/
|
| 119 |
+
|
| 120 |
+
# pixi
|
| 121 |
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
| 122 |
+
#pixi.lock
|
| 123 |
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
| 124 |
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
| 125 |
+
.pixi
|
| 126 |
+
|
| 127 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 128 |
+
__pypackages__/
|
| 129 |
+
|
| 130 |
+
# Celery stuff
|
| 131 |
+
celerybeat-schedule
|
| 132 |
+
celerybeat.pid
|
| 133 |
+
|
| 134 |
+
# SageMath parsed files
|
| 135 |
+
*.sage.py
|
| 136 |
+
|
| 137 |
+
# Environments
|
| 138 |
+
.env
|
| 139 |
+
.envrc
|
| 140 |
+
.venv
|
| 141 |
+
env/
|
| 142 |
+
venv/
|
| 143 |
+
ENV/
|
| 144 |
+
env.bak/
|
| 145 |
+
venv.bak/
|
| 146 |
+
|
| 147 |
+
# Spyder project settings
|
| 148 |
+
.spyderproject
|
| 149 |
+
.spyproject
|
| 150 |
+
|
| 151 |
+
# Rope project settings
|
| 152 |
+
.ropeproject
|
| 153 |
+
|
| 154 |
+
# mkdocs documentation
|
| 155 |
+
/site
|
| 156 |
+
|
| 157 |
+
# mypy
|
| 158 |
+
.mypy_cache/
|
| 159 |
+
.dmypy.json
|
| 160 |
+
dmypy.json
|
| 161 |
+
|
| 162 |
+
# Pyre type checker
|
| 163 |
+
.pyre/
|
| 164 |
+
|
| 165 |
+
# pytype static type analyzer
|
| 166 |
+
.pytype/
|
| 167 |
+
|
| 168 |
+
# Cython debug symbols
|
| 169 |
+
cython_debug/
|
| 170 |
+
|
| 171 |
+
# PyCharm
|
| 172 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 173 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 174 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 175 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 176 |
+
#.idea/
|
| 177 |
+
|
| 178 |
+
# Abstra
|
| 179 |
+
# Abstra is an AI-powered process automation framework.
|
| 180 |
+
# Ignore directories containing user credentials, local state, and settings.
|
| 181 |
+
# Learn more at https://abstra.io/docs
|
| 182 |
+
.abstra/
|
| 183 |
+
|
| 184 |
+
# Visual Studio Code
|
| 185 |
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
| 186 |
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
| 187 |
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
| 188 |
+
# you could uncomment the following to ignore the entire vscode folder
|
| 189 |
+
# .vscode/
|
| 190 |
+
|
| 191 |
+
# Ruff stuff:
|
| 192 |
+
.ruff_cache/
|
| 193 |
+
|
| 194 |
+
# PyPI configuration file
|
| 195 |
+
.pypirc
|
| 196 |
+
|
| 197 |
+
# Cursor
|
| 198 |
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
| 199 |
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
| 200 |
+
# refer to https://docs.cursor.com/context/ignore-files
|
| 201 |
+
.cursorignore
|
| 202 |
+
.cursorindexingignore
|
| 203 |
+
|
| 204 |
+
# Marimo
|
| 205 |
+
marimo/_static/
|
| 206 |
+
marimo/_lsp/
|
| 207 |
+
__marimo__/
|
Dependency length.ipynb
ADDED
|
@@ -0,0 +1,801 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "2e1aaddb-8d3e-4168-b6b1-787b052c8244",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"The plan is to first write a function for converting UD to SSUD, then measure dependency lengths."
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "code",
|
| 13 |
+
"execution_count": 1,
|
| 14 |
+
"id": "e50c5f7a-75c3-40b3-89be-82988d8bfb73",
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"import spacy\n",
|
| 19 |
+
"from spacy import displacy\n",
|
| 20 |
+
"\n",
|
| 21 |
+
"nlp = spacy.load(\"en_core_web_sm\")"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": 2,
|
| 27 |
+
"id": "c49f475d-547f-49d8-8550-2f1ad1555a14",
|
| 28 |
+
"metadata": {},
|
| 29 |
+
"outputs": [],
|
| 30 |
+
"source": [
|
| 31 |
+
"relations = {\n",
|
| 32 |
+
" \"subj\": [\"nsubj\", \"nsubjpass\", \"csubj\", \"csubjpass\", \"expl\"],\n",
|
| 33 |
+
" \"comp\": [\"dobj\", \"dative\", \"attr\", \"oprd\", \"pobj\", \"aux\", \"auxpass\", \"mark\", \"case\", \"ccomp\", \"xcomp\", \"acomp\"],\n",
|
| 34 |
+
" \"mod\": [\"agent\", \"advmod\", \"advcl\", \"relcl\", \"npmod\", \"npadvmod\", \"prt\"],\n",
|
| 35 |
+
" \"udep\": [\"acl\", \"amod\", \"nmod\", \"poss\", \"nummod\", \"prep\"],\n",
|
| 36 |
+
"}"
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"cell_type": "code",
|
| 41 |
+
"execution_count": 48,
|
| 42 |
+
"id": "cefdbf22-b747-4bea-b279-c9b01e75ff2e",
|
| 43 |
+
"metadata": {},
|
| 44 |
+
"outputs": [],
|
| 45 |
+
"source": [
|
| 46 |
+
"def ssudify(doc):\n",
|
| 47 |
+
" for token in doc:\n",
|
| 48 |
+
" to_reverse = [token]\n",
|
| 49 |
+
" for child in token.children:\n",
|
| 50 |
+
" if (child.dep_ in [\"aux\", \"auxpass\", \"mark\", \"case\"]) or (child.dep_ == \"advmod\" and child.pos_ == \"SCONJ\"):\n",
|
| 51 |
+
" to_reverse.append(child)\n",
|
| 52 |
+
" to_reverse.sort(key = lambda x: abs(x.i - token.i))\n",
|
| 53 |
+
" if len(to_reverse) > 1:\n",
|
| 54 |
+
" for i in range(1, len(to_reverse)):\n",
|
| 55 |
+
" if to_reverse[i].dep_ in [\"aux\", \"auxpass\"]:\n",
|
| 56 |
+
" for child in to_reverse[i-1].children:\n",
|
| 57 |
+
" if child.dep_ in relations[\"subj\"] + relations[\"mod\"] + [\"conj\", \"cc\"]:\n",
|
| 58 |
+
" child.head = to_reverse[i]\n",
|
| 59 |
+
" to_reverse[i].head = to_reverse[i-1].head if to_reverse[i-1].head != to_reverse[i-1] else to_reverse[i]\n",
|
| 60 |
+
" to_reverse[i].dep_ = to_reverse[i-1].dep_\n",
|
| 61 |
+
" to_reverse[i-1].head = to_reverse[i]\n",
|
| 62 |
+
" to_reverse[i-1].dep_ = \"comp\"\n",
|
| 63 |
+
" for token in doc:\n",
|
| 64 |
+
" if token.dep_ == \"dep\": token.dep_ = \"unknown\"\n",
|
| 65 |
+
" for rel in relations.keys():\n",
|
| 66 |
+
" if token.dep_ in relations[rel]: token.dep_ = rel\n",
|
| 67 |
+
" for token in doc:\n",
|
| 68 |
+
" subjects = sorted([child for child in token.children if child.dep_ == \"subj\"], key = lambda x: abs(x.i - token.i))\n",
|
| 69 |
+
" if len(subjects) > 1:\n",
|
| 70 |
+
" for s in subjects[1:]:\n",
|
| 71 |
+
" s.dep_ = \"comp\"\n",
|
| 72 |
+
" for token in doc:\n",
|
| 73 |
+
" subject = [child for child in token.children if child.dep_ == \"subj\"]\n",
|
| 74 |
+
" if subject:\n",
|
| 75 |
+
" for child in [c for c in token.children if c.i < subject[0].i]:\n",
|
| 76 |
+
" if child.dep_ in [\"comp\", \"udep\"]:\n",
|
| 77 |
+
" child.dep_ = \"mod\"\n",
|
| 78 |
+
" return doc"
|
| 79 |
+
]
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"cell_type": "code",
|
| 83 |
+
"execution_count": 51,
|
| 84 |
+
"id": "23efda66-9d58-4169-9fa0-05de47267b5a",
|
| 85 |
+
"metadata": {},
|
| 86 |
+
"outputs": [],
|
| 87 |
+
"source": [
|
| 88 |
+
"def flyover(token):\n",
|
| 89 |
+
" if token.dep_ in [\"subj\", \"comp\", \"udep\", \"conj\"]:\n",
|
| 90 |
+
" dep_distance = abs(token.i - token.head.i)\n",
|
| 91 |
+
" if token.head.i < token.i:\n",
|
| 92 |
+
" return (token.doc[token.head.i+1:token.i], dep_distance - 1)\n",
|
| 93 |
+
" elif token.head.i > token.i:\n",
|
| 94 |
+
" return (token.doc[token.i+1:token.head.i], dep_distance - 1)\n",
|
| 95 |
+
" else:\n",
|
| 96 |
+
" return (token.doc[token.i:token.i], 0)"
|
| 97 |
+
]
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"cell_type": "code",
|
| 101 |
+
"execution_count": 35,
|
| 102 |
+
"id": "5bb29f0a-9d55-4f2d-84c3-4c3c4bae0271",
|
| 103 |
+
"metadata": {},
|
| 104 |
+
"outputs": [],
|
| 105 |
+
"source": [
|
| 106 |
+
"def get_fluff(doc):\n",
|
| 107 |
+
" flyovers = list(map(flyover, doc))\n",
|
| 108 |
+
" flyovers = [f for f in flyovers if len(f[0]) > 0]\n",
|
| 109 |
+
" flyovers = [f1 for f1 in flyovers if len([f2 for f2 in flyovers if \n",
|
| 110 |
+
" (f2[0][-1].i > f1[0][0].i >= f2[0][0].i or f2[0][0].i < f1[0][-1].i <= f2[0][-1].i) and \n",
|
| 111 |
+
" (len(f1[0]) < len(f2[0]) or f1[1] < f2[1])]) == 0 and len(f1[0]) > 2]\n",
|
| 112 |
+
" flyovers = sorted(flyovers, key=lambda x: x[0][0].i)\n",
|
| 113 |
+
" interstices = []\n",
|
| 114 |
+
" for i in range(len(flyovers)):\n",
|
| 115 |
+
" if i == 0:\n",
|
| 116 |
+
" if flyovers[0][0][0].i > 0:\n",
|
| 117 |
+
" interstices.append((doc[0:flyovers[0][0][0].i], 0))\n",
|
| 118 |
+
" else:\n",
|
| 119 |
+
" if flyovers[i][0][0].i > flyovers[i-1][0][-1].i + 1:\n",
|
| 120 |
+
" interstices.append((doc[flyovers[i-1][0][-1].i+1:flyovers[i][0][0].i], 0))\n",
|
| 121 |
+
" # elif flyovers[i][1] == flyovers[i-1][1]:\n",
|
| 122 |
+
" # flyovers[i] = (doc[flyovers[i-1][0][0].i:flyovers[i][0][-1].i+1], flyovers[i][1])\n",
|
| 123 |
+
" # flyovers[i-1] = (doc[flyovers[i-1][0][0].i:flyovers[i-1][0][0].i], flyovers[i-1][1])\n",
|
| 124 |
+
" if len(flyovers) > 0:\n",
|
| 125 |
+
" if flyovers[-1][0][-1].i < doc[-1].i:\n",
|
| 126 |
+
" interstices.append((doc[flyovers[-1][0][-1].i+1:], 0))\n",
|
| 127 |
+
" else:\n",
|
| 128 |
+
" interstices.append((doc, 0))\n",
|
| 129 |
+
" flyovers = [f for f in flyovers if len(f[0]) > 0]\n",
|
| 130 |
+
" return sorted(flyovers + interstices, key=lambda x: x[0][0].i)"
|
| 131 |
+
]
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"cell_type": "code",
|
| 135 |
+
"execution_count": 49,
|
| 136 |
+
"id": "ba90ff19-c665-49d8-8ad4-5caee885901d",
|
| 137 |
+
"metadata": {},
|
| 138 |
+
"outputs": [
|
| 139 |
+
{
|
| 140 |
+
"data": {
|
| 141 |
+
"text/html": [
|
| 142 |
+
"<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"0c70d09209f64114aa5a12db7f67e2b2-0\" class=\"displacy\" width=\"6350\" height=\"574.5\" direction=\"ltr\" style=\"max-width: none; height: 574.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
|
| 143 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 144 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">According</tspan>\n",
|
| 145 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">VERB</tspan>\n",
|
| 146 |
+
"</text>\n",
|
| 147 |
+
"\n",
|
| 148 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 149 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"225\">to</tspan>\n",
|
| 150 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">ADP</tspan>\n",
|
| 151 |
+
"</text>\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 154 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"400\">the</tspan>\n",
|
| 155 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"400\">DET</tspan>\n",
|
| 156 |
+
"</text>\n",
|
| 157 |
+
"\n",
|
| 158 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 159 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"575\">studies, “</tspan>\n",
|
| 160 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">NOUN</tspan>\n",
|
| 161 |
+
"</text>\n",
|
| 162 |
+
"\n",
|
| 163 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 164 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"750\">People</tspan>\n",
|
| 165 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"750\">NOUN</tspan>\n",
|
| 166 |
+
"</text>\n",
|
| 167 |
+
"\n",
|
| 168 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 169 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"925\">who</tspan>\n",
|
| 170 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"925\">PRON</tspan>\n",
|
| 171 |
+
"</text>\n",
|
| 172 |
+
"\n",
|
| 173 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 174 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1100\">danced</tspan>\n",
|
| 175 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1100\">VERB</tspan>\n",
|
| 176 |
+
"</text>\n",
|
| 177 |
+
"\n",
|
| 178 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 179 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1275\">more</tspan>\n",
|
| 180 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1275\">ADJ</tspan>\n",
|
| 181 |
+
"</text>\n",
|
| 182 |
+
"\n",
|
| 183 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 184 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1450\">than</tspan>\n",
|
| 185 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1450\">ADP</tspan>\n",
|
| 186 |
+
"</text>\n",
|
| 187 |
+
"\n",
|
| 188 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 189 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1625\">once</tspan>\n",
|
| 190 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1625\">ADV</tspan>\n",
|
| 191 |
+
"</text>\n",
|
| 192 |
+
"\n",
|
| 193 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 194 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1800\">a</tspan>\n",
|
| 195 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1800\">DET</tspan>\n",
|
| 196 |
+
"</text>\n",
|
| 197 |
+
"\n",
|
| 198 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 199 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1975\">week</tspan>\n",
|
| 200 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1975\">NOUN</tspan>\n",
|
| 201 |
+
"</text>\n",
|
| 202 |
+
"\n",
|
| 203 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 204 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2150\">had</tspan>\n",
|
| 205 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2150\">VERB</tspan>\n",
|
| 206 |
+
"</text>\n",
|
| 207 |
+
"\n",
|
| 208 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 209 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2325\">a</tspan>\n",
|
| 210 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2325\">DET</tspan>\n",
|
| 211 |
+
"</text>\n",
|
| 212 |
+
"\n",
|
| 213 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 214 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2500\">76%</tspan>\n",
|
| 215 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2500\">NOUN</tspan>\n",
|
| 216 |
+
"</text>\n",
|
| 217 |
+
"\n",
|
| 218 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 219 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2675\">lower</tspan>\n",
|
| 220 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2675\">ADJ</tspan>\n",
|
| 221 |
+
"</text>\n",
|
| 222 |
+
"\n",
|
| 223 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 224 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2850\">risk</tspan>\n",
|
| 225 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2850\">NOUN</tspan>\n",
|
| 226 |
+
"</text>\n",
|
| 227 |
+
"\n",
|
| 228 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 229 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3025\">of</tspan>\n",
|
| 230 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3025\">ADP</tspan>\n",
|
| 231 |
+
"</text>\n",
|
| 232 |
+
"\n",
|
| 233 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 234 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3200\">getting</tspan>\n",
|
| 235 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3200\">VERB</tspan>\n",
|
| 236 |
+
"</text>\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 239 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3375\">dementia</tspan>\n",
|
| 240 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3375\">NOUN</tspan>\n",
|
| 241 |
+
"</text>\n",
|
| 242 |
+
"\n",
|
| 243 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 244 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3550\">compared</tspan>\n",
|
| 245 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3550\">VERB</tspan>\n",
|
| 246 |
+
"</text>\n",
|
| 247 |
+
"\n",
|
| 248 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 249 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3725\">to</tspan>\n",
|
| 250 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3725\">ADP</tspan>\n",
|
| 251 |
+
"</text>\n",
|
| 252 |
+
"\n",
|
| 253 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 254 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3900\">people</tspan>\n",
|
| 255 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3900\">NOUN</tspan>\n",
|
| 256 |
+
"</text>\n",
|
| 257 |
+
"\n",
|
| 258 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 259 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4075\">who</tspan>\n",
|
| 260 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4075\">PRON</tspan>\n",
|
| 261 |
+
"</text>\n",
|
| 262 |
+
"\n",
|
| 263 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 264 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4250\">danced</tspan>\n",
|
| 265 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4250\">VERB</tspan>\n",
|
| 266 |
+
"</text>\n",
|
| 267 |
+
"\n",
|
| 268 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 269 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4425\">less</tspan>\n",
|
| 270 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4425\">ADV</tspan>\n",
|
| 271 |
+
"</text>\n",
|
| 272 |
+
"\n",
|
| 273 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 274 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4600\">frequently</tspan>\n",
|
| 275 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4600\">ADV</tspan>\n",
|
| 276 |
+
"</text>\n",
|
| 277 |
+
"\n",
|
| 278 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 279 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4775\">or</tspan>\n",
|
| 280 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4775\">CCONJ</tspan>\n",
|
| 281 |
+
"</text>\n",
|
| 282 |
+
"\n",
|
| 283 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 284 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4950\">did</tspan>\n",
|
| 285 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4950\">AUX</tspan>\n",
|
| 286 |
+
"</text>\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 289 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"5125\">n’t</tspan>\n",
|
| 290 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"5125\">NOUN</tspan>\n",
|
| 291 |
+
"</text>\n",
|
| 292 |
+
"\n",
|
| 293 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 294 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"5300\">dance</tspan>\n",
|
| 295 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"5300\">NOUN</tspan>\n",
|
| 296 |
+
"</text>\n",
|
| 297 |
+
"\n",
|
| 298 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 299 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"5475\">at</tspan>\n",
|
| 300 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"5475\">ADV</tspan>\n",
|
| 301 |
+
"</text>\n",
|
| 302 |
+
"\n",
|
| 303 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 304 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"5650\">all,”</tspan>\n",
|
| 305 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"5650\">ADV</tspan>\n",
|
| 306 |
+
"</text>\n",
|
| 307 |
+
"\n",
|
| 308 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 309 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"5825\">Dr</tspan>\n",
|
| 310 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"5825\">PROPN</tspan>\n",
|
| 311 |
+
"</text>\n",
|
| 312 |
+
"\n",
|
| 313 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 314 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"6000\">Pasricha</tspan>\n",
|
| 315 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"6000\">PROPN</tspan>\n",
|
| 316 |
+
"</text>\n",
|
| 317 |
+
"\n",
|
| 318 |
+
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"484.5\">\n",
|
| 319 |
+
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"6175\">noted.</tspan>\n",
|
| 320 |
+
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"6175\">VERB</tspan>\n",
|
| 321 |
+
"</text>\n",
|
| 322 |
+
"\n",
|
| 323 |
+
"<g class=\"displacy-arrow\">\n",
|
| 324 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-0\" stroke-width=\"2px\" d=\"M70,439.5 C70,89.5 2145.0,89.5 2145.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 325 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 326 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
|
| 327 |
+
" </text>\n",
|
| 328 |
+
" <path class=\"displacy-arrowhead\" d=\"M70,441.5 L62,429.5 78,429.5\" fill=\"currentColor\"/>\n",
|
| 329 |
+
"</g>\n",
|
| 330 |
+
"\n",
|
| 331 |
+
"<g class=\"displacy-arrow\">\n",
|
| 332 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-1\" stroke-width=\"2px\" d=\"M70,439.5 C70,352.0 205.0,352.0 205.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 333 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 334 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
|
| 335 |
+
" </text>\n",
|
| 336 |
+
" <path class=\"displacy-arrowhead\" d=\"M205.0,441.5 L213.0,429.5 197.0,429.5\" fill=\"currentColor\"/>\n",
|
| 337 |
+
"</g>\n",
|
| 338 |
+
"\n",
|
| 339 |
+
"<g class=\"displacy-arrow\">\n",
|
| 340 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-2\" stroke-width=\"2px\" d=\"M420,439.5 C420,352.0 555.0,352.0 555.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 341 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 342 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
| 343 |
+
" </text>\n",
|
| 344 |
+
" <path class=\"displacy-arrowhead\" d=\"M420,441.5 L412,429.5 428,429.5\" fill=\"currentColor\"/>\n",
|
| 345 |
+
"</g>\n",
|
| 346 |
+
"\n",
|
| 347 |
+
"<g class=\"displacy-arrow\">\n",
|
| 348 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-3\" stroke-width=\"2px\" d=\"M245,439.5 C245,264.5 560.0,264.5 560.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 349 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 350 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
|
| 351 |
+
" </text>\n",
|
| 352 |
+
" <path class=\"displacy-arrowhead\" d=\"M560.0,441.5 L568.0,429.5 552.0,429.5\" fill=\"currentColor\"/>\n",
|
| 353 |
+
"</g>\n",
|
| 354 |
+
"\n",
|
| 355 |
+
"<g class=\"displacy-arrow\">\n",
|
| 356 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-4\" stroke-width=\"2px\" d=\"M770,439.5 C770,177.0 2140.0,177.0 2140.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 357 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 358 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
|
| 359 |
+
" </text>\n",
|
| 360 |
+
" <path class=\"displacy-arrowhead\" d=\"M770,441.5 L762,429.5 778,429.5\" fill=\"currentColor\"/>\n",
|
| 361 |
+
"</g>\n",
|
| 362 |
+
"\n",
|
| 363 |
+
"<g class=\"displacy-arrow\">\n",
|
| 364 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-5\" stroke-width=\"2px\" d=\"M945,439.5 C945,352.0 1080.0,352.0 1080.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 365 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 366 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
|
| 367 |
+
" </text>\n",
|
| 368 |
+
" <path class=\"displacy-arrowhead\" d=\"M945,441.5 L937,429.5 953,429.5\" fill=\"currentColor\"/>\n",
|
| 369 |
+
"</g>\n",
|
| 370 |
+
"\n",
|
| 371 |
+
"<g class=\"displacy-arrow\">\n",
|
| 372 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-6\" stroke-width=\"2px\" d=\"M770,439.5 C770,264.5 1085.0,264.5 1085.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 373 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 374 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
|
| 375 |
+
" </text>\n",
|
| 376 |
+
" <path class=\"displacy-arrowhead\" d=\"M1085.0,441.5 L1093.0,429.5 1077.0,429.5\" fill=\"currentColor\"/>\n",
|
| 377 |
+
"</g>\n",
|
| 378 |
+
"\n",
|
| 379 |
+
"<g class=\"displacy-arrow\">\n",
|
| 380 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-7\" stroke-width=\"2px\" d=\"M1120,439.5 C1120,352.0 1255.0,352.0 1255.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 381 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 382 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-7\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
|
| 383 |
+
" </text>\n",
|
| 384 |
+
" <path class=\"displacy-arrowhead\" d=\"M1255.0,441.5 L1263.0,429.5 1247.0,429.5\" fill=\"currentColor\"/>\n",
|
| 385 |
+
"</g>\n",
|
| 386 |
+
"\n",
|
| 387 |
+
"<g class=\"displacy-arrow\">\n",
|
| 388 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-8\" stroke-width=\"2px\" d=\"M1295,439.5 C1295,352.0 1430.0,352.0 1430.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 389 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 390 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-8\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
|
| 391 |
+
" </text>\n",
|
| 392 |
+
" <path class=\"displacy-arrowhead\" d=\"M1430.0,441.5 L1438.0,429.5 1422.0,429.5\" fill=\"currentColor\"/>\n",
|
| 393 |
+
"</g>\n",
|
| 394 |
+
"\n",
|
| 395 |
+
"<g class=\"displacy-arrow\">\n",
|
| 396 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-9\" stroke-width=\"2px\" d=\"M1470,439.5 C1470,352.0 1605.0,352.0 1605.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 397 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 398 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-9\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pcomp</textPath>\n",
|
| 399 |
+
" </text>\n",
|
| 400 |
+
" <path class=\"displacy-arrowhead\" d=\"M1605.0,441.5 L1613.0,429.5 1597.0,429.5\" fill=\"currentColor\"/>\n",
|
| 401 |
+
"</g>\n",
|
| 402 |
+
"\n",
|
| 403 |
+
"<g class=\"displacy-arrow\">\n",
|
| 404 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-10\" stroke-width=\"2px\" d=\"M1820,439.5 C1820,352.0 1955.0,352.0 1955.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 405 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 406 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-10\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
| 407 |
+
" </text>\n",
|
| 408 |
+
" <path class=\"displacy-arrowhead\" d=\"M1820,441.5 L1812,429.5 1828,429.5\" fill=\"currentColor\"/>\n",
|
| 409 |
+
"</g>\n",
|
| 410 |
+
"\n",
|
| 411 |
+
"<g class=\"displacy-arrow\">\n",
|
| 412 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-11\" stroke-width=\"2px\" d=\"M1295,439.5 C1295,264.5 1960.0,264.5 1960.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 413 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 414 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-11\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
|
| 415 |
+
" </text>\n",
|
| 416 |
+
" <path class=\"displacy-arrowhead\" d=\"M1960.0,441.5 L1968.0,429.5 1952.0,429.5\" fill=\"currentColor\"/>\n",
|
| 417 |
+
"</g>\n",
|
| 418 |
+
"\n",
|
| 419 |
+
"<g class=\"displacy-arrow\">\n",
|
| 420 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-12\" stroke-width=\"2px\" d=\"M2170,439.5 C2170,2.0 6175.0,2.0 6175.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 421 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 422 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-12\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
|
| 423 |
+
" </text>\n",
|
| 424 |
+
" <path class=\"displacy-arrowhead\" d=\"M2170,441.5 L2162,429.5 2178,429.5\" fill=\"currentColor\"/>\n",
|
| 425 |
+
"</g>\n",
|
| 426 |
+
"\n",
|
| 427 |
+
"<g class=\"displacy-arrow\">\n",
|
| 428 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-13\" stroke-width=\"2px\" d=\"M2345,439.5 C2345,264.5 2835.0,264.5 2835.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 429 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 430 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-13\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
| 431 |
+
" </text>\n",
|
| 432 |
+
" <path class=\"displacy-arrowhead\" d=\"M2345,441.5 L2337,429.5 2353,429.5\" fill=\"currentColor\"/>\n",
|
| 433 |
+
"</g>\n",
|
| 434 |
+
"\n",
|
| 435 |
+
"<g class=\"displacy-arrow\">\n",
|
| 436 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-14\" stroke-width=\"2px\" d=\"M2520,439.5 C2520,352.0 2655.0,352.0 2655.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 437 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 438 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-14\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
|
| 439 |
+
" </text>\n",
|
| 440 |
+
" <path class=\"displacy-arrowhead\" d=\"M2520,441.5 L2512,429.5 2528,429.5\" fill=\"currentColor\"/>\n",
|
| 441 |
+
"</g>\n",
|
| 442 |
+
"\n",
|
| 443 |
+
"<g class=\"displacy-arrow\">\n",
|
| 444 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-15\" stroke-width=\"2px\" d=\"M2695,439.5 C2695,352.0 2830.0,352.0 2830.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 445 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 446 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-15\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
|
| 447 |
+
" </text>\n",
|
| 448 |
+
" <path class=\"displacy-arrowhead\" d=\"M2695,441.5 L2687,429.5 2703,429.5\" fill=\"currentColor\"/>\n",
|
| 449 |
+
"</g>\n",
|
| 450 |
+
"\n",
|
| 451 |
+
"<g class=\"displacy-arrow\">\n",
|
| 452 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-16\" stroke-width=\"2px\" d=\"M2170,439.5 C2170,177.0 2840.0,177.0 2840.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 453 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 454 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-16\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
|
| 455 |
+
" </text>\n",
|
| 456 |
+
" <path class=\"displacy-arrowhead\" d=\"M2840.0,441.5 L2848.0,429.5 2832.0,429.5\" fill=\"currentColor\"/>\n",
|
| 457 |
+
"</g>\n",
|
| 458 |
+
"\n",
|
| 459 |
+
"<g class=\"displacy-arrow\">\n",
|
| 460 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-17\" stroke-width=\"2px\" d=\"M2870,439.5 C2870,352.0 3005.0,352.0 3005.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 461 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 462 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-17\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
|
| 463 |
+
" </text>\n",
|
| 464 |
+
" <path class=\"displacy-arrowhead\" d=\"M3005.0,441.5 L3013.0,429.5 2997.0,429.5\" fill=\"currentColor\"/>\n",
|
| 465 |
+
"</g>\n",
|
| 466 |
+
"\n",
|
| 467 |
+
"<g class=\"displacy-arrow\">\n",
|
| 468 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-18\" stroke-width=\"2px\" d=\"M3045,439.5 C3045,352.0 3180.0,352.0 3180.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 469 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 470 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-18\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pcomp</textPath>\n",
|
| 471 |
+
" </text>\n",
|
| 472 |
+
" <path class=\"displacy-arrowhead\" d=\"M3180.0,441.5 L3188.0,429.5 3172.0,429.5\" fill=\"currentColor\"/>\n",
|
| 473 |
+
"</g>\n",
|
| 474 |
+
"\n",
|
| 475 |
+
"<g class=\"displacy-arrow\">\n",
|
| 476 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-19\" stroke-width=\"2px\" d=\"M3220,439.5 C3220,352.0 3355.0,352.0 3355.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 477 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 478 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-19\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
|
| 479 |
+
" </text>\n",
|
| 480 |
+
" <path class=\"displacy-arrowhead\" d=\"M3355.0,441.5 L3363.0,429.5 3347.0,429.5\" fill=\"currentColor\"/>\n",
|
| 481 |
+
"</g>\n",
|
| 482 |
+
"\n",
|
| 483 |
+
"<g class=\"displacy-arrow\">\n",
|
| 484 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-20\" stroke-width=\"2px\" d=\"M2170,439.5 C2170,89.5 3545.0,89.5 3545.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 485 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 486 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-20\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
|
| 487 |
+
" </text>\n",
|
| 488 |
+
" <path class=\"displacy-arrowhead\" d=\"M3545.0,441.5 L3553.0,429.5 3537.0,429.5\" fill=\"currentColor\"/>\n",
|
| 489 |
+
"</g>\n",
|
| 490 |
+
"\n",
|
| 491 |
+
"<g class=\"displacy-arrow\">\n",
|
| 492 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-21\" stroke-width=\"2px\" d=\"M3570,439.5 C3570,352.0 3705.0,352.0 3705.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 493 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 494 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-21\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">udep</textPath>\n",
|
| 495 |
+
" </text>\n",
|
| 496 |
+
" <path class=\"displacy-arrowhead\" d=\"M3705.0,441.5 L3713.0,429.5 3697.0,429.5\" fill=\"currentColor\"/>\n",
|
| 497 |
+
"</g>\n",
|
| 498 |
+
"\n",
|
| 499 |
+
"<g class=\"displacy-arrow\">\n",
|
| 500 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-22\" stroke-width=\"2px\" d=\"M3745,439.5 C3745,352.0 3880.0,352.0 3880.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 501 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 502 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-22\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
|
| 503 |
+
" </text>\n",
|
| 504 |
+
" <path class=\"displacy-arrowhead\" d=\"M3880.0,441.5 L3888.0,429.5 3872.0,429.5\" fill=\"currentColor\"/>\n",
|
| 505 |
+
"</g>\n",
|
| 506 |
+
"\n",
|
| 507 |
+
"<g class=\"displacy-arrow\">\n",
|
| 508 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-23\" stroke-width=\"2px\" d=\"M4095,439.5 C4095,352.0 4230.0,352.0 4230.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 509 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 510 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-23\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
|
| 511 |
+
" </text>\n",
|
| 512 |
+
" <path class=\"displacy-arrowhead\" d=\"M4095,441.5 L4087,429.5 4103,429.5\" fill=\"currentColor\"/>\n",
|
| 513 |
+
"</g>\n",
|
| 514 |
+
"\n",
|
| 515 |
+
"<g class=\"displacy-arrow\">\n",
|
| 516 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-24\" stroke-width=\"2px\" d=\"M3920,439.5 C3920,264.5 4235.0,264.5 4235.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 517 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 518 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-24\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
|
| 519 |
+
" </text>\n",
|
| 520 |
+
" <path class=\"displacy-arrowhead\" d=\"M4235.0,441.5 L4243.0,429.5 4227.0,429.5\" fill=\"currentColor\"/>\n",
|
| 521 |
+
"</g>\n",
|
| 522 |
+
"\n",
|
| 523 |
+
"<g class=\"displacy-arrow\">\n",
|
| 524 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-25\" stroke-width=\"2px\" d=\"M4445,439.5 C4445,352.0 4580.0,352.0 4580.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 525 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 526 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-25\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
|
| 527 |
+
" </text>\n",
|
| 528 |
+
" <path class=\"displacy-arrowhead\" d=\"M4445,441.5 L4437,429.5 4453,429.5\" fill=\"currentColor\"/>\n",
|
| 529 |
+
"</g>\n",
|
| 530 |
+
"\n",
|
| 531 |
+
"<g class=\"displacy-arrow\">\n",
|
| 532 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-26\" stroke-width=\"2px\" d=\"M4270,439.5 C4270,264.5 4585.0,264.5 4585.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 533 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 534 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-26\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
|
| 535 |
+
" </text>\n",
|
| 536 |
+
" <path class=\"displacy-arrowhead\" d=\"M4585.0,441.5 L4593.0,429.5 4577.0,429.5\" fill=\"currentColor\"/>\n",
|
| 537 |
+
"</g>\n",
|
| 538 |
+
"\n",
|
| 539 |
+
"<g class=\"displacy-arrow\">\n",
|
| 540 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-27\" stroke-width=\"2px\" d=\"M4270,439.5 C4270,177.0 4765.0,177.0 4765.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 541 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 542 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-27\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">cc</textPath>\n",
|
| 543 |
+
" </text>\n",
|
| 544 |
+
" <path class=\"displacy-arrowhead\" d=\"M4765.0,441.5 L4773.0,429.5 4757.0,429.5\" fill=\"currentColor\"/>\n",
|
| 545 |
+
"</g>\n",
|
| 546 |
+
"\n",
|
| 547 |
+
"<g class=\"displacy-arrow\">\n",
|
| 548 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-28\" stroke-width=\"2px\" d=\"M4270,439.5 C4270,89.5 4945.0,89.5 4945.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 549 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 550 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-28\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">conj</textPath>\n",
|
| 551 |
+
" </text>\n",
|
| 552 |
+
" <path class=\"displacy-arrowhead\" d=\"M4945.0,441.5 L4953.0,429.5 4937.0,429.5\" fill=\"currentColor\"/>\n",
|
| 553 |
+
"</g>\n",
|
| 554 |
+
"\n",
|
| 555 |
+
"<g class=\"displacy-arrow\">\n",
|
| 556 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-29\" stroke-width=\"2px\" d=\"M4970,439.5 C4970,352.0 5105.0,352.0 5105.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 557 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 558 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-29\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">punct</textPath>\n",
|
| 559 |
+
" </text>\n",
|
| 560 |
+
" <path class=\"displacy-arrowhead\" d=\"M5105.0,441.5 L5113.0,429.5 5097.0,429.5\" fill=\"currentColor\"/>\n",
|
| 561 |
+
"</g>\n",
|
| 562 |
+
"\n",
|
| 563 |
+
"<g class=\"displacy-arrow\">\n",
|
| 564 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-30\" stroke-width=\"2px\" d=\"M4970,439.5 C4970,264.5 5285.0,264.5 5285.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 565 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 566 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-30\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">comp</textPath>\n",
|
| 567 |
+
" </text>\n",
|
| 568 |
+
" <path class=\"displacy-arrowhead\" d=\"M5285.0,441.5 L5293.0,429.5 5277.0,429.5\" fill=\"currentColor\"/>\n",
|
| 569 |
+
"</g>\n",
|
| 570 |
+
"\n",
|
| 571 |
+
"<g class=\"displacy-arrow\">\n",
|
| 572 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-31\" stroke-width=\"2px\" d=\"M5495,439.5 C5495,352.0 5630.0,352.0 5630.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 573 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 574 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-31\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
|
| 575 |
+
" </text>\n",
|
| 576 |
+
" <path class=\"displacy-arrowhead\" d=\"M5495,441.5 L5487,429.5 5503,429.5\" fill=\"currentColor\"/>\n",
|
| 577 |
+
"</g>\n",
|
| 578 |
+
"\n",
|
| 579 |
+
"<g class=\"displacy-arrow\">\n",
|
| 580 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-32\" stroke-width=\"2px\" d=\"M4970,439.5 C4970,177.0 5640.0,177.0 5640.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 581 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 582 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-32\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">mod</textPath>\n",
|
| 583 |
+
" </text>\n",
|
| 584 |
+
" <path class=\"displacy-arrowhead\" d=\"M5640.0,441.5 L5648.0,429.5 5632.0,429.5\" fill=\"currentColor\"/>\n",
|
| 585 |
+
"</g>\n",
|
| 586 |
+
"\n",
|
| 587 |
+
"<g class=\"displacy-arrow\">\n",
|
| 588 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-33\" stroke-width=\"2px\" d=\"M5845,439.5 C5845,352.0 5980.0,352.0 5980.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 589 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 590 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-33\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n",
|
| 591 |
+
" </text>\n",
|
| 592 |
+
" <path class=\"displacy-arrowhead\" d=\"M5845,441.5 L5837,429.5 5853,429.5\" fill=\"currentColor\"/>\n",
|
| 593 |
+
"</g>\n",
|
| 594 |
+
"\n",
|
| 595 |
+
"<g class=\"displacy-arrow\">\n",
|
| 596 |
+
" <path class=\"displacy-arc\" id=\"arrow-0c70d09209f64114aa5a12db7f67e2b2-0-34\" stroke-width=\"2px\" d=\"M6020,439.5 C6020,352.0 6155.0,352.0 6155.0,439.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
| 597 |
+
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
| 598 |
+
" <textPath xlink:href=\"#arrow-0c70d09209f64114aa5a12db7f67e2b2-0-34\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">subj</textPath>\n",
|
| 599 |
+
" </text>\n",
|
| 600 |
+
" <path class=\"displacy-arrowhead\" d=\"M6020,441.5 L6012,429.5 6028,429.5\" fill=\"currentColor\"/>\n",
|
| 601 |
+
"</g>\n",
|
| 602 |
+
"</svg></span>"
|
| 603 |
+
],
|
| 604 |
+
"text/plain": [
|
| 605 |
+
"<IPython.core.display.HTML object>"
|
| 606 |
+
]
|
| 607 |
+
},
|
| 608 |
+
"metadata": {},
|
| 609 |
+
"output_type": "display_data"
|
| 610 |
+
}
|
| 611 |
+
],
|
| 612 |
+
"source": [
|
| 613 |
+
"doc = ssudify(nlp(\"According to the studies, “People who danced more than once a week had a 76% lower risk of getting dementia compared to people who danced less frequently or didn’t dance at all,” Dr Pasricha noted.\"))\n",
|
| 614 |
+
"# Since this is an interactive Jupyter environment, we can use displacy.render here\n",
|
| 615 |
+
"displacy.render(doc, style='dep')"
|
| 616 |
+
]
|
| 617 |
+
},
|
| 618 |
+
{
|
| 619 |
+
"cell_type": "code",
|
| 620 |
+
"execution_count": 36,
|
| 621 |
+
"id": "daa1e8f5-2afe-4153-9469-a4c2af9b46b1",
|
| 622 |
+
"metadata": {},
|
| 623 |
+
"outputs": [
|
| 624 |
+
{
|
| 625 |
+
"data": {
|
| 626 |
+
"text/plain": [
|
| 627 |
+
"[(Judgement, 0)]"
|
| 628 |
+
]
|
| 629 |
+
},
|
| 630 |
+
"execution_count": 36,
|
| 631 |
+
"metadata": {},
|
| 632 |
+
"output_type": "execute_result"
|
| 633 |
+
}
|
| 634 |
+
],
|
| 635 |
+
"source": [
|
| 636 |
+
"get_fluff(doc)"
|
| 637 |
+
]
|
| 638 |
+
},
|
| 639 |
+
{
|
| 640 |
+
"cell_type": "code",
|
| 641 |
+
"execution_count": 8,
|
| 642 |
+
"id": "b8dcf65d-89b0-49fd-9a0d-fc1454d522e0",
|
| 643 |
+
"metadata": {},
|
| 644 |
+
"outputs": [],
|
| 645 |
+
"source": [
|
| 646 |
+
"from fasthtml.common import *\n",
|
| 647 |
+
"from fasthtml.jupyter import JupyUvi, HTMX"
|
| 648 |
+
]
|
| 649 |
+
},
|
| 650 |
+
{
|
| 651 |
+
"cell_type": "code",
|
| 652 |
+
"execution_count": 9,
|
| 653 |
+
"id": "9f23a19c-b645-4b78-85fd-bc9742d00f82",
|
| 654 |
+
"metadata": {},
|
| 655 |
+
"outputs": [],
|
| 656 |
+
"source": [
|
| 657 |
+
"def display_annotations(annot):\n",
|
| 658 |
+
" return Div(*[Span(a[0], style=f\"margin-right: 0.5ex; background: rgba(219, 144, 232, {a[1]/15})\") for a in annot])"
|
| 659 |
+
]
|
| 660 |
+
},
|
| 661 |
+
{
|
| 662 |
+
"cell_type": "code",
|
| 663 |
+
"execution_count": 10,
|
| 664 |
+
"id": "8fb35fe3-68de-45b6-8053-8b390c8e8661",
|
| 665 |
+
"metadata": {},
|
| 666 |
+
"outputs": [],
|
| 667 |
+
"source": [
|
| 668 |
+
"app, rt = fast_app(pico=True)"
|
| 669 |
+
]
|
| 670 |
+
},
|
| 671 |
+
{
|
| 672 |
+
"cell_type": "code",
|
| 673 |
+
"execution_count": 41,
|
| 674 |
+
"id": "9ce9c318-733d-4ee1-b5d7-39d98aa14697",
|
| 675 |
+
"metadata": {},
|
| 676 |
+
"outputs": [],
|
| 677 |
+
"source": [
|
| 678 |
+
"@app.get\n",
|
| 679 |
+
"def index():\n",
|
| 680 |
+
" page = Div(\n",
|
| 681 |
+
" Form(hx_post=send, hx_target=\"#output\", hx_swap=\"outerHTML\")(\n",
|
| 682 |
+
" Div(Button(\"Check\", style=\"margin-bottom: 1rem\"), \n",
|
| 683 |
+
" Textarea(name=\"text\", style=\"height: calc(100vh - 11rem)\"))\n",
|
| 684 |
+
" ),\n",
|
| 685 |
+
" Div(\n",
|
| 686 |
+
" Div(Small(Em(\"Highlighted text segments can be shortened or reordered to improve readability. The stronger the highlight, the more the segment burdens the reader’s memory.\")), \n",
|
| 687 |
+
" cls=\"overflow-auto\", style=\"height: 4rem; text-wrap: balance; padding: 0rem 1rem\"),\n",
|
| 688 |
+
" Div(id=\"output\", style=\"padding: 1rem; padding-bottom: calc(1rem - 5px)\")\n",
|
| 689 |
+
" ),\n",
|
| 690 |
+
" cls=\"grid\"\n",
|
| 691 |
+
" )\n",
|
| 692 |
+
" return Titled('Readability feedback', page)\n",
|
| 693 |
+
"\n",
|
| 694 |
+
"@app.post\n",
|
| 695 |
+
"def send(text:str):\n",
|
| 696 |
+
" paragraphs = re.sub(r\"[^\\S\\r\\n]+\", \" \", text).split(\"\\r\\n\\r\\n\")\n",
|
| 697 |
+
" docs = [ssudify(nlp(para)) for para in paragraphs]\n",
|
| 698 |
+
" annot_paras = [get_fluff(doc) for doc in docs]\n",
|
| 699 |
+
" return Div(*[P(*[Span(Span(a[0], style=f\"background: light-dark(rgba(237, 201, 241, {a[1]/15}), rgba(182, 69, 205, {a[1]/15}))\"), \n",
|
| 700 |
+
" Span(\" \")) for a in annot_para],\n",
|
| 701 |
+
" style=\"margin-bottom: 1.5em\")\n",
|
| 702 |
+
" for annot_para in annot_paras[:-1]],\n",
|
| 703 |
+
" P(*[Span(Span(a[0], style=f\"background: light-dark(rgba(237, 201, 241, {a[1]/15}), rgba(182, 69, 205, {a[1]/15}))\"), \n",
|
| 704 |
+
" Span(\" \")) for a in annot_paras[-1]],\n",
|
| 705 |
+
" style=\"margin-bottom: 0em\"),\n",
|
| 706 |
+
" id=\"output\", cls=\"overflow-auto\", style=\"height: calc(100vh - 11rem); padding: 1rem; padding-bottom: calc(1rem - 5px)\")"
|
| 707 |
+
]
|
| 708 |
+
},
|
| 709 |
+
{
|
| 710 |
+
"cell_type": "code",
|
| 711 |
+
"execution_count": 12,
|
| 712 |
+
"id": "4cc68772-d26d-4cd1-be59-c23a5739c3d0",
|
| 713 |
+
"metadata": {},
|
| 714 |
+
"outputs": [
|
| 715 |
+
{
|
| 716 |
+
"data": {
|
| 717 |
+
"text/html": [
|
| 718 |
+
"\n",
|
| 719 |
+
"<script>\n",
|
| 720 |
+
"document.body.addEventListener('htmx:configRequest', (event) => {\n",
|
| 721 |
+
" if(event.detail.path.includes('://')) return;\n",
|
| 722 |
+
" htmx.config.selfRequestsOnly=false;\n",
|
| 723 |
+
" event.detail.path = `${location.protocol}//${location.hostname}:8000${event.detail.path}`;\n",
|
| 724 |
+
"});\n",
|
| 725 |
+
"</script>"
|
| 726 |
+
],
|
| 727 |
+
"text/plain": [
|
| 728 |
+
"<IPython.core.display.HTML object>"
|
| 729 |
+
]
|
| 730 |
+
},
|
| 731 |
+
"metadata": {},
|
| 732 |
+
"output_type": "display_data"
|
| 733 |
+
}
|
| 734 |
+
],
|
| 735 |
+
"source": [
|
| 736 |
+
"server = JupyUvi(app)"
|
| 737 |
+
]
|
| 738 |
+
},
|
| 739 |
+
{
|
| 740 |
+
"cell_type": "code",
|
| 741 |
+
"execution_count": 13,
|
| 742 |
+
"id": "3e8c0a41-0d79-4715-ab2b-e5a2d0455569",
|
| 743 |
+
"metadata": {},
|
| 744 |
+
"outputs": [
|
| 745 |
+
{
|
| 746 |
+
"data": {
|
| 747 |
+
"text/html": [
|
| 748 |
+
"<iframe src=\"http://localhost:8000/\" style=\"width: 100%; height: auto; border: none;\" onload=\"{\n",
|
| 749 |
+
" let frame = this;\n",
|
| 750 |
+
" window.addEventListener('message', function(e) {\n",
|
| 751 |
+
" if (e.source !== frame.contentWindow) return; // Only proceed if the message is from this iframe\n",
|
| 752 |
+
" if (e.data.height) frame.style.height = (e.data.height+1) + 'px';\n",
|
| 753 |
+
" }, false);\n",
|
| 754 |
+
" }\" allow=\"accelerometer; autoplay; camera; clipboard-read; clipboard-write; display-capture; encrypted-media; fullscreen; gamepad; geolocation; gyroscope; hid; identity-credentials-get; idle-detection; magnetometer; microphone; midi; payment; picture-in-picture; publickey-credentials-get; screen-wake-lock; serial; usb; web-share; xr-spatial-tracking\"></iframe> "
|
| 755 |
+
],
|
| 756 |
+
"text/plain": [
|
| 757 |
+
"<IPython.core.display.HTML object>"
|
| 758 |
+
]
|
| 759 |
+
},
|
| 760 |
+
"execution_count": 13,
|
| 761 |
+
"metadata": {},
|
| 762 |
+
"output_type": "execute_result"
|
| 763 |
+
}
|
| 764 |
+
],
|
| 765 |
+
"source": [
|
| 766 |
+
"HTMX()"
|
| 767 |
+
]
|
| 768 |
+
},
|
| 769 |
+
{
|
| 770 |
+
"cell_type": "code",
|
| 771 |
+
"execution_count": 35,
|
| 772 |
+
"id": "257f3c71-a034-4c9d-b35f-a9e06ddcd021",
|
| 773 |
+
"metadata": {},
|
| 774 |
+
"outputs": [],
|
| 775 |
+
"source": [
|
| 776 |
+
"server.stop()"
|
| 777 |
+
]
|
| 778 |
+
}
|
| 779 |
+
],
|
| 780 |
+
"metadata": {
|
| 781 |
+
"kernelspec": {
|
| 782 |
+
"display_name": "Python 3 (ipykernel)",
|
| 783 |
+
"language": "python",
|
| 784 |
+
"name": "python3"
|
| 785 |
+
},
|
| 786 |
+
"language_info": {
|
| 787 |
+
"codemirror_mode": {
|
| 788 |
+
"name": "ipython",
|
| 789 |
+
"version": 3
|
| 790 |
+
},
|
| 791 |
+
"file_extension": ".py",
|
| 792 |
+
"mimetype": "text/x-python",
|
| 793 |
+
"name": "python",
|
| 794 |
+
"nbconvert_exporter": "python",
|
| 795 |
+
"pygments_lexer": "ipython3",
|
| 796 |
+
"version": "3.13.9"
|
| 797 |
+
}
|
| 798 |
+
},
|
| 799 |
+
"nbformat": 4,
|
| 800 |
+
"nbformat_minor": 5
|
| 801 |
+
}
|
Dockerfile
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10
|
| 2 |
+
WORKDIR /code
|
| 3 |
+
COPY --link --chown=1000 . .
|
| 4 |
+
RUN mkdir -p /tmp/cache/
|
| 5 |
+
RUN chmod a+rwx -R /tmp/cache/
|
| 6 |
+
ENV HF_HUB_CACHE=HF_HOME
|
| 7 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 8 |
+
|
| 9 |
+
ENV PYTHONUNBUFFERED=1 PORT=7860
|
| 10 |
+
CMD ["python", "main.py"]
|
README.md
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
---
|
| 2 |
-
title: Readability
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Readability feedback
|
| 3 |
+
emoji: 📖
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# readability-feedback
|
| 11 |
+
NLP-based readability feedback for writers
|
main.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# coding: utf-8
|
| 3 |
+
|
| 4 |
+
import spacy
|
| 5 |
+
|
| 6 |
+
nlp = spacy.load("en_core_web_sm")
|
| 7 |
+
|
| 8 |
+
relations = {
|
| 9 |
+
"subj": ["nsubj", "nsubjpass", "csubj", "csubjpass", "expl"],
|
| 10 |
+
"comp": ["dobj", "dative", "attr", "oprd", "pobj", "aux", "auxpass", "mark", "case", "ccomp", "xcomp", "acomp"],
|
| 11 |
+
"mod": ["agent", "advmod", "advcl", "relcl", "npmod", "npadvmod", "prt"],
|
| 12 |
+
"udep": ["acl", "amod", "nmod", "poss", "nummod", "prep"],
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
def ssudify(doc):
|
| 16 |
+
for token in doc:
|
| 17 |
+
to_reverse = [token]
|
| 18 |
+
for child in token.children:
|
| 19 |
+
if (child.dep_ in ["aux", "auxpass", "mark", "case"]) or (child.dep_ == "advmod" and child.pos_ == "SCONJ"):
|
| 20 |
+
to_reverse.append(child)
|
| 21 |
+
to_reverse.sort(key = lambda x: abs(x.i - token.i))
|
| 22 |
+
if len(to_reverse) > 1:
|
| 23 |
+
for i in range(1, len(to_reverse)):
|
| 24 |
+
if to_reverse[i].dep_ in ["aux", "auxpass"]:
|
| 25 |
+
for child in to_reverse[i-1].children:
|
| 26 |
+
if child.dep_ in relations["subj"] + relations["mod"] + ["conj", "cc"]:
|
| 27 |
+
child.head = to_reverse[i]
|
| 28 |
+
to_reverse[i].head = to_reverse[i-1].head if to_reverse[i-1].head != to_reverse[i-1] else to_reverse[i]
|
| 29 |
+
to_reverse[i].dep_ = to_reverse[i-1].dep_
|
| 30 |
+
to_reverse[i-1].head = to_reverse[i]
|
| 31 |
+
to_reverse[i-1].dep_ = "comp"
|
| 32 |
+
for token in doc:
|
| 33 |
+
if token.dep_ == "dep": token.dep_ = "unknown"
|
| 34 |
+
for rel in relations.keys():
|
| 35 |
+
if token.dep_ in relations[rel]: token.dep_ = rel
|
| 36 |
+
for token in doc:
|
| 37 |
+
subjects = sorted([child for child in token.children if child.dep_ == "subj"], key = lambda x: abs(x.i - token.i))
|
| 38 |
+
if len(subjects) > 1:
|
| 39 |
+
for s in subjects[1:]:
|
| 40 |
+
s.dep_ = "comp"
|
| 41 |
+
for token in doc:
|
| 42 |
+
subject = [child for child in token.children if child.dep_ == "subj"]
|
| 43 |
+
if subject:
|
| 44 |
+
for child in [c for c in token.children if c.i < subject[0].i]:
|
| 45 |
+
if child.dep_ in ["comp", "udep"]:
|
| 46 |
+
child.dep_ = "mod"
|
| 47 |
+
return doc
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def flyover(token):
|
| 51 |
+
if token.dep_ in ["subj", "comp", "udep", "conj"]:
|
| 52 |
+
dep_distance = abs(token.i - token.head.i)
|
| 53 |
+
if token.head.i < token.i:
|
| 54 |
+
return (token.doc[token.head.i+1:token.i], dep_distance - 1)
|
| 55 |
+
elif token.head.i > token.i:
|
| 56 |
+
return (token.doc[token.i+1:token.head.i], dep_distance - 1)
|
| 57 |
+
else:
|
| 58 |
+
return (token.doc[token.i:token.i], 0)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def get_fluff(doc):
|
| 62 |
+
flyovers = list(map(flyover, doc))
|
| 63 |
+
flyovers = [f for f in flyovers if len(f[0]) > 0]
|
| 64 |
+
flyovers = [f1 for f1 in flyovers if len([f2 for f2 in flyovers if
|
| 65 |
+
(f2[0][-1].i > f1[0][0].i >= f2[0][0].i or f2[0][0].i < f1[0][-1].i <= f2[0][-1].i) and
|
| 66 |
+
(len(f1[0]) < len(f2[0]) or f1[1] < f2[1])]) == 0 and len(f1[0]) > 2]
|
| 67 |
+
flyovers = sorted(flyovers, key=lambda x: x[0][0].i)
|
| 68 |
+
interstices = []
|
| 69 |
+
for i in range(len(flyovers)):
|
| 70 |
+
if i == 0:
|
| 71 |
+
if flyovers[0][0][0].i > 0:
|
| 72 |
+
interstices.append((doc[0:flyovers[0][0][0].i], 0))
|
| 73 |
+
else:
|
| 74 |
+
if flyovers[i][0][0].i > flyovers[i-1][0][-1].i + 1:
|
| 75 |
+
interstices.append((doc[flyovers[i-1][0][-1].i+1:flyovers[i][0][0].i], 0))
|
| 76 |
+
# elif flyovers[i][1] == flyovers[i-1][1]:
|
| 77 |
+
# flyovers[i] = (doc[flyovers[i-1][0][0].i:flyovers[i][0][-1].i+1], flyovers[i][1])
|
| 78 |
+
# flyovers[i-1] = (doc[flyovers[i-1][0][0].i:flyovers[i-1][0][0].i], flyovers[i-1][1])
|
| 79 |
+
if len(flyovers) > 0:
|
| 80 |
+
if flyovers[-1][0][-1].i < doc[-1].i:
|
| 81 |
+
interstices.append((doc[flyovers[-1][0][-1].i+1:], 0))
|
| 82 |
+
else:
|
| 83 |
+
interstices.append((doc, 0))
|
| 84 |
+
flyovers = [f for f in flyovers if len(f[0]) > 0]
|
| 85 |
+
return sorted(flyovers + interstices, key=lambda x: x[0][0].i)
|
| 86 |
+
|
| 87 |
+
from fasthtml_hf import setup_hf_backup
|
| 88 |
+
from fasthtml.common import *
|
| 89 |
+
import re
|
| 90 |
+
|
| 91 |
+
app, rt = fast_app(pico=True)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
@app.get
|
| 95 |
+
def index():
|
| 96 |
+
page = Div(
|
| 97 |
+
Form(hx_post=send, hx_target="#output", hx_swap="outerHTML")(
|
| 98 |
+
Div(Button("Check", style="margin-bottom: 1rem"),
|
| 99 |
+
Textarea(name="text", style="height: calc(100vh - 11rem)"))
|
| 100 |
+
),
|
| 101 |
+
Div(
|
| 102 |
+
Div(Small(Em("Highlighted text segments can be shortened or reordered to improve readability. The stronger the highlight, the more the segment burdens the reader’s memory.")),
|
| 103 |
+
cls="overflow-auto", style="height: 4rem; text-wrap: balance; padding: 0rem 1rem"),
|
| 104 |
+
Div(id="output", style="padding: 1rem; padding-bottom: calc(1rem - 5px)")
|
| 105 |
+
),
|
| 106 |
+
cls="grid"
|
| 107 |
+
)
|
| 108 |
+
return Titled('Readability feedback', page)
|
| 109 |
+
|
| 110 |
+
@app.post
|
| 111 |
+
def send(text:str):
|
| 112 |
+
paragraphs = re.sub(r"[^\S\r\n]+", " ", text).split("\r\n\r\n")
|
| 113 |
+
docs = [ssudify(nlp(para)) for para in paragraphs]
|
| 114 |
+
annot_paras = [get_fluff(doc) for doc in docs]
|
| 115 |
+
return Div(*[P(*[Span(Span(a[0], style=f"background: light-dark(rgba(237, 201, 241, {a[1]/15}), rgba(182, 69, 205, {a[1]/15}))"),
|
| 116 |
+
Span(" ")) for a in annot_para],
|
| 117 |
+
style="margin-bottom: 1.5em")
|
| 118 |
+
for annot_para in annot_paras[:-1]],
|
| 119 |
+
P(*[Span(Span(a[0], style=f"background: light-dark(rgba(237, 201, 241, {a[1]/15}), rgba(182, 69, 205, {a[1]/15}))"),
|
| 120 |
+
Span(" ")) for a in annot_paras[-1]],
|
| 121 |
+
style="margin-bottom: 0em"),
|
| 122 |
+
id="output", cls="overflow-auto", style="height: calc(100vh - 11rem); padding: 1rem; padding-bottom: calc(1rem - 5px)")
|
| 123 |
+
|
| 124 |
+
setup_hf_backup(app)
|
| 125 |
+
serve()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fasthtml-hf
|
| 2 |
+
python-fasthtml
|
| 3 |
+
spacy
|
| 4 |
+
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
|