lynn-twinkl commited on
Commit ·
97a0795
0
Parent(s):
First commit
Browse files- .gitignore +167 -0
- functions/extract_needs.py +59 -0
- notebooks/Untitled.ipynb +225 -0
- notebooks/custom-ner-model/attribute_ruler/patterns +0 -0
- notebooks/custom-ner-model/config.cfg +269 -0
- notebooks/custom-ner-model/lemmatizer/lookups/lookups.bin +0 -0
- notebooks/custom-ner-model/meta.json +531 -0
- notebooks/custom-ner-model/ner/cfg +13 -0
- notebooks/custom-ner-model/ner/moves +1 -0
- notebooks/custom-ner-model/parser/cfg +13 -0
- notebooks/custom-ner-model/parser/moves +1 -0
- notebooks/custom-ner-model/senter/cfg +3 -0
- notebooks/custom-ner-model/tagger/cfg +57 -0
- notebooks/custom-ner-model/tagger/model +0 -0
- notebooks/custom-ner-model/tok2vec/cfg +3 -0
- notebooks/custom-ner-model/tokenizer +3 -0
- notebooks/custom-ner-model/vocab/lookups.bin +0 -0
- notebooks/custom-ner-model/vocab/strings.json +0 -0
- notebooks/custom-ner-model/vocab/vectors.cfg +3 -0
- notebooks/data-exploration.ipynb +0 -0
- notebooks/ner-training.ipynb +531 -0
- notebooks/training-code.md +32 -0
.gitignore
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# poetry
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 102 |
+
#poetry.lock
|
| 103 |
+
|
| 104 |
+
# pdm
|
| 105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 106 |
+
#pdm.lock
|
| 107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 108 |
+
# in version control.
|
| 109 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 110 |
+
.pdm.toml
|
| 111 |
+
.pdm-python
|
| 112 |
+
.pdm-build/
|
| 113 |
+
|
| 114 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 115 |
+
__pypackages__/
|
| 116 |
+
|
| 117 |
+
# Celery stuff
|
| 118 |
+
celerybeat-schedule
|
| 119 |
+
celerybeat.pid
|
| 120 |
+
|
| 121 |
+
# SageMath parsed files
|
| 122 |
+
*.sage.py
|
| 123 |
+
|
| 124 |
+
# Environments
|
| 125 |
+
.env
|
| 126 |
+
.venv
|
| 127 |
+
env/
|
| 128 |
+
venv/
|
| 129 |
+
ENV/
|
| 130 |
+
env.bak/
|
| 131 |
+
venv.bak/
|
| 132 |
+
|
| 133 |
+
# Spyder project settings
|
| 134 |
+
.spyderproject
|
| 135 |
+
.spyproject
|
| 136 |
+
|
| 137 |
+
# Rope project settings
|
| 138 |
+
.ropeproject
|
| 139 |
+
|
| 140 |
+
# mkdocs documentation
|
| 141 |
+
/site
|
| 142 |
+
|
| 143 |
+
# mypy
|
| 144 |
+
.mypy_cache/
|
| 145 |
+
.dmypy.json
|
| 146 |
+
dmypy.json
|
| 147 |
+
|
| 148 |
+
# Pyre type checker
|
| 149 |
+
.pyre/
|
| 150 |
+
|
| 151 |
+
# pytype static type analyzer
|
| 152 |
+
.pytype/
|
| 153 |
+
|
| 154 |
+
# Cython debug symbols
|
| 155 |
+
cython_debug/
|
| 156 |
+
|
| 157 |
+
#Local Files
|
| 158 |
+
.DS_Store
|
| 159 |
+
notes/
|
| 160 |
+
data/
|
| 161 |
+
|
| 162 |
+
# PyCharm
|
| 163 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 164 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 165 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 166 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 167 |
+
#.idea/
|
functions/extract_needs.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_openai import ChatOpenAI
|
| 2 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 3 |
+
from typing import List, Dict, Any
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
def extract_needs(docs, max_concurrency: int = 10) -> List[List[str]]:
|
| 8 |
+
|
| 9 |
+
llm = ChatOpenAI(
|
| 10 |
+
model='gpt-4o-mini',
|
| 11 |
+
temperature=0.2
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
prompt = ChatPromptTemplate.from_messages(
|
| 15 |
+
[
|
| 16 |
+
("system",
|
| 17 |
+
"""
|
| 18 |
+
You are a data extraction assistant helping a grant review team evaluate funding applications from schools. Your task is to extract a structured list of specific items or services that the school is requesting funding for.
|
| 19 |
+
|
| 20 |
+
## Instructions:
|
| 21 |
+
|
| 22 |
+
- Users will submit excerpts from grant application letters.
|
| 23 |
+
- From each letter, extract only the tangible items or clearly defined services the school wants to use the grant for.
|
| 24 |
+
- Output the extracted items as a **clean, comma-separated list**, with no additional explanation or formatting.
|
| 25 |
+
- Do not include abstract goals or general program names (e.g., "Arts Award program" or "student development").
|
| 26 |
+
- Focus on concrete nouns that represent resources or services the grant would directly fund (e.g., "paint", "laptops", "counseling sessions", "sports equipment").
|
| 27 |
+
|
| 28 |
+
## Example:
|
| 29 |
+
|
| 30 |
+
**User Input:**
|
| 31 |
+
_I work in an alternative provision supporting disadvantaged children with SEND. Many of our students face significant challenges in communication and emotional expression.
|
| 32 |
+
Art is a powerful tool that allows them to process feelings, build confidence, and develop essential life skills.
|
| 33 |
+
With your support, we would purchase paints, canvases, clay, and sketchbooks to run the Arts Award program.
|
| 34 |
+
This would give our students a creative voice and a sense of achievement. Additionally, we could bring in an art therapist weekly, providing vital emotional support.
|
| 35 |
+
Your funding would transform lives, giving these children hope and opportunity._
|
| 36 |
+
|
| 37 |
+
**Your Output:**
|
| 38 |
+
paints, canvases, clay, sketchbooks, art therapist
|
| 39 |
+
"""
|
| 40 |
+
),
|
| 41 |
+
("human", "{input}")
|
| 42 |
+
]
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
chain = prompt | llm
|
| 46 |
+
|
| 47 |
+
responses = chain.batch(docs, config={"max_concurrency": max_concurrency})
|
| 48 |
+
|
| 49 |
+
result = []
|
| 50 |
+
|
| 51 |
+
for response in responses:
|
| 52 |
+
# Extract the content from AIMessage
|
| 53 |
+
content = response.content
|
| 54 |
+
|
| 55 |
+
items = [item.strip() for item in content.split(',')]
|
| 56 |
+
|
| 57 |
+
result.append(items)
|
| 58 |
+
|
| 59 |
+
return result
|
notebooks/Untitled.ipynb
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"id": "acad423d-8483-4c25-9a48-23ac442a5651",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"import pandas as pd\n",
|
| 11 |
+
"import numpy as np\n",
|
| 12 |
+
"import re"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"cell_type": "code",
|
| 17 |
+
"execution_count": 2,
|
| 18 |
+
"id": "ffcfa0af-8dad-48e8-ab77-6915c7eda80a",
|
| 19 |
+
"metadata": {},
|
| 20 |
+
"outputs": [],
|
| 21 |
+
"source": [
|
| 22 |
+
"df = pd.read_csv('data/april-data.csv')"
|
| 23 |
+
]
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"cell_type": "code",
|
| 27 |
+
"execution_count": 3,
|
| 28 |
+
"id": "f80e366d-9a29-4eef-b93f-69d9658658d7",
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"outputs": [
|
| 31 |
+
{
|
| 32 |
+
"data": {
|
| 33 |
+
"text/html": [
|
| 34 |
+
"<div>\n",
|
| 35 |
+
"<style scoped>\n",
|
| 36 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 37 |
+
" vertical-align: middle;\n",
|
| 38 |
+
" }\n",
|
| 39 |
+
"\n",
|
| 40 |
+
" .dataframe tbody tr th {\n",
|
| 41 |
+
" vertical-align: top;\n",
|
| 42 |
+
" }\n",
|
| 43 |
+
"\n",
|
| 44 |
+
" .dataframe thead th {\n",
|
| 45 |
+
" text-align: right;\n",
|
| 46 |
+
" }\n",
|
| 47 |
+
"</style>\n",
|
| 48 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 49 |
+
" <thead>\n",
|
| 50 |
+
" <tr style=\"text-align: right;\">\n",
|
| 51 |
+
" <th></th>\n",
|
| 52 |
+
" <th>Id</th>\n",
|
| 53 |
+
" <th>Date/Time Requested</th>\n",
|
| 54 |
+
" <th>Giveaway title</th>\n",
|
| 55 |
+
" <th>Customer Name</th>\n",
|
| 56 |
+
" <th>Email_address</th>\n",
|
| 57 |
+
" <th>School_name</th>\n",
|
| 58 |
+
" <th>Postal_address</th>\n",
|
| 59 |
+
" <th>Address_line_2</th>\n",
|
| 60 |
+
" <th>Address_city</th>\n",
|
| 61 |
+
" <th>Postcode</th>\n",
|
| 62 |
+
" <th>Additional_info</th>\n",
|
| 63 |
+
" </tr>\n",
|
| 64 |
+
" </thead>\n",
|
| 65 |
+
" <tbody>\n",
|
| 66 |
+
" <tr>\n",
|
| 67 |
+
" <th>0</th>\n",
|
| 68 |
+
" <td>312781</td>\n",
|
| 69 |
+
" <td>4/1/25 8:08</td>\n",
|
| 70 |
+
" <td>April Community Collection: Gardening Bundle G...</td>\n",
|
| 71 |
+
" <td>Kate Thompson</td>\n",
|
| 72 |
+
" <td>kate.thompson@fhhschools.com</td>\n",
|
| 73 |
+
" <td>Fountain Head House School</td>\n",
|
| 74 |
+
" <td>Church Road</td>\n",
|
| 75 |
+
" <td>NaN</td>\n",
|
| 76 |
+
" <td>Saltash</td>\n",
|
| 77 |
+
" <td>PL12 4AE</td>\n",
|
| 78 |
+
" <td>In our school we have an Outdoors Instructor w...</td>\n",
|
| 79 |
+
" </tr>\n",
|
| 80 |
+
" <tr>\n",
|
| 81 |
+
" <th>1</th>\n",
|
| 82 |
+
" <td>312783</td>\n",
|
| 83 |
+
" <td>4/1/25 8:32</td>\n",
|
| 84 |
+
" <td>April Community Collection: Gardening Bundle G...</td>\n",
|
| 85 |
+
" <td>Shona Wilson</td>\n",
|
| 86 |
+
" <td>shonaedment1994@gmail.com</td>\n",
|
| 87 |
+
" <td>S4YC @ Little Owls</td>\n",
|
| 88 |
+
" <td>Gadbrook Road</td>\n",
|
| 89 |
+
" <td>NaN</td>\n",
|
| 90 |
+
" <td>Northwich</td>\n",
|
| 91 |
+
" <td>CW9 7JL</td>\n",
|
| 92 |
+
" <td>We are currently working on our outdoor provis...</td>\n",
|
| 93 |
+
" </tr>\n",
|
| 94 |
+
" <tr>\n",
|
| 95 |
+
" <th>2</th>\n",
|
| 96 |
+
" <td>312790</td>\n",
|
| 97 |
+
" <td>4/1/25 9:22</td>\n",
|
| 98 |
+
" <td>April Community Collection: Gardening Bundle G...</td>\n",
|
| 99 |
+
" <td>Charlotte Coldham-Wood</td>\n",
|
| 100 |
+
" <td>daveandcharwood@gmail.com</td>\n",
|
| 101 |
+
" <td>Archbishop Benson School</td>\n",
|
| 102 |
+
" <td>Archbishop Benson School</td>\n",
|
| 103 |
+
" <td>Bodmin Road</td>\n",
|
| 104 |
+
" <td>Truro</td>\n",
|
| 105 |
+
" <td>TR1 1BN</td>\n",
|
| 106 |
+
" <td>I would use this bundle with our SEN children,...</td>\n",
|
| 107 |
+
" </tr>\n",
|
| 108 |
+
" <tr>\n",
|
| 109 |
+
" <th>3</th>\n",
|
| 110 |
+
" <td>312801</td>\n",
|
| 111 |
+
" <td>4/1/25 10:37</td>\n",
|
| 112 |
+
" <td>April Community Collection: Gardening Bundle G...</td>\n",
|
| 113 |
+
" <td>Erica Bowen</td>\n",
|
| 114 |
+
" <td>erica.bowen@thewillowseyc.org.uk</td>\n",
|
| 115 |
+
" <td>The Willows School</td>\n",
|
| 116 |
+
" <td>Fishermead Boulevard</td>\n",
|
| 117 |
+
" <td>Fishermead</td>\n",
|
| 118 |
+
" <td>Milton Keynes</td>\n",
|
| 119 |
+
" <td>mk6 2lp</td>\n",
|
| 120 |
+
" <td>I run outdoor learning at my school, we are in...</td>\n",
|
| 121 |
+
" </tr>\n",
|
| 122 |
+
" <tr>\n",
|
| 123 |
+
" <th>4</th>\n",
|
| 124 |
+
" <td>312802</td>\n",
|
| 125 |
+
" <td>4/1/25 10:42</td>\n",
|
| 126 |
+
" <td>April Community Collection: Gardening Bundle G...</td>\n",
|
| 127 |
+
" <td>Amy Smith</td>\n",
|
| 128 |
+
" <td>amysmith@ourladyofgracercprimaryschool.co.uk</td>\n",
|
| 129 |
+
" <td>Our Lady of Grace RC Primary School</td>\n",
|
| 130 |
+
" <td>Highfield road</td>\n",
|
| 131 |
+
" <td>Prestwich</td>\n",
|
| 132 |
+
" <td>Bury</td>\n",
|
| 133 |
+
" <td>m25 0as</td>\n",
|
| 134 |
+
" <td>Our school is passionate about hands-on learni...</td>\n",
|
| 135 |
+
" </tr>\n",
|
| 136 |
+
" </tbody>\n",
|
| 137 |
+
"</table>\n",
|
| 138 |
+
"</div>"
|
| 139 |
+
],
|
| 140 |
+
"text/plain": [
|
| 141 |
+
" Id Date/Time Requested \\\n",
|
| 142 |
+
"0 312781 4/1/25 8:08 \n",
|
| 143 |
+
"1 312783 4/1/25 8:32 \n",
|
| 144 |
+
"2 312790 4/1/25 9:22 \n",
|
| 145 |
+
"3 312801 4/1/25 10:37 \n",
|
| 146 |
+
"4 312802 4/1/25 10:42 \n",
|
| 147 |
+
"\n",
|
| 148 |
+
" Giveaway title Customer Name \\\n",
|
| 149 |
+
"0 April Community Collection: Gardening Bundle G... Kate Thompson \n",
|
| 150 |
+
"1 April Community Collection: Gardening Bundle G... Shona Wilson \n",
|
| 151 |
+
"2 April Community Collection: Gardening Bundle G... Charlotte Coldham-Wood \n",
|
| 152 |
+
"3 April Community Collection: Gardening Bundle G... Erica Bowen \n",
|
| 153 |
+
"4 April Community Collection: Gardening Bundle G... Amy Smith \n",
|
| 154 |
+
"\n",
|
| 155 |
+
" Email_address \\\n",
|
| 156 |
+
"0 kate.thompson@fhhschools.com \n",
|
| 157 |
+
"1 shonaedment1994@gmail.com \n",
|
| 158 |
+
"2 daveandcharwood@gmail.com \n",
|
| 159 |
+
"3 erica.bowen@thewillowseyc.org.uk \n",
|
| 160 |
+
"4 amysmith@ourladyofgracercprimaryschool.co.uk \n",
|
| 161 |
+
"\n",
|
| 162 |
+
" School_name Postal_address \\\n",
|
| 163 |
+
"0 Fountain Head House School Church Road \n",
|
| 164 |
+
"1 S4YC @ Little Owls Gadbrook Road \n",
|
| 165 |
+
"2 Archbishop Benson School Archbishop Benson School \n",
|
| 166 |
+
"3 The Willows School Fishermead Boulevard \n",
|
| 167 |
+
"4 Our Lady of Grace RC Primary School Highfield road \n",
|
| 168 |
+
"\n",
|
| 169 |
+
" Address_line_2 Address_city Postcode \\\n",
|
| 170 |
+
"0 NaN Saltash PL12 4AE \n",
|
| 171 |
+
"1 NaN Northwich CW9 7JL \n",
|
| 172 |
+
"2 Bodmin Road Truro TR1 1BN \n",
|
| 173 |
+
"3 Fishermead Milton Keynes mk6 2lp \n",
|
| 174 |
+
"4 Prestwich Bury m25 0as \n",
|
| 175 |
+
"\n",
|
| 176 |
+
" Additional_info \n",
|
| 177 |
+
"0 In our school we have an Outdoors Instructor w... \n",
|
| 178 |
+
"1 We are currently working on our outdoor provis... \n",
|
| 179 |
+
"2 I would use this bundle with our SEN children,... \n",
|
| 180 |
+
"3 I run outdoor learning at my school, we are in... \n",
|
| 181 |
+
"4 Our school is passionate about hands-on learni... "
|
| 182 |
+
]
|
| 183 |
+
},
|
| 184 |
+
"execution_count": 3,
|
| 185 |
+
"metadata": {},
|
| 186 |
+
"output_type": "execute_result"
|
| 187 |
+
}
|
| 188 |
+
],
|
| 189 |
+
"source": [
|
| 190 |
+
"df.head()"
|
| 191 |
+
]
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"cell_type": "code",
|
| 195 |
+
"execution_count": null,
|
| 196 |
+
"id": "93946708-5ff0-4a1f-9e86-c29c702945c8",
|
| 197 |
+
"metadata": {},
|
| 198 |
+
"outputs": [],
|
| 199 |
+
"source": [
|
| 200 |
+
"for_supervised_labeling = df[["
|
| 201 |
+
]
|
| 202 |
+
}
|
| 203 |
+
],
|
| 204 |
+
"metadata": {
|
| 205 |
+
"kernelspec": {
|
| 206 |
+
"display_name": "Python 3 (ipykernel)",
|
| 207 |
+
"language": "python",
|
| 208 |
+
"name": "python3"
|
| 209 |
+
},
|
| 210 |
+
"language_info": {
|
| 211 |
+
"codemirror_mode": {
|
| 212 |
+
"name": "ipython",
|
| 213 |
+
"version": 3
|
| 214 |
+
},
|
| 215 |
+
"file_extension": ".py",
|
| 216 |
+
"mimetype": "text/x-python",
|
| 217 |
+
"name": "python",
|
| 218 |
+
"nbconvert_exporter": "python",
|
| 219 |
+
"pygments_lexer": "ipython3",
|
| 220 |
+
"version": "3.12.9"
|
| 221 |
+
}
|
| 222 |
+
},
|
| 223 |
+
"nbformat": 4,
|
| 224 |
+
"nbformat_minor": 5
|
| 225 |
+
}
|
notebooks/custom-ner-model/attribute_ruler/patterns
ADDED
|
Binary file (14.7 kB). View file
|
|
|
notebooks/custom-ner-model/config.cfg
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[paths]
|
| 2 |
+
train = null
|
| 3 |
+
dev = null
|
| 4 |
+
vectors = null
|
| 5 |
+
init_tok2vec = null
|
| 6 |
+
|
| 7 |
+
[system]
|
| 8 |
+
gpu_allocator = null
|
| 9 |
+
seed = 0
|
| 10 |
+
|
| 11 |
+
[nlp]
|
| 12 |
+
lang = "en"
|
| 13 |
+
pipeline = ["tok2vec","tagger","parser","senter","attribute_ruler","lemmatizer","ner"]
|
| 14 |
+
disabled = ["senter"]
|
| 15 |
+
before_creation = null
|
| 16 |
+
after_creation = null
|
| 17 |
+
after_pipeline_creation = null
|
| 18 |
+
batch_size = 256
|
| 19 |
+
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
| 20 |
+
vectors = {"@vectors":"spacy.Vectors.v1"}
|
| 21 |
+
|
| 22 |
+
[components]
|
| 23 |
+
|
| 24 |
+
[components.attribute_ruler]
|
| 25 |
+
factory = "attribute_ruler"
|
| 26 |
+
scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
|
| 27 |
+
validate = false
|
| 28 |
+
|
| 29 |
+
[components.lemmatizer]
|
| 30 |
+
factory = "lemmatizer"
|
| 31 |
+
mode = "rule"
|
| 32 |
+
model = null
|
| 33 |
+
overwrite = false
|
| 34 |
+
scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
|
| 35 |
+
|
| 36 |
+
[components.ner]
|
| 37 |
+
factory = "ner"
|
| 38 |
+
incorrect_spans_key = null
|
| 39 |
+
moves = null
|
| 40 |
+
scorer = {"@scorers":"spacy.ner_scorer.v1"}
|
| 41 |
+
update_with_oracle_cut_size = 100
|
| 42 |
+
|
| 43 |
+
[components.ner.model]
|
| 44 |
+
@architectures = "spacy.TransitionBasedParser.v2"
|
| 45 |
+
state_type = "ner"
|
| 46 |
+
extra_state_tokens = false
|
| 47 |
+
hidden_width = 64
|
| 48 |
+
maxout_pieces = 2
|
| 49 |
+
use_upper = true
|
| 50 |
+
nO = null
|
| 51 |
+
|
| 52 |
+
[components.ner.model.tok2vec]
|
| 53 |
+
@architectures = "spacy.Tok2Vec.v2"
|
| 54 |
+
|
| 55 |
+
[components.ner.model.tok2vec.embed]
|
| 56 |
+
@architectures = "spacy.MultiHashEmbed.v2"
|
| 57 |
+
width = 96
|
| 58 |
+
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
|
| 59 |
+
rows = [5000,1000,2500,2500]
|
| 60 |
+
include_static_vectors = true
|
| 61 |
+
|
| 62 |
+
[components.ner.model.tok2vec.encode]
|
| 63 |
+
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
| 64 |
+
width = 96
|
| 65 |
+
depth = 4
|
| 66 |
+
window_size = 1
|
| 67 |
+
maxout_pieces = 3
|
| 68 |
+
|
| 69 |
+
[components.parser]
|
| 70 |
+
factory = "parser"
|
| 71 |
+
learn_tokens = false
|
| 72 |
+
min_action_freq = 30
|
| 73 |
+
moves = null
|
| 74 |
+
scorer = {"@scorers":"spacy.parser_scorer.v1"}
|
| 75 |
+
update_with_oracle_cut_size = 100
|
| 76 |
+
|
| 77 |
+
[components.parser.model]
|
| 78 |
+
@architectures = "spacy.TransitionBasedParser.v2"
|
| 79 |
+
state_type = "parser"
|
| 80 |
+
extra_state_tokens = false
|
| 81 |
+
hidden_width = 64
|
| 82 |
+
maxout_pieces = 2
|
| 83 |
+
use_upper = true
|
| 84 |
+
nO = null
|
| 85 |
+
|
| 86 |
+
[components.parser.model.tok2vec]
|
| 87 |
+
@architectures = "spacy.Tok2VecListener.v1"
|
| 88 |
+
width = ${components.tok2vec.model.encode:width}
|
| 89 |
+
upstream = "tok2vec"
|
| 90 |
+
|
| 91 |
+
[components.senter]
|
| 92 |
+
factory = "senter"
|
| 93 |
+
overwrite = false
|
| 94 |
+
scorer = {"@scorers":"spacy.senter_scorer.v1"}
|
| 95 |
+
|
| 96 |
+
[components.senter.model]
|
| 97 |
+
@architectures = "spacy.Tagger.v2"
|
| 98 |
+
nO = null
|
| 99 |
+
normalize = false
|
| 100 |
+
|
| 101 |
+
[components.senter.model.tok2vec]
|
| 102 |
+
@architectures = "spacy.Tok2Vec.v2"
|
| 103 |
+
|
| 104 |
+
[components.senter.model.tok2vec.embed]
|
| 105 |
+
@architectures = "spacy.MultiHashEmbed.v2"
|
| 106 |
+
width = 16
|
| 107 |
+
attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
|
| 108 |
+
rows = [1000,500,500,500,50]
|
| 109 |
+
include_static_vectors = true
|
| 110 |
+
|
| 111 |
+
[components.senter.model.tok2vec.encode]
|
| 112 |
+
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
| 113 |
+
width = 16
|
| 114 |
+
depth = 2
|
| 115 |
+
window_size = 1
|
| 116 |
+
maxout_pieces = 2
|
| 117 |
+
|
| 118 |
+
[components.tagger]
|
| 119 |
+
factory = "tagger"
|
| 120 |
+
label_smoothing = 0.0
|
| 121 |
+
neg_prefix = "!"
|
| 122 |
+
overwrite = false
|
| 123 |
+
scorer = {"@scorers":"spacy.tagger_scorer.v1"}
|
| 124 |
+
|
| 125 |
+
[components.tagger.model]
|
| 126 |
+
@architectures = "spacy.Tagger.v2"
|
| 127 |
+
nO = null
|
| 128 |
+
normalize = false
|
| 129 |
+
|
| 130 |
+
[components.tagger.model.tok2vec]
|
| 131 |
+
@architectures = "spacy.Tok2VecListener.v1"
|
| 132 |
+
width = ${components.tok2vec.model.encode:width}
|
| 133 |
+
upstream = "tok2vec"
|
| 134 |
+
|
| 135 |
+
[components.tok2vec]
|
| 136 |
+
factory = "tok2vec"
|
| 137 |
+
|
| 138 |
+
[components.tok2vec.model]
|
| 139 |
+
@architectures = "spacy.Tok2Vec.v2"
|
| 140 |
+
|
| 141 |
+
[components.tok2vec.model.embed]
|
| 142 |
+
@architectures = "spacy.MultiHashEmbed.v2"
|
| 143 |
+
width = ${components.tok2vec.model.encode:width}
|
| 144 |
+
attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY","IS_SPACE"]
|
| 145 |
+
rows = [5000,1000,2500,2500,50,50]
|
| 146 |
+
include_static_vectors = true
|
| 147 |
+
|
| 148 |
+
[components.tok2vec.model.encode]
|
| 149 |
+
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
| 150 |
+
width = 96
|
| 151 |
+
depth = 4
|
| 152 |
+
window_size = 1
|
| 153 |
+
maxout_pieces = 3
|
| 154 |
+
|
| 155 |
+
[corpora]
|
| 156 |
+
|
| 157 |
+
[corpora.dev]
|
| 158 |
+
@readers = "spacy.Corpus.v1"
|
| 159 |
+
path = ${paths.dev}
|
| 160 |
+
gold_preproc = false
|
| 161 |
+
max_length = 0
|
| 162 |
+
limit = 0
|
| 163 |
+
augmenter = null
|
| 164 |
+
|
| 165 |
+
[corpora.train]
|
| 166 |
+
@readers = "spacy.Corpus.v1"
|
| 167 |
+
path = ${paths.train}
|
| 168 |
+
gold_preproc = false
|
| 169 |
+
max_length = 0
|
| 170 |
+
limit = 0
|
| 171 |
+
augmenter = null
|
| 172 |
+
|
| 173 |
+
[training]
|
| 174 |
+
train_corpus = "corpora.train"
|
| 175 |
+
dev_corpus = "corpora.dev"
|
| 176 |
+
seed = ${system:seed}
|
| 177 |
+
gpu_allocator = ${system:gpu_allocator}
|
| 178 |
+
dropout = 0.1
|
| 179 |
+
accumulate_gradient = 1
|
| 180 |
+
patience = 5000
|
| 181 |
+
max_epochs = 0
|
| 182 |
+
max_steps = 100000
|
| 183 |
+
eval_frequency = 1000
|
| 184 |
+
frozen_components = []
|
| 185 |
+
before_to_disk = null
|
| 186 |
+
annotating_components = []
|
| 187 |
+
before_update = null
|
| 188 |
+
|
| 189 |
+
[training.batcher]
|
| 190 |
+
@batchers = "spacy.batch_by_words.v1"
|
| 191 |
+
discard_oversize = false
|
| 192 |
+
tolerance = 0.2
|
| 193 |
+
get_length = null
|
| 194 |
+
|
| 195 |
+
[training.batcher.size]
|
| 196 |
+
@schedules = "compounding.v1"
|
| 197 |
+
start = 100
|
| 198 |
+
stop = 1000
|
| 199 |
+
compound = 1.001
|
| 200 |
+
t = 0.0
|
| 201 |
+
|
| 202 |
+
[training.logger]
|
| 203 |
+
@loggers = "spacy.ConsoleLogger.v1"
|
| 204 |
+
progress_bar = false
|
| 205 |
+
|
| 206 |
+
[training.optimizer]
|
| 207 |
+
@optimizers = "Adam.v1"
|
| 208 |
+
beta1 = 0.9
|
| 209 |
+
beta2 = 0.999
|
| 210 |
+
L2_is_weight_decay = true
|
| 211 |
+
L2 = 0.01
|
| 212 |
+
grad_clip = 1.0
|
| 213 |
+
use_averages = true
|
| 214 |
+
eps = 0.00000001
|
| 215 |
+
learn_rate = 0.001
|
| 216 |
+
|
| 217 |
+
[training.score_weights]
|
| 218 |
+
tag_acc = 0.16
|
| 219 |
+
dep_uas = 0.0
|
| 220 |
+
dep_las = 0.16
|
| 221 |
+
dep_las_per_type = null
|
| 222 |
+
sents_p = null
|
| 223 |
+
sents_r = null
|
| 224 |
+
sents_f = 0.02
|
| 225 |
+
lemma_acc = 0.5
|
| 226 |
+
ents_f = 0.16
|
| 227 |
+
ents_p = 0.0
|
| 228 |
+
ents_r = 0.0
|
| 229 |
+
ents_per_type = null
|
| 230 |
+
speed = 0.0
|
| 231 |
+
|
| 232 |
+
[pretraining]
|
| 233 |
+
|
| 234 |
+
[initialize]
|
| 235 |
+
vocab_data = null
|
| 236 |
+
vectors = ${paths.vectors}
|
| 237 |
+
init_tok2vec = ${paths.init_tok2vec}
|
| 238 |
+
before_init = null
|
| 239 |
+
after_init = null
|
| 240 |
+
|
| 241 |
+
[initialize.components]
|
| 242 |
+
|
| 243 |
+
[initialize.components.ner]
|
| 244 |
+
|
| 245 |
+
[initialize.components.ner.labels]
|
| 246 |
+
@readers = "spacy.read_labels.v1"
|
| 247 |
+
path = "corpus/labels/ner.json"
|
| 248 |
+
require = false
|
| 249 |
+
|
| 250 |
+
[initialize.components.parser]
|
| 251 |
+
|
| 252 |
+
[initialize.components.parser.labels]
|
| 253 |
+
@readers = "spacy.read_labels.v1"
|
| 254 |
+
path = "corpus/labels/parser.json"
|
| 255 |
+
require = false
|
| 256 |
+
|
| 257 |
+
[initialize.components.tagger]
|
| 258 |
+
|
| 259 |
+
[initialize.components.tagger.labels]
|
| 260 |
+
@readers = "spacy.read_labels.v1"
|
| 261 |
+
path = "corpus/labels/tagger.json"
|
| 262 |
+
require = false
|
| 263 |
+
|
| 264 |
+
[initialize.lookups]
|
| 265 |
+
@misc = "spacy.LookupsDataLoader.v1"
|
| 266 |
+
lang = ${nlp.lang}
|
| 267 |
+
tables = ["lexeme_norm"]
|
| 268 |
+
|
| 269 |
+
[initialize.tokenizer]
|
notebooks/custom-ner-model/lemmatizer/lookups/lookups.bin
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
notebooks/custom-ner-model/meta.json
ADDED
|
@@ -0,0 +1,531 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"lang":"en",
|
| 3 |
+
"name":"core_web_md",
|
| 4 |
+
"version":"3.8.0",
|
| 5 |
+
"description":"English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.",
|
| 6 |
+
"author":"Explosion",
|
| 7 |
+
"email":"contact@explosion.ai",
|
| 8 |
+
"url":"https://explosion.ai",
|
| 9 |
+
"license":"MIT",
|
| 10 |
+
"spacy_version":">=3.8.0,<3.9.0",
|
| 11 |
+
"spacy_git_version":"5010fcbd3",
|
| 12 |
+
"vectors":{
|
| 13 |
+
"width":300,
|
| 14 |
+
"vectors":20000,
|
| 15 |
+
"keys":684830,
|
| 16 |
+
"name":"en_vectors",
|
| 17 |
+
"mode":"default"
|
| 18 |
+
},
|
| 19 |
+
"labels":{
|
| 20 |
+
"tok2vec":[
|
| 21 |
+
|
| 22 |
+
],
|
| 23 |
+
"tagger":[
|
| 24 |
+
"$",
|
| 25 |
+
"''",
|
| 26 |
+
",",
|
| 27 |
+
"-LRB-",
|
| 28 |
+
"-RRB-",
|
| 29 |
+
".",
|
| 30 |
+
":",
|
| 31 |
+
"ADD",
|
| 32 |
+
"AFX",
|
| 33 |
+
"CC",
|
| 34 |
+
"CD",
|
| 35 |
+
"DT",
|
| 36 |
+
"EX",
|
| 37 |
+
"FW",
|
| 38 |
+
"HYPH",
|
| 39 |
+
"IN",
|
| 40 |
+
"JJ",
|
| 41 |
+
"JJR",
|
| 42 |
+
"JJS",
|
| 43 |
+
"LS",
|
| 44 |
+
"MD",
|
| 45 |
+
"NFP",
|
| 46 |
+
"NN",
|
| 47 |
+
"NNP",
|
| 48 |
+
"NNPS",
|
| 49 |
+
"NNS",
|
| 50 |
+
"PDT",
|
| 51 |
+
"POS",
|
| 52 |
+
"PRP",
|
| 53 |
+
"PRP$",
|
| 54 |
+
"RB",
|
| 55 |
+
"RBR",
|
| 56 |
+
"RBS",
|
| 57 |
+
"RP",
|
| 58 |
+
"SYM",
|
| 59 |
+
"TO",
|
| 60 |
+
"UH",
|
| 61 |
+
"VB",
|
| 62 |
+
"VBD",
|
| 63 |
+
"VBG",
|
| 64 |
+
"VBN",
|
| 65 |
+
"VBP",
|
| 66 |
+
"VBZ",
|
| 67 |
+
"WDT",
|
| 68 |
+
"WP",
|
| 69 |
+
"WP$",
|
| 70 |
+
"WRB",
|
| 71 |
+
"XX",
|
| 72 |
+
"_SP",
|
| 73 |
+
"``"
|
| 74 |
+
],
|
| 75 |
+
"parser":[
|
| 76 |
+
"ROOT",
|
| 77 |
+
"acl",
|
| 78 |
+
"acomp",
|
| 79 |
+
"advcl",
|
| 80 |
+
"advmod",
|
| 81 |
+
"agent",
|
| 82 |
+
"amod",
|
| 83 |
+
"appos",
|
| 84 |
+
"attr",
|
| 85 |
+
"aux",
|
| 86 |
+
"auxpass",
|
| 87 |
+
"case",
|
| 88 |
+
"cc",
|
| 89 |
+
"ccomp",
|
| 90 |
+
"compound",
|
| 91 |
+
"conj",
|
| 92 |
+
"csubj",
|
| 93 |
+
"csubjpass",
|
| 94 |
+
"dative",
|
| 95 |
+
"dep",
|
| 96 |
+
"det",
|
| 97 |
+
"dobj",
|
| 98 |
+
"expl",
|
| 99 |
+
"intj",
|
| 100 |
+
"mark",
|
| 101 |
+
"meta",
|
| 102 |
+
"neg",
|
| 103 |
+
"nmod",
|
| 104 |
+
"npadvmod",
|
| 105 |
+
"nsubj",
|
| 106 |
+
"nsubjpass",
|
| 107 |
+
"nummod",
|
| 108 |
+
"oprd",
|
| 109 |
+
"parataxis",
|
| 110 |
+
"pcomp",
|
| 111 |
+
"pobj",
|
| 112 |
+
"poss",
|
| 113 |
+
"preconj",
|
| 114 |
+
"predet",
|
| 115 |
+
"prep",
|
| 116 |
+
"prt",
|
| 117 |
+
"punct",
|
| 118 |
+
"quantmod",
|
| 119 |
+
"relcl",
|
| 120 |
+
"xcomp"
|
| 121 |
+
],
|
| 122 |
+
"attribute_ruler":[
|
| 123 |
+
|
| 124 |
+
],
|
| 125 |
+
"lemmatizer":[
|
| 126 |
+
|
| 127 |
+
],
|
| 128 |
+
"ner":[
|
| 129 |
+
"BENEFIT",
|
| 130 |
+
"CARDINAL",
|
| 131 |
+
"CONTEXT",
|
| 132 |
+
"DATE",
|
| 133 |
+
"EVENT",
|
| 134 |
+
"FAC",
|
| 135 |
+
"GPE",
|
| 136 |
+
"LANGUAGE",
|
| 137 |
+
"LAW",
|
| 138 |
+
"LOC",
|
| 139 |
+
"MONEY",
|
| 140 |
+
"NORP",
|
| 141 |
+
"ORDINAL",
|
| 142 |
+
"ORG",
|
| 143 |
+
"PERCENT",
|
| 144 |
+
"PERSON",
|
| 145 |
+
"PRODUCT",
|
| 146 |
+
"QUANTITY",
|
| 147 |
+
"TIME",
|
| 148 |
+
"USAGE",
|
| 149 |
+
"WORK_OF_ART"
|
| 150 |
+
]
|
| 151 |
+
},
|
| 152 |
+
"pipeline":[
|
| 153 |
+
"tok2vec",
|
| 154 |
+
"tagger",
|
| 155 |
+
"parser",
|
| 156 |
+
"attribute_ruler",
|
| 157 |
+
"lemmatizer",
|
| 158 |
+
"ner"
|
| 159 |
+
],
|
| 160 |
+
"components":[
|
| 161 |
+
"tok2vec",
|
| 162 |
+
"tagger",
|
| 163 |
+
"parser",
|
| 164 |
+
"senter",
|
| 165 |
+
"attribute_ruler",
|
| 166 |
+
"lemmatizer",
|
| 167 |
+
"ner"
|
| 168 |
+
],
|
| 169 |
+
"disabled":[
|
| 170 |
+
"senter"
|
| 171 |
+
],
|
| 172 |
+
"performance":{
|
| 173 |
+
"token_acc":0.9986194413,
|
| 174 |
+
"token_p":0.9956819193,
|
| 175 |
+
"token_r":0.9957659295,
|
| 176 |
+
"token_f":0.9957239226,
|
| 177 |
+
"tag_acc":0.9730728707,
|
| 178 |
+
"sents_p":0.9214095835,
|
| 179 |
+
"sents_r":0.8917474767,
|
| 180 |
+
"sents_f":0.9063359035,
|
| 181 |
+
"dep_uas":0.9184113666,
|
| 182 |
+
"dep_las":0.8997862569,
|
| 183 |
+
"dep_las_per_type":{
|
| 184 |
+
"prep":{
|
| 185 |
+
"p":0.856192445,
|
| 186 |
+
"r":0.8643141719,
|
| 187 |
+
"f":0.860234139
|
| 188 |
+
},
|
| 189 |
+
"det":{
|
| 190 |
+
"p":0.9772449727,
|
| 191 |
+
"r":0.9788387833,
|
| 192 |
+
"f":0.9780412287
|
| 193 |
+
},
|
| 194 |
+
"pobj":{
|
| 195 |
+
"p":0.9627618006,
|
| 196 |
+
"r":0.9675633222,
|
| 197 |
+
"f":0.9651565897
|
| 198 |
+
},
|
| 199 |
+
"nsubj":{
|
| 200 |
+
"p":0.9574741128,
|
| 201 |
+
"r":0.9479518072,
|
| 202 |
+
"f":0.9526891663
|
| 203 |
+
},
|
| 204 |
+
"aux":{
|
| 205 |
+
"p":0.9798437223,
|
| 206 |
+
"r":0.9823733642,
|
| 207 |
+
"f":0.9811069126
|
| 208 |
+
},
|
| 209 |
+
"advmod":{
|
| 210 |
+
"p":0.8539325843,
|
| 211 |
+
"r":0.8504122497,
|
| 212 |
+
"f":0.8521687814
|
| 213 |
+
},
|
| 214 |
+
"relcl":{
|
| 215 |
+
"p":0.7634485216,
|
| 216 |
+
"r":0.7775761974,
|
| 217 |
+
"f":0.7704476002
|
| 218 |
+
},
|
| 219 |
+
"root":{
|
| 220 |
+
"p":0.9189557631,
|
| 221 |
+
"r":0.8893726499,
|
| 222 |
+
"f":0.9039222259
|
| 223 |
+
},
|
| 224 |
+
"xcomp":{
|
| 225 |
+
"p":0.8849964614,
|
| 226 |
+
"r":0.8977027997,
|
| 227 |
+
"f":0.8913043478
|
| 228 |
+
},
|
| 229 |
+
"amod":{
|
| 230 |
+
"p":0.9162757363,
|
| 231 |
+
"r":0.9111111111,
|
| 232 |
+
"f":0.9136861255
|
| 233 |
+
},
|
| 234 |
+
"compound":{
|
| 235 |
+
"p":0.9158361625,
|
| 236 |
+
"r":0.9314435286,
|
| 237 |
+
"f":0.9235739135
|
| 238 |
+
},
|
| 239 |
+
"poss":{
|
| 240 |
+
"p":0.9729133226,
|
| 241 |
+
"r":0.9760466989,
|
| 242 |
+
"f":0.974477492
|
| 243 |
+
},
|
| 244 |
+
"ccomp":{
|
| 245 |
+
"p":0.7641807699,
|
| 246 |
+
"r":0.8368635438,
|
| 247 |
+
"f":0.7988723632
|
| 248 |
+
},
|
| 249 |
+
"attr":{
|
| 250 |
+
"p":0.9025182778,
|
| 251 |
+
"r":0.9343986543,
|
| 252 |
+
"f":0.9181818182
|
| 253 |
+
},
|
| 254 |
+
"case":{
|
| 255 |
+
"p":0.9763196843,
|
| 256 |
+
"r":0.9904904905,
|
| 257 |
+
"f":0.9833540373
|
| 258 |
+
},
|
| 259 |
+
"mark":{
|
| 260 |
+
"p":0.9025157233,
|
| 261 |
+
"r":0.9125596184,
|
| 262 |
+
"f":0.9075098814
|
| 263 |
+
},
|
| 264 |
+
"intj":{
|
| 265 |
+
"p":0.6685303514,
|
| 266 |
+
"r":0.6131868132,
|
| 267 |
+
"f":0.6396637371
|
| 268 |
+
},
|
| 269 |
+
"advcl":{
|
| 270 |
+
"p":0.6691251272,
|
| 271 |
+
"r":0.662553513,
|
| 272 |
+
"f":0.6658231051
|
| 273 |
+
},
|
| 274 |
+
"cc":{
|
| 275 |
+
"p":0.8341950222,
|
| 276 |
+
"r":0.8298050472,
|
| 277 |
+
"f":0.8319942439
|
| 278 |
+
},
|
| 279 |
+
"neg":{
|
| 280 |
+
"p":0.9505494505,
|
| 281 |
+
"r":0.9548419468,
|
| 282 |
+
"f":0.9526908636
|
| 283 |
+
},
|
| 284 |
+
"conj":{
|
| 285 |
+
"p":0.7675373134,
|
| 286 |
+
"r":0.7768126888,
|
| 287 |
+
"f":0.7721471471
|
| 288 |
+
},
|
| 289 |
+
"nsubjpass":{
|
| 290 |
+
"p":0.9198570699,
|
| 291 |
+
"r":0.9241025641,
|
| 292 |
+
"f":0.9219749296
|
| 293 |
+
},
|
| 294 |
+
"auxpass":{
|
| 295 |
+
"p":0.9512522361,
|
| 296 |
+
"r":0.9690205011,
|
| 297 |
+
"f":0.9600541638
|
| 298 |
+
},
|
| 299 |
+
"dobj":{
|
| 300 |
+
"p":0.9230109204,
|
| 301 |
+
"r":0.9429436608,
|
| 302 |
+
"f":0.9328708266
|
| 303 |
+
},
|
| 304 |
+
"nummod":{
|
| 305 |
+
"p":0.9362458725,
|
| 306 |
+
"r":0.9308080808,
|
| 307 |
+
"f":0.9335190579
|
| 308 |
+
},
|
| 309 |
+
"npadvmod":{
|
| 310 |
+
"p":0.7773527161,
|
| 311 |
+
"r":0.7218472469,
|
| 312 |
+
"f":0.7485724811
|
| 313 |
+
},
|
| 314 |
+
"prt":{
|
| 315 |
+
"p":0.8078817734,
|
| 316 |
+
"r":0.8817204301,
|
| 317 |
+
"f":0.8431876607
|
| 318 |
+
},
|
| 319 |
+
"pcomp":{
|
| 320 |
+
"p":0.872133426,
|
| 321 |
+
"r":0.8788515406,
|
| 322 |
+
"f":0.8754795954
|
| 323 |
+
},
|
| 324 |
+
"expl":{
|
| 325 |
+
"p":0.9829787234,
|
| 326 |
+
"r":0.9892933619,
|
| 327 |
+
"f":0.9861259338
|
| 328 |
+
},
|
| 329 |
+
"acl":{
|
| 330 |
+
"p":0.7368115942,
|
| 331 |
+
"r":0.6933987998,
|
| 332 |
+
"f":0.7144463182
|
| 333 |
+
},
|
| 334 |
+
"agent":{
|
| 335 |
+
"p":0.9036144578,
|
| 336 |
+
"r":0.9408602151,
|
| 337 |
+
"f":0.9218612818
|
| 338 |
+
},
|
| 339 |
+
"dative":{
|
| 340 |
+
"p":0.7525,
|
| 341 |
+
"r":0.6903669725,
|
| 342 |
+
"f":0.7200956938
|
| 343 |
+
},
|
| 344 |
+
"acomp":{
|
| 345 |
+
"p":0.9061784897,
|
| 346 |
+
"r":0.8979591837,
|
| 347 |
+
"f":0.9020501139
|
| 348 |
+
},
|
| 349 |
+
"dep":{
|
| 350 |
+
"p":0.3660377358,
|
| 351 |
+
"r":0.1574675325,
|
| 352 |
+
"f":0.2202043133
|
| 353 |
+
},
|
| 354 |
+
"csubj":{
|
| 355 |
+
"p":0.6830601093,
|
| 356 |
+
"r":0.7396449704,
|
| 357 |
+
"f":0.7102272727
|
| 358 |
+
},
|
| 359 |
+
"quantmod":{
|
| 360 |
+
"p":0.8675022381,
|
| 361 |
+
"r":0.7871649066,
|
| 362 |
+
"f":0.8253833049
|
| 363 |
+
},
|
| 364 |
+
"nmod":{
|
| 365 |
+
"p":0.7535211268,
|
| 366 |
+
"r":0.5868372943,
|
| 367 |
+
"f":0.6598150051
|
| 368 |
+
},
|
| 369 |
+
"appos":{
|
| 370 |
+
"p":0.7052341598,
|
| 371 |
+
"r":0.6663774403,
|
| 372 |
+
"f":0.6852554093
|
| 373 |
+
},
|
| 374 |
+
"predet":{
|
| 375 |
+
"p":0.8247011952,
|
| 376 |
+
"r":0.8884120172,
|
| 377 |
+
"f":0.8553719008
|
| 378 |
+
},
|
| 379 |
+
"preconj":{
|
| 380 |
+
"p":0.5652173913,
|
| 381 |
+
"r":0.6046511628,
|
| 382 |
+
"f":0.5842696629
|
| 383 |
+
},
|
| 384 |
+
"oprd":{
|
| 385 |
+
"p":0.8209459459,
|
| 386 |
+
"r":0.7253731343,
|
| 387 |
+
"f":0.7702060222
|
| 388 |
+
},
|
| 389 |
+
"parataxis":{
|
| 390 |
+
"p":0.5940860215,
|
| 391 |
+
"r":0.4793926247,
|
| 392 |
+
"f":0.5306122449
|
| 393 |
+
},
|
| 394 |
+
"meta":{
|
| 395 |
+
"p":0.7096774194,
|
| 396 |
+
"r":0.4230769231,
|
| 397 |
+
"f":0.5301204819
|
| 398 |
+
},
|
| 399 |
+
"csubjpass":{
|
| 400 |
+
"p":0.5,
|
| 401 |
+
"r":0.8333333333,
|
| 402 |
+
"f":0.625
|
| 403 |
+
}
|
| 404 |
+
},
|
| 405 |
+
"ents_p":0.8439829077,
|
| 406 |
+
"ents_r":0.8506610577,
|
| 407 |
+
"ents_f":0.8473088243,
|
| 408 |
+
"ents_per_type":{
|
| 409 |
+
"DATE":{
|
| 410 |
+
"p":0.8564771668,
|
| 411 |
+
"r":0.8752380952,
|
| 412 |
+
"f":0.8657560057
|
| 413 |
+
},
|
| 414 |
+
"GPE":{
|
| 415 |
+
"p":0.913153457,
|
| 416 |
+
"r":0.9062761506,
|
| 417 |
+
"f":0.909701806
|
| 418 |
+
},
|
| 419 |
+
"ORDINAL":{
|
| 420 |
+
"p":0.7737430168,
|
| 421 |
+
"r":0.8602484472,
|
| 422 |
+
"f":0.8147058824
|
| 423 |
+
},
|
| 424 |
+
"ORG":{
|
| 425 |
+
"p":0.801532567,
|
| 426 |
+
"r":0.8319194062,
|
| 427 |
+
"f":0.8164433459
|
| 428 |
+
},
|
| 429 |
+
"CARDINAL":{
|
| 430 |
+
"p":0.8072289157,
|
| 431 |
+
"r":0.8763376932,
|
| 432 |
+
"f":0.8403648803
|
| 433 |
+
},
|
| 434 |
+
"PERSON":{
|
| 435 |
+
"p":0.8630514706,
|
| 436 |
+
"r":0.919386423,
|
| 437 |
+
"f":0.8903286979
|
| 438 |
+
},
|
| 439 |
+
"NORP":{
|
| 440 |
+
"p":0.8883684621,
|
| 441 |
+
"r":0.9104,
|
| 442 |
+
"f":0.8992493086
|
| 443 |
+
},
|
| 444 |
+
"FAC":{
|
| 445 |
+
"p":0.35,
|
| 446 |
+
"r":0.3230769231,
|
| 447 |
+
"f":0.336
|
| 448 |
+
},
|
| 449 |
+
"LOC":{
|
| 450 |
+
"p":0.7026022305,
|
| 451 |
+
"r":0.601910828,
|
| 452 |
+
"f":0.6483704974
|
| 453 |
+
},
|
| 454 |
+
"TIME":{
|
| 455 |
+
"p":0.7201166181,
|
| 456 |
+
"r":0.7222222222,
|
| 457 |
+
"f":0.7211678832
|
| 458 |
+
},
|
| 459 |
+
"WORK_OF_ART":{
|
| 460 |
+
"p":0.5,
|
| 461 |
+
"r":0.3608247423,
|
| 462 |
+
"f":0.4191616766
|
| 463 |
+
},
|
| 464 |
+
"QUANTITY":{
|
| 465 |
+
"p":0.8057553957,
|
| 466 |
+
"r":0.6153846154,
|
| 467 |
+
"f":0.6978193146
|
| 468 |
+
},
|
| 469 |
+
"EVENT":{
|
| 470 |
+
"p":0.5666666667,
|
| 471 |
+
"r":0.2931034483,
|
| 472 |
+
"f":0.3863636364
|
| 473 |
+
},
|
| 474 |
+
"LAW":{
|
| 475 |
+
"p":0.5909090909,
|
| 476 |
+
"r":0.40625,
|
| 477 |
+
"f":0.4814814815
|
| 478 |
+
},
|
| 479 |
+
"MONEY":{
|
| 480 |
+
"p":0.9098557692,
|
| 481 |
+
"r":0.893742621,
|
| 482 |
+
"f":0.9017272186
|
| 483 |
+
},
|
| 484 |
+
"PERCENT":{
|
| 485 |
+
"p":0.9197431782,
|
| 486 |
+
"r":0.8774885145,
|
| 487 |
+
"f":0.8981191223
|
| 488 |
+
},
|
| 489 |
+
"LANGUAGE":{
|
| 490 |
+
"p":0.7692307692,
|
| 491 |
+
"r":0.625,
|
| 492 |
+
"f":0.6896551724
|
| 493 |
+
},
|
| 494 |
+
"PRODUCT":{
|
| 495 |
+
"p":0.6202531646,
|
| 496 |
+
"r":0.2322274882,
|
| 497 |
+
"f":0.3379310345
|
| 498 |
+
}
|
| 499 |
+
},
|
| 500 |
+
"speed":8701.0458781553
|
| 501 |
+
},
|
| 502 |
+
"sources":[
|
| 503 |
+
{
|
| 504 |
+
"name":"OntoNotes 5",
|
| 505 |
+
"url":"https://catalog.ldc.upenn.edu/LDC2013T19",
|
| 506 |
+
"license":"commercial (licensed by Explosion)",
|
| 507 |
+
"author":"Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston"
|
| 508 |
+
},
|
| 509 |
+
{
|
| 510 |
+
"name":"ClearNLP Constituent-to-Dependency Conversion",
|
| 511 |
+
"url":"https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md",
|
| 512 |
+
"license":"Citation provided for reference, no code packaged with model",
|
| 513 |
+
"author":"Emory University"
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"name":"WordNet 3.0",
|
| 517 |
+
"url":"https://wordnet.princeton.edu/",
|
| 518 |
+
"author":"Princeton University",
|
| 519 |
+
"license":"WordNet 3.0 License"
|
| 520 |
+
},
|
| 521 |
+
{
|
| 522 |
+
"name":"Explosion Vectors (OSCAR 2109 + Wikipedia + OpenSubtitles + WMT News Crawl)",
|
| 523 |
+
"url":"https://github.com/explosion/spacy-vectors-builder",
|
| 524 |
+
"license":"CC0",
|
| 525 |
+
"author":"Explosion"
|
| 526 |
+
}
|
| 527 |
+
],
|
| 528 |
+
"requirements":[
|
| 529 |
+
|
| 530 |
+
]
|
| 531 |
+
}
|
notebooks/custom-ner-model/ner/cfg
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"moves":null,
|
| 3 |
+
"update_with_oracle_cut_size":100,
|
| 4 |
+
"multitasks":[
|
| 5 |
+
|
| 6 |
+
],
|
| 7 |
+
"min_action_freq":1,
|
| 8 |
+
"learn_tokens":false,
|
| 9 |
+
"beam_width":1,
|
| 10 |
+
"beam_density":0.0,
|
| 11 |
+
"beam_update_prob":0.0,
|
| 12 |
+
"incorrect_spans_key":null
|
| 13 |
+
}
|
notebooks/custom-ner-model/ner/moves
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
��moves��{"0":{},"1":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"CONTEXT":-1,"USAGE":-2,"BENEFIT":-3},"2":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"CONTEXT":-1,"USAGE":-2,"BENEFIT":-3},"3":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"CONTEXT":-1,"USAGE":-2,"BENEFIT":-3},"4":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"":1,"CONTEXT":-1,"USAGE":-2,"BENEFIT":-3},"5":{"":1}}�cfg��neg_key�
|
notebooks/custom-ner-model/parser/cfg
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"moves":null,
|
| 3 |
+
"update_with_oracle_cut_size":100,
|
| 4 |
+
"multitasks":[
|
| 5 |
+
|
| 6 |
+
],
|
| 7 |
+
"min_action_freq":30,
|
| 8 |
+
"learn_tokens":false,
|
| 9 |
+
"beam_width":1,
|
| 10 |
+
"beam_density":0.0,
|
| 11 |
+
"beam_update_prob":0.0,
|
| 12 |
+
"incorrect_spans_key":null
|
| 13 |
+
}
|
notebooks/custom-ner-model/parser/moves
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
��moves�{"0":{"":994332},"1":{"":999432},"2":{"det":172595,"nsubj":165748,"compound":116623,"amod":105184,"aux":86667,"punct":65478,"advmod":62763,"poss":36443,"mark":27941,"nummod":22598,"auxpass":15594,"prep":14001,"nsubjpass":13856,"neg":12357,"cc":10739,"nmod":9562,"advcl":9062,"npadvmod":8168,"quantmod":7101,"intj":6464,"ccomp":5896,"dobj":3427,"expl":3360,"dep":2871,"predet":1944,"parataxis":1837,"csubj":1428,"preconj":621,"pobj||prep":616,"attr":578,"meta":376,"advmod||conj":368,"dobj||xcomp":352,"acomp":284,"nsubj||ccomp":224,"dative":206,"advmod||xcomp":149,"dobj||ccomp":70,"csubjpass":64,"dobj||conj":62,"prep||conj":51,"acl":48,"prep||nsubj":41,"prep||dobj":36,"xcomp":34,"advmod||ccomp":32,"oprd":31},"3":{"punct":183790,"pobj":182191,"prep":174008,"dobj":89615,"conj":59687,"cc":51930,"ccomp":30385,"advmod":22861,"xcomp":21021,"relcl":20969,"advcl":19828,"attr":17741,"acomp":16922,"appos":15265,"case":13388,"acl":12085,"pcomp":10324,"dep":10116,"npadvmod":9796,"prt":8179,"agent":3903,"dative":3866,"nsubj":3470,"neg":2906,"amod":2839,"intj":2819,"nummod":2732,"oprd":2301,"parataxis":1261,"quantmod":319,"nmod":294,"acl||dobj":200,"prep||dobj":190,"prep||nsubj":162,"acl||nsubj":159,"appos||nsubj":145,"relcl||dobj":134,"relcl||nsubj":111,"aux":103,"expl":96,"meta":92,"appos||dobj":86,"preconj":71,"csubj":65,"prep||nsubjpass":55,"prep||advmod":54,"prep||acomp":53,"det":51,"nsubjpass":45,"relcl||pobj":42,"acl||nsubjpass":42,"mark":40,"auxpass":39,"prep||pobj":36,"relcl||nsubjpass":32,"appos||nsubjpass":31},"4":{"ROOT":111664}}�cfg��neg_key�
|
notebooks/custom-ner-model/senter/cfg
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"overwrite":false
|
| 3 |
+
}
|
notebooks/custom-ner-model/tagger/cfg
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"label_smoothing":0.0,
|
| 3 |
+
"labels":[
|
| 4 |
+
"$",
|
| 5 |
+
"''",
|
| 6 |
+
",",
|
| 7 |
+
"-LRB-",
|
| 8 |
+
"-RRB-",
|
| 9 |
+
".",
|
| 10 |
+
":",
|
| 11 |
+
"ADD",
|
| 12 |
+
"AFX",
|
| 13 |
+
"CC",
|
| 14 |
+
"CD",
|
| 15 |
+
"DT",
|
| 16 |
+
"EX",
|
| 17 |
+
"FW",
|
| 18 |
+
"HYPH",
|
| 19 |
+
"IN",
|
| 20 |
+
"JJ",
|
| 21 |
+
"JJR",
|
| 22 |
+
"JJS",
|
| 23 |
+
"LS",
|
| 24 |
+
"MD",
|
| 25 |
+
"NFP",
|
| 26 |
+
"NN",
|
| 27 |
+
"NNP",
|
| 28 |
+
"NNPS",
|
| 29 |
+
"NNS",
|
| 30 |
+
"PDT",
|
| 31 |
+
"POS",
|
| 32 |
+
"PRP",
|
| 33 |
+
"PRP$",
|
| 34 |
+
"RB",
|
| 35 |
+
"RBR",
|
| 36 |
+
"RBS",
|
| 37 |
+
"RP",
|
| 38 |
+
"SYM",
|
| 39 |
+
"TO",
|
| 40 |
+
"UH",
|
| 41 |
+
"VB",
|
| 42 |
+
"VBD",
|
| 43 |
+
"VBG",
|
| 44 |
+
"VBN",
|
| 45 |
+
"VBP",
|
| 46 |
+
"VBZ",
|
| 47 |
+
"WDT",
|
| 48 |
+
"WP",
|
| 49 |
+
"WP$",
|
| 50 |
+
"WRB",
|
| 51 |
+
"XX",
|
| 52 |
+
"_SP",
|
| 53 |
+
"``"
|
| 54 |
+
],
|
| 55 |
+
"neg_prefix":"!",
|
| 56 |
+
"overwrite":false
|
| 57 |
+
}
|
notebooks/custom-ner-model/tagger/model
ADDED
|
Binary file (19.8 kB). View file
|
|
|
notebooks/custom-ner-model/tok2vec/cfg
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
|
| 3 |
+
}
|
notebooks/custom-ner-model/tokenizer
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
��prefix_search��^§|^%|^=|^—|^–|^\+(?![0-9])|^…|^……|^,|^:|^;|^\!|^\?|^¿|^؟|^¡|^\(|^\)|^\[|^\]|^\{|^\}|^<|^>|^_|^#|^\*|^&|^。|^?|^!|^,|^、|^;|^:|^~|^·|^।|^،|^۔|^؛|^٪|^\.\.+|^…|^\'|^"|^”|^“|^`|^‘|^´|^’|^‚|^,|^„|^»|^«|^「|^」|^『|^』|^(|^)|^〔|^〕|^【|^】|^《|^》|^〈|^〉|^〈|^〉|^⟦|^⟧|^\$|^£|^€|^¥|^฿|^US\$|^C\$|^A\$|^₽|^﷼|^₴|^₠|^₡|^₢|^₣|^₤|^₥|^₦|^₧|^₨|^₩|^₪|^₫|^€|^₭|^₮|^₯|^₰|^₱|^₲|^₳|^₴|^₵|^₶|^₷|^₸|^₹|^₺|^₻|^₼|^₽|^₾|^₿|^[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]�suffix_search�2�…$|……$|,$|:$|;$|\!$|\?$|¿$|؟$|¡$|\($|\)$|\[$|\]$|\{$|\}$|<$|>$|_$|#$|\*$|&$|。$|?$|!$|,$|、$|;$|:$|~$|·$|।$|،$|۔$|؛$|٪$|\.\.+$|…$|\'$|"$|”$|“$|`$|‘$|´$|’$|‚$|,$|„$|»$|«$|「$|」$|『$|』$|($|)$|〔$|〕$|【$|】$|《$|》$|〈$|〉$|〈$|〉$|⟦$|⟧$|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]$|'s$|'S$|’s$|’S$|—$|–$|(?<=[0-9])\+$|(?<=°[FfCcKk])\.$|(?<=[0-9])(?:\$|£|€|¥|฿|US\$|C\$|A\$|₽|﷼|₴|₠|₡|₢|₣|₤|₥|₦|₧|₨|₩|₪|₫|€|₭|₮|₯|₰|₱|₲|₳|₴|₵|₶|₷|₸|₹|₺|₻|₼|₽|₾|₿)$|(?<=[0-9])(?:km|km²|km³|m|m²|m³|dm|dm²|dm³|cm|cm²|cm³|mm|mm²|mm³|ha|µm|nm|yd|in|ft|kg|g|mg|µg|t|lb|oz|m/s|km/h|kmh|mph|hPa|Pa|mbar|mb|MB|kb|KB|gb|GB|tb|TB|T|G|M|K|%|км|км²|км³|м|м²|м³|дм|дм²|дм³|см|см²|см³|мм|мм²|мм³|нм|кг|г|мг|м/с|км/ч|кПа|Па|мбар|Кб|КБ|кб|Мб|МБ|мб|Гб|ГБ|гб|Тб|ТБ|тбكم|كم²|كم³|م|م²|م³|سم|سم²|سم³|مم|مم²|مم³|كم|غرام|جرام|جم|كغ|ملغ|كوب|اكواب)$|(?<=[0-9a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F%²\-\+…|……|,|:|;|\!|\?|¿|؟|¡|\(|\)|\[|\]|\{|\}|<|>|_|#|\*|&|。|?|!|,|、|;|:|~|·|।|،|۔|؛|٪(?:\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉〈〉⟦⟧)])\.$|(?<=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F][A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])\.$�infix_finditer�>�\.\.+|…|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]|(?<=[0-9])[+\-\*^](?=[0-9-])|(?<=[a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉〈〉⟦⟧])\.(?=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉〈〉⟦⟧])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F]),(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F0-9])(?:-|–|—|--|---|——|~)(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F0-9])[:<>=/](?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])�token_match��url_match�
|
| 2 |
+
��A�
|
| 3 |
+
� ��A� �'��A�'�''��A�''�'Cause��A�'CauseC�because�'Cos��A�'CosC�because�'Coz��A�'CozC�because�'Cuz��A�'CuzC�because�'S��A�'SC�'s�'bout��A�'boutC�about�'cause��A�'causeC�because�'cos��A�'cosC�because�'coz��A�'cozC�because�'cuz��A�'cuzC�because�'d��A�'d�'em��A�'emC�them�'ll��A�'llC�will�'nuff��A�'nuffC�enough�'re��A�'reC�are�'s��A�'sC�'s�(*_*)��A�(*_*)�(-8��A�(-8�(-:��A�(-:�(-;��A�(-;�(-_-)��A�(-_-)�(._.)��A�(._.)�(:��A�(:�(;��A�(;�(=��A�(=�(>_<)��A�(>_<)�(^_^)��A�(^_^)�(o:��A�(o:�(¬_¬)��A�(¬_¬)�(ಠ_ಠ)��A�(ಠ_ಠ)�(╯°□°)╯︵┻━┻��A�(╯°□°)╯︵┻━┻�)-:��A�)-:�):��A�):�-_-��A�-_-�-__-��A�-__-�._.��A�._.�0.0��A�0.0�0.o��A�0.o�0_0��A�0_0�0_o��A�0_o�10a.m.��A�10�A�a.m.C�a.m.�10am��A�10�A�amC�a.m.�10p.m.��A�10�A�p.m.C�p.m.�10pm��A�10�A�pmC�p.m.�11a.m.��A�11�A�a.m.C�a.m.�11am��A�11�A�amC�a.m.�11p.m.��A�11�A�p.m.C�p.m.�11pm��A�11�A�pmC�p.m.�12a.m.��A�12�A�a.m.C�a.m.�12am��A�12�A�amC�a.m.�12p.m.��A�12�A�p.m.C�p.m.�12pm��A�12�A�pmC�p.m.�1a.m.��A�1�A�a.m.C�a.m.�1am��A�1�A�amC�a.m.�1p.m.��A�1�A�p.m.C�p.m.�1pm��A�1�A�pmC�p.m.�2a.m.��A�2�A�a.m.C�a.m.�2am��A�2�A�amC�a.m.�2p.m.��A�2�A�p.m.C�p.m.�2pm��A�2�A�pmC�p.m.�3a.m.��A�3�A�a.m.C�a.m.�3am��A�3�A�amC�a.m.�3p.m.��A�3�A�p.m.C�p.m.�3pm��A�3�A�pmC�p.m.�4a.m.��A�4�A�a.m.C�a.m.�4am��A�4�A�amC�a.m.�4p.m.��A�4�A�p.m.C�p.m.�4pm��A�4�A�pmC�p.m.�5a.m.��A�5�A�a.m.C�a.m.�5am��A�5�A�amC�a.m.�5p.m.��A�5�A�p.m.C�p.m.�5pm��A�5�A�pmC�p.m.�6a.m.��A�6�A�a.m.C�a.m.�6am��A�6�A�amC�a.m.�6p.m.��A�6�A�p.m.C�p.m.�6pm��A�6�A�pmC�p.m.�7a.m.��A�7�A�a.m.C�a.m.�7am��A�7�A�amC�a.m.�7p.m.��A�7�A�p.m.C�p.m.�7pm��A�7�A�pmC�p.m.�8)��A�8)�8-)��A�8-)�8-D��A�8-D�8D��A�8D�8a.m.��A�8�A�a.m.C�a.m.�8am��A�8�A�amC�a.m.�8p.m.��A�8�A�p.m.C�p.m.�8pm��A�8�A�pmC�p.m.�9a.m.��A�9�A�a.m.C�a.m.�9am��A�9�A�amC�a.m.�9p.m.��A�9�A�p.m.C�p.m.�9pm��A�9�A�pmC�p.m.�:'(��A�:'(�:')��A�:')�:'-(��A�:'-(�:'-)��A�:'-)�:(��A�:(�:((��A�:((�:(((��A�:(((�:()��A�:()�:)��A�:)�:))��A�:))�:)))��A�:)))�:*��A�:*�:-(��A�:-(�:-((��A�:-((�:-(((��A�:-(((�:-)��A�:-)�:-))��A�:-))�:-)))��A�:-)))�:-*��A�:-*�:-/��A�:-/�:-0��A�:-0�:-3��A�:-3�:->��A�:->�:-D��A�:-D�:-O��A�:-O�:-P��A�:-P�:-X��A�:-X�:-]��A�:-]�:-o��A�:-o�:-p��A�:-p�:-x��A�:-x�:-|��A�:-|�:-}��A�:-}�:/��A�:/�:0��A�:0�:1��A�:1�:3��A�:3�:>��A�:>�:D��A�:D�:O��A�:O�:P��A�:P�:X��A�:X�:]��A�:]�:o��A�:o�:o)��A�:o)�:p��A�:p�:x��A�:x�:|��A�:|�:}��A�:}�:’(��A�:’(�:’)��A�:’)�:’-(��A�:’-(�:’-)��A�:’-)�;)��A�;)�;-)��A�;-)�;-D��A�;-D�;D��A�;D�;_;��A�;_;�<.<��A�<.<�</3��A�</3�<3��A�<3�<33��A�<33�<333��A�<333�<space>��A�<space>�=(��A�=(�=)��A�=)�=/��A�=/�=3��A�=3�=D��A�=D�=[��A�=[�=]��A�=]�=|��A�=|�>.<��A�>.<�>.>��A�>.>�>:(��A�>:(�>:o��A�>:o�><(((*>��A�><(((*>�@_@��A�@_@�Adm.��A�Adm.�Ain't��A�Ai�A�n'tC�not�Aint��A�Ai�A�ntC�not�Ain’t��A�Ai�A�n’tC�not�Ak.��A�Ak.C�Alaska�Ala.��A�Ala.C�Alabama�Apr.��A�Apr.C�April�Aren't��A�AreC�are�A�n'tC�not�Arent��A�AreC�are�A�ntC�not�Aren’t��A�AreC�are�A�n’tC�not�Ariz.��A�Ariz.C�Arizona�Ark.��A�Ark.C�Arkansas�Aug.��A�Aug.C�August�Bros.��A�Bros.�C'mon��A�C'mC�come�A�on�C++��A�C++�Calif.��A�Calif.C�California�Can't��A�CaC�can�A�n'tC�not�Can't've��A�CaC�can�A�n'tC�not�A�'veC�have�Cannot��A�CanC�can�A�not�Cant��A�CaC�can�A�ntC�not�Cantve��A�CaC�can�A�ntC�not�A�veC�have�Can’t��A�CaC�can�A�n’tC�not�Can’t’ve��A�CaC�can�A�n’tC�not�A�’veC�have�Co.��A�Co.�Colo.��A�Colo.C�Colorado�Conn.��A�Conn.C�Connecticut�Corp.��A�Corp.�Could've��A�CouldC�could�A�'ve�Couldn't��A�CouldC�could�A�n'tC�not�Couldn't've��A�CouldC�could�A�n'tC�not�A�'veC�have�Couldnt��A�CouldC�could�A�ntC�not�Couldntve��A�CouldC�could�A�ntC�not�A�veC�have�Couldn’t��A�CouldC�could�A�n’tC�not�Couldn’t’ve��A�CouldC�could�A�n’tC�not�A�’veC�have�Couldve��A�CouldC�could�A�ve�Could’ve��A�CouldC�could�A�’ve�C’mon��A�C’mC�come�A�on�D.C.��A�D.C.�Daren't��A�DareC�dare�A�n'tC�not�Darent��A�DareC�dare�A�ntC�not�Daren’t��A�DareC�dare�A�n’tC�not�Dec.��A�Dec.C�December�Del.��A�Del.C�Delaware�Didn't��A�DidC�do�A�n'tC�not�Didn't've��A�DidC�do�A�n'tC�not�A�'veC�have�Didnt��A�DidC�do�A�ntC�not�Didntve��A�DidC�do�A�ntC�not�A�veC�have�Didn’t��A�DidC�do�A�n’tC�not�Didn’t’ve��A�DidC�do�A�n’tC�not�A�’veC�have�Doesn't��A�DoesC�does�A�n'tC�not�Doesn't've��A�DoesC�does�A�n'tC�not�A�'veC�have�Doesnt��A�DoesC�does�A�ntC�not�Doesntve��A�DoesC�does�A�ntC�not�A�veC�have�Doesn’t��A�DoesC�does�A�n’tC�not�Doesn’t’ve��A�DoesC�does�A�n’tC�not�A�’veC�have�Doin��A�DoinC�doing�Doin'��A�Doin'C�doing�Doin’��A�Doin’C�doing�Don't��A�DoC�do�A�n'tC�not�Don't've��A�DoC�do�A�n'tC�not�A�'veC�have�Dont��A�DoC�do�A�ntC�not�Dontve��A�DoC�do�A�ntC�not�A�veC�have�Don’t��A�DoC�do�A�n’tC�not�Don’t’ve��A�DoC�do�A�n’tC�not�A�’veC�have�Dr.��A�Dr.�E.G.��A�E.G.�E.g.��A�E.g.�Feb.��A�Feb.C�February�Fla.��A�Fla.C�Florida�Ga.��A�Ga.C�Georgia�Gen.��A�Gen.�Goin��A�GoinC�going�Goin'��A�Goin'C�going�Goin’��A�Goin’C�going�Gonna��A�GonC�going�A�naC�to�Gotta��A�GotC�got�A�taC�to�Gov.��A�Gov.�Hadn't��A�HadC�have�A�n'tC�not�Hadn't've��A�HadC�have�A�n'tC�not�A�'veC�have�Hadnt��A�HadC�have�A�ntC�not�Hadntve��A�HadC�have�A�ntC�not�A�veC�have�Hadn’t��A�HadC�have�A�n’tC�not�Hadn’t’ve��A�HadC�have�A�n’tC�not�A�’veC�have�Hasn't��A�HasC�has�A�n'tC�not�Hasnt��A�HasC�has�A�ntC�not�Hasn’t��A�HasC�has�A�n’tC�not�Haven't��A�HaveC�have�A�n'tC�not�Havent��A�HaveC�have�A�ntC�not�Haven’t��A�HaveC�have�A�n’tC�not�Havin��A�HavinC�having�Havin'��A�Havin'C�having�Havin’��A�Havin’C�having�He'd��A�HeC�he�A�'dC�'d�He'd've��A�HeC�he�A�'dC�would�A�'veC�have�He'll��A�HeC�he�A�'llC�will�He'll've��A�HeC�he�A�'llC�will�A�'veC�have�He's��A�HeC�he�A�'sC�'s�Hed��A�HeC�he�A�dC�'d�Hedve��A�HeC�he�A�dC�would�A�veC�have�Hellve��A�HeC�he�A�llC�will�A�veC�have�Hes��A�HeC�he�A�s�He’d��A�HeC�he�A�’dC�'d�He’d’ve��A�HeC�he�A�’dC�would�A�’veC�have�He’ll��A�HeC�he�A�’llC�will�He’ll’ve��A�HeC�he�A�’llC�will�A�’veC�have�He’s��A�HeC�he�A�’sC�'s�How'd��A�HowC�how�A�'dC�'d�How'd've��A�HowC�how�A�'dC�would�A�'veC�have�How'd'y��A�HowC�how�A�'d�A�'yC�you�How'll��A�HowC�how�A�'llC�will�How'll've��A�HowC�how�A�'llC�will�A�'veC�have�How're��A�HowC�how�A�'reC�are�How's��A�HowC�how�A�'sC�'s�How've��A�HowC�how�A�'ve�Howd��A�HowC�how�A�dC�'d�Howdve��A�HowC�how�A�dC�would�A�veC�have�Howll��A�HowC�how�A�llC�will�Howllve��A�HowC�how�A�llC�will�A�veC�have�Howre��A�HowC�how�A�reC�are�Hows��A�HowC�how�A�s�Howve��A�How�A�veC�have�How’d��A�HowC�how�A�’dC�'d�How’d’ve��A�HowC�how�A�’dC�would�A�’veC�have�How’d’y��A�HowC�how�A�’d�A�’yC�you�How’ll��A�HowC�how�A�’llC�will�How’ll’ve��A�HowC�how�A�’llC�will�A�’veC�have�How’re��A�HowC�how�A�’reC�are�How’s��A�HowC�how�A�’sC�'s�How’ve��A�HowC�how�A�’ve�I'd��A�IC�i�A�'dC�'d�I'd've��A�IC�i�A�'dC�would�A�'veC�have�I'll��A�IC�i�A�'llC�will�I'll've��A�IC�i�A�'llC�will�A�'veC�have�I'm��A�IC�i�A�'mC�am�I'ma��A�IC�i�A�'mC�am�A�aC�gonna�I've��A�IC�i�A�'veC�have�I.E.��A�I.E.�I.e.��A�I.e.�Ia.��A�Ia.C�Iowa�Id��A�IC�i�A�dC�'d�Id.��A�Id.C�Idaho�Idve��A�IC�i�A�dC�would�A�veC�have�Ill.��A�Ill.C�Illinois�Illve��A�IC�i�A�llC�will�A�veC�have�Im��A�IC�i�A�m�Ima��A�IC�i�A�mC�am�A�aC�gonna�Inc.��A�Inc.�Ind.��A�Ind.C�Indiana�Isn't��A�IsC�is�A�n'tC�not�Isnt��A�IsC�is�A�ntC�not�Isn’t��A�IsC�is�A�n’tC�not�It'd��A�ItC�it�A�'dC�'d�It'd've��A�ItC�it�A�'dC�would�A�'veC�have�It'll��A�ItC�it�A�'llC�will�It'll've��A�ItC�it�A�'llC�will�A�'veC�have�It's��A�ItC�it�A�'sC�'s�Itd��A�ItC�it�A�dC�'d�Itdve��A�ItC�it�A�dC�would�A�veC�have�Itll��A�ItC�it�A�llC�will�Itllve��A�ItC�it�A�llC�will�A�veC�have�It’d��A�ItC�it�A�’dC�'d�It’d’ve��A�ItC�it�A�’dC�would�A�’veC�have�It’ll��A�ItC�it�A�’llC�will�It’ll’ve��A�ItC�it�A�’llC�will�A�’veC�have�It’s��A�ItC�it�A�’sC�'s�Ive��A�IC�i�A�veC�have�I’d��A�IC�i�A�’dC�'d�I’d’ve��A�IC�i�A�’dC�would�A�’veC�have�I’ll��A�IC�i�A�’llC�will�I’ll’ve��A�IC�i�A�’llC�will�A�’veC�have�I’m��A�IC�i�A�’mC�am�I’ma��A�IC�i�A�’mC�am�A�aC�gonna�I’ve��A�IC�i�A�’veC�have�Jan.��A�Jan.C�January�Jr.��A�Jr.�Jul.��A�Jul.C�July�Jun.��A�Jun.C�June�Kan.��A�Kan.C�Kansas�Kans.��A�Kans.C�Kansas�Ky.��A�Ky.C�Kentucky�La.��A�La.C�Louisiana�Let's��A�LetC�let�A�'sC�us�Let’s��A�LetC�let�A�’sC�us�Lovin��A�LovinC�loving�Lovin'��A�Lovin'C�loving�Lovin’��A�Lovin’C�loving�Ltd.��A�Ltd.�Ma'am��A�Ma'amC�madam�Mar.��A�Mar.C�March�Mass.��A�Mass.C�Massachusetts�Mayn't��A�MayC�may�A�n'tC�not�Mayn't've��A�MayC�may�A�n'tC�not�A�'veC�have�Maynt��A�MayC�may�A�ntC�not�Mayntve��A�MayC�may�A�ntC�not�A�veC�have�Mayn’t��A�MayC�may�A�n’tC�not�Mayn’t’ve��A�MayC�may�A�n’tC�not�A�’veC�have�Ma’am��A�Ma’amC�madam�Md.��A�Md.�Messrs.��A�Messrs.�Mich.��A�Mich.C�Michigan�Might've��A�MightC�might�A�'ve�Mightn't��A�MightC�might�A�n'tC�not�Mightn't've��A�MightC�might�A�n'tC�not�A�'veC�have�Mightnt��A�MightC�might�A�ntC�not�Mightntve��A�MightC�might�A�ntC�not�A�veC�have�Mightn’t��A�MightC�might�A�n’tC�not�Mightn’t’ve��A�MightC�might�A�n’tC�not�A�’veC�have�Mightve��A�MightC�might�A�ve�Might’ve��A�MightC�might�A�’ve�Minn.��A�Minn.C�Minnesota�Miss.��A�Miss.C�Mississippi�Mo.��A�Mo.�Mont.��A�Mont.�Mr.��A�Mr.�Mrs.��A�Mrs.�Ms.��A�Ms.�Mt.��A�Mt.C�Mount�Must've��A�MustC�must�A�'ve�Mustn't��A�MustC�must�A�n'tC�not�Mustn't've��A�MustC�must�A�n'tC�not�A�'veC�have�Mustnt��A�MustC�must�A�ntC�not�Mustntve��A�MustC�must�A�ntC�not�A�veC�have�Mustn’t��A�MustC�must�A�n’tC�not�Mustn’t’ve��A�MustC�must�A�n’tC�not�A�’veC�have�Mustve��A�MustC�must�A�ve�Must’ve��A�MustC�must�A�’ve�N.C.��A�N.C.C�North Carolina�N.D.��A�N.D.C�North Dakota�N.H.��A�N.H.C�New Hampshire�N.J.��A�N.J.C�New Jersey�N.M.��A�N.M.C�New Mexico�N.Y.��A�N.Y.C�New York�Neb.��A�Neb.C�Nebraska�Nebr.��A�Nebr.C�Nebraska�Needn't��A�NeedC�need�A�n'tC�not�Needn't've��A�NeedC�need�A�n'tC�not�A�'veC�have�Neednt��A�NeedC�need�A�ntC�not�Needntve��A�NeedC�need�A�ntC�not�A�veC�have�Needn’t��A�NeedC�need�A�n’tC�not�Needn’t’ve��A�NeedC�need�A�n’tC�not�A�’veC�have�Nev.��A�Nev.C�Nevada�Not've��A�NotC�not�A�'veC�have�Nothin��A�NothinC�nothing�Nothin'��A�Nothin'C�nothing�Nothin’��A�Nothin’C�nothing�Notve��A�NotC�not�A�veC�have�Not’ve��A�NotC�not�A�’veC�have�Nov.��A�Nov.C�November�Nuthin��A�NuthinC�nothing�Nuthin'��A�Nuthin'C�nothing�Nuthin’��A�Nuthin’C�nothing�O'clock��A�O'clockC�o'clock�O.O��A�O.O�O.o��A�O.o�O_O��A�O_O�O_o��A�O_o�Oct.��A�Oct.C�October�Okla.��A�Okla.C�Oklahoma�Ol��A�OlC�old�Ol'��A�Ol'C�old�Ol’��A�Ol’C�old�Ore.��A�Ore.C�Oregon�Oughtn't��A�OughtC�ought�A�n'tC�not�Oughtn't've��A�OughtC�ought�A�n'tC�not�A�'veC�have�Oughtnt��A�OughtC�ought�A�ntC�not�Oughtntve��A�OughtC�ought�A�ntC�not�A�veC�have�Oughtn’t��A�OughtC�ought�A�n’tC�not�Oughtn’t’ve��A�OughtC�ought�A�n’tC�not�A�’veC�have�O’clock��A�O’clockC�o'clock�Pa.��A�Pa.C�Pennsylvania�Ph.D.��A�Ph.D.�Prof.��A�Prof.�Rep.��A�Rep.�Rev.��A�Rev.�S.C.��A�S.C.C�South Carolina�Sen.��A�Sen.�Sep.��A�Sep.C�September�Sept.��A�Sept.C�September�Shan't��A�ShaC�shall�A�n'tC�not�Shan't've��A�ShaC�shall�A�n'tC�not�A�'veC�have�Shant��A�ShaC�shall�A�ntC�not�Shantve��A�ShaC�shall�A�ntC�not�A�veC�have�Shan’t��A�ShaC�shall�A�n’tC�not�Shan’t’ve��A�ShaC�shall�A�n’tC�not�A�’veC�have�She'd��A�SheC�she�A�'dC�'d�She'd've��A�SheC�she�A�'dC�would�A�'veC�have�She'll��A�SheC�she�A�'llC�will�She'll've��A�SheC�she�A�'llC�will�A�'veC�have�She's��A�SheC�she�A�'sC�'s�Shedve��A�SheC�she�A�dC�would�A�veC�have�Shellve��A�SheC�she�A�llC�will�A�veC�have�Shes��A�SheC�she�A�s�She’d��A�SheC�she�A�’dC�'d�She’d’ve��A�SheC�she�A�’dC�would�A�’veC�have�She’ll��A�SheC�she�A�’llC�will�She’ll’ve��A�SheC�she�A�’llC�will�A�’veC�have�She’s��A�SheC�she�A�’sC�'s�Should've��A�ShouldC�should�A�'ve�Shouldn't��A�ShouldC�should�A�n'tC�not�Shouldn't've��A�ShouldC�should�A�n'tC�not�A�'veC�have�Shouldnt��A�ShouldC�should�A�ntC�not�Shouldntve��A�ShouldC�should�A�ntC�not�A�veC�have�Shouldn’t��A�ShouldC�should�A�n’tC�not�Shouldn’t’ve��A�ShouldC�should�A�n’tC�not�A�’veC�have�Shouldve��A�ShouldC�should�A�ve�Should’ve��A�ShouldC�should�A�’ve�Somethin��A�SomethinC�something�Somethin'��A�Somethin'C�something�Somethin’��A�Somethin’C�something�St.��A�St.�Tenn.��A�Tenn.C�Tennessee�That'd��A�ThatC�that�A�'dC�'d�That'd've��A�ThatC�that�A�'dC�would�A�'veC�have�That'll��A�ThatC�that�A�'llC�will�That'll've��A�ThatC�that�A�'llC�will�A�'veC�have�That's��A�ThatC�that�A�'sC�'s�Thatd��A�ThatC�that�A�dC�'d�Thatdve��A�ThatC�that�A�dC�would�A�veC�have�Thatll��A�ThatC�that�A�llC�will�Thatllve��A�ThatC�that�A�llC�will�A�veC�have�Thats��A�ThatC�that�A�s�That’d��A�ThatC�that�A�’dC�'d�That’d’ve��A�ThatC�that�A�’dC�would�A�’veC�have�That’ll��A�ThatC�that�A�’llC�will�That’ll’ve��A�ThatC�that�A�’llC�will�A�’veC�have�That’s��A�ThatC�that�A�’sC�'s�There'd��A�ThereC�there�A�'dC�'d�There'd've��A�ThereC�there�A�'dC�would�A�'veC�have�There'll��A�ThereC�there�A�'llC�will�There'll've��A�ThereC�there�A�'llC�will�A�'veC�have�There're��A�ThereC�there�A�'reC�are�There's��A�ThereC�there�A�'sC�'s�There've��A�ThereC�there�A�'ve�Thered��A�ThereC�there�A�dC�'d�Theredve��A�ThereC�there�A�dC�would�A�veC�have�Therell��A�ThereC�there�A�llC�will�Therellve��A�ThereC�there�A�llC�will�A�veC�have�Therere��A�ThereC�there�A�reC�are�Theres��A�ThereC�there�A�s�Thereve��A�There�A�veC�have�There’d��A�ThereC�there�A�’dC�'d�There’d’ve��A�ThereC�there�A�’dC�would�A�’veC�have�There’ll��A�ThereC�there�A�’llC�will�There’ll’ve��A�ThereC�there�A�’llC�will�A�’veC�have�There’re��A�ThereC�there�A�’reC�are�There’s��A�ThereC�there�A�’sC�'s�There’ve��A�ThereC�there�A�’ve�These'd��A�TheseC�these�A�'dC�'d�These'd've��A�TheseC�these�A�'dC�would�A�'veC�have�These'll��A�TheseC�these�A�'llC�will�These'll've��A�TheseC�these�A�'llC�will�A�'veC�have�These're��A�TheseC�these�A�'reC�are�These've��A�TheseC�these�A�'ve�Thesed��A�TheseC�these�A�dC�'d�Thesedve��A�TheseC�these�A�dC�would�A�veC�have�Thesell��A�TheseC�these�A�llC�will�Thesellve��A�TheseC�these�A�llC�will�A�veC�have�Thesere��A�TheseC�these�A�reC�are�Theseve��A�These�A�veC�have�These’d��A�TheseC�these�A�’dC�'d�These’d’ve��A�TheseC�these�A�’dC�would�A�’veC�have�These’ll��A�TheseC�these�A�’llC�will�These’ll’ve��A�TheseC�these�A�’llC�will�A�’veC�have�These’re��A�TheseC�these�A�’reC�are�These’ve��A�TheseC�these�A�’ve�They'd��A�TheyC�they�A�'dC�'d�They'd've��A�TheyC�they�A�'dC�would�A�'veC�have�They'll��A�TheyC�they�A�'llC�will�They'll've��A�TheyC�they�A�'llC�will�A�'veC�have�They're��A�TheyC�they�A�'reC�are�They've��A�TheyC�they�A�'veC�have�Theyd��A�TheyC�they�A�dC�'d�Theydve��A�TheyC�they�A�dC�would�A�veC�have�Theyll��A�TheyC�they�A�llC�will�Theyllve��A�TheyC�they�A�llC�will�A�veC�have�Theyre��A�TheyC�they�A�reC�are�Theyve��A�TheyC�they�A�veC�have�They’d��A�TheyC�they�A�’dC�'d�They’d’ve��A�TheyC�they�A�’dC�would�A�’veC�have�They’ll��A�TheyC�they�A�’llC�will�They’ll’ve��A�TheyC�they�A�’llC�will�A�’veC�have�They’re��A�TheyC�they�A�’reC�are�They’ve��A�TheyC�they�A�’veC�have�This'd��A�ThisC�this�A�'dC�'d�This'd've��A�ThisC�this�A�'dC�would�A�'veC�have�This'll��A�ThisC�this�A�'llC�will�This'll've��A�ThisC�this�A�'llC�will�A�'veC�have�This's��A�ThisC�this�A�'sC�'s�Thisd��A�ThisC�this�A�dC�'d�Thisdve��A�ThisC�this�A�dC�would�A�veC�have�Thisll��A�ThisC�this�A�llC�will�Thisllve��A�ThisC�this�A�llC�will�A�veC�have�Thiss��A�ThisC�this�A�s�This’d��A�ThisC�this�A�’dC�'d�This’d’ve��A�ThisC�this�A�’dC�would�A�’veC�have�This’ll��A�ThisC�this�A�’llC�will�This’ll’ve��A�ThisC�this�A�’llC�will�A�’veC�have�This’s��A�ThisC�this�A�’sC�'s�Those'd��A�ThoseC�those�A�'dC�'d�Those'd've��A�ThoseC�those�A�'dC�would�A�'veC�have�Those'll��A�ThoseC�those�A�'llC�will�Those'll've��A�ThoseC�those�A�'llC�will�A�'veC�have�Those're��A�ThoseC�those�A�'reC�are�Those've��A�ThoseC�those�A�'ve�Thosed��A�ThoseC�those�A�dC�'d�Thosedve��A�ThoseC�those�A�dC�would�A�veC�have�Thosell��A�ThoseC�those�A�llC�will�Thosellve��A�ThoseC�those�A�llC�will�A�veC�have�Thosere��A�ThoseC�those�A�reC�are�Thoseve��A�Those�A�veC�have�Those’d��A�ThoseC�those�A�’dC�'d�Those’d’ve��A�ThoseC�those�A�’dC�would�A�’veC�have�Those’ll��A�ThoseC�those�A�’llC�will�Those’ll’ve��A�ThoseC�those�A�’llC�will�A�’veC�have�Those’re��A�ThoseC�those�A�’reC�are�Those’ve��A�ThoseC�those�A�’ve�V.V��A�V.V�V_V��A�V_V�Va.��A�Va.C�Virginia�Wash.��A�Wash.C�Washington�Wasn't��A�WasC�was�A�n'tC�not�Wasnt��A�WasC�was�A�ntC�not�Wasn’t��A�WasC�was�A�n’tC�not�We'd��A�WeC�we�A�'dC�'d�We'd've��A�WeC�we�A�'dC�would�A�'veC�have�We'll��A�WeC�we�A�'llC�will�We'll've��A�WeC�we�A�'llC�will�A�'veC�have�We're��A�WeC�we�A�'reC�are�We've��A�WeC�we�A�'veC�have�Wed��A�WeC�we�A�dC�'d�Wedve��A�WeC�we�A�dC�would�A�veC�have�Wellve��A�WeC�we�A�llC�will�A�veC�have�Weren't��A�WereC�were�A�n'tC�not�Werent��A�WereC�were�A�ntC�not�Weren’t��A�WereC�were�A�n’tC�not�Weve��A�WeC�we�A�veC�have�We’d��A�WeC�we�A�’dC�'d�We’d’ve��A�WeC�we�A�’dC�would�A�’veC�have�We’ll��A�WeC�we�A�’llC�will�We’ll’ve��A�WeC�we�A�’llC�will�A�’veC�have�We’re��A�WeC�we�A�’reC�are�We’ve��A�WeC�we�A�’veC�have�What'd��A�WhatC�what�A�'dC�'d�What'd've��A�WhatC�what�A�'dC�would�A�'veC�have�What'll��A�WhatC�what�A�'llC�will�What'll've��A�WhatC�what�A�'llC�will�A�'veC�have�What're��A�WhatC�what�A�'reC�are�What's��A�WhatC�what�A�'sC�'s�What've��A�WhatC�what�A�'ve�Whatd��A�WhatC�what�A�dC�'d�Whatdve��A�WhatC�what�A�dC�would�A�veC�have�Whatll��A�WhatC�what�A�llC�will�Whatllve��A�WhatC�what�A�llC�will�A�veC�have�Whatre��A�WhatC�what�A�reC�are�Whats��A�WhatC�what�A�s�Whatve��A�What�A�veC�have�What’d��A�WhatC�what�A�’dC�'d�What’d’ve��A�WhatC�what�A�’dC�would�A�’veC�have�What’ll��A�WhatC�what�A�’llC�will�What’ll’ve��A�WhatC�what�A�’llC�will�A�’veC�have�What’re��A�WhatC�what�A�’reC�are�What’s��A�WhatC�what�A�’sC�'s�What’ve��A�WhatC�what�A�’ve�When'd��A�WhenC�when�A�'dC�'d�When'd've��A�WhenC�when�A�'dC�would�A�'veC�have�When'll��A�WhenC�when�A�'llC�will�When'll've��A�WhenC�when�A�'llC�will�A�'veC�have�When're��A�WhenC�when�A�'reC�are�When's��A�WhenC�when�A�'sC�'s�When've��A�WhenC�when�A�'ve�Whend��A�WhenC�when�A�dC�'d�Whendve��A�WhenC�when�A�dC�would�A�veC�have�Whenll��A�WhenC�when�A�llC�will�Whenllve��A�WhenC�when�A�llC�will�A�veC�have�Whenre��A�WhenC�when�A�reC�are�Whens��A�WhenC�when�A�s�Whenve��A�When�A�veC�have�When’d��A�WhenC�when�A�’dC�'d�When’d’ve��A�WhenC�when�A�’dC�would�A�’veC�have�When’ll��A�WhenC�when�A�’llC�will�When’ll’ve��A�WhenC�when�A�’llC�will�A�’veC�have�When’re��A�WhenC�when�A�’reC�are�When’s��A�WhenC�when�A�’sC�'s�When’ve��A�WhenC�when�A�’ve�Where'd��A�WhereC�where�A�'dC�'d�Where'd've��A�WhereC�where�A�'dC�would�A�'veC�have�Where'll��A�WhereC�where�A�'llC�will�Where'll've��A�WhereC�where�A�'llC�will�A�'veC�have�Where're��A�WhereC�where�A�'reC�are�Where's��A�WhereC�where�A�'sC�'s�Where've��A�WhereC�where�A�'ve�Whered��A�WhereC�where�A�dC�'d�Wheredve��A�WhereC�where�A�dC�would�A�veC�have�Wherell��A�WhereC�where�A�llC�will�Wherellve��A�WhereC�where�A�llC�will�A�veC�have�Wherere��A�WhereC�where�A�reC�are�Wheres��A�WhereC�where�A�s�Whereve��A�Where�A�veC�have�Where’d��A�WhereC�where�A�’dC�'d�Where’d’ve��A�WhereC�where�A�’dC�would�A�’veC�have�Where’ll��A�WhereC�where�A�’llC�will�Where’ll’ve��A�WhereC�where�A�’llC�will�A�’veC�have�Where’re��A�WhereC�where�A�’reC�are�Where’s��A�WhereC�where�A�’sC�'s�Where’ve��A�WhereC�where�A�’ve�Who'd��A�WhoC�who�A�'dC�'d�Who'd've��A�WhoC�who�A�'dC�would�A�'veC�have�Who'll��A�WhoC�who�A�'llC�will�Who'll've��A�WhoC�who�A�'llC�will�A�'veC�have�Who're��A�WhoC�who�A�'reC�are�Who's��A�WhoC�who�A�'sC�'s�Who've��A�WhoC�who�A�'ve�Whod��A�WhoC�who�A�dC�'d�Whodve��A�WhoC�who�A�dC�would�A�veC�have�Wholl��A�WhoC�who�A�llC�will�Whollve��A�WhoC�who�A�llC�will�A�veC�have�Whos��A�WhoC�who�A�s�Whove��A�Who�A�veC�have�Who’d��A�WhoC�who�A�’dC�'d�Who’d’ve��A�WhoC�who�A�’dC�would�A�’veC�have�Who’ll��A�WhoC�who�A�’llC�will�Who’ll’ve��A�WhoC�who�A�’llC�will�A�’veC�have�Who’re��A�WhoC�who�A�’reC�are�Who’s��A�WhoC�who�A�’sC�'s�Who’ve��A�WhoC�who�A�’ve�Why'd��A�WhyC�why�A�'dC�'d�Why'd've��A�WhyC�why�A�'dC�would�A�'veC�have�Why'll��A�WhyC�why�A�'llC�will�Why'll've��A�WhyC�why�A�'llC�will�A�'veC�have�Why're��A�WhyC�why�A�'reC�are�Why's��A�WhyC�why�A�'sC�'s�Why've��A�WhyC�why�A�'ve�Whyd��A�WhyC�why�A�dC�'d�Whydve��A�WhyC�why�A�dC�would�A�veC�have�Whyll��A�WhyC�why�A�llC�will�Whyllve��A�WhyC�why�A�llC�will�A�veC�have�Whyre��A�WhyC�why�A�reC�are�Whys��A�WhyC�why�A�s�Whyve��A�Why�A�veC�have�Why’d��A�WhyC�why�A�’dC�'d�Why’d’ve��A�WhyC�why�A�’dC�would�A�’veC�have�Why’ll��A�WhyC�why�A�’llC�will�Why’ll’ve��A�WhyC�why�A�’llC�will�A�’veC�have�Why’re��A�WhyC�why�A�’reC�are�Why’s��A�WhyC�why�A�’sC�'s�Why’ve��A�WhyC�why�A�’ve�Wis.��A�Wis.C�Wisconsin�Won't��A�WoC�will�A�n'tC�not�Won't've��A�WoC�will�A�n'tC�not�A�'veC�have�Wont��A�WoC�will�A�ntC�not�Wontve��A�WoC�will�A�ntC�not�A�veC�have�Won’t��A�WoC�will�A�n’tC�not�Won’t’ve��A�WoC�will�A�n’tC�not�A�’veC�have�Would've��A�WouldC�would�A�'ve�Wouldn't��A�WouldC�would�A�n'tC�not�Wouldn't've��A�WouldC�would�A�n'tC�not�A�'veC�have�Wouldnt��A�WouldC�would�A�ntC�not�Wouldntve��A�WouldC�would�A�ntC�not�A�veC�have�Wouldn’t��A�WouldC�would�A�n’tC�not�Wouldn’t’ve��A�WouldC�would�A�n’tC�not�A�’veC�have�Wouldve��A�WouldC�would�A�ve�Would’ve��A�WouldC�would�A�’ve�XD��A�XD�XDD��A�XDD�You'd��A�YouC�you�A�'dC�'d�You'd've��A�YouC�you�A�'dC�would�A�'veC�have�You'll��A�YouC�you�A�'llC�will�You'll've��A�YouC�you�A�'llC�will�A�'veC�have�You're��A�YouC�you�A�'reC�are�You've��A�YouC�you�A�'veC�have�Youd��A�YouC�you�A�dC�'d�Youdve��A�YouC�you�A�dC�would�A�veC�have�Youll��A�YouC�you�A�llC�will�Youllve��A�YouC�you�A�llC�will�A�veC�have�Youre��A�YouC�you�A�reC�are�Youve��A�YouC�you�A�veC�have�You’d��A�YouC�you�A�’dC�'d�You’d’ve��A�YouC�you�A�’dC�would�A�’veC�have�You’ll��A�YouC�you�A�’llC�will�You’ll’ve��A�YouC�you�A�’llC�will�A�’veC�have�You’re��A�YouC�you�A�’reC�are�You’ve��A�YouC�you�A�’veC�have�[-:��A�[-:�[:��A�[:�[=��A�[=�\")��A�\")�\n��A�\n�\t��A�\t�]=��A�]=�^_^��A�^_^�^__^��A�^__^�^___^��A�^___^�a.��A�a.�a.m.��A�a.m.�ain't��A�ai�A�n'tC�not�aint��A�ai�A�ntC�not�ain’t��A�ai�A�n’tC�not�and/or��A�and/orC�and/or�aren't��A�areC�are�A�n'tC�not�arent��A�areC�are�A�ntC�not�aren’t��A�areC�are�A�n’tC�not�b.��A�b.�c'mon��A�c'mC�come�A�on�c.��A�c.�can't��A�caC�can�A�n'tC�not�can't've��A�caC�can�A�n'tC�not�A�'veC�have�cannot��A�can�A�not�cant��A�caC�can�A�ntC�not�cantve��A�caC�can�A�ntC�not�A�veC�have�can’t��A�caC�can�A�n’tC�not�can’t’ve��A�caC�can�A�n’tC�not�A�’veC�have�co.��A�co.�could've��A�couldC�could�A�'ve�couldn't��A�couldC�could�A�n'tC�not�couldn't've��A�couldC�could�A�n'tC�not�A�'veC�have�couldnt��A�couldC�could�A�ntC�not�couldntve��A�couldC�could�A�ntC�not�A�veC�have�couldn’t��A�couldC�could�A�n’tC�not�couldn’t’ve��A�couldC�could�A�n’tC�not�A�’veC�have�couldve��A�couldC�could�A�ve�could’ve��A�couldC�could�A�’ve�c’mon��A�c’mC�come�A�on�d.��A�d.�daren't��A�dareC�dare�A�n'tC�not�darent��A�dareC�dare�A�ntC�not�daren’t��A�dareC�dare�A�n’tC�not�didn't��A�didC�do�A�n'tC�not�didn't've��A�didC�do�A�n'tC�not�A�'veC�have�didnt��A�didC�do�A�ntC�not�didntve��A�didC�do�A�ntC�not�A�veC�have�didn’t��A�didC�do�A�n’tC�not�didn’t’ve��A�didC�do�A�n’tC�not�A�’veC�have�doesn't��A�doesC�does�A�n'tC�not�doesn't've��A�doesC�does�A�n'tC�not�A�'veC�have�doesnt��A�doesC�does�A�ntC�not�doesntve��A�doesC�does�A�ntC�not�A�veC�have�doesn’t��A�doesC�does�A�n’tC�not�doesn’t’ve��A�doesC�does�A�n’tC�not�A�’veC�have�doin��A�doinC�doing�doin'��A�doin'C�doing�doin’��A�doin’C�doing�don't��A�doC�do�A�n'tC�not�don't've��A�doC�do�A�n'tC�not�A�'veC�have�dont��A�doC�do�A�ntC�not�dontve��A�doC�do�A�ntC�not�A�veC�have�don’t��A�doC�do�A�n’tC�not�don’t’ve��A�doC�do�A�n’tC�not�A�’veC�have�e.��A�e.�e.g.��A�e.g.�em��A�emC�them�f.��A�f.�g.��A�g.�goin��A�goinC�going�goin'��A�goin'C�going�goin’��A�goin’C�going�gonna��A�gonC�going�A�naC�to�gotta��A�got�A�taC�to�h.��A�h.�hadn't��A�hadC�have�A�n'tC�not�hadn't've��A�hadC�have�A�n'tC�not�A�'veC�have�hadnt��A�hadC�have�A�ntC�not�hadntve��A�hadC�have�A�ntC�not�A�veC�have�hadn’t��A�hadC�have�A�n’tC�not�hadn’t’ve��A�hadC�have�A�n’tC�not�A�’veC�have�hasn't��A�hasC�has�A�n'tC�not�hasnt��A�hasC�has�A�ntC�not�hasn’t��A�hasC�has�A�n’tC�not�haven't��A�haveC�have�A�n'tC�not�havent��A�haveC�have�A�ntC�not�haven’t��A�haveC�have�A�n’tC�not�havin��A�havinC�having�havin'��A�havin'C�having�havin’��A�havin’C�having�he'd��A�heC�he�A�'dC�'d�he'd've��A�heC�he�A�'dC�would�A�'veC�have�he'll��A�heC�he�A�'llC�will�he'll've��A�heC�he�A�'llC�will�A�'veC�have�he's��A�heC�he�A�'sC�'s�hed��A�heC�he�A�dC�'d�hedve��A�heC�he�A�dC�would�A�veC�have�hellve��A�heC�he�A�llC�will�A�veC�have�hes��A�heC�he�A�s�he’d��A�heC�he�A�’dC�'d�he’d’ve��A�heC�he�A�’dC�would�A�’veC�have�he’ll��A�heC�he�A�’llC�will�he’ll’ve��A�heC�he�A�’llC�will�A�’veC�have�he’s��A�heC�he�A�’sC�'s�how'd��A�howC�how�A�'dC�'d�how'd've��A�howC�how�A�'dC�would�A�'veC�have�how'd'y��A�how�A�'d�A�'yC�you�how'll��A�howC�how�A�'llC�will�how'll've��A�howC�how�A�'llC�will�A�'veC�have�how're��A�howC�how�A�'reC�are�how's��A�howC�how�A�'sC�'s�how've��A�howC�how�A�'ve�howd��A�howC�how�A�dC�'d�howdve��A�howC�how�A�dC�would�A�veC�have�howll��A�howC�how�A�llC�will�howllve��A�howC�how�A�llC�will�A�veC�have�howre��A�howC�how�A�reC�are�hows��A�howC�how�A�s�howve��A�how�A�veC�have�how’d��A�howC�how�A�’dC�'d�how’d’ve��A�howC�how�A���dC�would�A�’veC�have�how’d’y��A�how�A�’d�A�’yC�you�how’ll��A�howC�how�A�’llC�will�how’ll’ve��A�howC�how�A�’llC�will�A�’veC�have�how’re��A�howC�how�A�’reC�are�how’s��A�howC�how�A�’sC�'s�how’ve��A�howC�how�A�’ve�i'd��A�iC�i�A�'dC�'d�i'd've��A�iC�i�A�'dC�would�A�'veC�have�i'll��A�iC�i�A�'llC�will�i'll've��A�iC�i�A�'llC�will�A�'veC�have�i'm��A�iC�i�A�'mC�am�i'ma��A�iC�i�A�'mC�am�A�aC�gonna�i've��A�iC�i�A�'veC�have�i.��A�i.�i.e.��A�i.e.�id��A�iC�i�A�dC�'d�idve��A�iC�i�A�dC�would�A�veC�have�illve��A�iC�i�A�llC�will�A�veC�have�im��A�iC�i�A�m�ima��A�iC�i�A�mC�am�A�aC�gonna�isn't��A�isC�is�A�n'tC�not�isnt��A�isC�is�A�ntC�not�isn’t��A�isC�is�A�n’tC�not�it'd��A�itC�it�A�'dC�'d�it'd've��A�itC�it�A�'dC�would�A�'veC�have�it'll��A�itC�it�A�'llC�will�it'll've��A�itC�it�A�'llC�will�A�'veC�have�it's��A�itC�it�A�'sC�'s�itd��A�itC�it�A�dC�'d�itdve��A�itC�it�A�dC�would�A�veC�have�itll��A�itC�it�A�llC�will�itllve��A�itC�it�A�llC�will�A�veC�have�it’d��A�itC�it�A�’dC�'d�it’d’ve��A�itC�it�A�’dC�would�A�’veC�have�it’ll��A�itC�it�A�’llC�will�it’ll’ve��A�itC�it�A�’llC�will�A�’veC�have�it’s��A�itC�it�A�’sC�'s�ive��A�iC�i�A�veC�have�i’d��A�iC�i�A�’dC�'d�i’d’ve��A�iC�i�A�’dC�would�A�’veC�have�i’ll��A�iC�i�A�’llC�will�i’ll’ve��A�iC�i�A�’llC�will�A�’veC�have�i’m��A�iC�i�A�’mC�am�i’ma��A�iC�i�A�’mC�am�A�aC�gonna�i’ve��A�iC�i�A�’veC�have�j.��A�j.�k.��A�k.�l.��A�l.�let's��A�let�A�'sC�us�let’s��A�let�A�’sC�us�ll��A�llC�will�lovin��A�lovinC�loving�lovin'��A�lovin'C�loving�lovin’��A�lovin’C�loving�m.��A�m.�ma'am��A�ma'amC�madam�mayn't��A�mayC�may�A�n'tC�not�mayn't've��A�mayC�may�A�n'tC�not�A�'veC�have�maynt��A�mayC�may�A�ntC�not�mayntve��A�mayC�may�A�ntC�not�A�veC�have�mayn’t��A�mayC�may�A�n’tC�not�mayn’t’ve��A�mayC�may�A�n’tC�not�A�’veC�have�ma’am��A�ma’amC�madam�might've��A�mightC�might�A�'ve�mightn't��A�mightC�might�A�n'tC�not�mightn't've��A�mightC�might�A�n'tC�not�A�'veC�have�mightnt��A�mightC�might�A�ntC�not�mightntve��A�mightC�might�A�ntC�not�A�veC�have�mightn’t��A�mightC�might�A�n’tC�not�mightn’t’ve��A�mightC�might�A�n’tC�not�A�’veC�have�mightve��A�mightC�might�A�ve�might’ve��A�mightC�might�A�’ve�must've��A�mustC�must�A�'ve�mustn't��A�mustC�must�A�n'tC�not�mustn't've��A�mustC�must�A�n'tC�not�A�'veC�have�mustnt��A�mustC�must�A�ntC�not�mustntve��A�mustC�must�A�ntC�not�A�veC�have�mustn’t��A�mustC�must�A�n’tC�not�mustn’t’ve��A�mustC�must�A�n’tC�not�A�’veC�have�mustve��A�mustC�must�A�ve�must’ve��A�mustC�must�A�’ve�n.��A�n.�needn't��A�needC�need�A�n'tC�not�needn't've��A�needC�need�A�n'tC�not�A�'veC�have�neednt��A�needC�need�A�ntC�not�needntve��A�needC�need�A�ntC�not�A�veC�have�needn’t��A�needC�need�A�n’tC�not�needn’t’ve��A�needC�need�A�n’tC�not�A�’veC�have�not've��A�not�A�'veC�have�nothin��A�nothinC�nothing�nothin'��A�nothin'C�nothing�nothin’��A�nothin’C�nothing�notve��A�not�A�veC�have�not’ve��A�not�A�’veC�have�nuff��A�nuffC�enough�nuthin��A�nuthinC�nothing�nuthin'��A�nuthin'C�nothing�nuthin’��A�nuthin’C�nothing�o'clock��A�o'clockC�o'clock�o.��A�o.�o.0��A�o.0�o.O��A�o.O�o.o��A�o.o�o_0��A�o_0�o_O��A�o_O�o_o��A�o_o�ol��A�olC�old�ol'��A�ol'C�old�ol’��A�ol’C�old�oughtn't��A�oughtC�ought�A�n'tC�not�oughtn't've��A�oughtC�ought�A�n'tC�not�A�'veC�have�oughtnt��A�oughtC�ought�A�ntC�not�oughtntve��A�oughtC�ought�A�ntC�not�A�veC�have�oughtn’t��A�oughtC�ought�A�n’tC�not�oughtn’t’ve��A�oughtC�ought�A�n’tC�not�A�’veC�have�o’clock��A�o’clockC�o'clock�p.��A�p.�p.m.��A�p.m.�q.��A�q.�r.��A�r.�s.��A�s.�shan't��A�shaC�shall�A�n'tC�not�shan't've��A�shaC�shall�A�n'tC�not�A�'veC�have�shant��A�shaC�shall�A�ntC�not�shantve��A�shaC�shall�A�ntC�not�A�veC�have�shan’t��A�shaC�shall�A�n’tC�not�shan’t’ve��A�shaC�shall�A�n’tC�not�A�’veC�have�she'd��A�sheC�she�A�'dC�'d�she'd've��A�sheC�she�A�'dC�would�A�'veC�have�she'll��A�sheC�she�A�'llC�will�she'll've��A�sheC�she�A�'llC�will�A�'veC�have�she's��A�sheC�she�A�'sC�'s�shedve��A�sheC�she�A�dC�would�A�veC�have�shellve��A�sheC�she�A�llC�will�A�veC�have�shes��A�sheC�she�A�s�she’d��A�sheC�she�A�’dC�'d�she’d’ve��A�sheC�she�A�’dC�would�A�’veC�have�she’ll��A�sheC�she�A�’llC�will�she’ll’ve��A�sheC�she�A�’llC�will�A�’veC�have�she’s��A�sheC�she�A�’sC�'s�should've��A�shouldC�should�A�'ve�shouldn't��A�shouldC�should�A�n'tC�not�shouldn't've��A�shouldC�should�A�n'tC�not�A�'veC�have�shouldnt��A�shouldC�should�A�ntC�not�shouldntve��A�shouldC�should�A�ntC�not�A�veC�have�shouldn’t��A�shouldC�should�A�n’tC�not�shouldn’t’ve��A�shouldC�should�A�n’tC�not�A�’veC�have�shouldve��A�shouldC�should�A�ve�should’ve��A�shouldC�should�A�’ve�somethin��A�somethinC�something�somethin'��A�somethin'C�something�somethin’��A�somethin’C�something�t.��A�t.�that'd��A�thatC�that�A�'dC�'d�that'd've��A�thatC�that�A�'dC�would�A�'veC�have�that'll��A�thatC�that�A�'llC�will�that'll've��A�thatC�that�A�'llC�will�A�'veC�have�that's��A�thatC�that�A�'sC�'s�thatd��A�thatC�that�A�dC�'d�thatdve��A�thatC�that�A�dC�would�A�veC�have�thatll��A�thatC�that�A�llC�will�thatllve��A�thatC�that�A�llC�will�A�veC�have�thats��A�thatC�that�A�s�that’d��A�thatC�that�A�’dC�'d�that’d’ve��A�thatC�that�A�’dC�would�A�’veC�have�that’ll��A�thatC�that�A�’llC�will�that’ll’ve��A�thatC�that�A�’llC�will�A�’veC�have�that’s��A�thatC�that�A�’sC�'s�there'd��A�thereC�there�A�'dC�'d�there'd've��A�thereC�there�A�'dC�would�A�'veC�have�there'll��A�thereC�there�A�'llC�will�there'll've��A�thereC�there�A�'llC�will�A�'veC�have�there're��A�thereC�there�A�'reC�are�there's��A�thereC�there�A�'sC�'s�there've��A�thereC�there�A�'ve�thered��A�thereC�there�A�dC�'d�theredve��A�thereC�there�A�dC�would�A�veC�have�therell��A�thereC�there�A�llC�will�therellve��A�thereC�there�A�llC�will�A�veC�have�therere��A�thereC�there�A�reC�are�theres��A�thereC�there�A�s�thereve��A�there�A�veC�have�there’d��A�thereC�there�A�’dC�'d�there’d’ve��A�thereC�there�A�’dC�would�A�’veC�have�there’ll��A�thereC�there�A�’llC�will�there’ll’ve��A�thereC�there�A�’llC�will�A�’veC�have�there’re��A�thereC�there�A�’reC�are�there’s��A�thereC�there�A�’sC�'s�there’ve��A�thereC�there�A�’ve�these'd��A�theseC�these�A�'dC�'d�these'd've��A�theseC�these�A�'dC�would�A�'veC�have�these'll��A�theseC�these�A�'llC�will�these'll've��A�theseC�these�A�'llC�will�A�'veC�have�these're��A�theseC�these�A�'reC�are�these've��A�theseC�these�A�'ve�thesed��A�theseC�these�A�dC�'d�thesedve��A�theseC�these�A�dC�would�A�veC�have�thesell��A�theseC�these�A�llC�will�thesellve��A�theseC�these�A�llC�will�A�veC�have�thesere��A�theseC�these�A�reC�are�theseve��A�these�A�veC�have�these’d��A�theseC�these�A�’dC�'d�these’d’ve��A�theseC�these�A�’dC�would�A�’veC�have�these’ll��A�theseC�these�A�’llC�will�these’ll’ve��A�theseC�these�A�’llC�will�A�’veC�have�these’re��A�theseC�these�A�’reC�are�these’ve��A�theseC�these�A�’ve�they'd��A�theyC�they�A�'dC�'d�they'd've��A�theyC�they�A�'dC�would�A�'veC�have�they'll��A�theyC�they�A�'llC�will�they'll've��A�theyC�they�A�'llC�will�A�'veC�have�they're��A�theyC�they�A�'reC�are�they've��A�theyC�they�A�'veC�have�theyd��A�theyC�they�A�dC�'d�theydve��A�theyC�they�A�dC�would�A�veC�have�theyll��A�theyC�they�A�llC�will�theyllve��A�theyC�they�A�llC�will�A�veC�have�theyre��A�theyC�they�A�reC�are�theyve��A�theyC�they�A�veC�have�they’d��A�theyC�they�A�’dC�'d�they’d’ve��A�theyC�they�A�’dC�would�A�’veC�have�they’ll��A�theyC�they�A�’llC�will�they’ll’ve��A�theyC�they�A�’llC�will�A�’veC�have�they’re��A�theyC�they�A�’reC�are�they’ve��A�theyC�they�A�’veC�have�this'd��A�thisC�this�A�'dC�'d�this'd've��A�thisC�this�A�'dC�would�A�'veC�have�this'll��A�thisC�this�A�'llC�will�this'll've��A�thisC�this�A�'llC�will�A�'veC�have�this's��A�thisC�this�A�'sC�'s�thisd��A�thisC�this�A�dC�'d�thisdve��A�thisC�this�A�dC�would�A�veC�have�thisll��A�thisC�this�A�llC�will�thisllve��A�thisC�this�A�llC�will�A�veC�have�thiss��A�thisC�this�A�s�this’d��A�thisC�this�A�’dC�'d�this’d’ve��A�thisC�this�A�’dC�would�A�’veC�have�this’ll��A�thisC�this�A�’llC�will�this’ll’ve��A�thisC�this�A�’llC�will�A�’veC�have�this’s��A�thisC�this�A�’sC�'s�those'd��A�thoseC�those�A�'dC�'d�those'd've��A�thoseC�those�A�'dC�would�A�'veC�have�those'll��A�thoseC�those�A�'llC�will�those'll've��A�thoseC�those�A�'llC�will�A�'veC�have�those're��A�thoseC�those�A�'reC�are�those've��A�thoseC�those�A�'ve�thosed��A�thoseC�those�A�dC�'d�thosedve��A�thoseC�those�A�dC�would�A�veC�have�thosell��A�thoseC�those�A�llC�will�thosellve��A�thoseC�those�A�llC�will�A�veC�have�thosere��A�thoseC�those�A�reC�are�thoseve��A�those�A�veC�have�those’d��A�thoseC�those�A�’dC�'d�those’d’ve��A�thoseC�those�A�’dC�would�A�’veC�have�those’ll��A�thoseC�those�A�’llC�will�those’ll’ve��A�thoseC�those�A�’llC�will�A�’veC�have�those’re��A�thoseC�those�A�’reC�are�those’ve��A�thoseC�those�A�’ve�u.��A�u.�v.��A�v.�v.s.��A�v.s.�v.v��A�v.v�v_v��A�v_v�vs.��A�vs.�w.��A�w.�w/o��A�w/oC�without�wasn't��A�wasC�was�A�n'tC�not�wasnt��A�wasC�was�A�ntC�not�wasn’t��A�wasC�was�A�n’tC�not�we'd��A�weC�we�A�'dC�'d�we'd've��A�weC�we�A�'dC�would�A�'veC�have�we'll��A�weC�we�A�'llC�will�we'll've��A�weC�we�A�'llC�will�A�'veC�have�we're��A�weC�we�A�'reC�are�we've��A�weC�we�A�'veC�have�wed��A�weC�we�A�dC�'d�wedve��A�weC�we�A�dC�would�A�veC�have�wellve��A�weC�we�A�llC�will�A�veC�have�weren't��A�wereC�were�A�n'tC�not�werent��A�wereC�were�A�ntC�not�weren’t��A�wereC�were�A�n’tC�not�weve��A�weC�we�A�veC�have�we’d��A�weC�we�A�’dC�'d�we’d’ve��A�weC�we�A�’dC�would�A�’veC�have�we’ll��A�weC�we�A�’llC�will�we’ll’ve��A�weC�we�A�’llC�will�A�’veC�have�we’re��A�weC�we�A�’reC�are�we’ve��A�weC�we�A�’veC�have�what'd��A�whatC�what�A�'dC�'d�what'd've��A�whatC�what�A�'dC�would�A�'veC�have�what'll��A�whatC�what�A�'llC�will�what'll've��A�whatC�what�A�'llC�will�A�'veC�have�what're��A�whatC�what�A�'reC�are�what's��A�whatC�what�A�'sC�'s�what've��A�whatC�what�A�'ve�whatd��A�whatC�what�A�dC�'d�whatdve��A�whatC�what�A�dC�would�A�veC�have�whatll��A�whatC�what�A�llC�will�whatllve��A�whatC�what�A�llC�will�A�veC�have�whatre��A�whatC�what�A�reC�are�whats��A�whatC�what�A�s�whatve��A�what�A�veC�have�what’d��A�whatC�what�A�’dC�'d�what’d’ve��A�whatC�what�A�’dC�would�A�’veC�have�what’ll��A�whatC�what�A�’llC�will�what’ll’ve��A�whatC�what�A�’llC�will�A�’veC�have�what’re��A�whatC�what�A�’reC�are�what’s��A�whatC�what�A�’sC�'s�what’ve��A�whatC�what�A�’ve�when'd��A�whenC�when�A�'dC�'d�when'd've��A�whenC�when�A�'dC�would�A�'veC�have�when'll��A�whenC�when�A�'llC�will�when'll've��A�whenC�when�A�'llC�will�A�'veC�have�when're��A�whenC�when�A�'reC�are�when's��A�whenC�when�A�'sC�'s�when've��A�whenC�when�A�'ve�whend��A�whenC�when�A�dC�'d�whendve��A�whenC�when�A�dC�would�A�veC�have�whenll��A�whenC�when�A�llC�will�whenllve��A�whenC�when�A�llC�will�A�veC�have�whenre��A�whenC�when�A�reC�are�whens��A�whenC�when�A�s�whenve��A�when�A�veC�have�when’d��A�whenC�when�A�’dC�'d�when’d’ve��A�whenC�when�A�’dC�would�A�’veC�have�when’ll��A�whenC�when�A�’llC�will�when’ll’ve��A�whenC�when�A�’llC�will�A�’veC�have�when’re��A�whenC�when�A�’reC�are�when’s��A�whenC�when�A�’sC�'s�when’ve��A�whenC�when�A�’ve�where'd��A�whereC�where�A�'dC�'d�where'd've��A�whereC�where�A�'dC�would�A�'veC�have�where'll��A�whereC�where�A�'llC�will�where'll've��A�whereC�where�A�'llC�will�A�'veC�have�where're��A�whereC�where�A�'reC�are�where's��A�whereC�where�A�'sC�'s�where've��A�whereC�where�A�'ve�whered��A�whereC�where�A�dC�'d�wheredve��A�whereC�where�A�dC�would�A�veC�have�wherell��A�whereC�where�A�llC�will�wherellve��A�whereC�where�A�llC�will�A�veC�have�wherere��A�whereC�where�A�reC�are�wheres��A�whereC�where�A�s�whereve��A�where�A�veC�have�where’d��A�whereC�where�A�’dC�'d�where’d’ve��A�whereC�where�A�’dC�would�A�’veC�have�where’ll��A�whereC�where�A�’llC�will�where’ll’ve��A�whereC�where�A�’llC�will�A�’veC�have�where’re��A�whereC�where�A�’reC�are�where’s��A�whereC�where�A�’sC�'s�where’ve��A�whereC�where�A�’ve�who'd��A�whoC�who�A�'dC�'d�who'd've��A�whoC�who�A�'dC�would�A�'veC�have�who'll��A�whoC�who�A�'llC�will�who'll've��A�whoC�who�A�'llC�will�A�'veC�have�who're��A�whoC�who�A�'reC�are�who's��A�whoC�who�A�'sC�'s�who've��A�whoC�who�A�'ve�whod��A�whoC�who�A�dC�'d�whodve��A�whoC�who�A�dC�would�A�veC�have�wholl��A�whoC�who�A�llC�will�whollve��A�whoC�who�A�llC�will�A�veC�have�whos��A�whoC�who�A�s�whove��A�who�A�veC�have�who’d��A�whoC�who�A�’dC�'d�who’d’ve��A�whoC�who�A�’dC�would�A�’veC�have�who’ll��A�whoC�who�A�’llC�will�who’ll’ve��A�whoC�who�A�’llC�will�A�’veC�have�who’re��A�whoC�who�A�’reC�are�who’s��A�whoC�who�A�’sC�'s�who’ve��A�whoC�who�A�’ve�why'd��A�whyC�why�A�'dC�'d�why'd've��A�whyC�why�A�'dC�would�A�'veC�have�why'll��A�whyC�why�A�'llC�will�why'll've��A�whyC�why�A�'llC�will�A�'veC�have�why're��A�whyC�why�A�'reC�are�why's��A�whyC�why�A�'sC�'s�why've��A�whyC�why�A�'ve�whyd��A�whyC�why�A�dC�'d�whydve��A�whyC�why�A�dC�would�A�veC�have�whyll��A�whyC�why�A�llC�will�whyllve��A�whyC�why�A�llC�will�A�veC�have�whyre��A�whyC�why�A�reC�are�whys��A�whyC�why�A�s�whyve��A�why�A�veC�have�why’d��A�whyC�why�A�’dC�'d�why’d’ve��A�whyC�why�A�’dC�would�A�’veC�have�why’ll��A�whyC�why�A�’llC�will�why’ll’ve��A�whyC�why�A�’llC�will�A�’veC�have�why’re��A�whyC�why�A�’reC�are�why’s��A�whyC�why�A�’sC�'s�why’ve��A�whyC�why�A�’ve�won't��A�woC�will�A�n'tC�not�won't've��A�woC�will�A�n'tC�not�A�'veC�have�wont��A�woC�will�A�ntC�not�wontve��A�woC�will�A�ntC�not�A�veC�have�won’t��A�woC�will�A�n’tC�not�won’t’ve��A�woC�will�A�n’tC�not�A�’veC�have�would've��A�wouldC�would�A�'ve�wouldn't��A�wouldC�would�A�n'tC�not�wouldn't've��A�wouldC�would�A�n'tC�not�A�'veC�have�wouldnt��A�wouldC�would�A�ntC�not�wouldntve��A�wouldC�would�A�ntC�not�A�veC�have�wouldn’t��A�wouldC�would�A�n’tC�not�wouldn’t’ve��A�wouldC�would�A�n’tC�not�A�’veC�have�wouldve��A�wouldC�would�A�ve�would’ve��A�wouldC�would�A�’ve�x.��A�x.�xD��A�xD�xDD��A�xDD�y'all��A�y'C�you�A�all�y.��A�y.�yall��A�yC�you�A�all�you'd��A�youC�you�A�'dC�'d�you'd've��A�youC�you�A�'dC�would�A�'veC�have�you'll��A�youC�you�A�'llC�will�you'll've��A�youC�you�A�'llC�will�A�'veC�have�you're��A�youC�you�A�'reC�are�you've��A�youC�you�A�'veC�have�youd��A�youC�you�A�dC�'d�youdve��A�youC�you�A�dC�would�A�veC�have�youll��A�youC�you�A�llC�will�youllve��A�youC�you�A�llC�will�A�veC�have�youre��A�youC�you�A�reC�are�youve��A�youC�you�A�veC�have�you’d��A�youC�you�A�’dC�'d�you’d’ve��A�youC�you�A�’dC�would�A�’veC�have�you’ll��A�youC�you�A�’llC�will�you’ll’ve��A�youC�you�A�’llC�will�A�’veC�have�you’re��A�youC�you�A�’reC�are�you’ve��A�youC�you�A�’veC�have�y’all��A�y’C�you�A�all�z.��A�z.� ��A� C� �¯\(ツ)/¯��A�¯\(ツ)/¯�°C.��A�°�A�C�A�.�°F.��A�°�A�F�A�.�°K.��A�°�A�K�A�.�°c.��A�°�A�c�A�.�°f.��A�°�A�f�A�.�°k.��A�°�A�k�A�.�ä.��A�ä.�ö.��A�ö.�ü.��A�ü.�ಠ_ಠ��A�ಠ_ಠ�ಠ︵ಠ��A�ಠ︵ಠ�—��A�—�‘S��A�‘SC�'s�‘s��A�‘sC�'s�’��A�’�’Cause��A�’CauseC�because�’Cos��A�’CosC�because�’Coz��A�’CozC�because�’Cuz��A�’CuzC�because�’S��A�’SC�'s�’bout��A�’boutC�about�’cause��A�’causeC�because�’cos��A�’cosC�because�’coz��A�’cozC�because�’cuz��A�’cuzC�because�’d��A�’d�’em��A�’emC�them�’ll��A�’llC�will�’nuff��A�’nuffC�enough�’re��A�’reC�are�’s��A�’sC�'s�’’��A�’’�faster_heuristics�
|
notebooks/custom-ner-model/vocab/lookups.bin
ADDED
|
Binary file (70 kB). View file
|
|
|
notebooks/custom-ner-model/vocab/strings.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
notebooks/custom-ner-model/vocab/vectors.cfg
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mode":"default"
|
| 3 |
+
}
|
notebooks/data-exploration.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
notebooks/ner-training.ipynb
ADDED
|
@@ -0,0 +1,531 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 51,
|
| 6 |
+
"id": "4b1da6b8-17d5-4271-a5d7-5d130260a5dc",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"import json\n",
|
| 11 |
+
"import random\n",
|
| 12 |
+
"import spacy\n",
|
| 13 |
+
"from spacy.util import minibatch\n",
|
| 14 |
+
"from spacy.training.example import Example"
|
| 15 |
+
]
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"cell_type": "code",
|
| 19 |
+
"execution_count": 52,
|
| 20 |
+
"id": "135d1538-db90-4e1a-a371-31ce97a53c0f",
|
| 21 |
+
"metadata": {},
|
| 22 |
+
"outputs": [],
|
| 23 |
+
"source": [
|
| 24 |
+
"with open('data/labeled-data-hybrid.json', 'r') as file:\n",
|
| 25 |
+
" raw_data = json.load(file)"
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"cell_type": "code",
|
| 30 |
+
"execution_count": 53,
|
| 31 |
+
"id": "42da15a7-ffeb-479b-94c9-faa73edfb5f6",
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"outputs": [
|
| 34 |
+
{
|
| 35 |
+
"data": {
|
| 36 |
+
"text/plain": [
|
| 37 |
+
"{'Unnamed: 0': 0,\n",
|
| 38 |
+
" 'additional_info': 'In our school we have an Outdoors Instructor who would love the chance to develop a veg patch in her outdoor space to support our SEND students in growing and cooking their own food. The equipment and seeds provided in the giveaway would develop our students understanding of how to grow your own food but also the process of growing your own food.',\n",
|
| 39 |
+
" 'id': 2015,\n",
|
| 40 |
+
" 'label': [{'start': 74,\n",
|
| 41 |
+
" 'end': 93,\n",
|
| 42 |
+
" 'text': 'develop a veg patch',\n",
|
| 43 |
+
" 'labels': ['Usage']},\n",
|
| 44 |
+
" {'start': 115,\n",
|
| 45 |
+
" 'end': 182,\n",
|
| 46 |
+
" 'text': 'to support our SEND students in growing and cooking their own food.',\n",
|
| 47 |
+
" 'labels': ['Benefit']},\n",
|
| 48 |
+
" {'start': 259,\n",
|
| 49 |
+
" 'end': 348,\n",
|
| 50 |
+
" 'text': 'understanding of how to grow your own food but also the process of growing your own food.',\n",
|
| 51 |
+
" 'labels': ['Benefit']}],\n",
|
| 52 |
+
" 'annotator': 1,\n",
|
| 53 |
+
" 'annotation_id': 7089,\n",
|
| 54 |
+
" 'created_at': '2025-04-12T21:16:24.879659Z',\n",
|
| 55 |
+
" 'updated_at': '2025-04-12T21:16:24.879683Z',\n",
|
| 56 |
+
" 'lead_time': 42.768}"
|
| 57 |
+
]
|
| 58 |
+
},
|
| 59 |
+
"execution_count": 53,
|
| 60 |
+
"metadata": {},
|
| 61 |
+
"output_type": "execute_result"
|
| 62 |
+
}
|
| 63 |
+
],
|
| 64 |
+
"source": [
|
| 65 |
+
"raw_data[0]"
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"cell_type": "code",
|
| 70 |
+
"execution_count": 54,
|
| 71 |
+
"id": "653a0b7f-5e0f-4ac9-8b42-3892ec084e5c",
|
| 72 |
+
"metadata": {},
|
| 73 |
+
"outputs": [],
|
| 74 |
+
"source": [
|
| 75 |
+
"TRAIN_DATA = []\n",
|
| 76 |
+
"\n",
|
| 77 |
+
"for record in raw_data:\n",
|
| 78 |
+
" text = record[\"additional_info\"]\n",
|
| 79 |
+
" entities = []\n",
|
| 80 |
+
" for ann in record[\"label\"]:\n",
|
| 81 |
+
" entity_label = ann[\"labels\"][0].upper()\n",
|
| 82 |
+
" start = ann[\"start\"]\n",
|
| 83 |
+
" end = ann[\"end\"]\n",
|
| 84 |
+
" entities.append((start, end, entity_label))\n",
|
| 85 |
+
" TRAIN_DATA.append((text, {\"entities\": entities}))"
|
| 86 |
+
]
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"cell_type": "code",
|
| 90 |
+
"execution_count": 55,
|
| 91 |
+
"id": "15303845-7976-48f3-b51e-3680e9597df4",
|
| 92 |
+
"metadata": {},
|
| 93 |
+
"outputs": [
|
| 94 |
+
{
|
| 95 |
+
"data": {
|
| 96 |
+
"text/plain": [
|
| 97 |
+
"[('In our school we have an Outdoors Instructor who would love the chance to develop a veg patch in her outdoor space to support our SEND students in growing and cooking their own food. The equipment and seeds provided in the giveaway would develop our students understanding of how to grow your own food but also the process of growing your own food.',\n",
|
| 98 |
+
" {'entities': [(74, 93, 'USAGE'),\n",
|
| 99 |
+
" (115, 182, 'BENEFIT'),\n",
|
| 100 |
+
" (259, 348, 'BENEFIT')]}),\n",
|
| 101 |
+
" ('We are currently working on our outdoor provision. We have a large area and would love to be able to give it a full makeover! As a setting our ethos is to be nature inspired whilst using the curiosity approach with the added extras of colour etc. The children love spending time in the garden and enjoy planting and tending for the plants whilst also looking at the nature around. We have a wildlife garden which is in great need for a massive do over due to it being overgrown also - in here we have a pond, which was once home to many frogs!',\n",
|
| 102 |
+
" {'entities': [(391, 451, 'USAGE'), (101, 124, 'USAGE')]}),\n",
|
| 103 |
+
" ('I would use this bundle with our SEN children, in particular the children I work with who have trauma; gardening has huge therapeutic benefits and this bundle will enable me to take the children out to the garden and enjoy the natural dopamine rush!',\n",
|
| 104 |
+
" {'entities': [(29, 101, 'CONTEXT'), (103, 249, 'BENEFIT')]})]"
|
| 105 |
+
]
|
| 106 |
+
},
|
| 107 |
+
"execution_count": 55,
|
| 108 |
+
"metadata": {},
|
| 109 |
+
"output_type": "execute_result"
|
| 110 |
+
}
|
| 111 |
+
],
|
| 112 |
+
"source": [
|
| 113 |
+
"TRAIN_DATA[:3]"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"cell_type": "code",
|
| 118 |
+
"execution_count": 56,
|
| 119 |
+
"id": "52874a6a-852a-457c-9fec-c2d267fd7067",
|
| 120 |
+
"metadata": {},
|
| 121 |
+
"outputs": [],
|
| 122 |
+
"source": [
|
| 123 |
+
"labels = [\"CONTEXT\", \"USAGE\", \"BENEFIT\"]"
|
| 124 |
+
]
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"cell_type": "code",
|
| 128 |
+
"execution_count": 57,
|
| 129 |
+
"id": "07cc9611-1461-48c1-892e-3ca49a9bcd01",
|
| 130 |
+
"metadata": {},
|
| 131 |
+
"outputs": [],
|
| 132 |
+
"source": [
|
| 133 |
+
"nlp = spacy.load('en_core_web_md')"
|
| 134 |
+
]
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"cell_type": "code",
|
| 138 |
+
"execution_count": 58,
|
| 139 |
+
"id": "dea12e7f-a9fd-4b01-b5e6-7e63511bc77f",
|
| 140 |
+
"metadata": {},
|
| 141 |
+
"outputs": [],
|
| 142 |
+
"source": [
|
| 143 |
+
"if 'ner' not in nlp.pipe_names:\n",
|
| 144 |
+
" ner = nlp.create_pipe('ner')\n",
|
| 145 |
+
" nlp.add_pipe(ner)\n",
|
| 146 |
+
"else:\n",
|
| 147 |
+
" ner = nlp.get_pipe('ner')"
|
| 148 |
+
]
|
| 149 |
+
},
|
| 150 |
+
{
|
| 151 |
+
"cell_type": "code",
|
| 152 |
+
"execution_count": 59,
|
| 153 |
+
"id": "6e7cb732-16a4-460f-b68a-f2208cae17ca",
|
| 154 |
+
"metadata": {},
|
| 155 |
+
"outputs": [],
|
| 156 |
+
"source": [
|
| 157 |
+
"for label in labels: ner.add_label(label)"
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"cell_type": "code",
|
| 162 |
+
"execution_count": 72,
|
| 163 |
+
"id": "43133106-98af-4dca-8d37-837d81cde57d",
|
| 164 |
+
"metadata": {},
|
| 165 |
+
"outputs": [
|
| 166 |
+
{
|
| 167 |
+
"name": "stderr",
|
| 168 |
+
"output_type": "stream",
|
| 169 |
+
"text": [
|
| 170 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"Our children enjoy seeing flowers growing and the ...\" with entities \"[(0, 80, 'CONTEXT'), (82, 143, 'USAGE'), (194, 244...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 171 |
+
" warnings.warn(\n",
|
| 172 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"The Eco-Council have been working hard to prepare ...\" with entities \"[(0, 105, 'CONTEXT'), (265, 395, 'USAGE'), (399, 4...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 173 |
+
" warnings.warn(\n",
|
| 174 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"Budgets are tight, needs are great. We're a small ...\" with entities \"[(36, 176, 'CONTEXT'), (178, 293, 'CONTEXT'), (293...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 175 |
+
" warnings.warn(\n",
|
| 176 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"Hi we have a newly formed eco council who are keen...\" with entities \"[(0, 93, 'CONTEXT'), (95, 159, 'USAGE'), (160, 241...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 177 |
+
" warnings.warn(\n",
|
| 178 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"We are an urban school without a green space (e.g....\" with entities \"[(0, 64, 'CONTEXT'), (66, 153, 'USAGE'), (153, 300...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 179 |
+
" warnings.warn(\n",
|
| 180 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"We are a small rural school and as such have very ...\" with entities \"[(0, 96, 'CONTEXT'), (171, 230, 'USAGE'), (274, 45...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 181 |
+
" warnings.warn(\n",
|
| 182 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"If our preschool won £500, we would invest in enri...\" with entities \"[(0, 65, 'USAGE'), (66, 108, 'BENEFIT'), (110, 189...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 183 |
+
" warnings.warn(\n",
|
| 184 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"We are just about to start 'Eco Warriors' in our s...\" with entities \"[(0, 55, 'CONTEXT'), (57, 127, 'CONTEXT'), (153, 1...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 185 |
+
" warnings.warn(\n",
|
| 186 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"To win the money or the gardening bundle would be ...\" with entities \"[(289, 380, 'USAGE'), (62, 171, 'BENEFIT'), (175, ...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 187 |
+
" warnings.warn(\n",
|
| 188 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"Greetings from our SEN school Harford Manor in Nor...\" with entities \"[(15, 54, 'CONTEXT'), (56, 189, 'CONTEXT'), (516, ...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 189 |
+
" warnings.warn(\n",
|
| 190 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"Our school is in a deprivation 4 area, whereby mos...\" with entities \"[(0, 109, 'CONTEXT'), (110, 170, 'USAGE'), (171, 2...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 191 |
+
" warnings.warn(\n",
|
| 192 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"Within our urban school, many families face econom...\" with entities \"[(0, 131, 'CONTEXT'), (234, 282, 'BENEFIT'), (287,...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 193 |
+
" warnings.warn(\n",
|
| 194 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"We are still hoping to create a special outdoor sp...\" with entities \"[(0, 71, 'USAGE'), (143, 260, 'BENEFIT'), (262, 35...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 195 |
+
" warnings.warn(\n",
|
| 196 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"I would love to be able to use these resources to ...\" with entities \"[(47, 258, 'BENEFIT'), (259, 364, 'CONTEXT'), (451...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 197 |
+
" warnings.warn(\n",
|
| 198 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"We could use this to develop our outdoor area and ...\" with entities \"[(0, 45, 'USAGE'), (69, 200, 'CONTEXT'), (205, 307...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 199 |
+
" warnings.warn(\n",
|
| 200 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"Working in the Early Years classes at Rushmere Hal...\" with entities \"[(101, 162, 'CONTEXT'), (230, 316, 'CONTEXT'), (31...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 201 |
+
" warnings.warn(\n",
|
| 202 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"I work in an alternative provision supporting disa...\" with entities \"[(0, 78, 'CONTEXT'), (80, 171, 'CONTEXT'), (172, 2...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 203 |
+
" warnings.warn(\n",
|
| 204 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"We are about to build an outdoor learning/ sensory...\" with entities \"[(0, 58, 'CONTEXT'), (196, 247, 'BENEFIT')]\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 205 |
+
" warnings.warn(\n",
|
| 206 |
+
"/Users/lynn/venv/lib/python3.12/site-packages/spacy/training/iob_utils.py:149: UserWarning: [W030] Some entities could not be aligned in the text \"Our small village school has a lovely vegetable pa...\" with entities \"[(0, 74, 'CONTEXT'), (76, 144, 'USAGE'), (145, 176...\". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.\n",
|
| 207 |
+
" warnings.warn(\n"
|
| 208 |
+
]
|
| 209 |
+
},
|
| 210 |
+
{
|
| 211 |
+
"name": "stdout",
|
| 212 |
+
"output_type": "stream",
|
| 213 |
+
"text": [
|
| 214 |
+
"Epoch: 1, Losses: {'ner': 9533.999769367278}\n",
|
| 215 |
+
"Epoch: 2, Losses: {'ner': 3373.046681917019}\n",
|
| 216 |
+
"Epoch: 3, Losses: {'ner': 2111.8298322203573}\n",
|
| 217 |
+
"Epoch: 4, Losses: {'ner': 1268.7511834601187}\n",
|
| 218 |
+
"Epoch: 5, Losses: {'ner': 1260.472505828361}\n",
|
| 219 |
+
"Epoch: 6, Losses: {'ner': 2081.233363646942}\n",
|
| 220 |
+
"Epoch: 7, Losses: {'ner': 1870.1290942860255}\n",
|
| 221 |
+
"Epoch: 8, Losses: {'ner': 2760.632185367636}\n",
|
| 222 |
+
"Epoch: 9, Losses: {'ner': 4679.581114066543}\n",
|
| 223 |
+
"Epoch: 10, Losses: {'ner': 1795.3569531670457}\n",
|
| 224 |
+
"Epoch: 11, Losses: {'ner': 1373.5194162921005}\n",
|
| 225 |
+
"Epoch: 12, Losses: {'ner': 1627.4724870795094}\n",
|
| 226 |
+
"Epoch: 13, Losses: {'ner': 2155.7555929912505}\n",
|
| 227 |
+
"Epoch: 14, Losses: {'ner': 2706.850242773626}\n",
|
| 228 |
+
"Epoch: 15, Losses: {'ner': 1775.3004598389962}\n",
|
| 229 |
+
"Epoch: 16, Losses: {'ner': 2494.793117767037}\n",
|
| 230 |
+
"Epoch: 17, Losses: {'ner': 2254.270585262983}\n",
|
| 231 |
+
"Epoch: 18, Losses: {'ner': 2032.8792585826413}\n",
|
| 232 |
+
"Epoch: 19, Losses: {'ner': 1623.8611707914843}\n",
|
| 233 |
+
"Epoch: 20, Losses: {'ner': 2971.092258894847}\n",
|
| 234 |
+
"Epoch: 21, Losses: {'ner': 1791.2796182300083}\n",
|
| 235 |
+
"Epoch: 22, Losses: {'ner': 1042.6703570287234}\n",
|
| 236 |
+
"Epoch: 23, Losses: {'ner': 1433.6355348417426}\n",
|
| 237 |
+
"Epoch: 24, Losses: {'ner': 1408.8894868987602}\n",
|
| 238 |
+
"Epoch: 25, Losses: {'ner': 1056.7561133178813}\n",
|
| 239 |
+
"Epoch: 26, Losses: {'ner': 1944.829844542442}\n",
|
| 240 |
+
"Epoch: 27, Losses: {'ner': 2795.8901439635674}\n",
|
| 241 |
+
"Epoch: 28, Losses: {'ner': 2298.3327245539053}\n",
|
| 242 |
+
"Epoch: 29, Losses: {'ner': 2307.1431226889918}\n",
|
| 243 |
+
"Epoch: 30, Losses: {'ner': 2528.3436281068466}\n",
|
| 244 |
+
"Epoch: 31, Losses: {'ner': 1870.7222537528485}\n",
|
| 245 |
+
"Epoch: 32, Losses: {'ner': 1094.7755118804075}\n",
|
| 246 |
+
"Epoch: 33, Losses: {'ner': 2263.5593045036608}\n",
|
| 247 |
+
"Epoch: 34, Losses: {'ner': 1468.8629297240825}\n",
|
| 248 |
+
"Epoch: 35, Losses: {'ner': 1500.340793062488}\n",
|
| 249 |
+
"Epoch: 36, Losses: {'ner': 1514.0060728432647}\n",
|
| 250 |
+
"Epoch: 37, Losses: {'ner': 1252.899499569187}\n",
|
| 251 |
+
"Epoch: 38, Losses: {'ner': 1178.042552752851}\n",
|
| 252 |
+
"Epoch: 39, Losses: {'ner': 1733.7365258247341}\n",
|
| 253 |
+
"Epoch: 40, Losses: {'ner': 3265.8411689659533}\n",
|
| 254 |
+
"Epoch: 41, Losses: {'ner': 1594.4703964198884}\n",
|
| 255 |
+
"Epoch: 42, Losses: {'ner': 1528.291078742218}\n",
|
| 256 |
+
"Epoch: 43, Losses: {'ner': 1121.1585976279564}\n",
|
| 257 |
+
"Epoch: 44, Losses: {'ner': 1977.9884357938543}\n",
|
| 258 |
+
"Epoch: 45, Losses: {'ner': 1668.6792443859056}\n",
|
| 259 |
+
"Epoch: 46, Losses: {'ner': 1767.2459401943004}\n",
|
| 260 |
+
"Epoch: 47, Losses: {'ner': 1790.4347547065895}\n",
|
| 261 |
+
"Epoch: 48, Losses: {'ner': 1172.3169484436285}\n",
|
| 262 |
+
"Epoch: 49, Losses: {'ner': 1229.973782697482}\n",
|
| 263 |
+
"Epoch: 50, Losses: {'ner': 1485.830037922377}\n",
|
| 264 |
+
"Epoch: 51, Losses: {'ner': 1417.2411754741781}\n",
|
| 265 |
+
"Epoch: 52, Losses: {'ner': 1256.0793795796576}\n",
|
| 266 |
+
"Epoch: 53, Losses: {'ner': 1265.9343875693016}\n",
|
| 267 |
+
"Epoch: 54, Losses: {'ner': 1613.1182694931767}\n",
|
| 268 |
+
"Epoch: 55, Losses: {'ner': 1374.4901916941078}\n",
|
| 269 |
+
"Epoch: 56, Losses: {'ner': 1289.6665635846941}\n",
|
| 270 |
+
"Epoch: 57, Losses: {'ner': 2030.2654670370566}\n",
|
| 271 |
+
"Epoch: 58, Losses: {'ner': 1178.3719373901415}\n",
|
| 272 |
+
"Epoch: 59, Losses: {'ner': 1222.974736904168}\n",
|
| 273 |
+
"Epoch: 60, Losses: {'ner': 1110.8926425971572}\n",
|
| 274 |
+
"Epoch: 61, Losses: {'ner': 2141.7158235774314}\n",
|
| 275 |
+
"Epoch: 62, Losses: {'ner': 1469.6043911663928}\n",
|
| 276 |
+
"Epoch: 63, Losses: {'ner': 1534.282817813435}\n",
|
| 277 |
+
"Epoch: 64, Losses: {'ner': 1330.7558638433745}\n",
|
| 278 |
+
"Epoch: 65, Losses: {'ner': 1328.6353675642722}\n",
|
| 279 |
+
"Epoch: 66, Losses: {'ner': 1226.3045698316218}\n",
|
| 280 |
+
"Epoch: 67, Losses: {'ner': 1000.5061301572475}\n",
|
| 281 |
+
"Epoch: 68, Losses: {'ner': 1040.673097800813}\n",
|
| 282 |
+
"Epoch: 69, Losses: {'ner': 1429.5936861294554}\n",
|
| 283 |
+
"Epoch: 70, Losses: {'ner': 955.2435547965925}\n",
|
| 284 |
+
"Epoch: 71, Losses: {'ner': 1139.967262046805}\n",
|
| 285 |
+
"Epoch: 72, Losses: {'ner': 971.6258769863182}\n",
|
| 286 |
+
"Epoch: 73, Losses: {'ner': 1223.0781285260389}\n",
|
| 287 |
+
"Epoch: 74, Losses: {'ner': 1028.4424976595403}\n",
|
| 288 |
+
"Epoch: 75, Losses: {'ner': 1043.8942978935258}\n",
|
| 289 |
+
"Epoch: 76, Losses: {'ner': 1181.2761545421245}\n",
|
| 290 |
+
"Epoch: 77, Losses: {'ner': 1016.0896479519944}\n",
|
| 291 |
+
"Epoch: 78, Losses: {'ner': 1110.6242503181215}\n",
|
| 292 |
+
"Epoch: 79, Losses: {'ner': 1083.1151262103085}\n",
|
| 293 |
+
"Epoch: 80, Losses: {'ner': 865.0088993680911}\n",
|
| 294 |
+
"Epoch: 81, Losses: {'ner': 999.7223470452517}\n",
|
| 295 |
+
"Epoch: 82, Losses: {'ner': 1024.4064070045667}\n",
|
| 296 |
+
"Epoch: 83, Losses: {'ner': 1008.208133637956}\n",
|
| 297 |
+
"Epoch: 84, Losses: {'ner': 967.2180909306633}\n",
|
| 298 |
+
"Epoch: 85, Losses: {'ner': 1038.060514933633}\n",
|
| 299 |
+
"Epoch: 86, Losses: {'ner': 1160.641935240572}\n",
|
| 300 |
+
"Epoch: 87, Losses: {'ner': 848.0598354220901}\n",
|
| 301 |
+
"Epoch: 88, Losses: {'ner': 1593.9618448381439}\n",
|
| 302 |
+
"Epoch: 89, Losses: {'ner': 2043.2513065760422}\n",
|
| 303 |
+
"Epoch: 90, Losses: {'ner': 949.2219864119644}\n",
|
| 304 |
+
"Epoch: 91, Losses: {'ner': 1308.128982071408}\n",
|
| 305 |
+
"Epoch: 92, Losses: {'ner': 1419.6585662560803}\n",
|
| 306 |
+
"Epoch: 93, Losses: {'ner': 1231.0372582253806}\n",
|
| 307 |
+
"Epoch: 94, Losses: {'ner': 961.5172148445994}\n",
|
| 308 |
+
"Epoch: 95, Losses: {'ner': 1032.0622994361909}\n",
|
| 309 |
+
"Epoch: 96, Losses: {'ner': 760.3541173792706}\n",
|
| 310 |
+
"Epoch: 97, Losses: {'ner': 858.9487417112138}\n",
|
| 311 |
+
"Epoch: 98, Losses: {'ner': 899.7491508334869}\n",
|
| 312 |
+
"Epoch: 99, Losses: {'ner': 997.3596832092959}\n",
|
| 313 |
+
"Epoch: 100, Losses: {'ner': 850.4430336169822}\n"
|
| 314 |
+
]
|
| 315 |
+
}
|
| 316 |
+
],
|
| 317 |
+
"source": [
|
| 318 |
+
"for _, annotations in TRAIN_DATA:\n",
|
| 319 |
+
" for ent in annotations['entities']:\n",
|
| 320 |
+
" if ent[2] not in ner.labels:\n",
|
| 321 |
+
" ner.add_label(ent[2])\n",
|
| 322 |
+
"\n",
|
| 323 |
+
"other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']\n",
|
| 324 |
+
"\n",
|
| 325 |
+
"with nlp.disable_pipes(*other_pipes):\n",
|
| 326 |
+
" optimizer = nlp.initialize()\n",
|
| 327 |
+
"\n",
|
| 328 |
+
" epochs = 100\n",
|
| 329 |
+
"\n",
|
| 330 |
+
" for epoch in range(epochs):\n",
|
| 331 |
+
" random.shuffle(TRAIN_DATA)\n",
|
| 332 |
+
" losses = {}\n",
|
| 333 |
+
" batches = minibatch(TRAIN_DATA, size=15)\n",
|
| 334 |
+
" for batch in batches:\n",
|
| 335 |
+
" examples = []\n",
|
| 336 |
+
" for text, annotations in batch:\n",
|
| 337 |
+
" doc = nlp.make_doc(text)\n",
|
| 338 |
+
" example=Example.from_dict(doc, annotations)\n",
|
| 339 |
+
" examples.append(example)\n",
|
| 340 |
+
"\n",
|
| 341 |
+
" nlp.update(examples, drop=0.5, losses=losses)\n",
|
| 342 |
+
"\n",
|
| 343 |
+
" print(f\"Epoch: {epoch +1}, Losses: {losses}\")"
|
| 344 |
+
]
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"cell_type": "code",
|
| 348 |
+
"execution_count": 73,
|
| 349 |
+
"id": "c9d6db57-c2e0-47a7-8bf6-9f1bd62ce638",
|
| 350 |
+
"metadata": {},
|
| 351 |
+
"outputs": [],
|
| 352 |
+
"source": [
|
| 353 |
+
"nlp.to_disk('custom-ner-model')"
|
| 354 |
+
]
|
| 355 |
+
},
|
| 356 |
+
{
|
| 357 |
+
"cell_type": "code",
|
| 358 |
+
"execution_count": 74,
|
| 359 |
+
"id": "3ebccd20-399c-4a2e-8b12-49f8126162c2",
|
| 360 |
+
"metadata": {},
|
| 361 |
+
"outputs": [],
|
| 362 |
+
"source": [
|
| 363 |
+
"trained_nlp = spacy.load('custom-ner-model')"
|
| 364 |
+
]
|
| 365 |
+
},
|
| 366 |
+
{
|
| 367 |
+
"cell_type": "code",
|
| 368 |
+
"execution_count": 75,
|
| 369 |
+
"id": "5e0c6dcd-dfa6-4a53-aab1-f37456445426",
|
| 370 |
+
"metadata": {},
|
| 371 |
+
"outputs": [],
|
| 372 |
+
"source": [
|
| 373 |
+
"import pandas as pd\n",
|
| 374 |
+
"\n",
|
| 375 |
+
"df = pd.read_csv('data/april-data.csv')"
|
| 376 |
+
]
|
| 377 |
+
},
|
| 378 |
+
{
|
| 379 |
+
"cell_type": "code",
|
| 380 |
+
"execution_count": 76,
|
| 381 |
+
"id": "4765cd4d-2ef5-480d-8af9-432d787528b6",
|
| 382 |
+
"metadata": {},
|
| 383 |
+
"outputs": [],
|
| 384 |
+
"source": [
|
| 385 |
+
"test_texts = df['Additional_info'].sample(n=20).to_list()"
|
| 386 |
+
]
|
| 387 |
+
},
|
| 388 |
+
{
|
| 389 |
+
"cell_type": "code",
|
| 390 |
+
"execution_count": 77,
|
| 391 |
+
"id": "060c15e0-69c4-4bac-a257-0a8333fba845",
|
| 392 |
+
"metadata": {},
|
| 393 |
+
"outputs": [
|
| 394 |
+
{
|
| 395 |
+
"name": "stdout",
|
| 396 |
+
"output_type": "stream",
|
| 397 |
+
"text": [
|
| 398 |
+
"TEXT: we have 5 different support programmes for those with additional needs form ages 14 upwards. We have a garden centre and this bundle would be great to be able to set up classes to get involved in nature, horticulture and offer this type of learning and connecting with nature that may not be available at home or school. We rely on funding streams which are being cut or stopped which means the variety of what we can offer becomes very limited and we appreciate all the donations, grants etc we can avail of to keep providing a meaningful experience\n",
|
| 399 |
+
"\n",
|
| 400 |
+
"ENTITIES: [('we have 5 different support programmes for those with additional needs form ages 14 upwards', 'CONTEXT')]\n",
|
| 401 |
+
"------------------------------------------------------------\n",
|
| 402 |
+
"TEXT: We have a wonderful outdoor area, with trees and planters and beautiful spaces for the children to explore in early years. However, due to budget constraints a lot of our resources for outside have become quite aged and worn out. It would be lovely to inject some newness into the area in order for the children to get the most of their outdoor learning experiences. As a school we have a Green Team who look after the outdoor areas, planting up, litter picking and we take a lot of pride in our school and want it to look impressive to passers by. It would be wonderful for us to win something like this.\n",
|
| 403 |
+
"\n",
|
| 404 |
+
"ENTITIES: [('We have a wonderful outdoor area', 'CONTEXT'), ('It would be lovely to inject some newness into the area in order for the children to get the most of their outdoor learning experiences', 'BENEFIT'), ('As a school we have a Green Team who look after the outdoor areas', 'CONTEXT'), ('It would be wonderful for us to win something like this.', 'BENEFIT')]\n",
|
| 405 |
+
"------------------------------------------------------------\n",
|
| 406 |
+
"TEXT: This year, our P3 class are through to the final of the Pocket Garden competition, with their Fairytale Alloa design. This design incorporates Alloa’s cultural, natural and industrial heritage. We would love this garden bundle to help our design come to life and to learn about how plants grow.\n",
|
| 407 |
+
"\n",
|
| 408 |
+
"ENTITIES: [('This design incorporates Alloa’s cultural, natural and industrial heritage', 'BENEFIT'), ('We would love this garden bundle to help our design come to life and to learn about how plants grow', 'USAGE')]\n",
|
| 409 |
+
"------------------------------------------------------------\n",
|
| 410 |
+
"TEXT: I work in an all age SEND school and for children with PMLD and complex medical needs which has only been open for two years \n",
|
| 411 |
+
"We have an empty garden which the children in the school would love to be able to plant with lots of sensory plants so that the PMLD children can enjoy alongside their peers \n",
|
| 412 |
+
"We would grow vegetables as well so that children can learn and see where their food comes from and use the produce in Food Technology with the secondary students\n",
|
| 413 |
+
"\n",
|
| 414 |
+
"ENTITIES: [('I work in an all age SEND school and for children with PMLD and complex medical needs which has only been open for two years \\r\\n', 'CONTEXT'), ('We have an empty garden which the children in the school would love to be able to plant with lots of sensory plants', 'CONTEXT'), ('We would grow vegetables as well so that children can learn and see where their food comes from and use the produce in Food Technology with the secondary students', 'USAGE')]\n",
|
| 415 |
+
"------------------------------------------------------------\n",
|
| 416 |
+
"TEXT: If we won the prize it would give our children to opportunity to take part in planting and growing flowers and some vegetables in our nursery garden. Most of our children live in tenement flats and don't have access to a garden. We would be able to purchase planters, small pots for the children to take home once their plant/flower has stated to grow. It would allow the children to observe nature in real time, which would expand on their own natural curiosity and wonder of their local environment, it would also give the children a sense of responsibility by reminding them to look after their flowers at home.\n",
|
| 417 |
+
"\n",
|
| 418 |
+
"ENTITIES: [('If we won the prize it would give our children to opportunity to take part in planting and growing flowers and some vegetables in our nursery garden', 'USAGE'), (\"Most of our children live in tenement flats and don't have access to a garden\", 'CONTEXT'), ('We would be able to purchase planters, small pots for the children to take home once their plant/flower has stated to grow', 'USAGE'), ('It would allow the children to observe nature in real time, which would expand on their own natural curiosity and wonder of their local environment', 'BENEFIT')]\n",
|
| 419 |
+
"------------------------------------------------------------\n",
|
| 420 |
+
"TEXT: My sons school is incredibly inclusive to those with SEN. They are opening up afterschool clubs for the students, gardening being one of them. These clubs are so important to these children and help with social interactions and friendships. \n",
|
| 421 |
+
"A gardening bundle and £500 would be absolutely amazing and will really help towards these clubs and interventions.\n",
|
| 422 |
+
"\n",
|
| 423 |
+
"ENTITIES: [('My sons school is incredibly inclusive to those with SEN', 'CONTEXT'), ('gardening being one of them.', 'BENEFIT'), ('A gardening bundle and', 'CONTEXT'), ('£500 would be absolutely amazing and will really help towards these clubs and interventions', 'USAGE')]\n",
|
| 424 |
+
"------------------------------------------------------------\n",
|
| 425 |
+
"TEXT: It would lovely to use the prize to enable the children to be able to grow their own fruit and vegetables to be able to use in learning how to cook healthy meals or sell fruit at breaktime to be able to reinvest to be able to carry on replenishing plants, bulbs and compost etc.\n",
|
| 426 |
+
"\n",
|
| 427 |
+
"ENTITIES: []\n",
|
| 428 |
+
"------------------------------------------------------------\n",
|
| 429 |
+
"TEXT: We already have a weekly gardening club where staff bring in their own seeds or off-cut from plants.\n",
|
| 430 |
+
"This bundle would allow us to take it further.\n",
|
| 431 |
+
"\n",
|
| 432 |
+
"ENTITIES: [('We already have a weekly gardening club where staff bring in their own seeds or off-cut from plants', 'CONTEXT'), ('This bundle would allow us to take it further.', 'BENEFIT')]\n",
|
| 433 |
+
"------------------------------------------------------------\n",
|
| 434 |
+
"TEXT: If I won the gardening bundle I would use it in my role of Emotional Literacy Support Assistant (ELSA) to support pupils wellbeing as well as in my SEND role. The vegetables we grow would then be used for cooking. Both gardening and cooking are great for improving wellbeing, following instructions, resilience and both fine and gross motor skills.\n",
|
| 435 |
+
"\n",
|
| 436 |
+
"ENTITIES: [('If I won the gardening bundle I', 'USAGE')]\n",
|
| 437 |
+
"------------------------------------------------------------\n",
|
| 438 |
+
"TEXT: Our upcoming theme is ‘Gardner’s World’ this would be perfect to bring the learning to life for our learners who all have severe learning difficulties.\n",
|
| 439 |
+
"\n",
|
| 440 |
+
"ENTITIES: [('Our upcoming theme is ‘Gardner’s World’ this would be perfect', 'CONTEXT'), ('to bring the learning to life for our learners who all have severe learning difficulties', 'BENEFIT')]\n",
|
| 441 |
+
"------------------------------------------------------------\n",
|
| 442 |
+
"TEXT: A local men's mental health group have made us some lovely wooden planters but we now need something to fill with with. Compost and plants are so expensive.\n",
|
| 443 |
+
"\n",
|
| 444 |
+
"ENTITIES: [(\"A local men's mental health group have made us some lovely wooden planters\", 'CONTEXT')]\n",
|
| 445 |
+
"------------------------------------------------------------\n",
|
| 446 |
+
"TEXT: We are a charity run pre school and rely on fundraising to purchase new equipment and furniture for the continued growth of the setting and to enhance the learning opportunities for the children. If we were to win these incredible prizes they would be used to update, revamp and revitalise our aging outdoor play area to enhance the children’s learning provision and experiences whilst exploring the great outdoors.\n",
|
| 447 |
+
"\n",
|
| 448 |
+
"ENTITIES: [('We are a charity run pre school and rely on fundraising to purchase new equipment and furniture for the continued growth of the setting and', 'CONTEXT'), ('to enhance the learning opportunities for the children', 'BENEFIT'), ('If we were to win these incredible prizes they would be used to update, revamp and revitalise our aging outdoor play area', 'USAGE')]\n",
|
| 449 |
+
"------------------------------------------------------------\n",
|
| 450 |
+
"TEXT: We have recently set up a Gardening Club for our students with SEN to help improve engagement and provide responsibility in a mainstream school. We are currently reliant on parent donations and missing key items such as spades to run this effectively.\n",
|
| 451 |
+
"\n",
|
| 452 |
+
"ENTITIES: [('We have recently set up a Gardening Club for our students with SEN to help improve engagement and provide responsibility in a mainstream school', 'CONTEXT'), ('We are currently reliant on parent donations and missing key items such as spades to run this effectively', 'CONTEXT')]\n",
|
| 453 |
+
"------------------------------------------------------------\n",
|
| 454 |
+
"TEXT: We are in the process of setting up a garden area on our field. We are recycling what we can to use in our garden - building bedding areas out of old decking and using other old bits of wood that have been donated to us. We are starting from scratch so we would use the money to purchase tools, seeds and flowers. We want to encourage our children to grow flowers and grow their own vegetables that we would then use in the meals supplied to our children at lunchtime.\n",
|
| 455 |
+
"\n",
|
| 456 |
+
"ENTITIES: [('We are in the process of setting up a garden area on our field', 'CONTEXT'), ('We are recycling what we can', 'CONTEXT'), ('We are starting from scratch', 'CONTEXT'), ('we would use the money to purchase tools, seeds and flowers', 'USAGE'), ('We want to encourage our children to grow flowers and grow their own vegetables that', 'USAGE'), ('we would then use in the meals supplied to our children at lunchtime', 'USAGE')]\n",
|
| 457 |
+
"------------------------------------------------------------\n",
|
| 458 |
+
"TEXT: We would plant edible fruit/veg/flowers to use in our cookery coursework as well as creating natural self-care products. We would explore the sensory aspects of plants as well as their historical uses. We are a group of 30+ young people with autism/ADHD and other additional support needs based in a area of low employment and are looking to improve our self esteem and confidence.\n",
|
| 459 |
+
"\n",
|
| 460 |
+
"ENTITIES: [('We would plant edible fruit/veg/flowers to use in our cookery coursework as well as creating natural self-care products', 'USAGE'), ('We would explore the sensory aspects of plants as well as their historical uses', 'USAGE'), ('We are a group of 30+ young people with autism/ADHD and other additional support needs based in a area of low employment and are looking to improve our self esteem and confidence', 'CONTEXT')]\n",
|
| 461 |
+
"------------------------------------------------------------\n",
|
| 462 |
+
"TEXT: This prize would mean the world for our school to win. Our school was built in 1836 and is a grade II listed building, it is so small we currently on have 94 students. Due to the size of our school our school playing field is a across the road and a short walk away from our building, which can make us all feel a little sad when we would like to be outdoors and enjoying gardening. However we do have an piece of small land at the front of school that has been used to create a sort of sensory area with scented plants however we do not have the resources to let the children have a go at planting and nuturing things. The children aswell as the staff would love to win a gardening bundle to plant the seeds and enjoy the food they grow..\n",
|
| 463 |
+
"\n",
|
| 464 |
+
"ENTITIES: [('Our school was built in 1836 and is a grade II listed building, it is so small we currently on have 94 students', 'CONTEXT'), ('The children aswell as the staff', 'CONTEXT')]\n",
|
| 465 |
+
"------------------------------------------------------------\n",
|
| 466 |
+
"TEXT: As a SEND school we value teaching life skills by teaching our pupils about the world all around them, this includes gardening and looking after their immediate environment, at home or at school. Unfortunately, we don't have a dedicated gardening teacher or many resources to support our teachers in providing the lessons. Our pupils love getting outdoors and watching their efforts grow before their eyes and more resources to enable better participation would really help support them in this endeavour, as well as support career choices as they grow through the school and into our 6th form college and beyond.\n",
|
| 467 |
+
"\n",
|
| 468 |
+
"ENTITIES: [('As a SEND school we value teaching life skills by teaching our pupils about the world all around them, this includes gardening and looking after their immediate environment', 'CONTEXT'), ('Our pupils love getting outdoors and', 'CONTEXT'), ('watching their efforts grow before their eyes and more resources to enable better participation would really help support them in this endeavour, as well as support career choices as they grow through the school and into our 6th form college and beyond', 'BENEFIT')]\n",
|
| 469 |
+
"------------------------------------------------------------\n",
|
| 470 |
+
"TEXT: The rural school setting, near Wem, is uniquely therapeutic and calming. Pupils who have struggled in traditional mainstream settings are helped to stabilise and thrive in a therapeutic learning environment.\n",
|
| 471 |
+
"\n",
|
| 472 |
+
"With a qualified therapist attached to the school and a talented team of teachers dedicated to improving children’s lives, Access School is well equipped to support young people with special educational needs.\n",
|
| 473 |
+
"\n",
|
| 474 |
+
"The school offers bespoke education for children aged 6-16 with social, emotional and mental health (SEMH), learning difficulties (LD) and mild learning difficulties (MLD). Young people with attachment difficulties, autism and ADHD have thrived in this learning environment, which includes a therapeutic village, sensory rooms and story massage among many other features.\n",
|
| 475 |
+
"\n",
|
| 476 |
+
"We are always looking for new experiences for our young people.\n",
|
| 477 |
+
"\n",
|
| 478 |
+
"ENTITIES: [('The rural school setting', 'CONTEXT'), ('With a qualified therapist attached to the school and a talented team of teachers dedicated to improving children’s lives, Access School is well equipped to support young people with special educational needs.', 'CONTEXT'), ('The school offers bespoke education for children aged 6-16 with social, emotional and mental health (SEMH), learning difficulties (LD) and mild learning difficulties (MLD). ', 'CONTEXT'), ('We are always looking for new experiences for our young people', 'CONTEXT')]\n",
|
| 479 |
+
"------------------------------------------------------------\n",
|
| 480 |
+
"TEXT: We are a stand alone local authority nursery in a deprived and diverse area of newcastle. Many of the children do not have access to outdoors as they live in flats with yards etc. The outdoor area in our setting is used all year round and in all weather's to give children the benefit of being outdoors. We are currently trying to improve our environment and allow children to have hands on gardening experiences, allowing them that early hands on involvement in caring for the environment. It would be amazing if we were successful not only for our present cohort but also for the future of our outdoors for all children . ?\n",
|
| 481 |
+
"\n",
|
| 482 |
+
"ENTITIES: [('We are a stand alone local authority nursery in a deprived and diverse area of newcastle. ', 'CONTEXT')]\n",
|
| 483 |
+
"------------------------------------------------------------\n",
|
| 484 |
+
"TEXT: We are trying to develop our outside area. It would be fantastic to have a gardening area where children can develop their knowledge of plants and grow their own food.\n",
|
| 485 |
+
"\n",
|
| 486 |
+
"ENTITIES: [('We are trying to', 'CONTEXT'), ('develop our outside area', 'BENEFIT'), ('develop their knowledge of plants and grow their own food.', 'BENEFIT')]\n",
|
| 487 |
+
"------------------------------------------------------------\n"
|
| 488 |
+
]
|
| 489 |
+
}
|
| 490 |
+
],
|
| 491 |
+
"source": [
|
| 492 |
+
"for text in test_texts:\n",
|
| 493 |
+
" doc = trained_nlp(text)\n",
|
| 494 |
+
" print(f\"TEXT: {text}\")\n",
|
| 495 |
+
" print()\n",
|
| 496 |
+
" print(\"ENTITIES:\", [(ent.text, ent.label_) for ent in doc.ents])\n",
|
| 497 |
+
" print('-'*60)\n",
|
| 498 |
+
" "
|
| 499 |
+
]
|
| 500 |
+
},
|
| 501 |
+
{
|
| 502 |
+
"cell_type": "code",
|
| 503 |
+
"execution_count": null,
|
| 504 |
+
"id": "c16ce1f4-a92c-4c68-b7c1-6c3082f278a3",
|
| 505 |
+
"metadata": {},
|
| 506 |
+
"outputs": [],
|
| 507 |
+
"source": []
|
| 508 |
+
}
|
| 509 |
+
],
|
| 510 |
+
"metadata": {
|
| 511 |
+
"kernelspec": {
|
| 512 |
+
"display_name": "Python 3 (ipykernel)",
|
| 513 |
+
"language": "python",
|
| 514 |
+
"name": "python3"
|
| 515 |
+
},
|
| 516 |
+
"language_info": {
|
| 517 |
+
"codemirror_mode": {
|
| 518 |
+
"name": "ipython",
|
| 519 |
+
"version": 3
|
| 520 |
+
},
|
| 521 |
+
"file_extension": ".py",
|
| 522 |
+
"mimetype": "text/x-python",
|
| 523 |
+
"name": "python",
|
| 524 |
+
"nbconvert_exporter": "python",
|
| 525 |
+
"pygments_lexer": "ipython3",
|
| 526 |
+
"version": "3.12.9"
|
| 527 |
+
}
|
| 528 |
+
},
|
| 529 |
+
"nbformat": 4,
|
| 530 |
+
"nbformat_minor": 5
|
| 531 |
+
}
|
notebooks/training-code.md
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Below are some observations and suggestions that may help you improve or validate your current training approach. Overall, the code “works” in the sense that it will run and produce a model—but there are a few important details that can make training more stable and give you better results, especially if you are using spaCy 3.x:
|
| 2 |
+
|
| 3 |
+
1) Mixing a pre‐trained pipeline with “begin_training()”
|
| 4 |
+
– In your code, you load the pre‐trained model “en_core_web_md” and then call nlp.begin_training(). In spaCy 3.x, begin_training() is typically used to initialize weights from scratch (as if you had a blank model). By loading “en_core_web_md,” you actually start off with weights that are already pre‐trained.
|
| 5 |
+
– If your goal is to leverage the pre‐trained weights and fine‐tune them with your custom entities, it is often safer to use either nlp.initialize() (within a config-based training workflow) or, in older spaCy 2.x code, use resume_training().
|
| 6 |
+
– Using begin_training() on a pipeline that already has weights can sometimes lead to unexpected results (often referred to as “catastrophic forgetting”).
|
| 7 |
+
|
| 8 |
+
2) Using a config-based workflow (spaCy 3.x)
|
| 9 |
+
– spaCy 3 introduced a config system that separates out hyperparameters and makes it easier to reproduce training. While the scripted approach you’re using (calling nlp.update in a loop) is still acceptable in simpler workflows, it does not take advantage of spaCy 3’s main training features (e.g., handling splits, logging, saving intermediate checkpoints, etc.).
|
| 10 |
+
– If you plan to maintain and iterate on this model, consider switching to a spaCy project with a config file. This ensures you can easily re-run training and track changes.
|
| 11 |
+
|
| 12 |
+
3) Disable_pipes vs. continuing to train the tagger/parser
|
| 13 |
+
– In your code, you disable all other pipes except NER. This means you’re not updating the parser, tagger, etc. If your goal truly is to train only the NER component on your new labels, that’s perfect. But if you ever want to keep the tagger or parser up-to-date, or avoid catastrophic forgetting, you’ll need to be more deliberate about which components are updated.
|
| 14 |
+
– Because you are loading “en_core_web_md,” it already includes a tagger, parser, etc. If you truly do not need them, you could remove them from the pipeline altogether.
|
| 15 |
+
|
| 16 |
+
4) Monitoring performance and stopping criteria
|
| 17 |
+
– You have a fixed 100-epoch training loop. You’ll see a final loss, but you have no early stopping or validation set. Adding at least a small dev set to measure F-scores (precision/recall) each epoch can help you avoid overfitting and let you stop training once the model peaks.
|
| 18 |
+
– Because NER can easily overfit, you may not actually need 100 epochs. Sometimes 10–20 epochs is enough, especially for smaller datasets.
|
| 19 |
+
|
| 20 |
+
5) Dropout rate
|
| 21 |
+
– You’re using drop=0.5 in nlp.update(...). That can be okay for small data, but you might experiment with a lower (or slightly adaptive) dropout. You may get better entity performance with something like 0.1–0.3. There’s no universal rule—experiment and see.
|
| 22 |
+
|
| 23 |
+
6) Overall structure looks good
|
| 24 |
+
– The logic for transforming your JSON labels into spaCy’s entity format, adding them to TRAIN_DATA, and iterating over minibatches is solid.
|
| 25 |
+
– You’re using the Example class from spacy.training.example, which is correct for spaCy 3.x code.
|
| 26 |
+
|
| 27 |
+
To summarize:
|
| 28 |
+
• The script will run and train a model, but if you are aiming to fine‐tune a pre‐trained pipeline (en_core_web_md), consider using a more modern approach (e.g., a config file plus nlp.initialize() or the spaCy CLI).
|
| 29 |
+
• Double‐check that you actually want to overwrite all of the existing pipeline’s layers with begin_training()—it can cause forgetting of the original pipeline’s knowledge.
|
| 30 |
+
• Add a small validation set (or cross‐validation approach) so that you can track performance beyond the raw loss, and consider early stopping or fewer epochs if the model is converging early.
|
| 31 |
+
|
| 32 |
+
If the model’s predictions are working for you in practice, that’s great! But if you want to push performance further, those are the key areas to refine.
|