wang commited on
Upload 404 files
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +3 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/._README.md +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/.gitignore +158 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/NamedEntityRecognization.iml +12 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/csv-plugin.xml +16 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/deployment.xml +14 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/inspectionProfiles/Project_Default.xml +178 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/inspectionProfiles/profiles_settings.xml +6 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/misc.xml +4 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/modules.xml +8 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/remote-mappings.xml +16 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/vcs.xml +6 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/webServers.xml +14 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/workspace.xml +256 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/README.md +340 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/config.cpython-310.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/config.cpython-37.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/config.cpython-38.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/evaluate.cpython-310.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/evaluate.cpython-37.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/evaluate.cpython-38.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/model.cpython-310.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/model.cpython-37.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/model.cpython-38.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/path.cpython-310.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/path.cpython-37.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/path.cpython-38.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/plot.cpython-310.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/plot.cpython-37.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/plot.cpython-38.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/predict.cpython-37.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/preprocess.cpython-310.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/preprocess.cpython-37.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/preprocess.cpython-38.pyc +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/calc_bert_matrix.ipynb +534 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/config.py +9 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/cudnn-7.6.5-cuda10.0_0.conda +3 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/data/chip.train +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/data/chip.validate +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/data/yidu.test +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/data/yidu.train +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/data/yidu.validate +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/evaluate.py +154 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/evaluate_ner.py +359 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/images/chip_train_acc.png +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/images/chip_train_loss.png +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/images/chip_val_f1.png +0 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/images/downstream.png +3 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/images/model.jpg +3 -0
- chinese_medical_ner/ccksyidu4k-ner-roformer/images/yidu_train_acc.png +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
chinese_medical_ner/ccksyidu4k-ner-roformer/cudnn-7.6.5-cuda10.0_0.conda filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
chinese_medical_ner/ccksyidu4k-ner-roformer/images/downstream.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
chinese_medical_ner/ccksyidu4k-ner-roformer/images/model.jpg filter=lfs diff=lfs merge=lfs -text
|
chinese_medical_ner/ccksyidu4k-ner-roformer/._README.md
ADDED
|
Binary file (4.1 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/.gitignore
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### JupyterNotebooks template
|
| 2 |
+
# gitignore template for Jupyter Notebooks
|
| 3 |
+
# website: http://jupyter.org/
|
| 4 |
+
|
| 5 |
+
.ipynb_checkpoints
|
| 6 |
+
*/.ipynb_checkpoints/*
|
| 7 |
+
|
| 8 |
+
# IPython
|
| 9 |
+
profile_default/
|
| 10 |
+
ipython_config.py
|
| 11 |
+
|
| 12 |
+
# Remove previous ipynb_checkpoints
|
| 13 |
+
# git rm -r .ipynb_checkpoints/
|
| 14 |
+
|
| 15 |
+
### Python template
|
| 16 |
+
# Byte-compiled / optimized / DLL files
|
| 17 |
+
__pycache__/
|
| 18 |
+
*.py[cod]
|
| 19 |
+
*$py.class
|
| 20 |
+
|
| 21 |
+
# C extensions
|
| 22 |
+
*.so
|
| 23 |
+
|
| 24 |
+
# Distribution / packaging
|
| 25 |
+
.Python
|
| 26 |
+
build/
|
| 27 |
+
develop-eggs/
|
| 28 |
+
dist/
|
| 29 |
+
downloads/
|
| 30 |
+
eggs/
|
| 31 |
+
.eggs/
|
| 32 |
+
lib/
|
| 33 |
+
lib64/
|
| 34 |
+
parts/
|
| 35 |
+
sdist/
|
| 36 |
+
var/
|
| 37 |
+
wheels/
|
| 38 |
+
share/python-wheels/
|
| 39 |
+
*.egg-info/
|
| 40 |
+
.installed.cfg
|
| 41 |
+
*.egg
|
| 42 |
+
MANIFEST
|
| 43 |
+
|
| 44 |
+
# PyInstaller
|
| 45 |
+
# Usually these files are written by a python script from a template
|
| 46 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 47 |
+
*.manifest
|
| 48 |
+
*.spec
|
| 49 |
+
|
| 50 |
+
# Installer logs
|
| 51 |
+
pip-log.txt
|
| 52 |
+
pip-delete-this-directory.txt
|
| 53 |
+
|
| 54 |
+
# Unit test / coverage reports
|
| 55 |
+
htmlcov/
|
| 56 |
+
.tox/
|
| 57 |
+
.nox/
|
| 58 |
+
.coverage
|
| 59 |
+
.coverage.*
|
| 60 |
+
.cache
|
| 61 |
+
nosetests.xml
|
| 62 |
+
coverage.xml
|
| 63 |
+
*.cover
|
| 64 |
+
*.py,cover
|
| 65 |
+
.hypothesis/
|
| 66 |
+
.pytest_cache/
|
| 67 |
+
cover/
|
| 68 |
+
|
| 69 |
+
# Translations
|
| 70 |
+
*.mo
|
| 71 |
+
*.pot
|
| 72 |
+
|
| 73 |
+
# Django stuff:
|
| 74 |
+
*.log
|
| 75 |
+
local_settings.py
|
| 76 |
+
db.sqlite3
|
| 77 |
+
db.sqlite3-journal
|
| 78 |
+
|
| 79 |
+
# Flask stuff:
|
| 80 |
+
instance/
|
| 81 |
+
.webassets-cache
|
| 82 |
+
|
| 83 |
+
# Scrapy stuff:
|
| 84 |
+
.scrapy
|
| 85 |
+
|
| 86 |
+
# Sphinx documentation
|
| 87 |
+
docs/_build/
|
| 88 |
+
|
| 89 |
+
# PyBuilder
|
| 90 |
+
.pybuilder/
|
| 91 |
+
target/
|
| 92 |
+
|
| 93 |
+
# Jupyter Notebook
|
| 94 |
+
.ipynb_checkpoints
|
| 95 |
+
|
| 96 |
+
# IPython
|
| 97 |
+
profile_default/
|
| 98 |
+
ipython_config.py
|
| 99 |
+
|
| 100 |
+
# pyenv
|
| 101 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 102 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 103 |
+
# .python-version
|
| 104 |
+
|
| 105 |
+
# pipenv
|
| 106 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 107 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 108 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 109 |
+
# install all needed dependencies.
|
| 110 |
+
#Pipfile.lock
|
| 111 |
+
|
| 112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
| 113 |
+
__pypackages__/
|
| 114 |
+
|
| 115 |
+
# Celery stuff
|
| 116 |
+
celerybeat-schedule
|
| 117 |
+
celerybeat.pid
|
| 118 |
+
|
| 119 |
+
# SageMath parsed files
|
| 120 |
+
*.sage.py
|
| 121 |
+
|
| 122 |
+
# Environments
|
| 123 |
+
.env
|
| 124 |
+
.venv
|
| 125 |
+
env/
|
| 126 |
+
venv/
|
| 127 |
+
ENV/
|
| 128 |
+
env.bak/
|
| 129 |
+
venv.bak/
|
| 130 |
+
|
| 131 |
+
# Spyder project settings
|
| 132 |
+
.spyderproject
|
| 133 |
+
.spyproject
|
| 134 |
+
|
| 135 |
+
# Rope project settings
|
| 136 |
+
.ropeproject
|
| 137 |
+
|
| 138 |
+
# mkdocs documentation
|
| 139 |
+
/site
|
| 140 |
+
|
| 141 |
+
# mypy
|
| 142 |
+
.mypy_cache/
|
| 143 |
+
.dmypy.json
|
| 144 |
+
dmypy.json
|
| 145 |
+
|
| 146 |
+
# Pyre type checker
|
| 147 |
+
.pyre/
|
| 148 |
+
|
| 149 |
+
# pytype static type analyzer
|
| 150 |
+
.pytype/
|
| 151 |
+
|
| 152 |
+
# Cython debug symbols
|
| 153 |
+
cython_debug/
|
| 154 |
+
|
| 155 |
+
chinese_roformer-v2-char_L-6_H-384_A-6/*.ckpt*
|
| 156 |
+
chinese_roformer-v2-char_L-12_H-768_A-12/*.ckpt*
|
| 157 |
+
weights/*.h5
|
| 158 |
+
data/lung.*
|
chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/NamedEntityRecognization.iml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<module type="PYTHON_MODULE" version="4">
|
| 3 |
+
<component name="NewModuleRootManager">
|
| 4 |
+
<content url="file://$MODULE_DIR$" />
|
| 5 |
+
<orderEntry type="jdk" jdkName="Python 3.7 (tf_v1)" jdkType="Python SDK" />
|
| 6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
| 7 |
+
</component>
|
| 8 |
+
<component name="PyDocumentationSettings">
|
| 9 |
+
<option name="format" value="PLAIN" />
|
| 10 |
+
<option name="myDocStringFormat" value="Plain" />
|
| 11 |
+
</component>
|
| 12 |
+
</module>
|
chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/csv-plugin.xml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="CsvFileAttributes">
|
| 4 |
+
<option name="attributeMap">
|
| 5 |
+
<map>
|
| 6 |
+
<entry key="/report/yidu_bert_base.csv">
|
| 7 |
+
<value>
|
| 8 |
+
<Attribute>
|
| 9 |
+
<option name="separator" value="," />
|
| 10 |
+
</Attribute>
|
| 11 |
+
</value>
|
| 12 |
+
</entry>
|
| 13 |
+
</map>
|
| 14 |
+
</option>
|
| 15 |
+
</component>
|
| 16 |
+
</project>
|
chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/deployment.xml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="PublishConfigData" serverName="NamedEntityRecognization" createEmptyFolders="true" remoteFilesAllowedToDisappearOnAutoupload="false">
|
| 4 |
+
<serverData>
|
| 5 |
+
<paths name="NamedEntityRecognization">
|
| 6 |
+
<serverdata>
|
| 7 |
+
<mappings>
|
| 8 |
+
<mapping deploy="/home/bureaux/Projects/NamedEntityRecognization" local="$PROJECT_DIR$" web="/" />
|
| 9 |
+
</mappings>
|
| 10 |
+
</serverdata>
|
| 11 |
+
</paths>
|
| 12 |
+
</serverData>
|
| 13 |
+
</component>
|
| 14 |
+
</project>
|
chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/inspectionProfiles/Project_Default.xml
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<profile version="1.0">
|
| 3 |
+
<option name="myName" value="Project Default" />
|
| 4 |
+
<inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
| 5 |
+
<Languages>
|
| 6 |
+
<language minSize="147" name="Python" />
|
| 7 |
+
</Languages>
|
| 8 |
+
</inspection_tool>
|
| 9 |
+
<inspection_tool class="JupyterPackageInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
| 10 |
+
<inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="WARNING" enabled_by_default="false">
|
| 11 |
+
<option name="ignoredPackages">
|
| 12 |
+
<value>
|
| 13 |
+
<list size="127">
|
| 14 |
+
<item index="0" class="java.lang.String" itemvalue="h5py" />
|
| 15 |
+
<item index="1" class="java.lang.String" itemvalue="six" />
|
| 16 |
+
<item index="2" class="java.lang.String" itemvalue="keras-bert" />
|
| 17 |
+
<item index="3" class="java.lang.String" itemvalue="keras-transformer" />
|
| 18 |
+
<item index="4" class="java.lang.String" itemvalue="absl-py" />
|
| 19 |
+
<item index="5" class="java.lang.String" itemvalue="google-pasta" />
|
| 20 |
+
<item index="6" class="java.lang.String" itemvalue="protobuf" />
|
| 21 |
+
<item index="7" class="java.lang.String" itemvalue="decorator" />
|
| 22 |
+
<item index="8" class="java.lang.String" itemvalue="tensorflow-estimator" />
|
| 23 |
+
<item index="9" class="java.lang.String" itemvalue="joblib" />
|
| 24 |
+
<item index="10" class="java.lang.String" itemvalue="threadpoolctl" />
|
| 25 |
+
<item index="11" class="java.lang.String" itemvalue="opt-einsum" />
|
| 26 |
+
<item index="12" class="java.lang.String" itemvalue="scikit-learn" />
|
| 27 |
+
<item index="13" class="java.lang.String" itemvalue="PyYAML" />
|
| 28 |
+
<item index="14" class="java.lang.String" itemvalue="cycler" />
|
| 29 |
+
<item index="15" class="java.lang.String" itemvalue="gast" />
|
| 30 |
+
<item index="16" class="java.lang.String" itemvalue="numpy" />
|
| 31 |
+
<item index="17" class="java.lang.String" itemvalue="importlib-metadata" />
|
| 32 |
+
<item index="18" class="java.lang.String" itemvalue="Keras-Preprocessing" />
|
| 33 |
+
<item index="19" class="java.lang.String" itemvalue="tensorflow" />
|
| 34 |
+
<item index="20" class="java.lang.String" itemvalue="Pygments" />
|
| 35 |
+
<item index="21" class="java.lang.String" itemvalue="pyzmq" />
|
| 36 |
+
<item index="22" class="java.lang.String" itemvalue="certifi" />
|
| 37 |
+
<item index="23" class="java.lang.String" itemvalue="prompt-toolkit" />
|
| 38 |
+
<item index="24" class="java.lang.String" itemvalue="cached-property" />
|
| 39 |
+
<item index="25" class="java.lang.String" itemvalue="Markdown" />
|
| 40 |
+
<item index="26" class="java.lang.String" itemvalue="scipy" />
|
| 41 |
+
<item index="27" class="java.lang.String" itemvalue="Werkzeug" />
|
| 42 |
+
<item index="28" class="java.lang.String" itemvalue="opencv-python" />
|
| 43 |
+
<item index="29" class="java.lang.String" itemvalue="parso" />
|
| 44 |
+
<item index="30" class="java.lang.String" itemvalue="wrapt" />
|
| 45 |
+
<item index="31" class="java.lang.String" itemvalue="astor" />
|
| 46 |
+
<item index="32" class="java.lang.String" itemvalue="ipython" />
|
| 47 |
+
<item index="33" class="java.lang.String" itemvalue="kiwisolver" />
|
| 48 |
+
<item index="34" class="java.lang.String" itemvalue="typing-extensions" />
|
| 49 |
+
<item index="35" class="java.lang.String" itemvalue="jupyter-client" />
|
| 50 |
+
<item index="36" class="java.lang.String" itemvalue="ipykernel" />
|
| 51 |
+
<item index="37" class="java.lang.String" itemvalue="Keras-Applications" />
|
| 52 |
+
<item index="38" class="java.lang.String" itemvalue="appnope" />
|
| 53 |
+
<item index="39" class="java.lang.String" itemvalue="pandas" />
|
| 54 |
+
<item index="40" class="java.lang.String" itemvalue="termcolor" />
|
| 55 |
+
<item index="41" class="java.lang.String" itemvalue="tensorboard" />
|
| 56 |
+
<item index="42" class="java.lang.String" itemvalue="matplotlib" />
|
| 57 |
+
<item index="43" class="java.lang.String" itemvalue="grpcio" />
|
| 58 |
+
<item index="44" class="java.lang.String" itemvalue="Keras" />
|
| 59 |
+
<item index="45" class="java.lang.String" itemvalue="pytz" />
|
| 60 |
+
<item index="46" class="java.lang.String" itemvalue="Pillow" />
|
| 61 |
+
<item index="47" class="java.lang.String" itemvalue="seqeval" />
|
| 62 |
+
<item index="48" class="java.lang.String" itemvalue="keras-embed-sim" />
|
| 63 |
+
<item index="49" class="java.lang.String" itemvalue="sklearn" />
|
| 64 |
+
<item index="50" class="java.lang.String" itemvalue="keras-position-wise-feed-forward" />
|
| 65 |
+
<item index="51" class="java.lang.String" itemvalue="keras-pos-embd" />
|
| 66 |
+
<item index="52" class="java.lang.String" itemvalue="keras-self-attention" />
|
| 67 |
+
<item index="53" class="java.lang.String" itemvalue="keras-layer-normalization" />
|
| 68 |
+
<item index="54" class="java.lang.String" itemvalue="keras-multi-head" />
|
| 69 |
+
<item index="55" class="java.lang.String" itemvalue="jedi" />
|
| 70 |
+
<item index="56" class="java.lang.String" itemvalue="pyDeprecate" />
|
| 71 |
+
<item index="57" class="java.lang.String" itemvalue="pytorch-lightning" />
|
| 72 |
+
<item index="58" class="java.lang.String" itemvalue="aiohttp" />
|
| 73 |
+
<item index="59" class="java.lang.String" itemvalue="packaging" />
|
| 74 |
+
<item index="60" class="java.lang.String" itemvalue="torch" />
|
| 75 |
+
<item index="61" class="java.lang.String" itemvalue="pyparsing" />
|
| 76 |
+
<item index="62" class="java.lang.String" itemvalue="torchvision" />
|
| 77 |
+
<item index="63" class="java.lang.String" itemvalue="traitlets" />
|
| 78 |
+
<item index="64" class="java.lang.String" itemvalue="testpath" />
|
| 79 |
+
<item index="65" class="java.lang.String" itemvalue="pickleshare" />
|
| 80 |
+
<item index="66" class="java.lang.String" itemvalue="python-dateutil" />
|
| 81 |
+
<item index="67" class="java.lang.String" itemvalue="defusedxml" />
|
| 82 |
+
<item index="68" class="java.lang.String" itemvalue="nbclient" />
|
| 83 |
+
<item index="69" class="java.lang.String" itemvalue="QtPy" />
|
| 84 |
+
<item index="70" class="java.lang.String" itemvalue="MarkupSafe" />
|
| 85 |
+
<item index="71" class="java.lang.String" itemvalue="pycparser" />
|
| 86 |
+
<item index="72" class="java.lang.String" itemvalue="pyasn1-modules" />
|
| 87 |
+
<item index="73" class="java.lang.String" itemvalue="ipython-genutils" />
|
| 88 |
+
<item index="74" class="java.lang.String" itemvalue="jupyterlab-widgets" />
|
| 89 |
+
<item index="75" class="java.lang.String" itemvalue="bleach" />
|
| 90 |
+
<item index="76" class="java.lang.String" itemvalue="oauthlib" />
|
| 91 |
+
<item index="77" class="java.lang.String" itemvalue="astunparse" />
|
| 92 |
+
<item index="78" class="java.lang.String" itemvalue="entrypoints" />
|
| 93 |
+
<item index="79" class="java.lang.String" itemvalue="jsonschema" />
|
| 94 |
+
<item index="80" class="java.lang.String" itemvalue="notebook" />
|
| 95 |
+
<item index="81" class="java.lang.String" itemvalue="qtconsole" />
|
| 96 |
+
<item index="82" class="java.lang.String" itemvalue="terminado" />
|
| 97 |
+
<item index="83" class="java.lang.String" itemvalue="argcomplete" />
|
| 98 |
+
<item index="84" class="java.lang.String" itemvalue="tensorboard-data-server" />
|
| 99 |
+
<item index="85" class="java.lang.String" itemvalue="pexpect" />
|
| 100 |
+
<item index="86" class="java.lang.String" itemvalue="jupyterlab-pygments" />
|
| 101 |
+
<item index="87" class="java.lang.String" itemvalue="nbconvert" />
|
| 102 |
+
<item index="88" class="java.lang.String" itemvalue="attrs" />
|
| 103 |
+
<item index="89" class="java.lang.String" itemvalue="cn2an" />
|
| 104 |
+
<item index="90" class="java.lang.String" itemvalue="flatbuffers" />
|
| 105 |
+
<item index="91" class="java.lang.String" itemvalue="backcall" />
|
| 106 |
+
<item index="92" class="java.lang.String" itemvalue="widgetsnbextension" />
|
| 107 |
+
<item index="93" class="java.lang.String" itemvalue="charset-normalizer" />
|
| 108 |
+
<item index="94" class="java.lang.String" itemvalue="idna" />
|
| 109 |
+
<item index="95" class="java.lang.String" itemvalue="rsa" />
|
| 110 |
+
<item index="96" class="java.lang.String" itemvalue="jupyter-core" />
|
| 111 |
+
<item index="97" class="java.lang.String" itemvalue="tensorflow-addons" />
|
| 112 |
+
<item index="98" class="java.lang.String" itemvalue="matplotlib-inline" />
|
| 113 |
+
<item index="99" class="java.lang.String" itemvalue="ptyprocess" />
|
| 114 |
+
<item index="100" class="java.lang.String" itemvalue="cffi" />
|
| 115 |
+
<item index="101" class="java.lang.String" itemvalue="pandocfilters" />
|
| 116 |
+
<item index="102" class="java.lang.String" itemvalue="wcwidth" />
|
| 117 |
+
<item index="103" class="java.lang.String" itemvalue="pyasn1" />
|
| 118 |
+
<item index="104" class="java.lang.String" itemvalue="requests" />
|
| 119 |
+
<item index="105" class="java.lang.String" itemvalue="Jinja2" />
|
| 120 |
+
<item index="106" class="java.lang.String" itemvalue="typeguard" />
|
| 121 |
+
<item index="107" class="java.lang.String" itemvalue="pyrsistent" />
|
| 122 |
+
<item index="108" class="java.lang.String" itemvalue="requests-oauthlib" />
|
| 123 |
+
<item index="109" class="java.lang.String" itemvalue="jupyter" />
|
| 124 |
+
<item index="110" class="java.lang.String" itemvalue="tensorboard-plugin-wit" />
|
| 125 |
+
<item index="111" class="java.lang.String" itemvalue="zipp" />
|
| 126 |
+
<item index="112" class="java.lang.String" itemvalue="nest-asyncio" />
|
| 127 |
+
<item index="113" class="java.lang.String" itemvalue="urllib3" />
|
| 128 |
+
<item index="114" class="java.lang.String" itemvalue="ipywidgets" />
|
| 129 |
+
<item index="115" class="java.lang.String" itemvalue="tornado" />
|
| 130 |
+
<item index="116" class="java.lang.String" itemvalue="google-auth-oauthlib" />
|
| 131 |
+
<item index="117" class="java.lang.String" itemvalue="nbformat" />
|
| 132 |
+
<item index="118" class="java.lang.String" itemvalue="Send2Trash" />
|
| 133 |
+
<item index="119" class="java.lang.String" itemvalue="prometheus-client" />
|
| 134 |
+
<item index="120" class="java.lang.String" itemvalue="mistune" />
|
| 135 |
+
<item index="121" class="java.lang.String" itemvalue="jupyter-console" />
|
| 136 |
+
<item index="122" class="java.lang.String" itemvalue="cachetools" />
|
| 137 |
+
<item index="123" class="java.lang.String" itemvalue="debugpy" />
|
| 138 |
+
<item index="124" class="java.lang.String" itemvalue="argon2-cffi" />
|
| 139 |
+
<item index="125" class="java.lang.String" itemvalue="webencodings" />
|
| 140 |
+
<item index="126" class="java.lang.String" itemvalue="google-auth" />
|
| 141 |
+
</list>
|
| 142 |
+
</value>
|
| 143 |
+
</option>
|
| 144 |
+
</inspection_tool>
|
| 145 |
+
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
| 146 |
+
<option name="ignoredErrors">
|
| 147 |
+
<list>
|
| 148 |
+
<option value="E501" />
|
| 149 |
+
<option value="E122" />
|
| 150 |
+
<option value="W292" />
|
| 151 |
+
</list>
|
| 152 |
+
</option>
|
| 153 |
+
</inspection_tool>
|
| 154 |
+
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
| 155 |
+
<option name="ignoredErrors">
|
| 156 |
+
<list>
|
| 157 |
+
<option value="N803" />
|
| 158 |
+
<option value="N802" />
|
| 159 |
+
<option value="N806" />
|
| 160 |
+
</list>
|
| 161 |
+
</option>
|
| 162 |
+
</inspection_tool>
|
| 163 |
+
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
| 164 |
+
<option name="ignoredIdentifiers">
|
| 165 |
+
<list>
|
| 166 |
+
<option value="utils.backend.keras" />
|
| 167 |
+
<option value="utils.backend.K" />
|
| 168 |
+
<option value="utils.backend.sparse_multilabel_categorical_crossentropy" />
|
| 169 |
+
</list>
|
| 170 |
+
</option>
|
| 171 |
+
</inspection_tool>
|
| 172 |
+
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
|
| 173 |
+
<option name="processCode" value="true" />
|
| 174 |
+
<option name="processLiterals" value="true" />
|
| 175 |
+
<option name="processComments" value="true" />
|
| 176 |
+
</inspection_tool>
|
| 177 |
+
</profile>
|
| 178 |
+
</component>
|
chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/inspectionProfiles/profiles_settings.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<settings>
|
| 3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
| 4 |
+
<version value="1.0" />
|
| 5 |
+
</settings>
|
| 6 |
+
</component>
|
chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/misc.xml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (tf_v1)" project-jdk-type="Python SDK" />
|
| 4 |
+
</project>
|
chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/modules.xml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectModuleManager">
|
| 4 |
+
<modules>
|
| 5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/NamedEntityRecognization.iml" filepath="$PROJECT_DIR$/.idea/NamedEntityRecognization.iml" />
|
| 6 |
+
</modules>
|
| 7 |
+
</component>
|
| 8 |
+
</project>
|
chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/remote-mappings.xml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="RemoteMappingsManager">
|
| 4 |
+
<list>
|
| 5 |
+
<list>
|
| 6 |
+
<remote-mappings server-id="python@sftp://bureaux@180.169.131.147:22/home/bureaux/miniconda3/envs/Keras-base/bin/python">
|
| 7 |
+
<settings>
|
| 8 |
+
<list>
|
| 9 |
+
<mapping local-root="$PROJECT_DIR$" remote-root="/home/bureaux/Projects/NamedEntityRecognization" />
|
| 10 |
+
</list>
|
| 11 |
+
</settings>
|
| 12 |
+
</remote-mappings>
|
| 13 |
+
</list>
|
| 14 |
+
</list>
|
| 15 |
+
</component>
|
| 16 |
+
</project>
|
chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/vcs.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="VcsDirectoryMappings">
|
| 4 |
+
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
| 5 |
+
</component>
|
| 6 |
+
</project>
|
chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/webServers.xml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="WebServers">
|
| 4 |
+
<option name="servers">
|
| 5 |
+
<webServer id="fb160272-0942-419e-87dd-a353536a93b5" name="NamedEntityRecognization">
|
| 6 |
+
<fileTransfer accessType="SFTP" host="180.169.131.147" port="22" sshConfigId="03272ad8-3c65-4cd1-95f0-0886d605abb3" sshConfig="bureaux@180.169.131.147:22 password">
|
| 7 |
+
<advancedOptions>
|
| 8 |
+
<advancedOptions dataProtectionLevel="Private" passiveMode="true" shareSSLContext="true" />
|
| 9 |
+
</advancedOptions>
|
| 10 |
+
</fileTransfer>
|
| 11 |
+
</webServer>
|
| 12 |
+
</option>
|
| 13 |
+
</component>
|
| 14 |
+
</project>
|
chinese_medical_ner/ccksyidu4k-ner-roformer/.idea/workspace.xml
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="AutoImportSettings">
|
| 4 |
+
<option name="autoReloadType" value="SELECTIVE" />
|
| 5 |
+
</component>
|
| 6 |
+
<component name="ChangeListManager">
|
| 7 |
+
<list default="true" id="626a281e-0f78-4eb9-9469-6e0d7f35140d" name="变更" comment="">
|
| 8 |
+
<change afterPath="$PROJECT_DIR$/report/crf_trans_yidu_visual.xlsx" afterDir="false" />
|
| 9 |
+
<change beforePath="$PROJECT_DIR$/.idea/NamedEntityRecognization.iml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/NamedEntityRecognization.iml" afterDir="false" />
|
| 10 |
+
<change beforePath="$PROJECT_DIR$/.idea/deployment.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/deployment.xml" afterDir="false" />
|
| 11 |
+
<change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
|
| 12 |
+
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
| 13 |
+
<change beforePath="$PROJECT_DIR$/config.py" beforeDir="false" afterPath="$PROJECT_DIR$/config.py" afterDir="false" />
|
| 14 |
+
<change beforePath="$PROJECT_DIR$/path.py" beforeDir="false" afterPath="$PROJECT_DIR$/path.py" afterDir="false" />
|
| 15 |
+
<change beforePath="$PROJECT_DIR$/preprocess.py" beforeDir="false" afterPath="$PROJECT_DIR$/preprocess.py" afterDir="false" />
|
| 16 |
+
<change beforePath="$PROJECT_DIR$/train.py" beforeDir="false" afterPath="$PROJECT_DIR$/train.py" afterDir="false" />
|
| 17 |
+
<change beforePath="$PROJECT_DIR$/utils/snippets.py" beforeDir="false" afterPath="$PROJECT_DIR$/utils/snippets.py" afterDir="false" />
|
| 18 |
+
</list>
|
| 19 |
+
<option name="SHOW_DIALOG" value="false" />
|
| 20 |
+
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
| 21 |
+
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
| 22 |
+
<option name="LAST_RESOLUTION" value="IGNORE" />
|
| 23 |
+
</component>
|
| 24 |
+
<component name="Git.Settings">
|
| 25 |
+
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
| 26 |
+
</component>
|
| 27 |
+
<component name="MarkdownSettingsMigration">
|
| 28 |
+
<option name="stateVersion" value="1" />
|
| 29 |
+
</component>
|
| 30 |
+
<component name="ProjectId" id="27LFq9lTgR3bspJNCu2Zpj2aqJy" />
|
| 31 |
+
<component name="ProjectLevelVcsManager" settingsEditedManually="true" />
|
| 32 |
+
<component name="ProjectViewState">
|
| 33 |
+
<option name="hideEmptyMiddlePackages" value="true" />
|
| 34 |
+
<option name="showLibraryContents" value="true" />
|
| 35 |
+
</component>
|
| 36 |
+
<component name="PropertiesComponent">{
|
| 37 |
+
"keyToString": {
|
| 38 |
+
"WebServerToolWindowFactoryState": "true",
|
| 39 |
+
"last_opened_file_path": "/Volumes/Riesling/TRAIN/AI-base/src/NamedEntityRecognization/report",
|
| 40 |
+
"node.js.detected.package.eslint": "true",
|
| 41 |
+
"node.js.detected.package.tslint": "true",
|
| 42 |
+
"node.js.selected.package.eslint": "(autodetect)",
|
| 43 |
+
"node.js.selected.package.tslint": "(autodetect)",
|
| 44 |
+
"nodejs_package_manager_path": "npm",
|
| 45 |
+
"settings.editor.selected.configurable": "com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable"
|
| 46 |
+
}
|
| 47 |
+
}</component>
|
| 48 |
+
<component name="RecentsManager">
|
| 49 |
+
<key name="CopyFile.RECENT_KEYS">
|
| 50 |
+
<recent name="$PROJECT_DIR$/report" />
|
| 51 |
+
<recent name="$PROJECT_DIR$/data" />
|
| 52 |
+
<recent name="$PROJECT_DIR$" />
|
| 53 |
+
</key>
|
| 54 |
+
</component>
|
| 55 |
+
<component name="RunManager" selected="Python.predict">
|
| 56 |
+
<configuration name="evaluate" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
| 57 |
+
<module name="NamedEntityRecognization" />
|
| 58 |
+
<option name="INTERPRETER_OPTIONS" value="" />
|
| 59 |
+
<option name="PARENT_ENVS" value="true" />
|
| 60 |
+
<envs>
|
| 61 |
+
<env name="PYTHONUNBUFFERED" value="1" />
|
| 62 |
+
</envs>
|
| 63 |
+
<option name="SDK_HOME" value="" />
|
| 64 |
+
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
| 65 |
+
<option name="IS_MODULE_SDK" value="true" />
|
| 66 |
+
<option name="ADD_CONTENT_ROOTS" value="true" />
|
| 67 |
+
<option name="ADD_SOURCE_ROOTS" value="true" />
|
| 68 |
+
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
| 69 |
+
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/evaluate.py" />
|
| 70 |
+
<option name="PARAMETERS" value="" />
|
| 71 |
+
<option name="SHOW_COMMAND_LINE" value="false" />
|
| 72 |
+
<option name="EMULATE_TERMINAL" value="false" />
|
| 73 |
+
<option name="MODULE_MODE" value="false" />
|
| 74 |
+
<option name="REDIRECT_INPUT" value="false" />
|
| 75 |
+
<option name="INPUT_FILE" value="" />
|
| 76 |
+
<method v="2" />
|
| 77 |
+
</configuration>
|
| 78 |
+
<configuration name="model" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
| 79 |
+
<module name="NamedEntityRecognization" />
|
| 80 |
+
<option name="INTERPRETER_OPTIONS" value="" />
|
| 81 |
+
<option name="PARENT_ENVS" value="true" />
|
| 82 |
+
<envs>
|
| 83 |
+
<env name="PYTHONUNBUFFERED" value="1" />
|
| 84 |
+
</envs>
|
| 85 |
+
<option name="SDK_HOME" value="" />
|
| 86 |
+
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
| 87 |
+
<option name="IS_MODULE_SDK" value="true" />
|
| 88 |
+
<option name="ADD_CONTENT_ROOTS" value="true" />
|
| 89 |
+
<option name="ADD_SOURCE_ROOTS" value="true" />
|
| 90 |
+
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
| 91 |
+
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/model.py" />
|
| 92 |
+
<option name="PARAMETERS" value="" />
|
| 93 |
+
<option name="SHOW_COMMAND_LINE" value="false" />
|
| 94 |
+
<option name="EMULATE_TERMINAL" value="false" />
|
| 95 |
+
<option name="MODULE_MODE" value="false" />
|
| 96 |
+
<option name="REDIRECT_INPUT" value="false" />
|
| 97 |
+
<option name="INPUT_FILE" value="" />
|
| 98 |
+
<method v="2" />
|
| 99 |
+
</configuration>
|
| 100 |
+
<configuration name="predict" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
| 101 |
+
<module name="NamedEntityRecognization" />
|
| 102 |
+
<option name="INTERPRETER_OPTIONS" value="" />
|
| 103 |
+
<option name="PARENT_ENVS" value="true" />
|
| 104 |
+
<envs>
|
| 105 |
+
<env name="PYTHONUNBUFFERED" value="1" />
|
| 106 |
+
</envs>
|
| 107 |
+
<option name="SDK_HOME" value="" />
|
| 108 |
+
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
| 109 |
+
<option name="IS_MODULE_SDK" value="true" />
|
| 110 |
+
<option name="ADD_CONTENT_ROOTS" value="true" />
|
| 111 |
+
<option name="ADD_SOURCE_ROOTS" value="true" />
|
| 112 |
+
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
| 113 |
+
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/predict.py" />
|
| 114 |
+
<option name="PARAMETERS" value="" />
|
| 115 |
+
<option name="SHOW_COMMAND_LINE" value="false" />
|
| 116 |
+
<option name="EMULATE_TERMINAL" value="false" />
|
| 117 |
+
<option name="MODULE_MODE" value="false" />
|
| 118 |
+
<option name="REDIRECT_INPUT" value="false" />
|
| 119 |
+
<option name="INPUT_FILE" value="" />
|
| 120 |
+
<method v="2" />
|
| 121 |
+
</configuration>
|
| 122 |
+
<configuration name="statistic" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
| 123 |
+
<module name="NamedEntityRecognization" />
|
| 124 |
+
<option name="INTERPRETER_OPTIONS" value="" />
|
| 125 |
+
<option name="PARENT_ENVS" value="true" />
|
| 126 |
+
<envs>
|
| 127 |
+
<env name="PYTHONUNBUFFERED" value="1" />
|
| 128 |
+
</envs>
|
| 129 |
+
<option name="SDK_HOME" value="" />
|
| 130 |
+
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
| 131 |
+
<option name="IS_MODULE_SDK" value="true" />
|
| 132 |
+
<option name="ADD_CONTENT_ROOTS" value="true" />
|
| 133 |
+
<option name="ADD_SOURCE_ROOTS" value="true" />
|
| 134 |
+
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
| 135 |
+
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/statistic.py" />
|
| 136 |
+
<option name="PARAMETERS" value="" />
|
| 137 |
+
<option name="SHOW_COMMAND_LINE" value="false" />
|
| 138 |
+
<option name="EMULATE_TERMINAL" value="false" />
|
| 139 |
+
<option name="MODULE_MODE" value="false" />
|
| 140 |
+
<option name="REDIRECT_INPUT" value="false" />
|
| 141 |
+
<option name="INPUT_FILE" value="" />
|
| 142 |
+
<method v="2" />
|
| 143 |
+
</configuration>
|
| 144 |
+
<configuration name="train" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
| 145 |
+
<module name="NamedEntityRecognization" />
|
| 146 |
+
<option name="INTERPRETER_OPTIONS" value="" />
|
| 147 |
+
<option name="PARENT_ENVS" value="true" />
|
| 148 |
+
<envs>
|
| 149 |
+
<env name="PYTHONUNBUFFERED" value="1" />
|
| 150 |
+
</envs>
|
| 151 |
+
<option name="SDK_HOME" value="" />
|
| 152 |
+
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
| 153 |
+
<option name="IS_MODULE_SDK" value="true" />
|
| 154 |
+
<option name="ADD_CONTENT_ROOTS" value="true" />
|
| 155 |
+
<option name="ADD_SOURCE_ROOTS" value="true" />
|
| 156 |
+
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
| 157 |
+
<EXTENSION ID="net.ashald.envfile">
|
| 158 |
+
<option name="IS_ENABLED" value="false" />
|
| 159 |
+
<option name="IS_SUBST" value="false" />
|
| 160 |
+
<option name="IS_PATH_MACRO_SUPPORTED" value="false" />
|
| 161 |
+
<option name="IS_IGNORE_MISSING_FILES" value="false" />
|
| 162 |
+
<option name="IS_ENABLE_EXPERIMENTAL_INTEGRATIONS" value="false" />
|
| 163 |
+
<ENTRIES>
|
| 164 |
+
<ENTRY IS_ENABLED="true" PARSER="runconfig" />
|
| 165 |
+
</ENTRIES>
|
| 166 |
+
</EXTENSION>
|
| 167 |
+
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/train.py" />
|
| 168 |
+
<option name="PARAMETERS" value="" />
|
| 169 |
+
<option name="SHOW_COMMAND_LINE" value="true" />
|
| 170 |
+
<option name="EMULATE_TERMINAL" value="false" />
|
| 171 |
+
<option name="MODULE_MODE" value="false" />
|
| 172 |
+
<option name="REDIRECT_INPUT" value="false" />
|
| 173 |
+
<option name="INPUT_FILE" value="" />
|
| 174 |
+
<method v="2" />
|
| 175 |
+
</configuration>
|
| 176 |
+
<list>
|
| 177 |
+
<item itemvalue="Python.predict" />
|
| 178 |
+
<item itemvalue="Python.statistic" />
|
| 179 |
+
<item itemvalue="Python.train" />
|
| 180 |
+
<item itemvalue="Python.model" />
|
| 181 |
+
<item itemvalue="Python.evaluate" />
|
| 182 |
+
</list>
|
| 183 |
+
<recent_temporary>
|
| 184 |
+
<list>
|
| 185 |
+
<item itemvalue="Python.predict" />
|
| 186 |
+
<item itemvalue="Python.train" />
|
| 187 |
+
<item itemvalue="Python.evaluate" />
|
| 188 |
+
<item itemvalue="Python.model" />
|
| 189 |
+
<item itemvalue="Python.statistic" />
|
| 190 |
+
</list>
|
| 191 |
+
</recent_temporary>
|
| 192 |
+
</component>
|
| 193 |
+
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="应用程序级" UseSingleDictionary="true" transferred="true" />
|
| 194 |
+
<component name="TaskManager">
|
| 195 |
+
<task active="true" id="Default" summary="默认任务">
|
| 196 |
+
<changelist id="626a281e-0f78-4eb9-9469-6e0d7f35140d" name="变更" comment="" />
|
| 197 |
+
<created>1649091649915</created>
|
| 198 |
+
<option name="number" value="Default" />
|
| 199 |
+
<option name="presentableId" value="Default" />
|
| 200 |
+
<updated>1649091649915</updated>
|
| 201 |
+
<workItem from="1649091655927" duration="9332000" />
|
| 202 |
+
<workItem from="1649773940694" duration="6925000" />
|
| 203 |
+
<workItem from="1651504862776" duration="153000" />
|
| 204 |
+
<workItem from="1651924741385" duration="694000" />
|
| 205 |
+
<workItem from="1658891597769" duration="13145000" />
|
| 206 |
+
<workItem from="1661422884262" duration="704000" />
|
| 207 |
+
</task>
|
| 208 |
+
<servers />
|
| 209 |
+
</component>
|
| 210 |
+
<component name="TypeScriptGeneratedFilesManager">
|
| 211 |
+
<option name="version" value="3" />
|
| 212 |
+
</component>
|
| 213 |
+
<component name="XDebuggerManager">
|
| 214 |
+
<breakpoint-manager>
|
| 215 |
+
<breakpoints>
|
| 216 |
+
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
|
| 217 |
+
<url>file://$PROJECT_DIR$/predict.py</url>
|
| 218 |
+
<line>36</line>
|
| 219 |
+
<option name="timeStamp" value="5" />
|
| 220 |
+
</line-breakpoint>
|
| 221 |
+
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
|
| 222 |
+
<url>file://$PROJECT_DIR$/train.py</url>
|
| 223 |
+
<line>110</line>
|
| 224 |
+
<option name="timeStamp" value="20" />
|
| 225 |
+
</line-breakpoint>
|
| 226 |
+
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
|
| 227 |
+
<url>file://$PROJECT_DIR$/preprocess.py</url>
|
| 228 |
+
<line>81</line>
|
| 229 |
+
<option name="timeStamp" value="21" />
|
| 230 |
+
</line-breakpoint>
|
| 231 |
+
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
|
| 232 |
+
<url>file://$PROJECT_DIR$/preprocess.py</url>
|
| 233 |
+
<line>107</line>
|
| 234 |
+
<option name="timeStamp" value="24" />
|
| 235 |
+
</line-breakpoint>
|
| 236 |
+
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
|
| 237 |
+
<url>file://$PROJECT_DIR$/utils/snippets.py</url>
|
| 238 |
+
<line>509</line>
|
| 239 |
+
<option name="timeStamp" value="28" />
|
| 240 |
+
</line-breakpoint>
|
| 241 |
+
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
|
| 242 |
+
<url>file://$PROJECT_DIR$/utils/snippets.py</url>
|
| 243 |
+
<line>506</line>
|
| 244 |
+
<option name="timeStamp" value="30" />
|
| 245 |
+
</line-breakpoint>
|
| 246 |
+
</breakpoints>
|
| 247 |
+
</breakpoint-manager>
|
| 248 |
+
</component>
|
| 249 |
+
<component name="com.intellij.coverage.CoverageDataManagerImpl">
|
| 250 |
+
<SUITE FILE_PATH="coverage/NamedEntityRecognization$train.coverage" NAME="train 覆盖结果" MODIFIED="1658905350571" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
|
| 251 |
+
<SUITE FILE_PATH="coverage/NamedEntityRecognization$model.coverage" NAME="model 覆盖结果" MODIFIED="1649776894188" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
|
| 252 |
+
<SUITE FILE_PATH="coverage/NamedEntityRecognization$evaluate.coverage" NAME="evaluate 覆盖结果" MODIFIED="1649825507637" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
|
| 253 |
+
<SUITE FILE_PATH="coverage/NamedEntityRecognization$statistic.coverage" NAME="statistic 覆盖结果" MODIFIED="1649172187190" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
|
| 254 |
+
<SUITE FILE_PATH="coverage/NamedEntityRecognization$predict.coverage" NAME="predict 覆盖结果" MODIFIED="1658911968974" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
|
| 255 |
+
</component>
|
| 256 |
+
</project>
|
chinese_medical_ner/ccksyidu4k-ner-roformer/README.md
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CCKS2019医渡云4k电子病历数据集命名实体识别
|
| 2 |
+
|
| 3 |
+
## Dataset
|
| 4 |
+
|
| 5 |
+
Yidu-S4K数据集,对于给定的一组电子病历纯文本文档,任务的目标是识别并抽取出与医学临床相关的实体提及(entity mention),并将它们归类到预定义类别(pre-defined
|
| 6 |
+
categories),比如疾病、治疗、检查检验等。
|
| 7 |
+
|
| 8 |
+
1. 疾病和诊断:医学上定义的疾病和医生在临床工作中对病因、病生理、分型分期等所作的判断。
|
| 9 |
+
2. 检查: 影像检查(X线、CT、MR、PETCT等)+造影+超声+心电图,未避免检查操作与手术操作过多冲突,不包含此外其它的诊断性操作,如胃镜、肠镜等。
|
| 10 |
+
3. 检验: 在实验室进行的物理或化学检查,本期特指临床工作中检验科进行的化验,不含免疫组化等广义实验室检查
|
| 11 |
+
4. 手术: 医生在患者身体局部进行的切除、缝合等治疗,是外科的主要治疗方法。
|
| 12 |
+
5. 药物: 用于疾病治疗的具体化学物质。
|
| 13 |
+
6. 解剖部位: 指疾病、症状和体征发生的人体解剖学部位。
|
| 14 |
+
|
| 15 |
+
任务一数据结构: 任务一数据每一行为一个json json key 为`['originalText','entities']` 即原文和实体列表 `json["entities"]`
|
| 16 |
+
为列表,每个元素代表一个实体entity,其中有该实体在原文中的起始位置`start_pos`,结束位置`end_pos`,以及实体类型
|
| 17 |
+
|
| 18 |
+
训练样本1000条,提交的测试样本379条,经过处理后转成BIO格式,形如:
|
| 19 |
+
|
| 20 |
+
```
|
| 21 |
+
心 B-TESTIMAGE
|
| 22 |
+
脏 I-TESTIMAGE
|
| 23 |
+
彩 I-TESTIMAGE
|
| 24 |
+
超 I-TESTIMAGE
|
| 25 |
+
: O
|
| 26 |
+
右 B-ANATOMY
|
| 27 |
+
房 I-ANATOMY
|
| 28 |
+
、 O
|
| 29 |
+
右 B-ANATOMY
|
| 30 |
+
室 I-ANATOMY
|
| 31 |
+
稍 O
|
| 32 |
+
增 O
|
| 33 |
+
大 O
|
| 34 |
+
, O
|
| 35 |
+
E B-TESTLAB
|
| 36 |
+
F I-TESTLAB
|
| 37 |
+
正 O
|
| 38 |
+
常 O
|
| 39 |
+
。 O
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
ATTENTION:
|
| 43 |
+
|
| 44 |
+
- 字与标签之间用tab("\t")隔开
|
| 45 |
+
- 其中句子与句子之间使用空行隔开
|
| 46 |
+
- 文件最后以两个换行结束
|
| 47 |
+
|
| 48 |
+
句长与数量信息可以运行`statistic.py`以查看
|
| 49 |
+
|
| 50 |
+
## Project Structure
|
| 51 |
+
|
| 52 |
+
```
|
| 53 |
+
./
|
| 54 |
+
├── README.md
|
| 55 |
+
├── __pycache__
|
| 56 |
+
├── chinese_roformer-v2-char_L-12_H-768_A-12 roformer_v2 base权重文件
|
| 57 |
+
│ ├── bert_config.json
|
| 58 |
+
│ ├── bert_model.ckpt.data-00000-of-00001
|
| 59 |
+
│ ├── bert_model.ckpt.index
|
| 60 |
+
│ ├── bert_model.ckpt.meta
|
| 61 |
+
│ ├── checkpoint
|
| 62 |
+
│ └── vocab.txt
|
| 63 |
+
├── chinese_roformer-v2-char_L-6_H-384_A-6 roformer_v2 small 权重文件
|
| 64 |
+
│ ├── bert_config.json
|
| 65 |
+
│ ├── bert_model.ckpt.data-00000-of-00001
|
| 66 |
+
│ ├── bert_model.ckpt.index
|
| 67 |
+
│ ├── bert_model.ckpt.meta
|
| 68 |
+
│ ├── checkpoint
|
| 69 |
+
│ └── vocab.txt
|
| 70 |
+
├── config.py 模型可能需要调整的超参数
|
| 71 |
+
├── data 数据集文件夹
|
| 72 |
+
│ ├── yidu.test 官方提供的379个测试样本
|
| 73 |
+
│ ├── yidu.train 从划分官方1000个训练样本中划分的的训练集
|
| 74 |
+
│ ├── yidu.validate 从划分官方1000个训练样本中划分的的验证集
|
| 75 |
+
│ └── yidu_catagory.pkl 类别set,由train.py生成,predict.py中用到
|
| 76 |
+
├── evaluate.py
|
| 77 |
+
├── images 训练、评估数据生成的图片
|
| 78 |
+
│ ├── train_acc.png
|
| 79 |
+
│ ├── train_loss.png
|
| 80 |
+
│ └── val_f1.png
|
| 81 |
+
├── log 训练日志,由train.py生成
|
| 82 |
+
│ ├── train_loss.csv
|
| 83 |
+
│ ├── val_f1.csv
|
| 84 |
+
│ ├── yidu.out
|
| 85 |
+
│ └── yidu_f1.out
|
| 86 |
+
├── model.py 构建模型
|
| 87 |
+
├── path.py 所有路径
|
| 88 |
+
├── predict.py 模型预测输出
|
| 89 |
+
├── preprocess.py 数据预处理
|
| 90 |
+
├── statistic.py 统计句长与数量信息,以便调整和设置maxlen
|
| 91 |
+
├── report 评估报告,由evaluate.py生成
|
| 92 |
+
│ └── yidu_bert_base.csv 每个类别的精准、召回、F1
|
| 93 |
+
├── train.py 训练文件
|
| 94 |
+
├── requirements.txt pip环境
|
| 95 |
+
├── plot.py 画图工具
|
| 96 |
+
├── utils bert4keras工具包,也可pip下载
|
| 97 |
+
│ ├── __init__.py
|
| 98 |
+
│ ├── __pycache__
|
| 99 |
+
│ ├── backend.py
|
| 100 |
+
│ ├── layers.py
|
| 101 |
+
│ ├── models.py
|
| 102 |
+
│ ├── optimizers.py
|
| 103 |
+
│ ├── snippets.py
|
| 104 |
+
│ └── tokenizers.py
|
| 105 |
+
└── weights 保存的权重
|
| 106 |
+
├── yidu_catagory.pkl 实体类别
|
| 107 |
+
├── yidu_roformer_v2_base.h5 模型权重
|
| 108 |
+
└── yidu_roformer_v2_crf_trans.pkl 最佳模型的权重
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
## Requirements
|
| 112 |
+
|
| 113 |
+
```
|
| 114 |
+
Keras==2.2.4
|
| 115 |
+
matplotlib==3.4.0
|
| 116 |
+
pandas==1.2.3
|
| 117 |
+
tensorflow==1.14.0
|
| 118 |
+
tqdm==4.61.2
|
| 119 |
+
```
|
| 120 |
+
|
| 121 |
+
## Steps
|
| 122 |
+
|
| 123 |
+
1. 替换数据集
|
| 124 |
+
2. 修改path.py中的地址
|
| 125 |
+
3. 删掉旧的weights/{}_catagory.pkl类别set文件
|
| 126 |
+
4. 根据需要修改model.py模型结构
|
| 127 |
+
5. 修改config.py的参数
|
| 128 |
+
6. Debug
|
| 129 |
+
7. 训练
|
| 130 |
+
|
| 131 |
+
## Model
|
| 132 |
+
|
| 133 |
+
### 上游
|
| 134 |
+
|
| 135 |
+
[GitHub - ZhuiyiTechnology/roformer-v2: RoFormer升级版](https://github.com/ZhuiyiTechnology/roformer-v2)
|
| 136 |
+
是RoFormer升级版,主要通过结构的简化来提升速度,并通过无监督预训练和有监督预训练的结合来提升效果,从而达到了速度与效果的“双赢”。相比RoFormer,RoFormerV2的主要改动是简化模型结构、增加训练数据以及加入有监督训练,这些改动能让RoFormerV2最终取得了速度和效果的“双赢”。
|
| 137 |
+
|
| 138 |
+
- **Small版**
|
| 139 |
+
: [chinese_roformer-v2-char_L-6_H-384_A-6.zip](https://open.zhuiyi.ai/releases/nlp/models/zhuiyi/chinese_roformer-v2-char_L-6_H-384_A-6.zip)
|
| 140 |
+
- **Base版**
|
| 141 |
+
: [chinese_roformer-v2-char_L-12_H-768_A-12.zip](https://open.zhuiyi.ai/releases/nlp/models/zhuiyi/chinese_roformer-v2-char_L-12_H-768_A-12.zip)
|
| 142 |
+
- **Large版**
|
| 143 |
+
: [chinese_roformer-v2-char_L-24_H-1024_A-16.zip](https://open.zhuiyi.ai/releases/nlp/models/zhuiyi/chinese_roformer-v2-char_L-24_H-1024_A-16.zip)
|
| 144 |
+
|
| 145 |
+
### 下游
|
| 146 |
+
|
| 147 |
+

|
| 148 |
+
|
| 149 |
+
模型大小
|
| 150 |
+
|
| 151 |
+
> * **Small版**:两张3090(24G),先用无监督MLM训练了100万步(maxlen为512),然后有监督多任务训练了75万步(maxlen从64到512不等,取决于任务),batch_size为512,优化器为LAMB;
|
| 152 |
+
> * **Base版**:四张3090(24G),先用无监督MLM训练了100万步(maxlen为512),然后有监督多任务训练了75万步(maxlen从64到512不等,取决于任务),batch_size为512,优化器为LAMB;
|
| 153 |
+
> * **Large版**:两张A100(80G),先用无监督MLM训练了100万步(maxlen为512),然后有监督多任务训练了50万步(maxlen从64到512不等,取决于任务),batch_size为512,优化器为LAMB。
|
| 154 |
+
|
| 155 |
+
## Config
|
| 156 |
+
|
| 157 |
+
- `maxlen` 训练中每个batch的最大单句长度,少于填充,多于截断
|
| 158 |
+
- `epochs` 最大训练轮次
|
| 159 |
+
- `batch_size` batch size
|
| 160 |
+
- `bert_layers` bert层数,small ≤ 4,base ≤ 12
|
| 161 |
+
- `crf_lr_multiplier` CRF层放大的学习率,必要时扩大它
|
| 162 |
+
- `model_type` 模型, 'roformer_v2'
|
| 163 |
+
- `dropout_rate` dropout比率
|
| 164 |
+
- `max_lr` 最大学习率,bert_layers越大应该越小,small建议5e-5~1e-4,base建议1e-5~5e-5
|
| 165 |
+
- `lstm_hidden_units` lstm隐藏层数量
|
| 166 |
+
|
| 167 |
+
ATTENTION: 并非所有句子都要填充到同一个长度,要求每个batch内的每个样本长度一致即可。所以若batch中最大长度 ≤ maxlen,则该batch将填充or截断到最长句子长度,若batch中最大长度 ≥
|
| 168 |
+
maxlen,则该batch将填充or截断到config.py中的maxlen
|
| 169 |
+
|
| 170 |
+
## Train
|
| 171 |
+
|
| 172 |
+
### 策略
|
| 173 |
+
|
| 174 |
+
#### 划分策略
|
| 175 |
+
|
| 176 |
+
将1000条训练样本按8:2划分成训练集、验证集,并shuffle。
|
| 177 |
+
|
| 178 |
+
#### 优化策略
|
| 179 |
+
|
| 180 |
+
- 使用EMA(exponential mobing average)滑动平均配合Adam作为优化策略。滑动平均可以用来估计变量的局部值,是的变量的更新与一段时间内的历史值有关。它的意义在于利用滑动平均的参数来提高模型在测试数据上的健壮性。
|
| 181 |
+
EMA 对每一个待更新训练学习的变量 (variable) 都会维护一个影子变量 (shadow variable)。影子变量的初始值就是这个变量的初始值。
|
| 182 |
+
- BERT模型由于已经有了预训练权重,所以微调权重只需要很小的学习率,而LSTM和Dense使用的`he_normal`
|
| 183 |
+
初始化学习率,需要使用较大学习率,所以本模型使用[分层学习率](https://kexue.fm/archives/6418)
|
| 184 |
+
- 在Embedding层注入扰动,[对抗训练](https://kexue.fm/archives/7234) ,使模型更具鲁棒性。
|
| 185 |
+
|
| 186 |
+
#### 停止策略
|
| 187 |
+
|
| 188 |
+
在callback中计算验证集实体F1值,监控它。5轮不升即停。
|
| 189 |
+
|
| 190 |
+
### 日志
|
| 191 |
+
|
| 192 |
+
```
|
| 193 |
+
Epoch 1/999
|
| 194 |
+
78/78 [==============================] - 342s 4s/step - loss: 44.7248 - sparse_accuracy: 0.8038
|
| 195 |
+
valid: f1: 0.05063, precision: 0.06611, recall: 0.04103, best f1: 0.05063
|
| 196 |
+
Epoch 2/999
|
| 197 |
+
78/78 [==============================] - 313s 4s/step - loss: 13.2246 - sparse_accuracy: 0.9135
|
| 198 |
+
valid: f1: 0.67956, precision: 0.70216, recall: 0.65837, best f1: 0.67956
|
| 199 |
+
Epoch 3/999
|
| 200 |
+
78/78 [==============================] - 319s 4s/step - loss: 5.9724 - sparse_accuracy: 0.9418
|
| 201 |
+
valid: f1: 0.81794, precision: 0.83338, recall: 0.80306, best f1: 0.81794
|
| 202 |
+
|
| 203 |
+
...
|
| 204 |
+
|
| 205 |
+
Epoch 16/999
|
| 206 |
+
78/78 [==============================] - 308s 4s/step - loss: 1.6843 - sparse_accuracy: 0.9109
|
| 207 |
+
Early stop count 3/5
|
| 208 |
+
valid: f1: 0.87578, precision: 0.86848, recall: 0.88321, best f1: 0.87753
|
| 209 |
+
Epoch 17/999
|
| 210 |
+
78/78 [==============================] - 323s 4s/step - loss: 1.5966 - sparse_accuracy: 0.9090
|
| 211 |
+
Early stop count 4/5
|
| 212 |
+
valid: f1: 0.87717, precision: 0.86962, recall: 0.88485, best f1: 0.87753
|
| 213 |
+
Epoch 18/999
|
| 214 |
+
78/78 [==============================] - 324s 4s/step - loss: 1.4774 - sparse_accuracy: 0.9092
|
| 215 |
+
Early stop count 5/5
|
| 216 |
+
Epoch 00018: early stopping THR
|
| 217 |
+
valid: f1: 0.87693, precision: 0.86916, recall: 0.88485, best f1: 0.87753
|
| 218 |
+
```
|
| 219 |
+
|
| 220 |
+
训练集crf loss
|
| 221 |
+
|
| 222 |
+

|
| 223 |
+
|
| 224 |
+
训练集crf acc:
|
| 225 |
+
|
| 226 |
+

|
| 227 |
+
|
| 228 |
+
### Evaluate
|
| 229 |
+
|
| 230 |
+
### 策略
|
| 231 |
+
|
| 232 |
+
评估策略为实体级别的F1,抽取到的每个实体的label、在每句中的起始坐标、终止坐标都正确才算对
|
| 233 |
+
|
| 234 |
+
可以评估:
|
| 235 |
+
|
| 236 |
+
- 总的F1:所有类别一起统计,TP为所有label、起始坐标、终止坐标都正确的个数,TP+FP为预测实体总数,TP+FN为真实实体总数
|
| 237 |
+
- 每类的F1:分类统计,TP为每个列别的起始坐标、终止坐标都正确的个数,TP+FP为每个类别的预测实体总数,TP+FN为每个类别的真实实体总数
|
| 238 |
+
|
| 239 |
+
### 评估单个模型
|
| 240 |
+
|
| 241 |
+
```python
|
| 242 |
+
evaluate_one(save_file_path = weights_path + '/yidu_roformer_v2_base.h5',
|
| 243 |
+
dataset_path = "./data/yidu.test",
|
| 244 |
+
csv_path = './report/yidu_bert_base.csv',
|
| 245 |
+
evaluate_categories_f1 = True)
|
| 246 |
+
```
|
| 247 |
+
|
| 248 |
+
`save_file_path`,`dataset_path`是评估数据集路径,`evaluate_categories_f1`为是否评估每个类别的F1(时间会比评估总的F1长很多),`csv_path`
|
| 249 |
+
是每类F1数据生成的csv文件存放路径。
|
| 250 |
+
|
| 251 |
+
ATTENTION: 1个batch只进1条句子,所以可以无视train的maxlen,但是tokenize后长于512的部分将无法被预测,也不会被算进P里
|
| 252 |
+
|
| 253 |
+
## Performance
|
| 254 |
+
|
| 255 |
+
### 测试集表现
|
| 256 |
+
|
| 257 |
+

|
| 258 |
+
|
| 259 |
+
### 验证集最佳F1
|
| 260 |
+
|
| 261 |
+
```
|
| 262 |
+
Epoch 13/999
|
| 263 |
+
78/78 [==============================] - 314s 4s/step - loss: 1.9135 - sparse_accuracy: 0.9114
|
| 264 |
+
valid: f1: 0.87753, precision: 0.87033, recall: 0.88485, best f1: 0.87753
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
### 官方提供的379条测试样本表现
|
| 268 |
+
|
| 269 |
+
```
|
| 270 |
+
weight path:/home/bureaux/Projects/NamedEntityRecognization/weights/yidu_roformer_v2_base.h5
|
| 271 |
+
evaluate dataset path:./data/yidu.test
|
| 272 |
+
Evaluating General F1: 100%|████████████████████████████████████| 2035/2035 [03:11<00:00, 10.60it/s]
|
| 273 |
+
General: f1: 0.87700, precision: 0.86014, recall: 0.89454
|
| 274 |
+
```
|
| 275 |
+
|
| 276 |
+
### 官方提供的379条测试样本表现每的类别F1评测结果
|
| 277 |
+
|
| 278 |
+
```
|
| 279 |
+
Evaluating F1 of each Categories: 100%|█████████████████████████| 2035/2035 [19:37<00:00, 1.73it/s]
|
| 280 |
+
TP TP+FP TP+FN precision recall f1
|
| 281 |
+
ANATOMY 2788 3286 3094 0.8484 0.9011 0.8740
|
| 282 |
+
DISEASE 1176 1332 1323 0.8829 0.8889 0.8859
|
| 283 |
+
DRUG 470 497 485 0.9457 0.9691 0.9572
|
| 284 |
+
OPERATION 143 158 162 0.9051 0.8827 0.8938
|
| 285 |
+
TESTIMAGE 326 366 348 0.8907 0.9368 0.9132
|
| 286 |
+
TESTLAB 466 603 590 0.7728 0.7898 0.7812
|
| 287 |
+
```
|
| 288 |
+
|
| 289 |
+
## Predict
|
| 290 |
+
|
| 291 |
+
```python
|
| 292 |
+
txt = '1997-8-6行胃癌根治术,2010.11发现CA724 升高最高1295 ,复查PET-CT检查未见复发转移,之后多次复查CA724 波动在500-800之间,多次查胃镜提示吻合口炎,给予对症治疗,患者感左下腹隐痛下腹隐痛不适,2013.10.15复查血CA724 147 CA199 13.62 ,2013.10.23复查腹部CT检查提示胰腺占位,考虑恶性,胰头周围,肝门,腹膜后多发多发淋巴结转移。PET-CT提示:胰头区高代谢,考虑恶性病变。患者近10天出现午饭后左下腹部胀痛,持续2-3小时候可自行缓解。体重近1月上降2KG.患者胰腺穿刺取病理示低分化腺癌,免疫组化示CEA+,CGA+/-,CD56+/-,SYN+/-,对手术有顾虑,且手术风险较大,2013-11-26行放疗30次,2014-1-7放疗结束。2013-11-28始行单药吉西他滨化疗4周期。末次2014-1-7.放化疗中出现黄疸,对症治疗后好转。化疗后患者出现II度白细胞降低、II度血小板降低。2014-1-24复查胰头区病灶及腹腔淋巴结均较强缩小,胰腺穿刺病理中低分化腺癌,免疫组化CA19+,CK7+,CGA-,SYN-,CD56-,CA199+,符合胆、胰导管来源浸润性腺癌。CA72.4 明显上降。2014-1-27病理比对原胃切除标本报告与胰腺肿瘤存在较大形态差异。考虑患者明确胰腺癌,于2014-2-7行第5周期GEM化疗,2014-2复查后病灶缩小SD,于2014-2-21开始第六周期化疗,因第八天白细胞减少推迟到2014-3-3。2014-4-7第8周期化疗。末次给药2014-4-14.2014-4-21复查评效SD,略有缩小,CA72.4降低至11.12.2014-4-28继续单药GEM化疗,末次给药时间2014-9-1.GEM双周一次,2014-7-24复查胰腺病灶继续缩小,评效PR。现患者无明显不适,饮食、睡眠可,体重较前上降约4KG。'
|
| 293 |
+
for i in predict(txt = txt,
|
| 294 |
+
weights_path = weights_path + '/yidu_roformer_v2_base.h5',
|
| 295 |
+
label_dict_path = label_dict_path,
|
| 296 |
+
trans_path = "./weights/yidu_roformer_v2_crf_trans.pkl"):
|
| 297 |
+
print(i)
|
| 298 |
+
```
|
| 299 |
+
|
| 300 |
+
txt为输入文本,save_file_path为使用权重的路径,label_dict_path为实体类别字典的pkl文件,trans_path为模型转移矩阵文件。缺一不可。
|
| 301 |
+
|
| 302 |
+
输出结果
|
| 303 |
+
|
| 304 |
+
```
|
| 305 |
+
[
|
| 306 |
+
('胃癌根治术', 'OPERATION', 9, 13)
|
| 307 |
+
('CA724', 'TESTLAB', 24, 28)
|
| 308 |
+
('PET-CT', 'TESTIMAGE', 42, 47)
|
| 309 |
+
('CA724', 'TESTLAB', 63, 67)
|
| 310 |
+
('吻合口炎', 'DISEASE', 89, 92)
|
| 311 |
+
('左下腹', 'ANATOMY', 104, 106)
|
| 312 |
+
('下腹', 'ANATOMY', 109, 110)
|
| 313 |
+
('CA724', 'TESTLAB', 129, 133)
|
| 314 |
+
('腹部CT', 'TESTIMAGE', 164, 167)
|
| 315 |
+
('胰腺', 'ANATOMY', 172, 173)
|
| 316 |
+
('胰头', 'ANATOMY', 182, 183)
|
| 317 |
+
('肝门', 'ANATOMY', 187, 188)
|
| 318 |
+
('腹膜', 'ANATOMY', 190, 191)
|
| 319 |
+
('PET-CT', 'TESTIMAGE', 203, 208)
|
| 320 |
+
('胰头区', 'ANATOMY', 212, 214)
|
| 321 |
+
('左下腹部', 'ANATOMY', 237, 240)
|
| 322 |
+
('胰腺', 'ANATOMY', 271, 272)
|
| 323 |
+
('低分化腺癌', 'DISEASE', 279, 283)
|
| 324 |
+
('吉西他滨', 'DRUG', 376, 379)
|
| 325 |
+
('白细胞', 'TESTLAB', 424, 426)
|
| 326 |
+
('血小板', 'TESTLAB', 433, 435)
|
| 327 |
+
('胰头区', 'ANATOMY', 450, 452)
|
| 328 |
+
('腹腔淋巴结', 'ANATOMY', 456, 460)
|
| 329 |
+
('胰腺', 'ANATOMY', 467, 468)
|
| 330 |
+
('中低分化腺癌', 'DISEASE', 473, 478)
|
| 331 |
+
('胆', 'ANATOMY', 520, 520)
|
| 332 |
+
('胰', 'ANATOMY', 522, 522)
|
| 333 |
+
('CA72.4', 'TESTLAB', 533, 538)
|
| 334 |
+
('胃', 'ANATOMY', 559, 559)
|
| 335 |
+
('胰腺肿瘤', 'DISEASE', 567, 570)
|
| 336 |
+
('胰腺癌', 'DISEASE', 586, 588)
|
| 337 |
+
]
|
| 338 |
+
```
|
| 339 |
+
|
| 340 |
+
输出格式为`(实体, 类别, 起始坐标, 终止坐标)`
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/config.cpython-310.pyc
ADDED
|
Binary file (419 Bytes). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/config.cpython-37.pyc
ADDED
|
Binary file (413 Bytes). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/config.cpython-38.pyc
ADDED
|
Binary file (407 Bytes). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/evaluate.cpython-310.pyc
ADDED
|
Binary file (4.57 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/evaluate.cpython-37.pyc
ADDED
|
Binary file (4.65 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/evaluate.cpython-38.pyc
ADDED
|
Binary file (4.59 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/model.cpython-310.pyc
ADDED
|
Binary file (4.28 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/model.cpython-37.pyc
ADDED
|
Binary file (4.18 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/model.cpython-38.pyc
ADDED
|
Binary file (4.23 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/path.cpython-310.pyc
ADDED
|
Binary file (976 Bytes). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/path.cpython-37.pyc
ADDED
|
Binary file (970 Bytes). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/path.cpython-38.pyc
ADDED
|
Binary file (964 Bytes). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/plot.cpython-310.pyc
ADDED
|
Binary file (1.62 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/plot.cpython-37.pyc
ADDED
|
Binary file (1.68 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/plot.cpython-38.pyc
ADDED
|
Binary file (1.61 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/predict.cpython-37.pyc
ADDED
|
Binary file (2.58 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/preprocess.cpython-310.pyc
ADDED
|
Binary file (4.22 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/preprocess.cpython-37.pyc
ADDED
|
Binary file (4.18 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/__pycache__/preprocess.cpython-38.pyc
ADDED
|
Binary file (4.18 kB). View file
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/calc_bert_matrix.ipynb
ADDED
|
@@ -0,0 +1,534 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 11,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stderr",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 13 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 14 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 15 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 16 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 17 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 18 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 19 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 20 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 21 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 22 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 23 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 24 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 25 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 26 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 27 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 28 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 29 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 30 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 31 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 32 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 33 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 34 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 35 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 36 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 37 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 38 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 39 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 40 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 41 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 42 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 43 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 44 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 45 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 46 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 47 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 48 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 49 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 50 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 51 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 52 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 53 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 54 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 55 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 56 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 57 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 58 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 59 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 60 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 61 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 62 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 63 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n"
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "stdout",
|
| 68 |
+
"output_type": "stream",
|
| 69 |
+
"text": [
|
| 70 |
+
"12\n",
|
| 71 |
+
"28\n",
|
| 72 |
+
"28.835511207580566\n",
|
| 73 |
+
"[[0.8042815 0.83821416 0.7857758 0.8936385 0.7276579 0.83560634\n",
|
| 74 |
+
" 0.83956754 0.7425114 0.82972634 0.840919 0.8455287 0.8671753\n",
|
| 75 |
+
" 0.7728379 0.43285608 0.82833314 0.8327997 0.8392484 0.8234416\n",
|
| 76 |
+
" 0.8522128 0.51038194 0.82206476 0.7454972 0.8382132 0.49966788\n",
|
| 77 |
+
" 0.86459064 0.7834512 0.8475671 0.85460234]\n",
|
| 78 |
+
" [0.8011063 0.828354 0.82743424 0.850899 0.73062104 0.85595804\n",
|
| 79 |
+
" 0.8435649 0.7553144 0.8425723 0.82148576 0.80054176 0.89214945\n",
|
| 80 |
+
" 0.79418015 0.47419527 0.81845486 0.8461245 0.8021023 0.7938319\n",
|
| 81 |
+
" 0.79460996 0.5338131 0.87848425 0.7716693 0.8170972 0.52933466\n",
|
| 82 |
+
" 0.8097694 0.83968496 0.839522 0.8165166 ]\n",
|
| 83 |
+
" [0.4245787 0.4378011 0.42134485 0.46912104 0.38410604 0.44984287\n",
|
| 84 |
+
" 0.41388378 0.44095236 0.43873137 0.44801378 0.43414456 0.4509009\n",
|
| 85 |
+
" 0.41366065 0.7984361 0.4289 0.43039462 0.42809123 0.4324836\n",
|
| 86 |
+
" 0.46127015 0.86037296 0.41747275 0.38431278 0.48179275 0.84873366\n",
|
| 87 |
+
" 0.4345677 0.41943404 0.46897653 0.45358443]\n",
|
| 88 |
+
" [0.7554055 0.77900416 0.7624301 0.8424594 0.68307996 0.8088174\n",
|
| 89 |
+
" 0.80660224 0.69681954 0.7785535 0.8220203 0.79812443 0.8501669\n",
|
| 90 |
+
" 0.7326208 0.41718763 0.7723533 0.8132994 0.8087872 0.77721477\n",
|
| 91 |
+
" 0.7891983 0.46248394 0.7991282 0.73404676 0.81659716 0.46346214\n",
|
| 92 |
+
" 0.79148304 0.7274809 0.9603679 0.77111566]\n",
|
| 93 |
+
" [0.8101381 0.8370694 0.8437565 0.87504846 0.73189175 0.86311483\n",
|
| 94 |
+
" 0.8619976 0.79309046 0.8413706 0.8296794 0.8228364 0.99999994\n",
|
| 95 |
+
" 0.8147346 0.4384241 0.81975913 0.8577111 0.8390564 0.8067612\n",
|
| 96 |
+
" 0.8274136 0.51204675 0.88608086 0.7762571 0.8515235 0.50785893\n",
|
| 97 |
+
" 0.80906993 0.8036982 0.87490416 0.8234324 ]\n",
|
| 98 |
+
" [0.4217796 0.48954895 0.42723012 0.4532832 0.3561649 0.4448802\n",
|
| 99 |
+
" 0.4336366 0.45388544 0.4319604 0.46770507 0.41890997 0.44228512\n",
|
| 100 |
+
" 0.43652567 0.93544704 0.446108 0.46484137 0.39359793 0.39574915\n",
|
| 101 |
+
" 0.45182198 0.8406079 0.425097 0.39100745 0.47122467 0.8352574\n",
|
| 102 |
+
" 0.42255652 0.4323899 0.4527101 0.43198568]\n",
|
| 103 |
+
" [0.8338187 0.8482033 0.7583538 0.9017825 0.7151871 0.84789246\n",
|
| 104 |
+
" 0.8150497 0.7093185 0.8569419 0.8142565 0.899078 0.84663707\n",
|
| 105 |
+
" 0.7619577 0.44392824 0.79649574 0.80953574 0.8414211 0.8342018\n",
|
| 106 |
+
" 0.80380815 0.4652272 0.83020467 0.75900805 0.81513274 0.4604773\n",
|
| 107 |
+
" 0.8724065 0.79225063 0.8495691 0.8571184 ]\n",
|
| 108 |
+
" [0.4388544 0.48099732 0.44652414 0.491911 0.39358306 0.4963931\n",
|
| 109 |
+
" 0.46961203 0.4602445 0.45970094 0.49297816 0.44363937 0.50785893\n",
|
| 110 |
+
" 0.42448643 0.8044198 0.4709897 0.47543868 0.4438693 0.4341317\n",
|
| 111 |
+
" 0.47560525 0.94062746 0.46269763 0.41282403 0.49911708 0.9999999\n",
|
| 112 |
+
" 0.45720756 0.43912742 0.51201 0.48214957]\n",
|
| 113 |
+
" [0.852879 0.9037154 0.7821789 0.9420587 0.7552315 0.8836415\n",
|
| 114 |
+
" 0.87547123 0.7251524 0.8828964 0.8373711 0.92202234 0.89002985\n",
|
| 115 |
+
" 0.7854445 0.474783 0.8376787 0.85402507 0.8500402 0.8268823\n",
|
| 116 |
+
" 0.82753396 0.48748526 0.87112916 0.78053 0.82918906 0.48343372\n",
|
| 117 |
+
" 0.841926 0.8271333 0.8875084 0.87484753]\n",
|
| 118 |
+
" [0.7869381 0.85783684 0.7684859 0.8841141 0.66779125 0.7765528\n",
|
| 119 |
+
" 0.75417054 0.73889744 0.85345876 0.863776 0.86478865 0.82011044\n",
|
| 120 |
+
" 0.77472615 0.43941003 0.7532432 0.7775699 0.7492738 0.7495041\n",
|
| 121 |
+
" 0.8271534 0.46770984 0.77475417 0.72683036 0.80998313 0.4553116\n",
|
| 122 |
+
" 0.8437807 0.75127625 0.8239754 0.8759123 ]\n",
|
| 123 |
+
" [0.8414567 0.86158824 0.7993734 0.9158263 0.75114155 0.8754386\n",
|
| 124 |
+
" 0.8565251 0.75108814 0.8627944 0.8455615 0.8663789 0.8859818\n",
|
| 125 |
+
" 0.7832396 0.4651299 0.8199284 0.8319515 0.8332075 0.81501603\n",
|
| 126 |
+
" 0.8339864 0.5201047 0.85637003 0.763462 0.82180524 0.5130591\n",
|
| 127 |
+
" 0.8290285 0.82059264 0.84924185 0.8875982 ]\n",
|
| 128 |
+
" [0.75577044 0.74085385 0.737481 0.7770459 0.708807 0.79907984\n",
|
| 129 |
+
" 0.80543596 0.6826918 0.718661 0.7301651 0.7104209 0.8091022\n",
|
| 130 |
+
" 0.710036 0.4174271 0.80161786 0.8145112 0.7708455 0.76511174\n",
|
| 131 |
+
" 0.74256396 0.4779269 0.79805374 0.7345556 0.75847065 0.48508406\n",
|
| 132 |
+
" 0.7402287 0.755322 0.8002572 0.72530735]]\n",
|
| 133 |
+
"0.916689\n"
|
| 134 |
+
]
|
| 135 |
+
}
|
| 136 |
+
],
|
| 137 |
+
"source": [
|
| 138 |
+
"## calc with cpu\n",
|
| 139 |
+
"import time\n",
|
| 140 |
+
"from transformers import BertTokenizer, BertModel\n",
|
| 141 |
+
"import torch\n",
|
| 142 |
+
"import numpy as np\n",
|
| 143 |
+
"from sklearn.metrics.pairwise import cosine_similarity\n",
|
| 144 |
+
"\n",
|
| 145 |
+
"# 初始化模型和分词器\n",
|
| 146 |
+
"tokenizer = BertTokenizer.from_pretrained(\"G:/model_zoo/LM/bert-base-chinese/\")\n",
|
| 147 |
+
"bert_model = BertModel.from_pretrained(\"G:/model_zoo/LM/bert-base-chinese/\")\n",
|
| 148 |
+
"\n",
|
| 149 |
+
"# tgt和out列表\n",
|
| 150 |
+
"# tgt_list = ['症状', '器官', '检查']\n",
|
| 151 |
+
"# out_list = ['病状', '身体部位', '诊断','胃']\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"tgt_list = ['乏力感', '厌世', '躯体不适', '社会功能严重受损', '兴趣减退', '言行紊乱', '脑器质性疾病', '情绪低落', '精神障碍', '情绪差伴躯体不适', '焦虑', '自责']\n",
|
| 154 |
+
"out_list = ['认知行为治疗', '与家人交流障碍', '偶有轻生想法', '长期适应性障碍', '利培酮', '心理治疗', '发呆', '独处时感到被支配', '沉迷学佛后出现精神异常', '与家人交流困难', '急性而短暂的精神病性障碍', '兴趣减退', '有被害妄想和攻击行为', '言语紊乱', '自知力可', '攻击行为', '心脏', '肝脏', '反应慢', '持续的情绪低落', '沉迷学佛', '氢溴酸西酞普兰', '无法胜任家务', '情绪低落', '急性起病', '被害妄想', '社会功能受损', '重度抑郁发作']\n",
|
| 155 |
+
"print(len(tgt_list))\n",
|
| 156 |
+
"print(len(out_list))\n",
|
| 157 |
+
"\n",
|
| 158 |
+
"# 获取词向量\n",
|
| 159 |
+
"def get_word_embedding(word):\n",
|
| 160 |
+
" # 对单词进行编码\n",
|
| 161 |
+
" input_ids = tokenizer.encode(word, add_special_tokens=True, return_tensors='pt')\n",
|
| 162 |
+
" # 获取词向量\n",
|
| 163 |
+
" with torch.no_grad():\n",
|
| 164 |
+
" output = bert_model(input_ids)\n",
|
| 165 |
+
" # 使用[CLS]标记的向量作为句子向量\n",
|
| 166 |
+
" return output.last_hidden_state[:, 0, :].numpy()\n",
|
| 167 |
+
"\n",
|
| 168 |
+
"# 计算相似性矩阵\n",
|
| 169 |
+
"def calculate_similarity_matrix(words_list1, words_list2):\n",
|
| 170 |
+
" if len(words_list1) > 0 and len(words_list2) > 0:\n",
|
| 171 |
+
" embeddings1 = np.array([get_word_embedding(word) for word in words_list1])\n",
|
| 172 |
+
" embeddings2 = np.array([get_word_embedding(word) for word in words_list2])\n",
|
| 173 |
+
" \n",
|
| 174 |
+
" # 计算余弦相似性矩阵\n",
|
| 175 |
+
" similarity_matrix = cosine_similarity(embeddings1.reshape(embeddings1.shape[0],-1), embeddings2.reshape(embeddings2.shape[0],-1))\n",
|
| 176 |
+
" else:\n",
|
| 177 |
+
" similarity_matrix = np.zeros((2,2))\n",
|
| 178 |
+
" return similarity_matrix\n",
|
| 179 |
+
"\n",
|
| 180 |
+
"time1 = time.time()\n",
|
| 181 |
+
"# 计算tgt和out列表的相似性矩阵\n",
|
| 182 |
+
"for i in range(50):\n",
|
| 183 |
+
" similarity_matrix = calculate_similarity_matrix(tgt_list, out_list)\n",
|
| 184 |
+
"time2 = time.time()\n",
|
| 185 |
+
"time_cost = time2-time1\n",
|
| 186 |
+
"print(time_cost)\n",
|
| 187 |
+
"\n",
|
| 188 |
+
"# 打印相似性矩阵\n",
|
| 189 |
+
"print(similarity_matrix)\n",
|
| 190 |
+
"print(np.max(np.array(similarity_matrix),axis=1).mean())\n"
|
| 191 |
+
]
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"cell_type": "code",
|
| 195 |
+
"execution_count": null,
|
| 196 |
+
"metadata": {},
|
| 197 |
+
"outputs": [
|
| 198 |
+
{
|
| 199 |
+
"name": "stderr",
|
| 200 |
+
"output_type": "stream",
|
| 201 |
+
"text": [
|
| 202 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 203 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 204 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 205 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 206 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 207 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 208 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 209 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 210 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 211 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 212 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 213 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 214 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 215 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 216 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 217 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 218 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 219 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 220 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 221 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 222 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 223 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 224 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 225 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 226 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 227 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 228 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 229 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 230 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 231 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 232 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 233 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 234 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 235 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 236 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 237 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 238 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 239 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 240 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 241 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 242 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 243 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 244 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 245 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 246 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 247 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 248 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 249 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 250 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 251 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
|
| 252 |
+
"A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
|
| 253 |
+
"A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n"
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"name": "stdout",
|
| 258 |
+
"output_type": "stream",
|
| 259 |
+
"text": [
|
| 260 |
+
"12\n",
|
| 261 |
+
"28\n",
|
| 262 |
+
"29.067501306533813\n",
|
| 263 |
+
"[[[-1. 1. 1. ... 1. 1. 1.]\n",
|
| 264 |
+
" [-1. 1. 1. ... 1. 1. 1.]\n",
|
| 265 |
+
" [-1. 1. -1. ... 1. -1. -1.]\n",
|
| 266 |
+
" ...\n",
|
| 267 |
+
" [ 1. 1. 1. ... 1. 1. 1.]\n",
|
| 268 |
+
" [-1. 1. 1. ... 1. -1. 1.]\n",
|
| 269 |
+
" [ 1. 1. 1. ... 1. 1. 1.]]\n",
|
| 270 |
+
"\n",
|
| 271 |
+
" [[ 1. 1. 1. ... 1. 1. 1.]\n",
|
| 272 |
+
" [ 1. 1. 1. ... 1. 1. 1.]\n",
|
| 273 |
+
" [ 1. 1. -1. ... 1. -1. -1.]\n",
|
| 274 |
+
" ...\n",
|
| 275 |
+
" [-1. 1. 1. ... 1. 1. 1.]\n",
|
| 276 |
+
" [ 1. 1. 1. ... 1. -1. 1.]\n",
|
| 277 |
+
" [-1. 1. 1. ... 1. 1. 1.]]\n",
|
| 278 |
+
"\n",
|
| 279 |
+
" [[ 1. 1. -1. ... 1. 1. 1.]\n",
|
| 280 |
+
" [ 1. 1. -1. ... 1. 1. 1.]\n",
|
| 281 |
+
" [ 1. 1. 1. ... 1. -1. -1.]\n",
|
| 282 |
+
" ...\n",
|
| 283 |
+
" [-1. 1. -1. ... 1. 1. 1.]\n",
|
| 284 |
+
" [ 1. 1. -1. ... 1. -1. 1.]\n",
|
| 285 |
+
" [-1. 1. -1. ... 1. 1. 1.]]\n",
|
| 286 |
+
"\n",
|
| 287 |
+
" ...\n",
|
| 288 |
+
"\n",
|
| 289 |
+
" [[ 1. 1. 1. ... 1. 1. -1.]\n",
|
| 290 |
+
" [ 1. 1. 1. ... 1. 1. -1.]\n",
|
| 291 |
+
" [ 1. 1. -1. ... 1. -1. 1.]\n",
|
| 292 |
+
" ...\n",
|
| 293 |
+
" [-1. 1. 1. ... 1. 1. -1.]\n",
|
| 294 |
+
" [ 1. 1. 1. ... 1. -1. -1.]\n",
|
| 295 |
+
" [-1. 1. 1. ... 1. 1. -1.]]\n",
|
| 296 |
+
"\n",
|
| 297 |
+
" [[-1. 1. 1. ... 1. 1. 1.]\n",
|
| 298 |
+
" [-1. 1. 1. ... 1. 1. 1.]\n",
|
| 299 |
+
" [-1. 1. -1. ... 1. -1. -1.]\n",
|
| 300 |
+
" ...\n",
|
| 301 |
+
" [ 1. 1. 1. ... 1. 1. 1.]\n",
|
| 302 |
+
" [-1. 1. 1. ... 1. -1. 1.]\n",
|
| 303 |
+
" [ 1. 1. 1. ... 1. 1. 1.]]\n",
|
| 304 |
+
"\n",
|
| 305 |
+
" [[-1. 1. 1. ... 1. 1. 1.]\n",
|
| 306 |
+
" [-1. 1. 1. ... 1. 1. 1.]\n",
|
| 307 |
+
" [-1. 1. -1. ... 1. -1. -1.]\n",
|
| 308 |
+
" ...\n",
|
| 309 |
+
" [ 1. 1. 1. ... 1. 1. 1.]\n",
|
| 310 |
+
" [-1. 1. 1. ... 1. -1. 1.]\n",
|
| 311 |
+
" [ 1. 1. 1. ... 1. 1. 1.]]]\n",
|
| 312 |
+
"0.9941406\n"
|
| 313 |
+
]
|
| 314 |
+
}
|
| 315 |
+
],
|
| 316 |
+
"source": [
|
| 317 |
+
"## calc with gpu 反而更慢了。。。\n",
|
| 318 |
+
"import time\n",
|
| 319 |
+
"\n",
|
| 320 |
+
"from transformers import BertTokenizer, BertModel\n",
|
| 321 |
+
"import torch\n",
|
| 322 |
+
"import numpy as np\n",
|
| 323 |
+
"# from sklearn.metrics.pairwise import cosine_similarity\n",
|
| 324 |
+
"from torch.nn.functional import cosine_similarity\n",
|
| 325 |
+
"\n",
|
| 326 |
+
"# 初始化模型和分词器\n",
|
| 327 |
+
"tokenizer = BertTokenizer.from_pretrained(\"G:/model_zoo/LM/bert-base-chinese/\")\n",
|
| 328 |
+
"bert_model = BertModel.from_pretrained(\"G:/model_zoo/LM/bert-base-chinese/\")\n",
|
| 329 |
+
"\n",
|
| 330 |
+
"# tgt和out列表\n",
|
| 331 |
+
"# tgt_list = ['症状', '器官', '检查']\n",
|
| 332 |
+
"# out_list = ['病状', '身体部位', '诊断','胃']\n",
|
| 333 |
+
"\n",
|
| 334 |
+
"tgt_list = ['乏力感', '厌世', '躯体不适', '社会功能严重受损', '兴趣减退', '言行紊乱', '脑器质性疾病', '情绪低落', '精神障碍', '情绪差伴躯体不适', '焦虑', '自责']\n",
|
| 335 |
+
"out_list = ['认知行为治疗', '与家人交流障碍', '偶有轻生想法', '长期适应性障碍', '利培酮', '心理治疗', '发呆', '独处时感到被支配', '沉迷学佛后出现精神异常', '与家人交流困难', '急性而短暂的精神病性障碍', '兴趣减退', '有被害妄想和攻击行为', '言语紊乱', '自知力可', '攻击行为', '心脏', '肝脏', '反应慢', '持续的情绪低落', '沉迷学佛', '氢溴酸西酞普兰', '无法胜任家务', '情绪低落', '急性起病', '被害妄想', '社会功能受损', '重度抑郁发作']\n",
|
| 336 |
+
"print(len(tgt_list))\n",
|
| 337 |
+
"print(len(out_list))\n",
|
| 338 |
+
"\n",
|
| 339 |
+
"# 确保CUDA可用\n",
|
| 340 |
+
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
| 341 |
+
"\n",
|
| 342 |
+
"# 将模型移动到GPU\n",
|
| 343 |
+
"bert_model.to(device)\n",
|
| 344 |
+
"\n",
|
| 345 |
+
"# 获取词向量\n",
|
| 346 |
+
"def get_word_embedding(word):\n",
|
| 347 |
+
" # 对单词进行编码\n",
|
| 348 |
+
" input_ids = tokenizer.encode(word, add_special_tokens=True, return_tensors='pt').to(device)\n",
|
| 349 |
+
" # 获取词向量\n",
|
| 350 |
+
" with torch.no_grad():\n",
|
| 351 |
+
" output = bert_model(input_ids)\n",
|
| 352 |
+
" # 使用[CLS]标记的向量作为句子向量\n",
|
| 353 |
+
" # 将结果移回CPU,因为cosine_similarity需要numpy数组\n",
|
| 354 |
+
" return output.last_hidden_state[:, 0, :]\n",
|
| 355 |
+
"\n",
|
| 356 |
+
"# 计算相似性矩阵\n",
|
| 357 |
+
"def calculate_similarity_matrix(words_list1, words_list2):\n",
|
| 358 |
+
" if len(words_list1) > 0 and len(words_list2) > 0:\n",
|
| 359 |
+
" embeddings1 = torch.stack([get_word_embedding(word) for word in words_list1])\n",
|
| 360 |
+
" embeddings2 = torch.stack([get_word_embedding(word) for word in words_list2])\n",
|
| 361 |
+
" \n",
|
| 362 |
+
" # 计算余弦相似性矩阵\n",
|
| 363 |
+
" # similarity_matrix = cosine_similarity(embeddings1.reshape(embeddings1.shape[0],-1), embeddings2.reshape(embeddings2.shape[0],-1))\n",
|
| 364 |
+
" similarity_matrix = cosine_similarity(embeddings1.unsqueeze(1), embeddings2.unsqueeze(0), dim=2).cpu().numpy()\n",
|
| 365 |
+
" \n",
|
| 366 |
+
" else:\n",
|
| 367 |
+
" similarity_matrix = np.zeros((2,2))\n",
|
| 368 |
+
" return similarity_matrix\n",
|
| 369 |
+
"\n",
|
| 370 |
+
"\n",
|
| 371 |
+
"time1 = time.time()\n",
|
| 372 |
+
"# 计算tgt和out列表的相似性矩阵\n",
|
| 373 |
+
"for i in range(50):\n",
|
| 374 |
+
" similarity_matrix = calculate_similarity_matrix(tgt_list, out_list)\n",
|
| 375 |
+
"time2 = time.time()\n",
|
| 376 |
+
"time_cost = time2-time1\n",
|
| 377 |
+
"print(time_cost)\n",
|
| 378 |
+
"\n",
|
| 379 |
+
"# 打印相似性矩阵\n",
|
| 380 |
+
"print(similarity_matrix)\n",
|
| 381 |
+
"print(np.max(np.array(similarity_matrix),axis=1).mean())\n"
|
| 382 |
+
]
|
| 383 |
+
},
|
| 384 |
+
{
|
| 385 |
+
"cell_type": "code",
|
| 386 |
+
"execution_count": 2,
|
| 387 |
+
"metadata": {},
|
| 388 |
+
"outputs": [
|
| 389 |
+
{
|
| 390 |
+
"name": "stdout",
|
| 391 |
+
"output_type": "stream",
|
| 392 |
+
"text": [
|
| 393 |
+
"100\n"
|
| 394 |
+
]
|
| 395 |
+
}
|
| 396 |
+
],
|
| 397 |
+
"source": [
|
| 398 |
+
"import numpy as np\n",
|
| 399 |
+
"ner_result = np.load(r\"G:\\code\\R0\\chinese_medical_ner-main\\ccksyidu4k-ner-roformer\\ccksyidu4k-ner-roformer\\ner_result\\PsychClinical\\1shot\\gpt-3.5-turbo_api\\task3ner_result.npy\",allow_pickle=True)\n",
|
| 400 |
+
"print(len(ner_result))"
|
| 401 |
+
]
|
| 402 |
+
},
|
| 403 |
+
{
|
| 404 |
+
"cell_type": "code",
|
| 405 |
+
"execution_count": 5,
|
| 406 |
+
"metadata": {},
|
| 407 |
+
"outputs": [
|
| 408 |
+
{
|
| 409 |
+
"name": "stdout",
|
| 410 |
+
"output_type": "stream",
|
| 411 |
+
"text": [
|
| 412 |
+
"-------------------------------------------------- task5 --------------------------------------------------\n",
|
| 413 |
+
"gpt-3.5-turbo\n",
|
| 414 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/gpt-3.5-turbo_api/task5/ner_result.npy not exist!!!\n",
|
| 415 |
+
"gpt-4o-mini\n",
|
| 416 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/gpt-4o-mini_api/task5/ner_result.npy not exist!!!\n",
|
| 417 |
+
"gpt-4\n",
|
| 418 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/gpt-4_api/task5/ner_result.npy not exist!!!\n",
|
| 419 |
+
"gemini-1.5-pro\n",
|
| 420 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/gemini-1.5-pro_api/task5/ner_result.npy not exist!!!\n",
|
| 421 |
+
"glm4\n",
|
| 422 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/glm4_api/task5/ner_result.npy not exist!!!\n",
|
| 423 |
+
"hunyuan-lite\n",
|
| 424 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/hunyuan-lite_api/task5/ner_result.npy not exist!!!\n",
|
| 425 |
+
"hunyuan-pro\n",
|
| 426 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/hunyuan-pro_api/task5/ner_result.npy not exist!!!\n",
|
| 427 |
+
"minimax\n",
|
| 428 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/minimax_api/task5/ner_result.npy not exist!!!\n",
|
| 429 |
+
"spark-4ultra\n",
|
| 430 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/spark-4ultra_api/task5/ner_result.npy not exist!!!\n",
|
| 431 |
+
"baichuan4\n",
|
| 432 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/baichuan4_api/task5/ner_result.npy not exist!!!\n",
|
| 433 |
+
"deepseek\n",
|
| 434 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/deepseek_api/task5/ner_result.npy not exist!!!\n",
|
| 435 |
+
"doubao-pro-32k\n",
|
| 436 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/doubao-pro-32k_api/task5/ner_result.npy not exist!!!\n",
|
| 437 |
+
"ernie-4-8k\n",
|
| 438 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/ernie-4-8k_api/task5/ner_result.npy not exist!!!\n",
|
| 439 |
+
"moonshot-v1-32k\n",
|
| 440 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/moonshot-v1-32k_api/task5/ner_result.npy not exist!!!\n",
|
| 441 |
+
"yi-large\n",
|
| 442 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/yi-large_api/task5/ner_result.npy not exist!!!\n",
|
| 443 |
+
"qwen-max\n",
|
| 444 |
+
"Error: G:/code/CMB_wuhu/src/ner_result/PsychClinical/0shot/qwen-max_api/task5/ner_result.npy not exist!!!\n"
|
| 445 |
+
]
|
| 446 |
+
}
|
| 447 |
+
],
|
| 448 |
+
"source": [
|
| 449 |
+
"### 计算性能指标\n",
|
| 450 |
+
"\n",
|
| 451 |
+
"import os\n",
|
| 452 |
+
"# model_id指定数据整理的格式 psychAiD与ChatGLM3的格式相同\n",
|
| 453 |
+
"\n",
|
| 454 |
+
"\n",
|
| 455 |
+
"nshot = 0\n",
|
| 456 |
+
"# for task in [1,3,5]:\n",
|
| 457 |
+
"for task in [5]:\n",
|
| 458 |
+
"\n",
|
| 459 |
+
" print('-'*50,'task{}'.format(task),'-'*50)\n",
|
| 460 |
+
"\n",
|
| 461 |
+
" for model in ['gpt-3.5-turbo','gpt-4o-mini','gpt-4','gemini-1.5-pro','glm4','hunyuan-lite','hunyuan-pro','minimax','spark-4ultra','baichuan4','deepseek','doubao-pro-32k','ernie-4-8k','moonshot-v1-32k','yi-large','qwen-max']: \n",
|
| 462 |
+
" # for model in ['baichuan4']: \n",
|
| 463 |
+
" \n",
|
| 464 |
+
" print(model)\n",
|
| 465 |
+
" if task == 5:\n",
|
| 466 |
+
" nshot=0\n",
|
| 467 |
+
" # ans_path = 'G:/code/CMB_0726/result-refined/API/{}shot/task{}_{}.json'.format(nshot,task,model)\n",
|
| 468 |
+
" # dir_out = './ner_result/PsychClinical/{}shot/{}_api/task{}'.format(nshot,model,task)\n",
|
| 469 |
+
"\n",
|
| 470 |
+
" ans_path = 'G:/code/CMB_wuhu/result-refined/API/{}shot/task{}_{}.json'.format(nshot,task,model)\n",
|
| 471 |
+
" dir_out = 'G:/code/CMB_wuhu/src/ner_result/PsychClinical/{}shot/{}_api/task{}'.format(nshot,model,task)\n",
|
| 472 |
+
"\n",
|
| 473 |
+
" # ans_path = 'G:/code/CMB_dali/result-refined/API/{}shot/task{}_{}.json'.format(nshot,task,model)\n",
|
| 474 |
+
" # dir_out = 'G:/code/CMB_dali/src/ner_result/PsychClinical/{}shot/{}_api/task{}'.format(nshot,model,task)\n",
|
| 475 |
+
"\n",
|
| 476 |
+
" \n",
|
| 477 |
+
"\n",
|
| 478 |
+
" \n",
|
| 479 |
+
" ner_path = dir_out +'/ner_result.npy'\n",
|
| 480 |
+
"\n",
|
| 481 |
+
" if not os.path.exists(ner_path):\n",
|
| 482 |
+
" print('Error:',ner_path,'not exist!!!')\n",
|
| 483 |
+
" continue\n",
|
| 484 |
+
" ner_result = np.load(ner_path,allow_pickle=True)\n",
|
| 485 |
+
" ner_scores = []\n",
|
| 486 |
+
" for tgt,out in ner_result:\n",
|
| 487 |
+
" similarity_matrix = calculate_similarity_matrix(tgt, out)\n",
|
| 488 |
+
"\n",
|
| 489 |
+
" # 打印相似性矩阵\n",
|
| 490 |
+
" # print(similarity_matrix)\n",
|
| 491 |
+
" ner_score = np.max(np.array(similarity_matrix),axis=1).mean()\n",
|
| 492 |
+
" ner_scores.append(ner_score)\n",
|
| 493 |
+
" ner_scores_mean = np.mean(ner_scores)\n",
|
| 494 |
+
" ner_scores_std = np.std(ner_scores)\n",
|
| 495 |
+
" print('ner score:{}±{}'.format(ner_scores_mean,ner_scores_std))\n",
|
| 496 |
+
" import json\n",
|
| 497 |
+
"\n",
|
| 498 |
+
" # Load the uploaded JSON file\n",
|
| 499 |
+
" file_path = dir_out +'/metrics.json'\n",
|
| 500 |
+
"\n",
|
| 501 |
+
" # Read the content of the file\n",
|
| 502 |
+
" with open(file_path, 'r') as file:\n",
|
| 503 |
+
" metrics = json.load(file)\n",
|
| 504 |
+
"\n",
|
| 505 |
+
" # Display the content of the JSON file to understand its structure\n",
|
| 506 |
+
" metrics['NER-score'] = {'avg':float(ner_scores_mean),'std':float(ner_scores_std)}\n",
|
| 507 |
+
" with open(file_path, 'w') as file:\n",
|
| 508 |
+
" file.write(json.dumps(metrics))\n",
|
| 509 |
+
" "
|
| 510 |
+
]
|
| 511 |
+
}
|
| 512 |
+
],
|
| 513 |
+
"metadata": {
|
| 514 |
+
"kernelspec": {
|
| 515 |
+
"display_name": "py310",
|
| 516 |
+
"language": "python",
|
| 517 |
+
"name": "python3"
|
| 518 |
+
},
|
| 519 |
+
"language_info": {
|
| 520 |
+
"codemirror_mode": {
|
| 521 |
+
"name": "ipython",
|
| 522 |
+
"version": 3
|
| 523 |
+
},
|
| 524 |
+
"file_extension": ".py",
|
| 525 |
+
"mimetype": "text/x-python",
|
| 526 |
+
"name": "python",
|
| 527 |
+
"nbconvert_exporter": "python",
|
| 528 |
+
"pygments_lexer": "ipython3",
|
| 529 |
+
"version": "3.10.13"
|
| 530 |
+
}
|
| 531 |
+
},
|
| 532 |
+
"nbformat": 4,
|
| 533 |
+
"nbformat_minor": 2
|
| 534 |
+
}
|
chinese_medical_ner/ccksyidu4k-ner-roformer/config.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
maxlen = 300
|
| 2 |
+
epochs = 999
|
| 3 |
+
batch_size = 16
|
| 4 |
+
bert_layers = 12
|
| 5 |
+
crf_lr_multiplier = 1000 # 必要时扩大CRF层的学习率
|
| 6 |
+
model_type = 'roformer_v2'
|
| 7 |
+
dropout_rate = 0.1
|
| 8 |
+
max_lr = 1e-5
|
| 9 |
+
lstm_hidden_units = 128
|
chinese_medical_ner/ccksyidu4k-ner-roformer/cudnn-7.6.5-cuda10.0_0.conda
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:291587fe1bbbff0dc3154f3f5cf9e011b8264d124dedad5f257efa39726a4557
|
| 3 |
+
size 172137578
|
chinese_medical_ner/ccksyidu4k-ner-roformer/data/chip.train
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/data/chip.validate
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/data/yidu.test
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/data/yidu.train
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/data/yidu.validate
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
chinese_medical_ner/ccksyidu4k-ner-roformer/evaluate.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#! -*- coding: utf-8 -*-
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
# bert tiny
|
| 5 |
+
import pickle
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from matplotlib import pyplot as plt
|
| 9 |
+
|
| 10 |
+
from model import BERT
|
| 11 |
+
from path import BASE_CONFIG_NAME, BASE_CKPT_NAME, BASE_MODEL_DIR, train_file_path, test_file_path, val_file_path, \
|
| 12 |
+
weights_path, label_dict_path, categories_f1_path
|
| 13 |
+
from preprocess import load_data, NamedEntityRecognizer
|
| 14 |
+
from plot import f1_plot
|
| 15 |
+
|
| 16 |
+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
| 17 |
+
|
| 18 |
+
from utils.backend import keras, K
|
| 19 |
+
from utils.tokenizers import Tokenizer
|
| 20 |
+
from tqdm import tqdm
|
| 21 |
+
|
| 22 |
+
# save_file_path = "./weights/yidu_bert_tiny_lstm_crf.h5"
|
| 23 |
+
|
| 24 |
+
# bert配置
|
| 25 |
+
config_path = BASE_CONFIG_NAME
|
| 26 |
+
checkpoint_path = BASE_CKPT_NAME
|
| 27 |
+
dict_path = '{}/vocab.txt'.format(BASE_MODEL_DIR)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def get_score(data, NER, tqdm_verbose = False):
|
| 31 |
+
"""评测函数
|
| 32 |
+
"""
|
| 33 |
+
X, Y, Z = 1e-10, 1e-10, 1e-10
|
| 34 |
+
if tqdm_verbose:
|
| 35 |
+
loop = tqdm(data, ncols = 100)
|
| 36 |
+
for d in loop:
|
| 37 |
+
loop.set_description("Evaluating General F1")
|
| 38 |
+
R = set(NER.recognize(d[0]))
|
| 39 |
+
T = set([tuple(i) for i in d[1:]])
|
| 40 |
+
X += len(R & T)
|
| 41 |
+
Y += len(R)
|
| 42 |
+
Z += len(T)
|
| 43 |
+
|
| 44 |
+
else:
|
| 45 |
+
for d in data:
|
| 46 |
+
R = set(NER.recognize(d[0]))
|
| 47 |
+
T = set([tuple(i) for i in d[1:]])
|
| 48 |
+
X += len(R & T)
|
| 49 |
+
Y += len(R)
|
| 50 |
+
Z += len(T)
|
| 51 |
+
f1, precision, recall = 2 * X / (Y + Z), X / Y, X / Z
|
| 52 |
+
return f1, precision, recall
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def get_catetories_score(data, NER, categories, tqdm_verbose = False):
|
| 56 |
+
"""评测函数
|
| 57 |
+
"""
|
| 58 |
+
labeded_set = {}
|
| 59 |
+
for i in categories:
|
| 60 |
+
labeded_set[i] = {'TP': 1e-10, 'TP+FP': 1e-10, 'TP+FN': 1e-10}
|
| 61 |
+
if tqdm_verbose:
|
| 62 |
+
loop = tqdm(data, ncols = 100)
|
| 63 |
+
for d in loop:
|
| 64 |
+
loop.set_description("Evaluating F1 of each Categories")
|
| 65 |
+
for i in categories:
|
| 66 |
+
R = set(NER.recognize(d[0]))
|
| 67 |
+
R_labeled = set()
|
| 68 |
+
for s, r, label in R:
|
| 69 |
+
if label == i:
|
| 70 |
+
R_labeled.add((s, r, label))
|
| 71 |
+
T = set([tuple(i) for i in d[1:]])
|
| 72 |
+
T_labeled = set()
|
| 73 |
+
for s, r, label in T:
|
| 74 |
+
if label == i:
|
| 75 |
+
T_labeled.add((s, r, label))
|
| 76 |
+
|
| 77 |
+
labeded_set[i]["TP"] += len(R_labeled & T_labeled)
|
| 78 |
+
labeded_set[i]["TP+FP"] += len(R_labeled)
|
| 79 |
+
labeded_set[i]["TP+FN"] += len(T_labeled)
|
| 80 |
+
# print(labeded_set)
|
| 81 |
+
for i in labeded_set:
|
| 82 |
+
labeded_set[i]["precision"] = round(labeded_set[i]["TP"] / labeded_set[i]["TP+FP"], 4)
|
| 83 |
+
labeded_set[i]["recall"] = round(labeded_set[i]["TP"] / labeded_set[i]["TP+FN"], 4)
|
| 84 |
+
labeded_set[i]["f1"] = round(2 * labeded_set[i]["TP"] / (labeded_set[i]["TP+FP"] + labeded_set[i]["TP+FN"]), 4)
|
| 85 |
+
labeded_set[i]["TP"] = int(labeded_set[i]["TP"])
|
| 86 |
+
labeded_set[i]["TP+FP"] = int(labeded_set[i]["TP+FP"])
|
| 87 |
+
labeded_set[i]["TP+FN"] = int(labeded_set[i]["TP+FN"])
|
| 88 |
+
# f1, precision, recall = 2 * X / (Y + Z), X / Y, X / Z
|
| 89 |
+
return labeded_set
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def evaluate(title, data, CRF, NER):
|
| 93 |
+
trans = K.eval(CRF.trans)
|
| 94 |
+
NER.trans = trans
|
| 95 |
+
f1, precision, recall = get_score(data, NER, tqdm_verbose = True)
|
| 96 |
+
print(title + ': f1: %.5f, precision: %.5f, recall: %.5f' % (f1, precision, recall))
|
| 97 |
+
return f1, precision, recall
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def evaluate_categories(title, data, categories, CRF, NER):
|
| 101 |
+
trans = K.eval(CRF.trans)
|
| 102 |
+
NER.trans = trans
|
| 103 |
+
result = get_catetories_score(data, NER, categories, tqdm_verbose = True)
|
| 104 |
+
# for i in result:
|
| 105 |
+
# print(i, result[i])
|
| 106 |
+
df = pd.DataFrame(result)
|
| 107 |
+
df = df.T
|
| 108 |
+
df[["TP", "TP+FP", "TP+FN"]] = df[["TP", "TP+FP", "TP+FN"]].astype(int)
|
| 109 |
+
# 设置value的显示长度为200,默认为50
|
| 110 |
+
pd.set_option('max_colwidth', 200)
|
| 111 |
+
# 显示所有列,把行显示设置成最大
|
| 112 |
+
pd.set_option('display.max_columns', None)
|
| 113 |
+
# 显示所有行,把列显示设置成最大
|
| 114 |
+
pd.set_option('display.max_rows', None)
|
| 115 |
+
print(df)
|
| 116 |
+
return df
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def evaluate_one(save_file_path, dataset_path, csv_path = categories_f1_path, evaluate_categories_f1 = False):
|
| 120 |
+
with open(label_dict_path, 'rb') as f: # 打开文件
|
| 121 |
+
categories = set(pickle.load(f))
|
| 122 |
+
|
| 123 |
+
bert = BERT(config_path,
|
| 124 |
+
checkpoint_path,
|
| 125 |
+
categories,
|
| 126 |
+
summary = False)
|
| 127 |
+
model = bert.get_model()
|
| 128 |
+
|
| 129 |
+
# 标注数据
|
| 130 |
+
test_data = load_data(dataset_path, categories)
|
| 131 |
+
categories = list(sorted(categories))
|
| 132 |
+
|
| 133 |
+
# 建立分词器
|
| 134 |
+
tokenizer = Tokenizer(dict_path, do_lower_case = True)
|
| 135 |
+
|
| 136 |
+
model.load_weights(save_file_path)
|
| 137 |
+
CRF = bert.get_CRF()
|
| 138 |
+
NER = NamedEntityRecognizer(tokenizer, model, categories, trans = K.eval(CRF.trans), starts = [0], ends = [0])
|
| 139 |
+
|
| 140 |
+
print("\nweight path:" + save_file_path)
|
| 141 |
+
print("evaluate dataset path:" + dataset_path)
|
| 142 |
+
f1, precision, recall = evaluate("General", test_data, CRF, NER)
|
| 143 |
+
if evaluate_categories_f1:
|
| 144 |
+
df = evaluate_categories("Each Categories:", test_data, categories, CRF, NER)
|
| 145 |
+
df.to_csv(csv_path, encoding = 'utf-8-sig')
|
| 146 |
+
return f1, precision, recall
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
if __name__ == '__main__':
|
| 150 |
+
|
| 151 |
+
evaluate_one(save_file_path = weights_path + '/chip_roformer_v2_base.h5',
|
| 152 |
+
dataset_path = "./data/chip.validate",
|
| 153 |
+
csv_path = './report/chip_bert_base.csv',
|
| 154 |
+
evaluate_categories_f1 = True)
|
chinese_medical_ner/ccksyidu4k-ner-roformer/evaluate_ner.py
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# import evaluate
|
| 2 |
+
import evaluate
|
| 3 |
+
import io
|
| 4 |
+
import json
|
| 5 |
+
import numpy as np
|
| 6 |
+
import os
|
| 7 |
+
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
|
| 8 |
+
import sys
|
| 9 |
+
import tqdm
|
| 10 |
+
import csv
|
| 11 |
+
# from predict import predict
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
## chip ner model
|
| 15 |
+
import pickle
|
| 16 |
+
from model import BERT
|
| 17 |
+
from path import BASE_CONFIG_NAME, BASE_CKPT_NAME, BASE_MODEL_DIR, label_dict_path, weights_path,proj_path
|
| 18 |
+
from preprocess import NamedEntityRecognizer
|
| 19 |
+
from utils.tokenizers import Tokenizer
|
| 20 |
+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
| 21 |
+
# bert配置
|
| 22 |
+
config_path = BASE_CONFIG_NAME
|
| 23 |
+
checkpoint_path = BASE_CKPT_NAME
|
| 24 |
+
dict_path = '{}/vocab.txt'.format(BASE_MODEL_DIR)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
import argparse
|
| 29 |
+
|
| 30 |
+
CALC_REDUNDANT = False # re-calculate, even if scores already exist
|
| 31 |
+
|
| 32 |
+
def main(label_dict_path, weights_path):
|
| 33 |
+
parser = argparse.ArgumentParser()
|
| 34 |
+
parser.add_argument("--dataset", type=str, help="psych exam", default=5)
|
| 35 |
+
parser.add_argument(
|
| 36 |
+
"--ans_path",
|
| 37 |
+
type=str,
|
| 38 |
+
help="path to the model generated ans file",
|
| 39 |
+
# default='/root/CMB/result/PsychExam/psychAiD/modelans.json'
|
| 40 |
+
# default='/root/CMB/result/PsychExam/chatglm3_6b/modelans_glm3.json'
|
| 41 |
+
# default='/root/CMB/result/PsychExam/chatglm3_6b_32k/modelans_glm3_32k.json'
|
| 42 |
+
# default='/root/CMB/result/PsychExam/psychAiD/modelans_psychAiD_no_sample.json'
|
| 43 |
+
# default='/data/cj_group/shuyu/CMB_0426/result/PsychClinical/llama2/modelans_psychAiD_no_sample.json'
|
| 44 |
+
default='G:/code/CMB_wuhu/result-refined/API/1shot/task1_glm4.json'
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
parser.add_argument(
|
| 48 |
+
"--dir_out",
|
| 49 |
+
type=str,
|
| 50 |
+
help="path to the eval matrics",
|
| 51 |
+
# default='/root/CMB/result/PsychExam/psychAiD/'
|
| 52 |
+
# default='/root/CMB/result/PsychExam/chatglm3_6b/'
|
| 53 |
+
# default='/root/CMB/result/PsychExam/chatglm3_6b_32k/'
|
| 54 |
+
# default='/root/CMB/result/PsychExam/psychAiD/no_sample'
|
| 55 |
+
default='G:/code/CMB_wuhu/src/ner_result/PsychClinical/1shot/glm4_api/task1'
|
| 56 |
+
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# parse arguments, set data paths
|
| 60 |
+
# args = parser.get_parser()
|
| 61 |
+
args = parser.parse_args()
|
| 62 |
+
is_cxr = True if args.dataset in ['cxr', 'opi'] else False
|
| 63 |
+
|
| 64 |
+
os.makedirs(args.dir_out,exist_ok=True)
|
| 65 |
+
|
| 66 |
+
# load data
|
| 67 |
+
lst_tgt = []
|
| 68 |
+
lst_out = []
|
| 69 |
+
lst_idx = []
|
| 70 |
+
option_qa = [[],[]]
|
| 71 |
+
with open(args.ans_path, "r", encoding="utf-8") as f:
|
| 72 |
+
answers = json.load(f)
|
| 73 |
+
idx = 0
|
| 74 |
+
for ans in answers:
|
| 75 |
+
if 'question_type' not in ans.keys():
|
| 76 |
+
ans['question_type'] = 'clinical'
|
| 77 |
+
if '选择题' in ans['question_type']:
|
| 78 |
+
option_qa[0].append(ans['answer'])
|
| 79 |
+
option_qa[1].append(ans['model_answer'])
|
| 80 |
+
idx += 1
|
| 81 |
+
else:
|
| 82 |
+
if ans['question_type'] == 'clinical':
|
| 83 |
+
if ans['conversations'][1]['from']=='gpt':
|
| 84 |
+
lst_tgt.append(ans['conversations'][1]['value'])
|
| 85 |
+
else:
|
| 86 |
+
lst_tgt.append(ans['conversations'][2]['value'])
|
| 87 |
+
else:
|
| 88 |
+
lst_tgt.append(ans['answer'])
|
| 89 |
+
# lst_out.append(ans['model_answer'])
|
| 90 |
+
if ans['answer_0'] == 'API调用失败':
|
| 91 |
+
continue
|
| 92 |
+
lst_out.append(ans['answer_0'])
|
| 93 |
+
|
| 94 |
+
lst_idx.append(idx)
|
| 95 |
+
idx += 1
|
| 96 |
+
|
| 97 |
+
print('data num:',idx)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# compute scores of each sample across entire dataset
|
| 102 |
+
scores_all = {}
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
# 建立分词器
|
| 107 |
+
weights_path = weights_path + '/chip_roformer_v2_base.h5'
|
| 108 |
+
label_dict_path = label_dict_path
|
| 109 |
+
trans_path = proj_path + "/weights/chip_roformer_v2_crf_trans.pkl"
|
| 110 |
+
print(label_dict_path)
|
| 111 |
+
with open(label_dict_path, 'rb') as f: # 打开文件
|
| 112 |
+
categories = pickle.load(f)
|
| 113 |
+
tokenizer = Tokenizer(dict_path, do_lower_case = True)
|
| 114 |
+
|
| 115 |
+
bert = BERT(config_path,
|
| 116 |
+
checkpoint_path,
|
| 117 |
+
categories,
|
| 118 |
+
summary = False)
|
| 119 |
+
model = bert.get_model()
|
| 120 |
+
print('loading model weights from ',weights_path)
|
| 121 |
+
model.load_weights(weights_path)
|
| 122 |
+
NER = NamedEntityRecognizer(tokenizer, model, categories, trans = pickle.load(open(trans_path, 'rb')), starts = [0],
|
| 123 |
+
ends = [0])
|
| 124 |
+
NER.trans = pickle.load(open(trans_path, 'rb'))
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
NER_results = []
|
| 128 |
+
NER_cls_results = []
|
| 129 |
+
|
| 130 |
+
for tgt, out, idx in tqdm.tqdm(zip(lst_tgt, lst_out, lst_idx)):
|
| 131 |
+
|
| 132 |
+
# get sub-dict containing scores for each metric
|
| 133 |
+
# scores = compute_scores(tgt, out)
|
| 134 |
+
|
| 135 |
+
entities = []
|
| 136 |
+
for start, end, tag in set(NER.recognize(tgt)):
|
| 137 |
+
entities.append((tgt[start:end + 1], tag, start, end))
|
| 138 |
+
entities = sorted(entities, key = lambda d: d[2])
|
| 139 |
+
tgt_chip_ner_list = entities
|
| 140 |
+
|
| 141 |
+
entities = []
|
| 142 |
+
for start, end, tag in set(NER.recognize(out)):
|
| 143 |
+
entities.append((out[start:end + 1], tag, start, end))
|
| 144 |
+
entities = sorted(entities, key = lambda d: d[2])
|
| 145 |
+
out_chip_ner_list = entities
|
| 146 |
+
|
| 147 |
+
tgt_list = tgt_chip_ner_list
|
| 148 |
+
out_list = out_chip_ner_list
|
| 149 |
+
|
| 150 |
+
# tgt_list = list(set([element[0] for element in tgt_chip_ner_list]))
|
| 151 |
+
# out_list = list(set([element[0] for element in out_chip_ner_list]))
|
| 152 |
+
# tgt_cls_list = list(set([element[1] for element in tgt_chip_ner_list]))
|
| 153 |
+
# out_cls_list = list(set([element[1] for element in out_chip_ner_list]))
|
| 154 |
+
|
| 155 |
+
tgt_list = list([element[0] for element in tgt_chip_ner_list])
|
| 156 |
+
out_list = list([element[0] for element in out_chip_ner_list])
|
| 157 |
+
tgt_cls_list = list([element[1] for element in tgt_chip_ner_list])
|
| 158 |
+
out_cls_list = list([element[1] for element in out_chip_ner_list])
|
| 159 |
+
|
| 160 |
+
NER_results.append([tgt_list,out_list])
|
| 161 |
+
NER_cls_results.append([tgt_cls_list,out_cls_list])
|
| 162 |
+
|
| 163 |
+
print('-'*50)
|
| 164 |
+
print('tgt:',tgt_list)
|
| 165 |
+
print('out:',out_list)
|
| 166 |
+
|
| 167 |
+
# 计算交集,即正确预测的词汇
|
| 168 |
+
intersection = set(tgt_list).intersection(set(out_list))
|
| 169 |
+
|
| 170 |
+
# 计算precision
|
| 171 |
+
precision = len(intersection) / len(out_list) if out_list else 0
|
| 172 |
+
|
| 173 |
+
# 计算recall
|
| 174 |
+
recall = len(intersection) / len(tgt_list) if tgt_list else 0
|
| 175 |
+
|
| 176 |
+
# 计算F1 score
|
| 177 |
+
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
scores = {
|
| 181 |
+
'NER-Precision': precision,
|
| 182 |
+
'NER-Recall': recall,
|
| 183 |
+
'NER-F1': f1_score,
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
# scale scores to be on [0,100] instead of [0,1]
|
| 187 |
+
for key in scores:
|
| 188 |
+
scores[key] *= 100.
|
| 189 |
+
scores[key] = round(scores[key], 2)
|
| 190 |
+
|
| 191 |
+
# append to master dict, dataset object
|
| 192 |
+
scores_all[idx] = scores
|
| 193 |
+
|
| 194 |
+
# save averaged scores across entire dataset
|
| 195 |
+
write_all_scores(args, scores_all)
|
| 196 |
+
print(scores_all)
|
| 197 |
+
|
| 198 |
+
score_list = []
|
| 199 |
+
for i in range(len(option_qa[0])):
|
| 200 |
+
gt = option_qa[0][i]
|
| 201 |
+
pred = option_qa[1][i]
|
| 202 |
+
if isinstance(gt,list):
|
| 203 |
+
hit = 0
|
| 204 |
+
for apred in pred:
|
| 205 |
+
if apred in gt:
|
| 206 |
+
hit += 1
|
| 207 |
+
score = hit/len(gt)
|
| 208 |
+
else:
|
| 209 |
+
if gt == pred:
|
| 210 |
+
score = 1
|
| 211 |
+
else:
|
| 212 |
+
score = 0
|
| 213 |
+
score_list.append(score)
|
| 214 |
+
acc = np.mean(score_list)
|
| 215 |
+
print('accuracy:',acc)
|
| 216 |
+
os.makedirs(args.dir_out,exist_ok=True)
|
| 217 |
+
np.save(args.dir_out+'/ner_result.npy',NER_results)
|
| 218 |
+
np.save(args.dir_out+'/ner_cls_result.npy',NER_cls_results)
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def compute_scores(tgt, out, metrics='NER', is_cxr=False):
|
| 223 |
+
''' given output(s), target(s), and a tuple of metrics
|
| 224 |
+
return a scores dict '''
|
| 225 |
+
from path import BASE_CONFIG_NAME, BASE_CKPT_NAME, BASE_MODEL_DIR, label_dict_path, weights_path
|
| 226 |
+
|
| 227 |
+
tgt_chip_ner_list = predict(txt = tgt,
|
| 228 |
+
weights_path = weights_path + '/chip_roformer_v2_base.h5',
|
| 229 |
+
label_dict_path = label_dict_path,
|
| 230 |
+
trans_path = "./weights/chip_roformer_v2_crf_trans.pkl")
|
| 231 |
+
|
| 232 |
+
out_chip_ner_list = predict(txt = tgt,
|
| 233 |
+
weights_path = weights_path + '/chip_roformer_v2_base.h5',
|
| 234 |
+
label_dict_path = label_dict_path,
|
| 235 |
+
trans_path = "./weights/chip_roformer_v2_crf_trans.pkl")
|
| 236 |
+
|
| 237 |
+
tgt_list = tgt_chip_ner_list
|
| 238 |
+
out_list = out_chip_ner_list
|
| 239 |
+
print('-'*50)
|
| 240 |
+
print('tgt:',tgt_list)
|
| 241 |
+
print('out:',out_list)
|
| 242 |
+
|
| 243 |
+
# 计算交集,即正确预测的词汇
|
| 244 |
+
intersection = set(tgt_list).intersection(set(out_list))
|
| 245 |
+
|
| 246 |
+
# 计算precision
|
| 247 |
+
precision = len(intersection) / len(out_list) if out_list else 0
|
| 248 |
+
|
| 249 |
+
# 计算recall
|
| 250 |
+
recall = len(intersection) / len(tgt_list) if tgt_list else 0
|
| 251 |
+
|
| 252 |
+
# 计算F1 score
|
| 253 |
+
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
scores = {
|
| 257 |
+
'NER-Precision': precision,
|
| 258 |
+
'NER-Recall': recall,
|
| 259 |
+
'NER-F1': f1_score,
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
# scale scores to be on [0,100] instead of [0,1]
|
| 263 |
+
for key in scores:
|
| 264 |
+
scores[key] *= 100.
|
| 265 |
+
scores[key] = round(scores[key], 2)
|
| 266 |
+
|
| 267 |
+
return scores
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
def write_all_scores(args, scores_all):
|
| 271 |
+
''' write all scores across dataset to json file
|
| 272 |
+
redundantly write to txt for copy-paste into overleaf '''
|
| 273 |
+
|
| 274 |
+
validate_keys(scores_all) # sanity check
|
| 275 |
+
|
| 276 |
+
# compute avg, std across all samples. write to json
|
| 277 |
+
scores_avg_std = avg_across_samples(scores_all)
|
| 278 |
+
fn_scores_json = os.path.join(args.dir_out, 'metrics.json')
|
| 279 |
+
with open(fn_scores_json, 'w') as f:
|
| 280 |
+
f.write(json.dumps(scores_avg_std))
|
| 281 |
+
|
| 282 |
+
# extract avg, write to txt file
|
| 283 |
+
scores_avg = extract_avg_only(scores_avg_std)
|
| 284 |
+
ss = scores_avg
|
| 285 |
+
txt_out = []
|
| 286 |
+
for key, val in scores_avg.items():
|
| 287 |
+
ss[key] = round(ss[key], 1)
|
| 288 |
+
header = 'NER-Precision & NER-Recall & NER-F1'
|
| 289 |
+
txt_out.append(header)
|
| 290 |
+
str_txt = f'{ss["NER-Precision"]} & {ss["NER-Recall"]} & {ss["NER-F1"]}'
|
| 291 |
+
txt_out.append(str_txt)
|
| 292 |
+
fn_scores_txt = os.path.join(args.dir_out, 'metrics.txt')
|
| 293 |
+
write_list_to_csv(fn_scores_txt, txt_out)
|
| 294 |
+
|
| 295 |
+
return
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
def avg_across_samples(scores_all):
|
| 299 |
+
''' average across individual sample scores (sub-dicts) '''
|
| 300 |
+
|
| 301 |
+
scores_avg_std = {}
|
| 302 |
+
keys_to_avg = ["NER-Precision", "NER-Recall", "NER-F1"]
|
| 303 |
+
|
| 304 |
+
for key in keys_to_avg:
|
| 305 |
+
values = [sub_dict[key] for sub_dict in scores_all.values()]
|
| 306 |
+
avg_std = {'avg': round(np.mean(values), 2),
|
| 307 |
+
'std': round(np.std(values), 2)}
|
| 308 |
+
scores_avg_std[key] = avg_std
|
| 309 |
+
|
| 310 |
+
return scores_avg_std
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def extract_avg_only(scores_avg_std):
|
| 314 |
+
''' extract only values from sub-dict key avg '''
|
| 315 |
+
scores_avg = {}
|
| 316 |
+
for idx in scores_avg_std:
|
| 317 |
+
scores_avg[idx] = scores_avg_std[idx]['avg']
|
| 318 |
+
return scores_avg
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
def validate_keys(my_dict):
|
| 322 |
+
''' given dict w sub-dict, validate all sub-dicts have same keys '''
|
| 323 |
+
|
| 324 |
+
sub_dict_keys = None
|
| 325 |
+
for sub_dict in my_dict.values():
|
| 326 |
+
if sub_dict_keys is None:
|
| 327 |
+
sub_dict_keys = set(sub_dict.keys())
|
| 328 |
+
else:
|
| 329 |
+
msg = 'sub-dicts do not contain same keys'
|
| 330 |
+
assert set(sub_dict.keys()) == sub_dict_keys, msg
|
| 331 |
+
|
| 332 |
+
return
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
# def wrap_str_in_lst(var):
|
| 336 |
+
# if isinstance(var, str):
|
| 337 |
+
# return [var]
|
| 338 |
+
# return var
|
| 339 |
+
|
| 340 |
+
def wrap_str_in_lst(text):
|
| 341 |
+
# 使用 jieba 进行精确分词
|
| 342 |
+
segmented_text = jieba.cut(text, cut_all=False)
|
| 343 |
+
# 直接返回分词结果的列表
|
| 344 |
+
return list(segmented_text)
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
def write_list_to_csv(fn_csv, list_, csv_action='w'):
|
| 348 |
+
''' write each element of 1d list to csv
|
| 349 |
+
can also append to existing file w csv_action="a" '''
|
| 350 |
+
|
| 351 |
+
with open(fn_csv, csv_action) as f:
|
| 352 |
+
writer = csv.writer(f, delimiter='\n')
|
| 353 |
+
writer.writerow(list_)
|
| 354 |
+
|
| 355 |
+
return
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
if __name__ == '__main__':
|
| 359 |
+
main(label_dict_path, weights_path)
|
chinese_medical_ner/ccksyidu4k-ner-roformer/images/chip_train_acc.png
ADDED
|
chinese_medical_ner/ccksyidu4k-ner-roformer/images/chip_train_loss.png
ADDED
|
chinese_medical_ner/ccksyidu4k-ner-roformer/images/chip_val_f1.png
ADDED
|
chinese_medical_ner/ccksyidu4k-ner-roformer/images/downstream.png
ADDED
|
Git LFS Details
|
chinese_medical_ner/ccksyidu4k-ner-roformer/images/model.jpg
ADDED
|
Git LFS Details
|
chinese_medical_ner/ccksyidu4k-ner-roformer/images/yidu_train_acc.png
ADDED
|