Finish Training
Browse files- bert-model.pth +3 -0
- labels.txt +232 -0
- main.ipynb +1554 -0
- use.ipynb +999 -0
bert-model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e100688c6978b589701729d4f5d1125f63c0eed0e249c26799e869dfa674ca4
|
| 3 |
+
size 407505477
|
labels.txt
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
B-BANK
|
| 2 |
+
E-BANK
|
| 3 |
+
O
|
| 4 |
+
B-COMMENTS_N
|
| 5 |
+
E-COMMENTS_N
|
| 6 |
+
B-COMMENTS_ADJ
|
| 7 |
+
E-COMMENTS_ADJ
|
| 8 |
+
B-PRODUCT
|
| 9 |
+
E-PRODUCT
|
| 10 |
+
I-PRODUCT
|
| 11 |
+
I-COMMENTS_N
|
| 12 |
+
I-BANK
|
| 13 |
+
I-COMMENTS_ADJ
|
| 14 |
+
B-product_name
|
| 15 |
+
I-product_name
|
| 16 |
+
B-time
|
| 17 |
+
I-time
|
| 18 |
+
E-time
|
| 19 |
+
B-person_name
|
| 20 |
+
I-person_name
|
| 21 |
+
E-person_name
|
| 22 |
+
E-product_name
|
| 23 |
+
B-org_name
|
| 24 |
+
I-org_name
|
| 25 |
+
E-org_name
|
| 26 |
+
B-location
|
| 27 |
+
I-location
|
| 28 |
+
E-location
|
| 29 |
+
B-company_name
|
| 30 |
+
I-company_name
|
| 31 |
+
E-company_name
|
| 32 |
+
B-GPE
|
| 33 |
+
I-GPE
|
| 34 |
+
E-GPE
|
| 35 |
+
B-PER
|
| 36 |
+
I-PER
|
| 37 |
+
E-PER
|
| 38 |
+
B-LOC
|
| 39 |
+
I-LOC
|
| 40 |
+
E-LOC
|
| 41 |
+
B-ORG
|
| 42 |
+
I-ORG
|
| 43 |
+
E-ORG
|
| 44 |
+
B-body
|
| 45 |
+
E-body
|
| 46 |
+
I-body
|
| 47 |
+
B-symp
|
| 48 |
+
E-symp
|
| 49 |
+
I-symp
|
| 50 |
+
B-chec
|
| 51 |
+
E-chec
|
| 52 |
+
I-chec
|
| 53 |
+
B-dise
|
| 54 |
+
I-dise
|
| 55 |
+
E-dise
|
| 56 |
+
B-cure
|
| 57 |
+
I-cure
|
| 58 |
+
E-cure
|
| 59 |
+
B-���岿λ
|
| 60 |
+
I-���岿λ
|
| 61 |
+
B-���ͼ���
|
| 62 |
+
E-���ͼ���
|
| 63 |
+
I-���ͼ���
|
| 64 |
+
E-���岿λ
|
| 65 |
+
B-֢״������
|
| 66 |
+
E-֢״������
|
| 67 |
+
I-֢״������
|
| 68 |
+
B-���������
|
| 69 |
+
I-���������
|
| 70 |
+
E-���������
|
| 71 |
+
B-����
|
| 72 |
+
I-����
|
| 73 |
+
E-����
|
| 74 |
+
B-���ʲ�λ
|
| 75 |
+
E-���ʲ�λ
|
| 76 |
+
B-����
|
| 77 |
+
I-����
|
| 78 |
+
E-����
|
| 79 |
+
B-Ӱ����
|
| 80 |
+
E-Ӱ����
|
| 81 |
+
I-���ʲ�λ
|
| 82 |
+
B-ҩ��
|
| 83 |
+
E-ҩ��
|
| 84 |
+
I-ҩ��
|
| 85 |
+
B-ʵ���Ҽ���
|
| 86 |
+
I-ʵ���Ҽ���
|
| 87 |
+
E-ʵ���Ҽ���
|
| 88 |
+
I-Ӱ����
|
| 89 |
+
B-name
|
| 90 |
+
I-name
|
| 91 |
+
E-name
|
| 92 |
+
B-address
|
| 93 |
+
E-address
|
| 94 |
+
B-organization
|
| 95 |
+
E-organization
|
| 96 |
+
B-game
|
| 97 |
+
I-game
|
| 98 |
+
E-game
|
| 99 |
+
I-address
|
| 100 |
+
B-scene
|
| 101 |
+
I-scene
|
| 102 |
+
E-scene
|
| 103 |
+
B-book
|
| 104 |
+
I-book
|
| 105 |
+
E-book
|
| 106 |
+
I-organization
|
| 107 |
+
B-company
|
| 108 |
+
I-company
|
| 109 |
+
E-company
|
| 110 |
+
B-position
|
| 111 |
+
E-position
|
| 112 |
+
I-position
|
| 113 |
+
B-government
|
| 114 |
+
I-government
|
| 115 |
+
E-government
|
| 116 |
+
B-movie
|
| 117 |
+
I-movie
|
| 118 |
+
E-movie
|
| 119 |
+
B-bod
|
| 120 |
+
I-bod
|
| 121 |
+
E-bod
|
| 122 |
+
B-dis
|
| 123 |
+
I-dis
|
| 124 |
+
E-dis
|
| 125 |
+
B-sym
|
| 126 |
+
I-sym
|
| 127 |
+
E-sym
|
| 128 |
+
B-pro
|
| 129 |
+
I-pro
|
| 130 |
+
E-pro
|
| 131 |
+
B-ite
|
| 132 |
+
I-ite
|
| 133 |
+
E-ite
|
| 134 |
+
B-mic
|
| 135 |
+
I-mic
|
| 136 |
+
E-mic
|
| 137 |
+
B-dep
|
| 138 |
+
E-dep
|
| 139 |
+
B-dru
|
| 140 |
+
I-dru
|
| 141 |
+
E-dru
|
| 142 |
+
I-dep
|
| 143 |
+
B-equ
|
| 144 |
+
I-equ
|
| 145 |
+
E-equ
|
| 146 |
+
B-Time
|
| 147 |
+
I-Time
|
| 148 |
+
E-Time
|
| 149 |
+
B-Person
|
| 150 |
+
B-Location
|
| 151 |
+
I-Location
|
| 152 |
+
E-Location
|
| 153 |
+
E-Person
|
| 154 |
+
B-Thing
|
| 155 |
+
E-Thing
|
| 156 |
+
B-Metric
|
| 157 |
+
E-Metric
|
| 158 |
+
I-Person
|
| 159 |
+
I-Thing
|
| 160 |
+
B-Organization
|
| 161 |
+
I-Organization
|
| 162 |
+
E-Organization
|
| 163 |
+
I-Metric
|
| 164 |
+
B-Abstract
|
| 165 |
+
I-Abstract
|
| 166 |
+
E-Abstract
|
| 167 |
+
B-Physical
|
| 168 |
+
I-Physical
|
| 169 |
+
E-Physical
|
| 170 |
+
B-Term
|
| 171 |
+
I-Term
|
| 172 |
+
E-Term
|
| 173 |
+
B-ABstract
|
| 174 |
+
I-ABstract
|
| 175 |
+
E-ABstract
|
| 176 |
+
B-HCCX
|
| 177 |
+
E-HCCX
|
| 178 |
+
I-HCCX
|
| 179 |
+
B-MISC
|
| 180 |
+
E-MISC
|
| 181 |
+
B-HPPX
|
| 182 |
+
E-HPPX
|
| 183 |
+
I-HPPX
|
| 184 |
+
I-MISC
|
| 185 |
+
B-XH
|
| 186 |
+
I-XH
|
| 187 |
+
E-XH
|
| 188 |
+
B-EQU
|
| 189 |
+
I-EQU
|
| 190 |
+
E-EQU
|
| 191 |
+
B-TIME
|
| 192 |
+
E-TIME
|
| 193 |
+
I-TIME
|
| 194 |
+
B-FAC
|
| 195 |
+
I-FAC
|
| 196 |
+
E-FAC
|
| 197 |
+
B-Symptom
|
| 198 |
+
E-Symptom
|
| 199 |
+
B-Medical_Examination
|
| 200 |
+
E-Medical_Examination
|
| 201 |
+
I-Medical_Examination
|
| 202 |
+
B-Drug
|
| 203 |
+
I-Drug
|
| 204 |
+
E-Drug
|
| 205 |
+
B-Drug_Category
|
| 206 |
+
I-Drug_Category
|
| 207 |
+
E-Drug_Category
|
| 208 |
+
I-Symptom
|
| 209 |
+
B-Operation
|
| 210 |
+
E-Operation
|
| 211 |
+
I-Operation
|
| 212 |
+
B-NAME
|
| 213 |
+
I-NAME
|
| 214 |
+
E-NAME
|
| 215 |
+
B-CONT
|
| 216 |
+
I-CONT
|
| 217 |
+
E-CONT
|
| 218 |
+
B-EDU
|
| 219 |
+
I-EDU
|
| 220 |
+
E-EDU
|
| 221 |
+
B-TITLE
|
| 222 |
+
I-TITLE
|
| 223 |
+
E-TITLE
|
| 224 |
+
B-RACE
|
| 225 |
+
E-RACE
|
| 226 |
+
B-PRO
|
| 227 |
+
I-PRO
|
| 228 |
+
E-PRO
|
| 229 |
+
I-RACE
|
| 230 |
+
B-T
|
| 231 |
+
I-T
|
| 232 |
+
E-T
|
main.ipynb
ADDED
|
@@ -0,0 +1,1554 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "96fe094792f58dd",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"collapsed": false
|
| 8 |
+
},
|
| 9 |
+
"source": "### Bert NER 实体抽取任务"
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"metadata": {
|
| 13 |
+
"ExecuteTime": {
|
| 14 |
+
"end_time": "2024-05-04T00:58:02.895669Z",
|
| 15 |
+
"start_time": "2024-05-04T00:57:57.087195Z"
|
| 16 |
+
}
|
| 17 |
+
},
|
| 18 |
+
"cell_type": "code",
|
| 19 |
+
"source": [
|
| 20 |
+
"# 导入库\n",
|
| 21 |
+
"import os\n",
|
| 22 |
+
"import json\n",
|
| 23 |
+
"import torch\n",
|
| 24 |
+
"import pandas as pd\n",
|
| 25 |
+
"from transformers import BertTokenizerFast\n",
|
| 26 |
+
"from torch.utils.data import random_split"
|
| 27 |
+
],
|
| 28 |
+
"id": "4e550f849189dddc",
|
| 29 |
+
"outputs": [],
|
| 30 |
+
"execution_count": 1
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"metadata": {
|
| 34 |
+
"ExecuteTime": {
|
| 35 |
+
"end_time": "2024-05-04T00:58:03.208869Z",
|
| 36 |
+
"start_time": "2024-05-04T00:58:02.897660Z"
|
| 37 |
+
}
|
| 38 |
+
},
|
| 39 |
+
"cell_type": "code",
|
| 40 |
+
"source": "from utils import BertNerModel",
|
| 41 |
+
"id": "356a053dd45cc273",
|
| 42 |
+
"outputs": [],
|
| 43 |
+
"execution_count": 2
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"metadata": {
|
| 47 |
+
"ExecuteTime": {
|
| 48 |
+
"end_time": "2024-05-04T00:58:03.224827Z",
|
| 49 |
+
"start_time": "2024-05-04T00:58:03.210824Z"
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"cell_type": "code",
|
| 53 |
+
"source": [
|
| 54 |
+
"# 设置量\n",
|
| 55 |
+
"count = None # 训练量 None 为所有"
|
| 56 |
+
],
|
| 57 |
+
"id": "ad119d35690af359",
|
| 58 |
+
"outputs": [],
|
| 59 |
+
"execution_count": 3
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"cell_type": "code",
|
| 63 |
+
"id": "8dcccc45da68749f",
|
| 64 |
+
"metadata": {
|
| 65 |
+
"collapsed": false,
|
| 66 |
+
"ExecuteTime": {
|
| 67 |
+
"end_time": "2024-05-04T00:58:03.286621Z",
|
| 68 |
+
"start_time": "2024-05-04T00:58:03.228777Z"
|
| 69 |
+
}
|
| 70 |
+
},
|
| 71 |
+
"source": [
|
| 72 |
+
"# 判断GPU\n",
|
| 73 |
+
"use_cuda = torch.cuda.is_available()\n",
|
| 74 |
+
"use_cuda"
|
| 75 |
+
],
|
| 76 |
+
"outputs": [
|
| 77 |
+
{
|
| 78 |
+
"data": {
|
| 79 |
+
"text/plain": [
|
| 80 |
+
"True"
|
| 81 |
+
]
|
| 82 |
+
},
|
| 83 |
+
"execution_count": 4,
|
| 84 |
+
"metadata": {},
|
| 85 |
+
"output_type": "execute_result"
|
| 86 |
+
}
|
| 87 |
+
],
|
| 88 |
+
"execution_count": 4
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"cell_type": "code",
|
| 92 |
+
"id": "ba07c141b60de27f",
|
| 93 |
+
"metadata": {
|
| 94 |
+
"collapsed": false,
|
| 95 |
+
"ExecuteTime": {
|
| 96 |
+
"end_time": "2024-05-04T00:58:13.503308Z",
|
| 97 |
+
"start_time": "2024-05-04T00:58:03.287621Z"
|
| 98 |
+
}
|
| 99 |
+
},
|
| 100 |
+
"source": [
|
| 101 |
+
"# 读取文件\n",
|
| 102 |
+
"# 训练集\n",
|
| 103 |
+
"train_data = []\n",
|
| 104 |
+
"# 遍历当前dataset文件夹下文件\n",
|
| 105 |
+
"current_directory = os.getcwd() + '\\\\data_set'\n",
|
| 106 |
+
"for file_name in os.listdir(current_directory):\n",
|
| 107 |
+
" # 拼接文件的完整路径\n",
|
| 108 |
+
" file_path = os.path.join(current_directory, file_name)\n",
|
| 109 |
+
" # 检查是否为文件\n",
|
| 110 |
+
" if os.path.isfile(file_path):\n",
|
| 111 |
+
" with open(file_path, 'r', encoding='utf-8') as f:\n",
|
| 112 |
+
" for i in f:\n",
|
| 113 |
+
" train_data.append(json.loads(i))\n",
|
| 114 |
+
"# 转换为pandas引用\n",
|
| 115 |
+
"train_data = pd.DataFrame(train_data)"
|
| 116 |
+
],
|
| 117 |
+
"outputs": [],
|
| 118 |
+
"execution_count": 5
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"cell_type": "code",
|
| 122 |
+
"source": [
|
| 123 |
+
"if not count is None:\n",
|
| 124 |
+
" train_data = train_data.head(count)"
|
| 125 |
+
],
|
| 126 |
+
"metadata": {
|
| 127 |
+
"collapsed": false,
|
| 128 |
+
"ExecuteTime": {
|
| 129 |
+
"end_time": "2024-05-04T00:58:13.519266Z",
|
| 130 |
+
"start_time": "2024-05-04T00:58:13.506301Z"
|
| 131 |
+
}
|
| 132 |
+
},
|
| 133 |
+
"id": "4b0b50a97cb77f2c",
|
| 134 |
+
"outputs": [],
|
| 135 |
+
"execution_count": 6
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"cell_type": "code",
|
| 139 |
+
"id": "dbf7e0655d319da7",
|
| 140 |
+
"metadata": {
|
| 141 |
+
"collapsed": false,
|
| 142 |
+
"ExecuteTime": {
|
| 143 |
+
"end_time": "2024-05-04T00:58:13.550183Z",
|
| 144 |
+
"start_time": "2024-05-04T00:58:13.521262Z"
|
| 145 |
+
}
|
| 146 |
+
},
|
| 147 |
+
"source": [
|
| 148 |
+
"train_data.head(5)"
|
| 149 |
+
],
|
| 150 |
+
"outputs": [
|
| 151 |
+
{
|
| 152 |
+
"data": {
|
| 153 |
+
"text/plain": [
|
| 154 |
+
" text \\\n",
|
| 155 |
+
"0 交行14年用过,半年准备提额,却直接被降到1K,半年期间只T过一次三千,其它全部真实消费,第... \n",
|
| 156 |
+
"1 单标我有了,最近visa双标返现活动好 \n",
|
| 157 |
+
"2 建设银行提额很慢的…… \n",
|
| 158 |
+
"3 孙女士在原恒泰农村合作银行存入50万元,同年9月又存款50万元。2014年,恒泰农村合作银行... \n",
|
| 159 |
+
"4 我的怎么显示0.25费率,而且不管分多少期都一样费率,可惜只有69k \n",
|
| 160 |
+
"\n",
|
| 161 |
+
" labels \n",
|
| 162 |
+
"0 [B-BANK, I-BANK, O, O, O, O, O, O, O, O, O, O,... \n",
|
| 163 |
+
"1 [B-PRODUCT, I-PRODUCT, O, O, O, O, O, O, B-PRO... \n",
|
| 164 |
+
"2 [B-BANK, I-BANK, I-BANK, I-BANK, B-COMMENTS_N,... \n",
|
| 165 |
+
"3 [O, O, O, O, O, B-BANK, I-BANK, I-BANK, I-BANK... \n",
|
| 166 |
+
"4 [O, O, O, O, O, O, O, O, O, O, B-COMMENTS_N, I... "
|
| 167 |
+
],
|
| 168 |
+
"text/html": [
|
| 169 |
+
"<div>\n",
|
| 170 |
+
"<style scoped>\n",
|
| 171 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 172 |
+
" vertical-align: middle;\n",
|
| 173 |
+
" }\n",
|
| 174 |
+
"\n",
|
| 175 |
+
" .dataframe tbody tr th {\n",
|
| 176 |
+
" vertical-align: top;\n",
|
| 177 |
+
" }\n",
|
| 178 |
+
"\n",
|
| 179 |
+
" .dataframe thead th {\n",
|
| 180 |
+
" text-align: right;\n",
|
| 181 |
+
" }\n",
|
| 182 |
+
"</style>\n",
|
| 183 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 184 |
+
" <thead>\n",
|
| 185 |
+
" <tr style=\"text-align: right;\">\n",
|
| 186 |
+
" <th></th>\n",
|
| 187 |
+
" <th>text</th>\n",
|
| 188 |
+
" <th>labels</th>\n",
|
| 189 |
+
" </tr>\n",
|
| 190 |
+
" </thead>\n",
|
| 191 |
+
" <tbody>\n",
|
| 192 |
+
" <tr>\n",
|
| 193 |
+
" <th>0</th>\n",
|
| 194 |
+
" <td>交行14年用过,半年准备提额,却直接被降到1K,半年期间只T过一次三千,其它全部真实消费,第...</td>\n",
|
| 195 |
+
" <td>[B-BANK, I-BANK, O, O, O, O, O, O, O, O, O, O,...</td>\n",
|
| 196 |
+
" </tr>\n",
|
| 197 |
+
" <tr>\n",
|
| 198 |
+
" <th>1</th>\n",
|
| 199 |
+
" <td>单标我有了,最近visa双标返现活动好</td>\n",
|
| 200 |
+
" <td>[B-PRODUCT, I-PRODUCT, O, O, O, O, O, O, B-PRO...</td>\n",
|
| 201 |
+
" </tr>\n",
|
| 202 |
+
" <tr>\n",
|
| 203 |
+
" <th>2</th>\n",
|
| 204 |
+
" <td>建设银行提额很慢的……</td>\n",
|
| 205 |
+
" <td>[B-BANK, I-BANK, I-BANK, I-BANK, B-COMMENTS_N,...</td>\n",
|
| 206 |
+
" </tr>\n",
|
| 207 |
+
" <tr>\n",
|
| 208 |
+
" <th>3</th>\n",
|
| 209 |
+
" <td>孙女士在原恒泰农村合作银行存入50万元,同年9月又存款50万元。2014年,恒泰农村合作银行...</td>\n",
|
| 210 |
+
" <td>[O, O, O, O, O, B-BANK, I-BANK, I-BANK, I-BANK...</td>\n",
|
| 211 |
+
" </tr>\n",
|
| 212 |
+
" <tr>\n",
|
| 213 |
+
" <th>4</th>\n",
|
| 214 |
+
" <td>我的怎么显示0.25费率,而且不管分多少期都一样费率,可惜只有69k</td>\n",
|
| 215 |
+
" <td>[O, O, O, O, O, O, O, O, O, O, B-COMMENTS_N, I...</td>\n",
|
| 216 |
+
" </tr>\n",
|
| 217 |
+
" </tbody>\n",
|
| 218 |
+
"</table>\n",
|
| 219 |
+
"</div>"
|
| 220 |
+
]
|
| 221 |
+
},
|
| 222 |
+
"execution_count": 7,
|
| 223 |
+
"metadata": {},
|
| 224 |
+
"output_type": "execute_result"
|
| 225 |
+
}
|
| 226 |
+
],
|
| 227 |
+
"execution_count": 7
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"metadata": {
|
| 231 |
+
"ExecuteTime": {
|
| 232 |
+
"end_time": "2024-05-04T00:58:19.113339Z",
|
| 233 |
+
"start_time": "2024-05-04T00:58:13.552180Z"
|
| 234 |
+
}
|
| 235 |
+
},
|
| 236 |
+
"cell_type": "code",
|
| 237 |
+
"source": [
|
| 238 |
+
"# 增加新标签\n",
|
| 239 |
+
"def add_new_lable(lable: list) -> list:\n",
|
| 240 |
+
" compare_lable = lable[1:] + ['O']\n",
|
| 241 |
+
" count = 0\n",
|
| 242 |
+
" for i, j in zip(lable, compare_lable):\n",
|
| 243 |
+
" if i[0] == 'I' and j == 'O':\n",
|
| 244 |
+
" lable[count] = 'E' + i[1:]\n",
|
| 245 |
+
" count += 1\n",
|
| 246 |
+
" return lable\n",
|
| 247 |
+
"\n",
|
| 248 |
+
"train_data['labels'] = train_data['labels'].apply(add_new_lable)"
|
| 249 |
+
],
|
| 250 |
+
"id": "5b03d12714b45347",
|
| 251 |
+
"outputs": [],
|
| 252 |
+
"execution_count": 8
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"metadata": {
|
| 256 |
+
"ExecuteTime": {
|
| 257 |
+
"end_time": "2024-05-04T00:58:32.947178Z",
|
| 258 |
+
"start_time": "2024-05-04T00:58:19.114338Z"
|
| 259 |
+
}
|
| 260 |
+
},
|
| 261 |
+
"cell_type": "code",
|
| 262 |
+
"source": [
|
| 263 |
+
"# 替换标签\n",
|
| 264 |
+
"def change_basic_lable(lable: list) -> list:\n",
|
| 265 |
+
" r_l =[i.split('.')[0] for i in lable]\n",
|
| 266 |
+
" return [i if i[0] != 'S' else 'B' + i[1:] for i in r_l]\n",
|
| 267 |
+
"\n",
|
| 268 |
+
"train_data['labels'] = train_data['labels'].apply(change_basic_lable)"
|
| 269 |
+
],
|
| 270 |
+
"id": "49f644953db48653",
|
| 271 |
+
"outputs": [],
|
| 272 |
+
"execution_count": 9
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"metadata": {
|
| 276 |
+
"ExecuteTime": {
|
| 277 |
+
"end_time": "2024-05-04T00:58:32.962140Z",
|
| 278 |
+
"start_time": "2024-05-04T00:58:32.949174Z"
|
| 279 |
+
}
|
| 280 |
+
},
|
| 281 |
+
"cell_type": "code",
|
| 282 |
+
"source": "train_data.head(5)",
|
| 283 |
+
"id": "a699748ab7ffea01",
|
| 284 |
+
"outputs": [
|
| 285 |
+
{
|
| 286 |
+
"data": {
|
| 287 |
+
"text/plain": [
|
| 288 |
+
" text \\\n",
|
| 289 |
+
"0 交行14年用过,半年准备提额,却直接被降到1K,半年期间只T过一次三千,其它全部真实消费,第... \n",
|
| 290 |
+
"1 单标我有了,最近visa双标返现活动好 \n",
|
| 291 |
+
"2 建设银行提额很慢的…… \n",
|
| 292 |
+
"3 孙女士在原恒泰农村合作银行存入50万元,同年9月又存款50万元。2014年,恒泰农村合作银行... \n",
|
| 293 |
+
"4 我的怎么显示0.25费率,而且不管分多少期都一样费率,可惜只有69k \n",
|
| 294 |
+
"\n",
|
| 295 |
+
" labels \n",
|
| 296 |
+
"0 [B-BANK, E-BANK, O, O, O, O, O, O, O, O, O, O,... \n",
|
| 297 |
+
"1 [B-PRODUCT, E-PRODUCT, O, O, O, O, O, O, B-PRO... \n",
|
| 298 |
+
"2 [B-BANK, I-BANK, I-BANK, I-BANK, B-COMMENTS_N,... \n",
|
| 299 |
+
"3 [O, O, O, O, O, B-BANK, I-BANK, I-BANK, I-BANK... \n",
|
| 300 |
+
"4 [O, O, O, O, O, O, O, O, O, O, B-COMMENTS_N, E... "
|
| 301 |
+
],
|
| 302 |
+
"text/html": [
|
| 303 |
+
"<div>\n",
|
| 304 |
+
"<style scoped>\n",
|
| 305 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 306 |
+
" vertical-align: middle;\n",
|
| 307 |
+
" }\n",
|
| 308 |
+
"\n",
|
| 309 |
+
" .dataframe tbody tr th {\n",
|
| 310 |
+
" vertical-align: top;\n",
|
| 311 |
+
" }\n",
|
| 312 |
+
"\n",
|
| 313 |
+
" .dataframe thead th {\n",
|
| 314 |
+
" text-align: right;\n",
|
| 315 |
+
" }\n",
|
| 316 |
+
"</style>\n",
|
| 317 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 318 |
+
" <thead>\n",
|
| 319 |
+
" <tr style=\"text-align: right;\">\n",
|
| 320 |
+
" <th></th>\n",
|
| 321 |
+
" <th>text</th>\n",
|
| 322 |
+
" <th>labels</th>\n",
|
| 323 |
+
" </tr>\n",
|
| 324 |
+
" </thead>\n",
|
| 325 |
+
" <tbody>\n",
|
| 326 |
+
" <tr>\n",
|
| 327 |
+
" <th>0</th>\n",
|
| 328 |
+
" <td>交行14年用过,半年准备提额,却直接被降到1K,半年期间只T过一次三千,其它全部真实消费,第...</td>\n",
|
| 329 |
+
" <td>[B-BANK, E-BANK, O, O, O, O, O, O, O, O, O, O,...</td>\n",
|
| 330 |
+
" </tr>\n",
|
| 331 |
+
" <tr>\n",
|
| 332 |
+
" <th>1</th>\n",
|
| 333 |
+
" <td>单标我有了,最近visa双标返现活动好</td>\n",
|
| 334 |
+
" <td>[B-PRODUCT, E-PRODUCT, O, O, O, O, O, O, B-PRO...</td>\n",
|
| 335 |
+
" </tr>\n",
|
| 336 |
+
" <tr>\n",
|
| 337 |
+
" <th>2</th>\n",
|
| 338 |
+
" <td>建设银行提额很慢的……</td>\n",
|
| 339 |
+
" <td>[B-BANK, I-BANK, I-BANK, I-BANK, B-COMMENTS_N,...</td>\n",
|
| 340 |
+
" </tr>\n",
|
| 341 |
+
" <tr>\n",
|
| 342 |
+
" <th>3</th>\n",
|
| 343 |
+
" <td>孙女士在原恒泰农村合作银行存入50万元,同年9月又存款50万元。2014年,恒泰农村合作银行...</td>\n",
|
| 344 |
+
" <td>[O, O, O, O, O, B-BANK, I-BANK, I-BANK, I-BANK...</td>\n",
|
| 345 |
+
" </tr>\n",
|
| 346 |
+
" <tr>\n",
|
| 347 |
+
" <th>4</th>\n",
|
| 348 |
+
" <td>我的怎么显示0.25费率,而且不管分多少期都一样费率,可惜只有69k</td>\n",
|
| 349 |
+
" <td>[O, O, O, O, O, O, O, O, O, O, B-COMMENTS_N, E...</td>\n",
|
| 350 |
+
" </tr>\n",
|
| 351 |
+
" </tbody>\n",
|
| 352 |
+
"</table>\n",
|
| 353 |
+
"</div>"
|
| 354 |
+
]
|
| 355 |
+
},
|
| 356 |
+
"execution_count": 10,
|
| 357 |
+
"metadata": {},
|
| 358 |
+
"output_type": "execute_result"
|
| 359 |
+
}
|
| 360 |
+
],
|
| 361 |
+
"execution_count": 10
|
| 362 |
+
},
|
| 363 |
+
{
|
| 364 |
+
"metadata": {
|
| 365 |
+
"ExecuteTime": {
|
| 366 |
+
"end_time": "2024-05-04T00:58:32.978098Z",
|
| 367 |
+
"start_time": "2024-05-04T00:58:32.964134Z"
|
| 368 |
+
}
|
| 369 |
+
},
|
| 370 |
+
"cell_type": "code",
|
| 371 |
+
"source": "len(train_data)",
|
| 372 |
+
"id": "293089e6a0661b19",
|
| 373 |
+
"outputs": [
|
| 374 |
+
{
|
| 375 |
+
"data": {
|
| 376 |
+
"text/plain": [
|
| 377 |
+
"581738"
|
| 378 |
+
]
|
| 379 |
+
},
|
| 380 |
+
"execution_count": 11,
|
| 381 |
+
"metadata": {},
|
| 382 |
+
"output_type": "execute_result"
|
| 383 |
+
}
|
| 384 |
+
],
|
| 385 |
+
"execution_count": 11
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"cell_type": "code",
|
| 389 |
+
"source": [
|
| 390 |
+
"# 切分数据集\n",
|
| 391 |
+
"train_split, test_split = random_split(train_data.values, [0.7, 0.3])\n",
|
| 392 |
+
"train_split, test_split = pd.DataFrame(list(train_split), columns=list(train_data.columns)), pd.DataFrame(list(test_split), columns=list(train_data.columns))"
|
| 393 |
+
],
|
| 394 |
+
"metadata": {
|
| 395 |
+
"collapsed": false,
|
| 396 |
+
"ExecuteTime": {
|
| 397 |
+
"end_time": "2024-05-04T00:58:36.137397Z",
|
| 398 |
+
"start_time": "2024-05-04T00:58:32.980092Z"
|
| 399 |
+
}
|
| 400 |
+
},
|
| 401 |
+
"id": "5c1b76f5cbeae3b0",
|
| 402 |
+
"outputs": [],
|
| 403 |
+
"execution_count": 12
|
| 404 |
+
},
|
| 405 |
+
{
|
| 406 |
+
"cell_type": "code",
|
| 407 |
+
"source": [
|
| 408 |
+
"test_split.head()"
|
| 409 |
+
],
|
| 410 |
+
"metadata": {
|
| 411 |
+
"collapsed": false,
|
| 412 |
+
"ExecuteTime": {
|
| 413 |
+
"end_time": "2024-05-04T00:58:36.168309Z",
|
| 414 |
+
"start_time": "2024-05-04T00:58:36.140384Z"
|
| 415 |
+
}
|
| 416 |
+
},
|
| 417 |
+
"id": "d9fda96d7eb8fb3",
|
| 418 |
+
"outputs": [
|
| 419 |
+
{
|
| 420 |
+
"data": {
|
| 421 |
+
"text/plain": [
|
| 422 |
+
" text \\\n",
|
| 423 |
+
"0 据中新网能源频道了解,截至1月8日,卓创资讯监测原油变化率为-0.65%,依此变化率累加上次... \n",
|
| 424 |
+
"1 虽然我们城市化现在每年以一个百分点的速度在这个,这个城市化的规模在扩张, \n",
|
| 425 |
+
"2 回想起它的味道,比小樱桃还要可口!它那“大大”的核还可以用作竹枪的子弹,竹枪是用细细的竹枝做... \n",
|
| 426 |
+
"3 2017新款厚底凉鞋女夏松糕底韩版休闲百搭真皮罗马欧洲站摇摇鞋潮 \n",
|
| 427 |
+
"4 根据连续不间断对梁体和立柱位移的监测数据表明,目前桥梁各重要监控指标稳定,未见病害进一步发展趋势。 \n",
|
| 428 |
+
"\n",
|
| 429 |
+
" labels \n",
|
| 430 |
+
"0 [O, O, O, O, O, O, O, O, O, O, O, O, O, B-T, I... \n",
|
| 431 |
+
"1 [O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ... \n",
|
| 432 |
+
"2 [O, O, O, B-Thing, O, O, O, O, O, O, B-Thing, ... \n",
|
| 433 |
+
"3 [O, O, O, O, O, O, O, O, B-HCCX, E-HCCX, O, O,... \n",
|
| 434 |
+
"4 [O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ... "
|
| 435 |
+
],
|
| 436 |
+
"text/html": [
|
| 437 |
+
"<div>\n",
|
| 438 |
+
"<style scoped>\n",
|
| 439 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 440 |
+
" vertical-align: middle;\n",
|
| 441 |
+
" }\n",
|
| 442 |
+
"\n",
|
| 443 |
+
" .dataframe tbody tr th {\n",
|
| 444 |
+
" vertical-align: top;\n",
|
| 445 |
+
" }\n",
|
| 446 |
+
"\n",
|
| 447 |
+
" .dataframe thead th {\n",
|
| 448 |
+
" text-align: right;\n",
|
| 449 |
+
" }\n",
|
| 450 |
+
"</style>\n",
|
| 451 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 452 |
+
" <thead>\n",
|
| 453 |
+
" <tr style=\"text-align: right;\">\n",
|
| 454 |
+
" <th></th>\n",
|
| 455 |
+
" <th>text</th>\n",
|
| 456 |
+
" <th>labels</th>\n",
|
| 457 |
+
" </tr>\n",
|
| 458 |
+
" </thead>\n",
|
| 459 |
+
" <tbody>\n",
|
| 460 |
+
" <tr>\n",
|
| 461 |
+
" <th>0</th>\n",
|
| 462 |
+
" <td>据中新网能源频道了解,截至1月8日,卓创资讯监测原油变化率为-0.65%,依此变化率累加上次...</td>\n",
|
| 463 |
+
" <td>[O, O, O, O, O, O, O, O, O, O, O, O, O, B-T, I...</td>\n",
|
| 464 |
+
" </tr>\n",
|
| 465 |
+
" <tr>\n",
|
| 466 |
+
" <th>1</th>\n",
|
| 467 |
+
" <td>虽然我们城市化现在每年以一个百分点的速度在这个,这个城市化的规模在扩张,</td>\n",
|
| 468 |
+
" <td>[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...</td>\n",
|
| 469 |
+
" </tr>\n",
|
| 470 |
+
" <tr>\n",
|
| 471 |
+
" <th>2</th>\n",
|
| 472 |
+
" <td>回想起它的味道,比小樱桃还要可口!它那“大大”的核还可以用作竹枪的子弹,竹枪是用细细的竹枝做...</td>\n",
|
| 473 |
+
" <td>[O, O, O, B-Thing, O, O, O, O, O, O, B-Thing, ...</td>\n",
|
| 474 |
+
" </tr>\n",
|
| 475 |
+
" <tr>\n",
|
| 476 |
+
" <th>3</th>\n",
|
| 477 |
+
" <td>2017新款厚底凉鞋女夏松糕底韩版休闲百搭真皮罗马欧洲站摇摇鞋潮</td>\n",
|
| 478 |
+
" <td>[O, O, O, O, O, O, O, O, B-HCCX, E-HCCX, O, O,...</td>\n",
|
| 479 |
+
" </tr>\n",
|
| 480 |
+
" <tr>\n",
|
| 481 |
+
" <th>4</th>\n",
|
| 482 |
+
" <td>根据连续不间断对梁体和立柱位移的监测数据表明,目前桥梁各重要监控指标稳定,未见病害进一步发展趋势。</td>\n",
|
| 483 |
+
" <td>[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...</td>\n",
|
| 484 |
+
" </tr>\n",
|
| 485 |
+
" </tbody>\n",
|
| 486 |
+
"</table>\n",
|
| 487 |
+
"</div>"
|
| 488 |
+
]
|
| 489 |
+
},
|
| 490 |
+
"execution_count": 13,
|
| 491 |
+
"metadata": {},
|
| 492 |
+
"output_type": "execute_result"
|
| 493 |
+
}
|
| 494 |
+
],
|
| 495 |
+
"execution_count": 13
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"cell_type": "code",
|
| 499 |
+
"source": [
|
| 500 |
+
"# 提取所有的labels类型\n",
|
| 501 |
+
"labels = []\n",
|
| 502 |
+
"for i in train_data.labels:\n",
|
| 503 |
+
" for k in i:\n",
|
| 504 |
+
" if k not in labels:\n",
|
| 505 |
+
" labels.append(k)\n",
|
| 506 |
+
"labels"
|
| 507 |
+
],
|
| 508 |
+
"metadata": {
|
| 509 |
+
"collapsed": false,
|
| 510 |
+
"ExecuteTime": {
|
| 511 |
+
"end_time": "2024-05-04T00:58:46.249289Z",
|
| 512 |
+
"start_time": "2024-05-04T00:58:36.170304Z"
|
| 513 |
+
}
|
| 514 |
+
},
|
| 515 |
+
"id": "a1cbd4f9e9a0325e",
|
| 516 |
+
"outputs": [
|
| 517 |
+
{
|
| 518 |
+
"data": {
|
| 519 |
+
"text/plain": [
|
| 520 |
+
"['B-BANK',\n",
|
| 521 |
+
" 'E-BANK',\n",
|
| 522 |
+
" 'O',\n",
|
| 523 |
+
" 'B-COMMENTS_N',\n",
|
| 524 |
+
" 'E-COMMENTS_N',\n",
|
| 525 |
+
" 'B-COMMENTS_ADJ',\n",
|
| 526 |
+
" 'E-COMMENTS_ADJ',\n",
|
| 527 |
+
" 'B-PRODUCT',\n",
|
| 528 |
+
" 'E-PRODUCT',\n",
|
| 529 |
+
" 'I-PRODUCT',\n",
|
| 530 |
+
" 'I-COMMENTS_N',\n",
|
| 531 |
+
" 'I-BANK',\n",
|
| 532 |
+
" 'I-COMMENTS_ADJ',\n",
|
| 533 |
+
" 'B-product_name',\n",
|
| 534 |
+
" 'I-product_name',\n",
|
| 535 |
+
" 'B-time',\n",
|
| 536 |
+
" 'I-time',\n",
|
| 537 |
+
" 'E-time',\n",
|
| 538 |
+
" 'B-person_name',\n",
|
| 539 |
+
" 'I-person_name',\n",
|
| 540 |
+
" 'E-person_name',\n",
|
| 541 |
+
" 'E-product_name',\n",
|
| 542 |
+
" 'B-org_name',\n",
|
| 543 |
+
" 'I-org_name',\n",
|
| 544 |
+
" 'E-org_name',\n",
|
| 545 |
+
" 'B-location',\n",
|
| 546 |
+
" 'I-location',\n",
|
| 547 |
+
" 'E-location',\n",
|
| 548 |
+
" 'B-company_name',\n",
|
| 549 |
+
" 'I-company_name',\n",
|
| 550 |
+
" 'E-company_name',\n",
|
| 551 |
+
" 'B-GPE',\n",
|
| 552 |
+
" 'I-GPE',\n",
|
| 553 |
+
" 'E-GPE',\n",
|
| 554 |
+
" 'B-PER',\n",
|
| 555 |
+
" 'I-PER',\n",
|
| 556 |
+
" 'E-PER',\n",
|
| 557 |
+
" 'B-LOC',\n",
|
| 558 |
+
" 'I-LOC',\n",
|
| 559 |
+
" 'E-LOC',\n",
|
| 560 |
+
" 'B-ORG',\n",
|
| 561 |
+
" 'I-ORG',\n",
|
| 562 |
+
" 'E-ORG',\n",
|
| 563 |
+
" 'B-body',\n",
|
| 564 |
+
" 'E-body',\n",
|
| 565 |
+
" 'I-body',\n",
|
| 566 |
+
" 'B-symp',\n",
|
| 567 |
+
" 'E-symp',\n",
|
| 568 |
+
" 'I-symp',\n",
|
| 569 |
+
" 'B-chec',\n",
|
| 570 |
+
" 'E-chec',\n",
|
| 571 |
+
" 'I-chec',\n",
|
| 572 |
+
" 'B-dise',\n",
|
| 573 |
+
" 'I-dise',\n",
|
| 574 |
+
" 'E-dise',\n",
|
| 575 |
+
" 'B-cure',\n",
|
| 576 |
+
" 'I-cure',\n",
|
| 577 |
+
" 'E-cure',\n",
|
| 578 |
+
" 'B-身体部位',\n",
|
| 579 |
+
" 'I-身体部位',\n",
|
| 580 |
+
" 'B-检查和检验',\n",
|
| 581 |
+
" 'E-检查和检验',\n",
|
| 582 |
+
" 'I-检查和检验',\n",
|
| 583 |
+
" 'E-身体部位',\n",
|
| 584 |
+
" 'B-症状和体征',\n",
|
| 585 |
+
" 'E-症状和体征',\n",
|
| 586 |
+
" 'I-症状和体征',\n",
|
| 587 |
+
" 'B-疾病和诊断',\n",
|
| 588 |
+
" 'I-疾病和诊断',\n",
|
| 589 |
+
" 'E-疾病和诊断',\n",
|
| 590 |
+
" 'B-治疗',\n",
|
| 591 |
+
" 'I-治疗',\n",
|
| 592 |
+
" 'E-治疗',\n",
|
| 593 |
+
" 'B-解剖部位',\n",
|
| 594 |
+
" 'E-解剖部位',\n",
|
| 595 |
+
" 'B-手术',\n",
|
| 596 |
+
" 'I-手术',\n",
|
| 597 |
+
" 'E-手术',\n",
|
| 598 |
+
" 'B-影像检查',\n",
|
| 599 |
+
" 'E-影像检查',\n",
|
| 600 |
+
" 'I-解剖部位',\n",
|
| 601 |
+
" 'B-药物',\n",
|
| 602 |
+
" 'E-药物',\n",
|
| 603 |
+
" 'I-药物',\n",
|
| 604 |
+
" 'B-实验室检验',\n",
|
| 605 |
+
" 'I-实验室检验',\n",
|
| 606 |
+
" 'E-实验室检验',\n",
|
| 607 |
+
" 'I-影像检查',\n",
|
| 608 |
+
" 'B-name',\n",
|
| 609 |
+
" 'I-name',\n",
|
| 610 |
+
" 'E-name',\n",
|
| 611 |
+
" 'B-address',\n",
|
| 612 |
+
" 'E-address',\n",
|
| 613 |
+
" 'B-organization',\n",
|
| 614 |
+
" 'E-organization',\n",
|
| 615 |
+
" 'B-game',\n",
|
| 616 |
+
" 'I-game',\n",
|
| 617 |
+
" 'E-game',\n",
|
| 618 |
+
" 'I-address',\n",
|
| 619 |
+
" 'B-scene',\n",
|
| 620 |
+
" 'I-scene',\n",
|
| 621 |
+
" 'E-scene',\n",
|
| 622 |
+
" 'B-book',\n",
|
| 623 |
+
" 'I-book',\n",
|
| 624 |
+
" 'E-book',\n",
|
| 625 |
+
" 'I-organization',\n",
|
| 626 |
+
" 'B-company',\n",
|
| 627 |
+
" 'I-company',\n",
|
| 628 |
+
" 'E-company',\n",
|
| 629 |
+
" 'B-position',\n",
|
| 630 |
+
" 'E-position',\n",
|
| 631 |
+
" 'I-position',\n",
|
| 632 |
+
" 'B-government',\n",
|
| 633 |
+
" 'I-government',\n",
|
| 634 |
+
" 'E-government',\n",
|
| 635 |
+
" 'B-movie',\n",
|
| 636 |
+
" 'I-movie',\n",
|
| 637 |
+
" 'E-movie',\n",
|
| 638 |
+
" 'B-bod',\n",
|
| 639 |
+
" 'I-bod',\n",
|
| 640 |
+
" 'E-bod',\n",
|
| 641 |
+
" 'B-dis',\n",
|
| 642 |
+
" 'I-dis',\n",
|
| 643 |
+
" 'E-dis',\n",
|
| 644 |
+
" 'B-sym',\n",
|
| 645 |
+
" 'I-sym',\n",
|
| 646 |
+
" 'E-sym',\n",
|
| 647 |
+
" 'B-pro',\n",
|
| 648 |
+
" 'I-pro',\n",
|
| 649 |
+
" 'E-pro',\n",
|
| 650 |
+
" 'B-ite',\n",
|
| 651 |
+
" 'I-ite',\n",
|
| 652 |
+
" 'E-ite',\n",
|
| 653 |
+
" 'B-mic',\n",
|
| 654 |
+
" 'I-mic',\n",
|
| 655 |
+
" 'E-mic',\n",
|
| 656 |
+
" 'B-dep',\n",
|
| 657 |
+
" 'E-dep',\n",
|
| 658 |
+
" 'B-dru',\n",
|
| 659 |
+
" 'I-dru',\n",
|
| 660 |
+
" 'E-dru',\n",
|
| 661 |
+
" 'I-dep',\n",
|
| 662 |
+
" 'B-equ',\n",
|
| 663 |
+
" 'I-equ',\n",
|
| 664 |
+
" 'E-equ',\n",
|
| 665 |
+
" 'B-Time',\n",
|
| 666 |
+
" 'I-Time',\n",
|
| 667 |
+
" 'E-Time',\n",
|
| 668 |
+
" 'B-Person',\n",
|
| 669 |
+
" 'B-Location',\n",
|
| 670 |
+
" 'I-Location',\n",
|
| 671 |
+
" 'E-Location',\n",
|
| 672 |
+
" 'E-Person',\n",
|
| 673 |
+
" 'B-Thing',\n",
|
| 674 |
+
" 'E-Thing',\n",
|
| 675 |
+
" 'B-Metric',\n",
|
| 676 |
+
" 'E-Metric',\n",
|
| 677 |
+
" 'I-Person',\n",
|
| 678 |
+
" 'I-Thing',\n",
|
| 679 |
+
" 'B-Organization',\n",
|
| 680 |
+
" 'I-Organization',\n",
|
| 681 |
+
" 'E-Organization',\n",
|
| 682 |
+
" 'I-Metric',\n",
|
| 683 |
+
" 'B-Abstract',\n",
|
| 684 |
+
" 'I-Abstract',\n",
|
| 685 |
+
" 'E-Abstract',\n",
|
| 686 |
+
" 'B-Physical',\n",
|
| 687 |
+
" 'I-Physical',\n",
|
| 688 |
+
" 'E-Physical',\n",
|
| 689 |
+
" 'B-Term',\n",
|
| 690 |
+
" 'I-Term',\n",
|
| 691 |
+
" 'E-Term',\n",
|
| 692 |
+
" 'B-ABstract',\n",
|
| 693 |
+
" 'I-ABstract',\n",
|
| 694 |
+
" 'E-ABstract',\n",
|
| 695 |
+
" 'B-HCCX',\n",
|
| 696 |
+
" 'E-HCCX',\n",
|
| 697 |
+
" 'I-HCCX',\n",
|
| 698 |
+
" 'B-MISC',\n",
|
| 699 |
+
" 'E-MISC',\n",
|
| 700 |
+
" 'B-HPPX',\n",
|
| 701 |
+
" 'E-HPPX',\n",
|
| 702 |
+
" 'I-HPPX',\n",
|
| 703 |
+
" 'I-MISC',\n",
|
| 704 |
+
" 'B-XH',\n",
|
| 705 |
+
" 'I-XH',\n",
|
| 706 |
+
" 'E-XH',\n",
|
| 707 |
+
" 'B-EQU',\n",
|
| 708 |
+
" 'I-EQU',\n",
|
| 709 |
+
" 'E-EQU',\n",
|
| 710 |
+
" 'B-TIME',\n",
|
| 711 |
+
" 'E-TIME',\n",
|
| 712 |
+
" 'I-TIME',\n",
|
| 713 |
+
" 'B-FAC',\n",
|
| 714 |
+
" 'I-FAC',\n",
|
| 715 |
+
" 'E-FAC',\n",
|
| 716 |
+
" 'B-Symptom',\n",
|
| 717 |
+
" 'E-Symptom',\n",
|
| 718 |
+
" 'B-Medical_Examination',\n",
|
| 719 |
+
" 'E-Medical_Examination',\n",
|
| 720 |
+
" 'I-Medical_Examination',\n",
|
| 721 |
+
" 'B-Drug',\n",
|
| 722 |
+
" 'I-Drug',\n",
|
| 723 |
+
" 'E-Drug',\n",
|
| 724 |
+
" 'B-Drug_Category',\n",
|
| 725 |
+
" 'I-Drug_Category',\n",
|
| 726 |
+
" 'E-Drug_Category',\n",
|
| 727 |
+
" 'I-Symptom',\n",
|
| 728 |
+
" 'B-Operation',\n",
|
| 729 |
+
" 'E-Operation',\n",
|
| 730 |
+
" 'I-Operation',\n",
|
| 731 |
+
" 'B-NAME',\n",
|
| 732 |
+
" 'I-NAME',\n",
|
| 733 |
+
" 'E-NAME',\n",
|
| 734 |
+
" 'B-CONT',\n",
|
| 735 |
+
" 'I-CONT',\n",
|
| 736 |
+
" 'E-CONT',\n",
|
| 737 |
+
" 'B-EDU',\n",
|
| 738 |
+
" 'I-EDU',\n",
|
| 739 |
+
" 'E-EDU',\n",
|
| 740 |
+
" 'B-TITLE',\n",
|
| 741 |
+
" 'I-TITLE',\n",
|
| 742 |
+
" 'E-TITLE',\n",
|
| 743 |
+
" 'B-RACE',\n",
|
| 744 |
+
" 'E-RACE',\n",
|
| 745 |
+
" 'B-PRO',\n",
|
| 746 |
+
" 'I-PRO',\n",
|
| 747 |
+
" 'E-PRO',\n",
|
| 748 |
+
" 'I-RACE',\n",
|
| 749 |
+
" 'B-T',\n",
|
| 750 |
+
" 'I-T',\n",
|
| 751 |
+
" 'E-T']"
|
| 752 |
+
]
|
| 753 |
+
},
|
| 754 |
+
"execution_count": 14,
|
| 755 |
+
"metadata": {},
|
| 756 |
+
"output_type": "execute_result"
|
| 757 |
+
}
|
| 758 |
+
],
|
| 759 |
+
"execution_count": 14
|
| 760 |
+
},
|
| 761 |
+
{
|
| 762 |
+
"cell_type": "code",
|
| 763 |
+
"source": [
|
| 764 |
+
"# 转换标签字典\n",
|
| 765 |
+
"label_dict = {i: j for i,j in zip(labels, range(len(labels)))}\n",
|
| 766 |
+
"label_dict"
|
| 767 |
+
],
|
| 768 |
+
"metadata": {
|
| 769 |
+
"collapsed": false,
|
| 770 |
+
"ExecuteTime": {
|
| 771 |
+
"end_time": "2024-05-04T00:58:46.265245Z",
|
| 772 |
+
"start_time": "2024-05-04T00:58:46.250285Z"
|
| 773 |
+
}
|
| 774 |
+
},
|
| 775 |
+
"id": "da46e04a666a54d9",
|
| 776 |
+
"outputs": [
|
| 777 |
+
{
|
| 778 |
+
"data": {
|
| 779 |
+
"text/plain": [
|
| 780 |
+
"{'B-BANK': 0,\n",
|
| 781 |
+
" 'E-BANK': 1,\n",
|
| 782 |
+
" 'O': 2,\n",
|
| 783 |
+
" 'B-COMMENTS_N': 3,\n",
|
| 784 |
+
" 'E-COMMENTS_N': 4,\n",
|
| 785 |
+
" 'B-COMMENTS_ADJ': 5,\n",
|
| 786 |
+
" 'E-COMMENTS_ADJ': 6,\n",
|
| 787 |
+
" 'B-PRODUCT': 7,\n",
|
| 788 |
+
" 'E-PRODUCT': 8,\n",
|
| 789 |
+
" 'I-PRODUCT': 9,\n",
|
| 790 |
+
" 'I-COMMENTS_N': 10,\n",
|
| 791 |
+
" 'I-BANK': 11,\n",
|
| 792 |
+
" 'I-COMMENTS_ADJ': 12,\n",
|
| 793 |
+
" 'B-product_name': 13,\n",
|
| 794 |
+
" 'I-product_name': 14,\n",
|
| 795 |
+
" 'B-time': 15,\n",
|
| 796 |
+
" 'I-time': 16,\n",
|
| 797 |
+
" 'E-time': 17,\n",
|
| 798 |
+
" 'B-person_name': 18,\n",
|
| 799 |
+
" 'I-person_name': 19,\n",
|
| 800 |
+
" 'E-person_name': 20,\n",
|
| 801 |
+
" 'E-product_name': 21,\n",
|
| 802 |
+
" 'B-org_name': 22,\n",
|
| 803 |
+
" 'I-org_name': 23,\n",
|
| 804 |
+
" 'E-org_name': 24,\n",
|
| 805 |
+
" 'B-location': 25,\n",
|
| 806 |
+
" 'I-location': 26,\n",
|
| 807 |
+
" 'E-location': 27,\n",
|
| 808 |
+
" 'B-company_name': 28,\n",
|
| 809 |
+
" 'I-company_name': 29,\n",
|
| 810 |
+
" 'E-company_name': 30,\n",
|
| 811 |
+
" 'B-GPE': 31,\n",
|
| 812 |
+
" 'I-GPE': 32,\n",
|
| 813 |
+
" 'E-GPE': 33,\n",
|
| 814 |
+
" 'B-PER': 34,\n",
|
| 815 |
+
" 'I-PER': 35,\n",
|
| 816 |
+
" 'E-PER': 36,\n",
|
| 817 |
+
" 'B-LOC': 37,\n",
|
| 818 |
+
" 'I-LOC': 38,\n",
|
| 819 |
+
" 'E-LOC': 39,\n",
|
| 820 |
+
" 'B-ORG': 40,\n",
|
| 821 |
+
" 'I-ORG': 41,\n",
|
| 822 |
+
" 'E-ORG': 42,\n",
|
| 823 |
+
" 'B-body': 43,\n",
|
| 824 |
+
" 'E-body': 44,\n",
|
| 825 |
+
" 'I-body': 45,\n",
|
| 826 |
+
" 'B-symp': 46,\n",
|
| 827 |
+
" 'E-symp': 47,\n",
|
| 828 |
+
" 'I-symp': 48,\n",
|
| 829 |
+
" 'B-chec': 49,\n",
|
| 830 |
+
" 'E-chec': 50,\n",
|
| 831 |
+
" 'I-chec': 51,\n",
|
| 832 |
+
" 'B-dise': 52,\n",
|
| 833 |
+
" 'I-dise': 53,\n",
|
| 834 |
+
" 'E-dise': 54,\n",
|
| 835 |
+
" 'B-cure': 55,\n",
|
| 836 |
+
" 'I-cure': 56,\n",
|
| 837 |
+
" 'E-cure': 57,\n",
|
| 838 |
+
" 'B-身体部位': 58,\n",
|
| 839 |
+
" 'I-身体部位': 59,\n",
|
| 840 |
+
" 'B-检查和检验': 60,\n",
|
| 841 |
+
" 'E-检查和检验': 61,\n",
|
| 842 |
+
" 'I-检查和检验': 62,\n",
|
| 843 |
+
" 'E-身体部位': 63,\n",
|
| 844 |
+
" 'B-症状和体征': 64,\n",
|
| 845 |
+
" 'E-症状和体征': 65,\n",
|
| 846 |
+
" 'I-症状和体征': 66,\n",
|
| 847 |
+
" 'B-疾病和诊断': 67,\n",
|
| 848 |
+
" 'I-疾病和诊断': 68,\n",
|
| 849 |
+
" 'E-疾病和诊断': 69,\n",
|
| 850 |
+
" 'B-治疗': 70,\n",
|
| 851 |
+
" 'I-治疗': 71,\n",
|
| 852 |
+
" 'E-治疗': 72,\n",
|
| 853 |
+
" 'B-解剖部位': 73,\n",
|
| 854 |
+
" 'E-解剖部位': 74,\n",
|
| 855 |
+
" 'B-手术': 75,\n",
|
| 856 |
+
" 'I-手术': 76,\n",
|
| 857 |
+
" 'E-手术': 77,\n",
|
| 858 |
+
" 'B-影像检查': 78,\n",
|
| 859 |
+
" 'E-影像检查': 79,\n",
|
| 860 |
+
" 'I-解剖部位': 80,\n",
|
| 861 |
+
" 'B-药物': 81,\n",
|
| 862 |
+
" 'E-药物': 82,\n",
|
| 863 |
+
" 'I-药物': 83,\n",
|
| 864 |
+
" 'B-实验室检验': 84,\n",
|
| 865 |
+
" 'I-实验室检验': 85,\n",
|
| 866 |
+
" 'E-实验室检验': 86,\n",
|
| 867 |
+
" 'I-影像检查': 87,\n",
|
| 868 |
+
" 'B-name': 88,\n",
|
| 869 |
+
" 'I-name': 89,\n",
|
| 870 |
+
" 'E-name': 90,\n",
|
| 871 |
+
" 'B-address': 91,\n",
|
| 872 |
+
" 'E-address': 92,\n",
|
| 873 |
+
" 'B-organization': 93,\n",
|
| 874 |
+
" 'E-organization': 94,\n",
|
| 875 |
+
" 'B-game': 95,\n",
|
| 876 |
+
" 'I-game': 96,\n",
|
| 877 |
+
" 'E-game': 97,\n",
|
| 878 |
+
" 'I-address': 98,\n",
|
| 879 |
+
" 'B-scene': 99,\n",
|
| 880 |
+
" 'I-scene': 100,\n",
|
| 881 |
+
" 'E-scene': 101,\n",
|
| 882 |
+
" 'B-book': 102,\n",
|
| 883 |
+
" 'I-book': 103,\n",
|
| 884 |
+
" 'E-book': 104,\n",
|
| 885 |
+
" 'I-organization': 105,\n",
|
| 886 |
+
" 'B-company': 106,\n",
|
| 887 |
+
" 'I-company': 107,\n",
|
| 888 |
+
" 'E-company': 108,\n",
|
| 889 |
+
" 'B-position': 109,\n",
|
| 890 |
+
" 'E-position': 110,\n",
|
| 891 |
+
" 'I-position': 111,\n",
|
| 892 |
+
" 'B-government': 112,\n",
|
| 893 |
+
" 'I-government': 113,\n",
|
| 894 |
+
" 'E-government': 114,\n",
|
| 895 |
+
" 'B-movie': 115,\n",
|
| 896 |
+
" 'I-movie': 116,\n",
|
| 897 |
+
" 'E-movie': 117,\n",
|
| 898 |
+
" 'B-bod': 118,\n",
|
| 899 |
+
" 'I-bod': 119,\n",
|
| 900 |
+
" 'E-bod': 120,\n",
|
| 901 |
+
" 'B-dis': 121,\n",
|
| 902 |
+
" 'I-dis': 122,\n",
|
| 903 |
+
" 'E-dis': 123,\n",
|
| 904 |
+
" 'B-sym': 124,\n",
|
| 905 |
+
" 'I-sym': 125,\n",
|
| 906 |
+
" 'E-sym': 126,\n",
|
| 907 |
+
" 'B-pro': 127,\n",
|
| 908 |
+
" 'I-pro': 128,\n",
|
| 909 |
+
" 'E-pro': 129,\n",
|
| 910 |
+
" 'B-ite': 130,\n",
|
| 911 |
+
" 'I-ite': 131,\n",
|
| 912 |
+
" 'E-ite': 132,\n",
|
| 913 |
+
" 'B-mic': 133,\n",
|
| 914 |
+
" 'I-mic': 134,\n",
|
| 915 |
+
" 'E-mic': 135,\n",
|
| 916 |
+
" 'B-dep': 136,\n",
|
| 917 |
+
" 'E-dep': 137,\n",
|
| 918 |
+
" 'B-dru': 138,\n",
|
| 919 |
+
" 'I-dru': 139,\n",
|
| 920 |
+
" 'E-dru': 140,\n",
|
| 921 |
+
" 'I-dep': 141,\n",
|
| 922 |
+
" 'B-equ': 142,\n",
|
| 923 |
+
" 'I-equ': 143,\n",
|
| 924 |
+
" 'E-equ': 144,\n",
|
| 925 |
+
" 'B-Time': 145,\n",
|
| 926 |
+
" 'I-Time': 146,\n",
|
| 927 |
+
" 'E-Time': 147,\n",
|
| 928 |
+
" 'B-Person': 148,\n",
|
| 929 |
+
" 'B-Location': 149,\n",
|
| 930 |
+
" 'I-Location': 150,\n",
|
| 931 |
+
" 'E-Location': 151,\n",
|
| 932 |
+
" 'E-Person': 152,\n",
|
| 933 |
+
" 'B-Thing': 153,\n",
|
| 934 |
+
" 'E-Thing': 154,\n",
|
| 935 |
+
" 'B-Metric': 155,\n",
|
| 936 |
+
" 'E-Metric': 156,\n",
|
| 937 |
+
" 'I-Person': 157,\n",
|
| 938 |
+
" 'I-Thing': 158,\n",
|
| 939 |
+
" 'B-Organization': 159,\n",
|
| 940 |
+
" 'I-Organization': 160,\n",
|
| 941 |
+
" 'E-Organization': 161,\n",
|
| 942 |
+
" 'I-Metric': 162,\n",
|
| 943 |
+
" 'B-Abstract': 163,\n",
|
| 944 |
+
" 'I-Abstract': 164,\n",
|
| 945 |
+
" 'E-Abstract': 165,\n",
|
| 946 |
+
" 'B-Physical': 166,\n",
|
| 947 |
+
" 'I-Physical': 167,\n",
|
| 948 |
+
" 'E-Physical': 168,\n",
|
| 949 |
+
" 'B-Term': 169,\n",
|
| 950 |
+
" 'I-Term': 170,\n",
|
| 951 |
+
" 'E-Term': 171,\n",
|
| 952 |
+
" 'B-ABstract': 172,\n",
|
| 953 |
+
" 'I-ABstract': 173,\n",
|
| 954 |
+
" 'E-ABstract': 174,\n",
|
| 955 |
+
" 'B-HCCX': 175,\n",
|
| 956 |
+
" 'E-HCCX': 176,\n",
|
| 957 |
+
" 'I-HCCX': 177,\n",
|
| 958 |
+
" 'B-MISC': 178,\n",
|
| 959 |
+
" 'E-MISC': 179,\n",
|
| 960 |
+
" 'B-HPPX': 180,\n",
|
| 961 |
+
" 'E-HPPX': 181,\n",
|
| 962 |
+
" 'I-HPPX': 182,\n",
|
| 963 |
+
" 'I-MISC': 183,\n",
|
| 964 |
+
" 'B-XH': 184,\n",
|
| 965 |
+
" 'I-XH': 185,\n",
|
| 966 |
+
" 'E-XH': 186,\n",
|
| 967 |
+
" 'B-EQU': 187,\n",
|
| 968 |
+
" 'I-EQU': 188,\n",
|
| 969 |
+
" 'E-EQU': 189,\n",
|
| 970 |
+
" 'B-TIME': 190,\n",
|
| 971 |
+
" 'E-TIME': 191,\n",
|
| 972 |
+
" 'I-TIME': 192,\n",
|
| 973 |
+
" 'B-FAC': 193,\n",
|
| 974 |
+
" 'I-FAC': 194,\n",
|
| 975 |
+
" 'E-FAC': 195,\n",
|
| 976 |
+
" 'B-Symptom': 196,\n",
|
| 977 |
+
" 'E-Symptom': 197,\n",
|
| 978 |
+
" 'B-Medical_Examination': 198,\n",
|
| 979 |
+
" 'E-Medical_Examination': 199,\n",
|
| 980 |
+
" 'I-Medical_Examination': 200,\n",
|
| 981 |
+
" 'B-Drug': 201,\n",
|
| 982 |
+
" 'I-Drug': 202,\n",
|
| 983 |
+
" 'E-Drug': 203,\n",
|
| 984 |
+
" 'B-Drug_Category': 204,\n",
|
| 985 |
+
" 'I-Drug_Category': 205,\n",
|
| 986 |
+
" 'E-Drug_Category': 206,\n",
|
| 987 |
+
" 'I-Symptom': 207,\n",
|
| 988 |
+
" 'B-Operation': 208,\n",
|
| 989 |
+
" 'E-Operation': 209,\n",
|
| 990 |
+
" 'I-Operation': 210,\n",
|
| 991 |
+
" 'B-NAME': 211,\n",
|
| 992 |
+
" 'I-NAME': 212,\n",
|
| 993 |
+
" 'E-NAME': 213,\n",
|
| 994 |
+
" 'B-CONT': 214,\n",
|
| 995 |
+
" 'I-CONT': 215,\n",
|
| 996 |
+
" 'E-CONT': 216,\n",
|
| 997 |
+
" 'B-EDU': 217,\n",
|
| 998 |
+
" 'I-EDU': 218,\n",
|
| 999 |
+
" 'E-EDU': 219,\n",
|
| 1000 |
+
" 'B-TITLE': 220,\n",
|
| 1001 |
+
" 'I-TITLE': 221,\n",
|
| 1002 |
+
" 'E-TITLE': 222,\n",
|
| 1003 |
+
" 'B-RACE': 223,\n",
|
| 1004 |
+
" 'E-RACE': 224,\n",
|
| 1005 |
+
" 'B-PRO': 225,\n",
|
| 1006 |
+
" 'I-PRO': 226,\n",
|
| 1007 |
+
" 'E-PRO': 227,\n",
|
| 1008 |
+
" 'I-RACE': 228,\n",
|
| 1009 |
+
" 'B-T': 229,\n",
|
| 1010 |
+
" 'I-T': 230,\n",
|
| 1011 |
+
" 'E-T': 231}"
|
| 1012 |
+
]
|
| 1013 |
+
},
|
| 1014 |
+
"execution_count": 15,
|
| 1015 |
+
"metadata": {},
|
| 1016 |
+
"output_type": "execute_result"
|
| 1017 |
+
}
|
| 1018 |
+
],
|
| 1019 |
+
"execution_count": 15
|
| 1020 |
+
},
|
| 1021 |
+
{
|
| 1022 |
+
"cell_type": "code",
|
| 1023 |
+
"source": [
|
| 1024 |
+
"# 数据集构建\n",
|
| 1025 |
+
"class Dataset(torch.utils.data.Dataset):\n",
|
| 1026 |
+
" def __init__(self, df: pd.DataFrame, label_dict: dict):\n",
|
| 1027 |
+
" \"\"\"\n",
|
| 1028 |
+
" 数据集初始化\n",
|
| 1029 |
+
" :param: df 数据集标签\n",
|
| 1030 |
+
" :param: label_dict lable字典\n",
|
| 1031 |
+
" \"\"\"\n",
|
| 1032 |
+
" self.labels = None\n",
|
| 1033 |
+
" self.text = None\n",
|
| 1034 |
+
" self.df_label = None\n",
|
| 1035 |
+
" self.df_text = None\n",
|
| 1036 |
+
" self.label_all_tokens = None\n",
|
| 1037 |
+
" self.bert_token_model = None\n",
|
| 1038 |
+
" self.bert_length = None\n",
|
| 1039 |
+
" self.settings_init()\n",
|
| 1040 |
+
" self.df = df\n",
|
| 1041 |
+
" self.label_dict = label_dict\n",
|
| 1042 |
+
" self.tokenizer = BertTokenizerFast.from_pretrained(self.bert_token_model, do_lower_case=True)\n",
|
| 1043 |
+
" self.data_prepare()\n",
|
| 1044 |
+
" \n",
|
| 1045 |
+
" def settings_init(self):\n",
|
| 1046 |
+
" \"\"\"\n",
|
| 1047 |
+
" 设置 \n",
|
| 1048 |
+
" \"\"\"\n",
|
| 1049 |
+
" self.bert_length = 128 # bert统一长度设置\n",
|
| 1050 |
+
" self.bert_token_model = 'bert-base-chinese' # bert tokenizer 基于的模型, 最好基于本地\n",
|
| 1051 |
+
" self.label_all_tokens = True # 子词延续标签 True为子词提供相同标签 False为用-100做标签\n",
|
| 1052 |
+
" self.df_text = 'text' # 文本列名称\n",
|
| 1053 |
+
" self.df_label = 'labels' # 标签列名称\n",
|
| 1054 |
+
" \n",
|
| 1055 |
+
" def data_prepare(self):\n",
|
| 1056 |
+
" \"\"\"\n",
|
| 1057 |
+
" 数据预处理\n",
|
| 1058 |
+
" \"\"\"\n",
|
| 1059 |
+
" self.text = [self.tokenizer(i, padding='max_length', truncation=True, max_length=self.bert_length, return_tensors='pt') for i in self.df[self.df_text].values.tolist()]\n",
|
| 1060 |
+
" self.labels = [self.labels_id_together(j, k) for j, k in zip(self.text, self.df[self.df_label].values.tolist())]\n",
|
| 1061 |
+
" \n",
|
| 1062 |
+
" def labels_id_together(self, tk, labels: list) -> list :\n",
|
| 1063 |
+
" \"\"\"\n",
|
| 1064 |
+
" 对齐数据集 lables与新ids\n",
|
| 1065 |
+
" :param tk: 字段tokenize后标签\n",
|
| 1066 |
+
" :param labels: 标签\n",
|
| 1067 |
+
" :return: 转换后的lables标签\n",
|
| 1068 |
+
" \"\"\"\n",
|
| 1069 |
+
" word_ids = tk.word_ids()\n",
|
| 1070 |
+
" label_ids = []\n",
|
| 1071 |
+
" k = None\n",
|
| 1072 |
+
" step = 0\n",
|
| 1073 |
+
" for i in word_ids:\n",
|
| 1074 |
+
" if i is None:\n",
|
| 1075 |
+
" label_ids.append(-100)\n",
|
| 1076 |
+
" step += 1\n",
|
| 1077 |
+
" elif step == 0:\n",
|
| 1078 |
+
" label_ids.append(self.label_dict[labels[i]])\n",
|
| 1079 |
+
" k = i\n",
|
| 1080 |
+
" step += 1\n",
|
| 1081 |
+
" else:\n",
|
| 1082 |
+
" if self.label_all_tokens:\n",
|
| 1083 |
+
" label_ids.append(self.label_dict[labels[i]])\n",
|
| 1084 |
+
" else:\n",
|
| 1085 |
+
" if k == i:\n",
|
| 1086 |
+
" label_ids.append(-100)\n",
|
| 1087 |
+
" else:\n",
|
| 1088 |
+
" label_ids.append(self.label_dict[labels[i]])\n",
|
| 1089 |
+
" step += 1\n",
|
| 1090 |
+
" k = i\n",
|
| 1091 |
+
" return label_ids\n",
|
| 1092 |
+
" \n",
|
| 1093 |
+
" def __len__(self):\n",
|
| 1094 |
+
" return len(self.labels)\n",
|
| 1095 |
+
" \n",
|
| 1096 |
+
" def get_batch_text(self, idx):\n",
|
| 1097 |
+
" return self.text[idx]\n",
|
| 1098 |
+
" \n",
|
| 1099 |
+
" def get_batch_label(self, idx):\n",
|
| 1100 |
+
" return torch.LongTensor(self.labels[idx])\n",
|
| 1101 |
+
" \n",
|
| 1102 |
+
" def __getitem__(self, idx):\n",
|
| 1103 |
+
" return self.get_batch_text(idx), self.get_batch_label(idx)"
|
| 1104 |
+
],
|
| 1105 |
+
"metadata": {
|
| 1106 |
+
"collapsed": false,
|
| 1107 |
+
"ExecuteTime": {
|
| 1108 |
+
"end_time": "2024-05-04T00:58:46.297164Z",
|
| 1109 |
+
"start_time": "2024-05-04T00:58:46.267240Z"
|
| 1110 |
+
}
|
| 1111 |
+
},
|
| 1112 |
+
"id": "ea9bab41704b4921",
|
| 1113 |
+
"outputs": [],
|
| 1114 |
+
"execution_count": 16
|
| 1115 |
+
},
|
| 1116 |
+
{
|
| 1117 |
+
"cell_type": "code",
|
| 1118 |
+
"source": [
|
| 1119 |
+
"from tqdm import tqdm\n",
|
| 1120 |
+
"from torch.optim.adamw import AdamW\n",
|
| 1121 |
+
"from torch.utils.data import DataLoader\n",
|
| 1122 |
+
"\n",
|
| 1123 |
+
"learning_rate = 5e-5\n",
|
| 1124 |
+
"epochs = 10\n",
|
| 1125 |
+
"batch_size = 256\n",
|
| 1126 |
+
"\n",
|
| 1127 |
+
"def train_loop(model, df_train, df_val):\n",
|
| 1128 |
+
" # 定义训练和验证集数据\n",
|
| 1129 |
+
" train_dataset = Dataset(df_train, label_dict)\n",
|
| 1130 |
+
" val_dataset = Dataset(df_val, label_dict)\n",
|
| 1131 |
+
" # 批量获取训练和验证集数据\n",
|
| 1132 |
+
" train_dataloader = DataLoader(train_dataset, num_workers=0, batch_size=batch_size, shuffle=True)\n",
|
| 1133 |
+
" val_dataloader = DataLoader(val_dataset, num_workers=0, batch_size=batch_size, shuffle=True)\n",
|
| 1134 |
+
" # 判断是否使用GPU,如果有,尽量使用,可以加快训练速度\n",
|
| 1135 |
+
" use_cuda = torch.cuda.is_available()\n",
|
| 1136 |
+
" device = torch.device(\"cuda\" if use_cuda else \"cpu\")\n",
|
| 1137 |
+
" # 定义优化器\n",
|
| 1138 |
+
" optimizer = AdamW(model.parameters(), lr=learning_rate)\n",
|
| 1139 |
+
"\n",
|
| 1140 |
+
" if use_cuda:\n",
|
| 1141 |
+
" model = model.cuda()\n",
|
| 1142 |
+
" \n",
|
| 1143 |
+
" # 开始训练循环\n",
|
| 1144 |
+
" best_acc = None\n",
|
| 1145 |
+
" best_loss = None\n",
|
| 1146 |
+
" best_val_loss = None\n",
|
| 1147 |
+
" best_val_acc = None\n",
|
| 1148 |
+
" best_model = None\n",
|
| 1149 |
+
" acc_list = []\n",
|
| 1150 |
+
" for epoch_num in range(epochs):\n",
|
| 1151 |
+
"\n",
|
| 1152 |
+
" total_acc_train = 0\n",
|
| 1153 |
+
" total_loss_train = 0\n",
|
| 1154 |
+
" # 训练模型S\n",
|
| 1155 |
+
" # 按批量循环训练模型\n",
|
| 1156 |
+
" for train_data, train_label in tqdm(train_dataloader):\n",
|
| 1157 |
+
" # 从train_data中获取mask和input_id\n",
|
| 1158 |
+
" train_label = train_label[0].to(device)\n",
|
| 1159 |
+
" mask = train_data['attention_mask'][0].to(device)\n",
|
| 1160 |
+
" input_id = train_data['input_ids'][0].to(device)\n",
|
| 1161 |
+
" # 梯度清零!!\n",
|
| 1162 |
+
" optimizer.zero_grad()\n",
|
| 1163 |
+
" # 输入模型训练结果:损失及分类概率\n",
|
| 1164 |
+
" loss, logits = model(input_id, mask, train_label)\n",
|
| 1165 |
+
" # 过滤掉特殊token及padding的token\n",
|
| 1166 |
+
" logits_clean = logits[0][train_label != -100]\n",
|
| 1167 |
+
" label_clean = train_label[train_label != -100]\n",
|
| 1168 |
+
" # 获取最大概率值\n",
|
| 1169 |
+
" predictions = logits_clean.argmax(dim=1)\n",
|
| 1170 |
+
" # 计算准确率\n",
|
| 1171 |
+
" acc = (predictions == label_clean).float().mean()\n",
|
| 1172 |
+
" total_acc_train += acc\n",
|
| 1173 |
+
" total_loss_train += loss.item()\n",
|
| 1174 |
+
" # 反向传递\n",
|
| 1175 |
+
" loss.backward()\n",
|
| 1176 |
+
" # 参数更新\n",
|
| 1177 |
+
" optimizer.step()\n",
|
| 1178 |
+
" # 模型评估\n",
|
| 1179 |
+
" model.eval()\n",
|
| 1180 |
+
"\n",
|
| 1181 |
+
" total_acc_val = 0\n",
|
| 1182 |
+
" total_loss_val = 0\n",
|
| 1183 |
+
" for val_data, val_label in val_dataloader:\n",
|
| 1184 |
+
" # 批量获取验证数据\n",
|
| 1185 |
+
" val_label = val_label[0].to(device)\n",
|
| 1186 |
+
" mask = val_data['attention_mask'][0].to(device)\n",
|
| 1187 |
+
" input_id = val_data['input_ids'][0].to(device)\n",
|
| 1188 |
+
" # 输出模型预测结果\n",
|
| 1189 |
+
" loss, logits = model(input_id, mask, val_label)\n",
|
| 1190 |
+
" # 清楚无效token对应的结果\n",
|
| 1191 |
+
" logits_clean = logits[0][val_label != -100]\n",
|
| 1192 |
+
" label_clean = val_label[val_label != -100]\n",
|
| 1193 |
+
" # 获取概率值最大的预测\n",
|
| 1194 |
+
" predictions = logits_clean.argmax(dim=1)\n",
|
| 1195 |
+
" # 计算精度\n",
|
| 1196 |
+
" acc = (predictions == label_clean).float().mean()\n",
|
| 1197 |
+
" total_acc_val += acc\n",
|
| 1198 |
+
" total_loss_val += loss.item()\n",
|
| 1199 |
+
"\n",
|
| 1200 |
+
" val_accuracy = total_acc_val / len(df_val)\n",
|
| 1201 |
+
" val_loss = total_loss_val / len(df_val)\n",
|
| 1202 |
+
" model_loss = total_loss_train / len(df_train)\n",
|
| 1203 |
+
" model_acc = total_acc_train / len(df_train)\n",
|
| 1204 |
+
"\n",
|
| 1205 |
+
" print(\n",
|
| 1206 |
+
" f'''Epochs: {epoch_num + 1} | \n",
|
| 1207 |
+
" Loss: {total_loss_train / len(df_train): .3f} | \n",
|
| 1208 |
+
" Accuracy: {total_acc_train / len(df_train): .3f} |\n",
|
| 1209 |
+
" Val_Loss: {total_loss_val / len(df_val): .3f} | \n",
|
| 1210 |
+
" Val_Accuracy: {total_acc_val / len(df_val): .3f}''')\n",
|
| 1211 |
+
" \n",
|
| 1212 |
+
" # 单次选优模型\n",
|
| 1213 |
+
" if best_model is None:\n",
|
| 1214 |
+
" best_loss = model_loss\n",
|
| 1215 |
+
" best_acc = model_acc\n",
|
| 1216 |
+
" best_val_loss = val_loss\n",
|
| 1217 |
+
" best_val_acc = val_accuracy\n",
|
| 1218 |
+
" best_model = model\n",
|
| 1219 |
+
" elif best_loss < model_loss and best_val_loss < val_loss:\n",
|
| 1220 |
+
" best_loss = model_loss\n",
|
| 1221 |
+
" best_acc = model_acc\n",
|
| 1222 |
+
" best_val_loss = val_loss\n",
|
| 1223 |
+
" best_val_acc = val_accuracy\n",
|
| 1224 |
+
" best_model = model\n",
|
| 1225 |
+
" \n",
|
| 1226 |
+
" # acc信息写入\n",
|
| 1227 |
+
" acc_list.append([epoch_num, model_loss, model_acc, val_loss, val_accuracy])\n",
|
| 1228 |
+
" return best_model, acc_list\n"
|
| 1229 |
+
],
|
| 1230 |
+
"metadata": {
|
| 1231 |
+
"collapsed": false,
|
| 1232 |
+
"ExecuteTime": {
|
| 1233 |
+
"end_time": "2024-05-04T00:58:46.327082Z",
|
| 1234 |
+
"start_time": "2024-05-04T00:58:46.299156Z"
|
| 1235 |
+
}
|
| 1236 |
+
},
|
| 1237 |
+
"id": "c12880f22af0ca8f",
|
| 1238 |
+
"outputs": [],
|
| 1239 |
+
"execution_count": 17
|
| 1240 |
+
},
|
| 1241 |
+
{
|
| 1242 |
+
"metadata": {
|
| 1243 |
+
"ExecuteTime": {
|
| 1244 |
+
"end_time": "2024-05-04T02:59:32.402667Z",
|
| 1245 |
+
"start_time": "2024-05-04T00:58:46.331069Z"
|
| 1246 |
+
}
|
| 1247 |
+
},
|
| 1248 |
+
"cell_type": "code",
|
| 1249 |
+
"source": [
|
| 1250 |
+
"model = BertNerModel(labels)\n",
|
| 1251 |
+
"model, info = train_loop(model, train_split, test_split)"
|
| 1252 |
+
],
|
| 1253 |
+
"id": "1af8c7475035fe2",
|
| 1254 |
+
"outputs": [
|
| 1255 |
+
{
|
| 1256 |
+
"name": "stderr",
|
| 1257 |
+
"output_type": "stream",
|
| 1258 |
+
"text": [
|
| 1259 |
+
"Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-chinese and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
|
| 1260 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
| 1261 |
+
"100%|██████████| 1591/1591 [10:54<00:00, 2.43it/s]\n"
|
| 1262 |
+
]
|
| 1263 |
+
},
|
| 1264 |
+
{
|
| 1265 |
+
"name": "stdout",
|
| 1266 |
+
"output_type": "stream",
|
| 1267 |
+
"text": [
|
| 1268 |
+
"Epochs: 1 | \n",
|
| 1269 |
+
" Loss: 0.003 | \n",
|
| 1270 |
+
" Accuracy: 0.003 |\n",
|
| 1271 |
+
" Val_Loss: 0.003 | \n",
|
| 1272 |
+
" Val_Accuracy: 0.003\n"
|
| 1273 |
+
]
|
| 1274 |
+
},
|
| 1275 |
+
{
|
| 1276 |
+
"name": "stderr",
|
| 1277 |
+
"output_type": "stream",
|
| 1278 |
+
"text": [
|
| 1279 |
+
"100%|██████████| 1591/1591 [10:54<00:00, 2.43it/s]\n"
|
| 1280 |
+
]
|
| 1281 |
+
},
|
| 1282 |
+
{
|
| 1283 |
+
"name": "stdout",
|
| 1284 |
+
"output_type": "stream",
|
| 1285 |
+
"text": [
|
| 1286 |
+
"Epochs: 2 | \n",
|
| 1287 |
+
" Loss: 0.002 | \n",
|
| 1288 |
+
" Accuracy: 0.003 |\n",
|
| 1289 |
+
" Val_Loss: 0.002 | \n",
|
| 1290 |
+
" Val_Accuracy: 0.003\n"
|
| 1291 |
+
]
|
| 1292 |
+
},
|
| 1293 |
+
{
|
| 1294 |
+
"name": "stderr",
|
| 1295 |
+
"output_type": "stream",
|
| 1296 |
+
"text": [
|
| 1297 |
+
"100%|██████████| 1591/1591 [11:34<00:00, 2.29it/s]\n"
|
| 1298 |
+
]
|
| 1299 |
+
},
|
| 1300 |
+
{
|
| 1301 |
+
"name": "stdout",
|
| 1302 |
+
"output_type": "stream",
|
| 1303 |
+
"text": [
|
| 1304 |
+
"Epochs: 3 | \n",
|
| 1305 |
+
" Loss: 0.002 | \n",
|
| 1306 |
+
" Accuracy: 0.003 |\n",
|
| 1307 |
+
" Val_Loss: 0.002 | \n",
|
| 1308 |
+
" Val_Accuracy: 0.003\n"
|
| 1309 |
+
]
|
| 1310 |
+
},
|
| 1311 |
+
{
|
| 1312 |
+
"name": "stderr",
|
| 1313 |
+
"output_type": "stream",
|
| 1314 |
+
"text": [
|
| 1315 |
+
"100%|██████████| 1591/1591 [10:36<00:00, 2.50it/s]\n"
|
| 1316 |
+
]
|
| 1317 |
+
},
|
| 1318 |
+
{
|
| 1319 |
+
"name": "stdout",
|
| 1320 |
+
"output_type": "stream",
|
| 1321 |
+
"text": [
|
| 1322 |
+
"Epochs: 4 | \n",
|
| 1323 |
+
" Loss: 0.002 | \n",
|
| 1324 |
+
" Accuracy: 0.003 |\n",
|
| 1325 |
+
" Val_Loss: 0.002 | \n",
|
| 1326 |
+
" Val_Accuracy: 0.003\n"
|
| 1327 |
+
]
|
| 1328 |
+
},
|
| 1329 |
+
{
|
| 1330 |
+
"name": "stderr",
|
| 1331 |
+
"output_type": "stream",
|
| 1332 |
+
"text": [
|
| 1333 |
+
"100%|██████████| 1591/1591 [10:43<00:00, 2.47it/s]\n"
|
| 1334 |
+
]
|
| 1335 |
+
},
|
| 1336 |
+
{
|
| 1337 |
+
"name": "stdout",
|
| 1338 |
+
"output_type": "stream",
|
| 1339 |
+
"text": [
|
| 1340 |
+
"Epochs: 5 | \n",
|
| 1341 |
+
" Loss: 0.002 | \n",
|
| 1342 |
+
" Accuracy: 0.003 |\n",
|
| 1343 |
+
" Val_Loss: 0.002 | \n",
|
| 1344 |
+
" Val_Accuracy: 0.003\n"
|
| 1345 |
+
]
|
| 1346 |
+
},
|
| 1347 |
+
{
|
| 1348 |
+
"name": "stderr",
|
| 1349 |
+
"output_type": "stream",
|
| 1350 |
+
"text": [
|
| 1351 |
+
"100%|██████████| 1591/1591 [11:39<00:00, 2.27it/s]\n"
|
| 1352 |
+
]
|
| 1353 |
+
},
|
| 1354 |
+
{
|
| 1355 |
+
"name": "stdout",
|
| 1356 |
+
"output_type": "stream",
|
| 1357 |
+
"text": [
|
| 1358 |
+
"Epochs: 6 | \n",
|
| 1359 |
+
" Loss: 0.002 | \n",
|
| 1360 |
+
" Accuracy: 0.003 |\n",
|
| 1361 |
+
" Val_Loss: 0.002 | \n",
|
| 1362 |
+
" Val_Accuracy: 0.003\n"
|
| 1363 |
+
]
|
| 1364 |
+
},
|
| 1365 |
+
{
|
| 1366 |
+
"name": "stderr",
|
| 1367 |
+
"output_type": "stream",
|
| 1368 |
+
"text": [
|
| 1369 |
+
"100%|██████████| 1591/1591 [12:20<00:00, 2.15it/s]\n"
|
| 1370 |
+
]
|
| 1371 |
+
},
|
| 1372 |
+
{
|
| 1373 |
+
"name": "stdout",
|
| 1374 |
+
"output_type": "stream",
|
| 1375 |
+
"text": [
|
| 1376 |
+
"Epochs: 7 | \n",
|
| 1377 |
+
" Loss: 0.002 | \n",
|
| 1378 |
+
" Accuracy: 0.003 |\n",
|
| 1379 |
+
" Val_Loss: 0.002 | \n",
|
| 1380 |
+
" Val_Accuracy: 0.003\n"
|
| 1381 |
+
]
|
| 1382 |
+
},
|
| 1383 |
+
{
|
| 1384 |
+
"name": "stderr",
|
| 1385 |
+
"output_type": "stream",
|
| 1386 |
+
"text": [
|
| 1387 |
+
"100%|██████████| 1591/1591 [11:25<00:00, 2.32it/s]\n"
|
| 1388 |
+
]
|
| 1389 |
+
},
|
| 1390 |
+
{
|
| 1391 |
+
"name": "stdout",
|
| 1392 |
+
"output_type": "stream",
|
| 1393 |
+
"text": [
|
| 1394 |
+
"Epochs: 8 | \n",
|
| 1395 |
+
" Loss: 0.002 | \n",
|
| 1396 |
+
" Accuracy: 0.003 |\n",
|
| 1397 |
+
" Val_Loss: 0.002 | \n",
|
| 1398 |
+
" Val_Accuracy: 0.003\n"
|
| 1399 |
+
]
|
| 1400 |
+
},
|
| 1401 |
+
{
|
| 1402 |
+
"name": "stderr",
|
| 1403 |
+
"output_type": "stream",
|
| 1404 |
+
"text": [
|
| 1405 |
+
"100%|██████████| 1591/1591 [11:27<00:00, 2.32it/s]\n"
|
| 1406 |
+
]
|
| 1407 |
+
},
|
| 1408 |
+
{
|
| 1409 |
+
"name": "stdout",
|
| 1410 |
+
"output_type": "stream",
|
| 1411 |
+
"text": [
|
| 1412 |
+
"Epochs: 9 | \n",
|
| 1413 |
+
" Loss: 0.002 | \n",
|
| 1414 |
+
" Accuracy: 0.003 |\n",
|
| 1415 |
+
" Val_Loss: 0.002 | \n",
|
| 1416 |
+
" Val_Accuracy: 0.003\n"
|
| 1417 |
+
]
|
| 1418 |
+
},
|
| 1419 |
+
{
|
| 1420 |
+
"name": "stderr",
|
| 1421 |
+
"output_type": "stream",
|
| 1422 |
+
"text": [
|
| 1423 |
+
"100%|██████████| 1591/1591 [10:59<00:00, 2.41it/s]\n"
|
| 1424 |
+
]
|
| 1425 |
+
},
|
| 1426 |
+
{
|
| 1427 |
+
"name": "stdout",
|
| 1428 |
+
"output_type": "stream",
|
| 1429 |
+
"text": [
|
| 1430 |
+
"Epochs: 10 | \n",
|
| 1431 |
+
" Loss: 0.002 | \n",
|
| 1432 |
+
" Accuracy: 0.003 |\n",
|
| 1433 |
+
" Val_Loss: 0.002 | \n",
|
| 1434 |
+
" Val_Accuracy: 0.003\n"
|
| 1435 |
+
]
|
| 1436 |
+
}
|
| 1437 |
+
],
|
| 1438 |
+
"execution_count": 18
|
| 1439 |
+
},
|
| 1440 |
+
{
|
| 1441 |
+
"cell_type": "code",
|
| 1442 |
+
"source": [
|
| 1443 |
+
"torch.save(model.state_dict(), 'bert-model.pth')"
|
| 1444 |
+
],
|
| 1445 |
+
"metadata": {
|
| 1446 |
+
"collapsed": false,
|
| 1447 |
+
"ExecuteTime": {
|
| 1448 |
+
"end_time": "2024-05-04T02:59:32.951202Z",
|
| 1449 |
+
"start_time": "2024-05-04T02:59:32.404662Z"
|
| 1450 |
+
}
|
| 1451 |
+
},
|
| 1452 |
+
"id": "69b2fdb529e60fdd",
|
| 1453 |
+
"outputs": [],
|
| 1454 |
+
"execution_count": 19
|
| 1455 |
+
},
|
| 1456 |
+
{
|
| 1457 |
+
"cell_type": "code",
|
| 1458 |
+
"source": [
|
| 1459 |
+
"# 保存labels\n",
|
| 1460 |
+
"with open('labels.txt', 'w') as f:\n",
|
| 1461 |
+
" for label in labels:\n",
|
| 1462 |
+
" f.writelines(label + '\\n')"
|
| 1463 |
+
],
|
| 1464 |
+
"metadata": {
|
| 1465 |
+
"collapsed": false,
|
| 1466 |
+
"ExecuteTime": {
|
| 1467 |
+
"end_time": "2024-05-04T02:59:32.966165Z",
|
| 1468 |
+
"start_time": "2024-05-04T02:59:32.953195Z"
|
| 1469 |
+
}
|
| 1470 |
+
},
|
| 1471 |
+
"id": "fd668538dc137ade",
|
| 1472 |
+
"outputs": [],
|
| 1473 |
+
"execution_count": 20
|
| 1474 |
+
},
|
| 1475 |
+
{
|
| 1476 |
+
"cell_type": "code",
|
| 1477 |
+
"source": [
|
| 1478 |
+
"import matplotlib.pyplot as plt\n",
|
| 1479 |
+
"\n",
|
| 1480 |
+
"# [epoch_num, model_loss, model_acc, val_loss, val_accuracy]\n",
|
| 1481 |
+
"\n",
|
| 1482 |
+
"\n",
|
| 1483 |
+
"x = [i[0] for i in info]\n",
|
| 1484 |
+
"y1 = [float(i[1]) for i in info]\n",
|
| 1485 |
+
"y2 = [float(i[2]) for i in info]\n",
|
| 1486 |
+
"y3 = [float(i[3]) for i in info]\n",
|
| 1487 |
+
"y4 = [float(i[4]) for i in info]\n",
|
| 1488 |
+
"\n",
|
| 1489 |
+
"plt.plot(x, y1, color='r', label='loss')\n",
|
| 1490 |
+
"plt.plot(x, y2, color='g', label='acc')\n",
|
| 1491 |
+
"plt.plot(x, y3, color='b', label='val_loss')\n",
|
| 1492 |
+
"plt.plot(x, y4, color='k', label='val_acc')\n",
|
| 1493 |
+
"plt.xlim((0,1))\n",
|
| 1494 |
+
"%matplotlib inline\n",
|
| 1495 |
+
"plt.show()"
|
| 1496 |
+
],
|
| 1497 |
+
"metadata": {
|
| 1498 |
+
"collapsed": false,
|
| 1499 |
+
"ExecuteTime": {
|
| 1500 |
+
"end_time": "2024-05-04T02:59:34.526025Z",
|
| 1501 |
+
"start_time": "2024-05-04T02:59:32.968156Z"
|
| 1502 |
+
}
|
| 1503 |
+
},
|
| 1504 |
+
"id": "d8c16f36d9431367",
|
| 1505 |
+
"outputs": [
|
| 1506 |
+
{
|
| 1507 |
+
"data": {
|
| 1508 |
+
"text/plain": [
|
| 1509 |
+
"<Figure size 640x480 with 1 Axes>"
|
| 1510 |
+
],
|
| 1511 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlEAAAGdCAYAAAAyviaMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAABSa0lEQVR4nO3de1xUdf4/8Be3mQEvg0bOgKGQhpppFAZBGpIUrQiyl6x2V1nX0nbd/dXXdjdtU9orrtq3vvq1tZu6u/nNdLsoipaRl1JAQ8xrrBe8oA5mxaAmIDPv3x+nmeHIIMwRmAFez8fjPGY4n8855zOccF6d8zmfj5+ICIiIiIjII/7ebgARERFRR8QQRURERKQBQxQRERGRBgxRRERERBowRBERERFpwBBFREREpAFDFBEREZEGDFFEREREGgR6uwEdjd1ux5kzZ9CjRw/4+fl5uzlERETUAiKCCxcuICIiAv7+rXMNiSHKQ2fOnEFkZKS3m0FEREQanDp1CjfddFOr7IshykM9evQAoJyEnj17erk1RERE1BLV1dWIjIx0fo+3BoYoDzlu4fXs2ZMhioiIqINpza447FhOREREpAFDFBEREZEGDFFEREREGjBEEREREWnAEEVERESkAUMUERERkQYMUUREREQaaApRixcvRlRUFAwGAxISErBz585r1l+9ejUGDx4Mg8GAYcOGIT8/X1UuIpgzZw7Cw8MRHByM1NRUHD58WFUnMzMT/fr1g8FgQHh4OCZOnIgzZ844y48fPw4/P79GS1FRkUdtISIiImoJj0PU22+/jRkzZiAnJwe7d+/G7bffjrS0NJw7d85t/R07duDRRx/FlClTUFpaiqysLGRlZWH//v3OOvPmzcPChQuxZMkSFBcXo1u3bkhLS0NNTY2zTkpKClatWoWysjK88847OHr0KH70ox81Ot5HH32Es2fPOpe4uDiP2kJERETUEn4iIp5skJCQgLvuugv/+7//C0CZkDcyMhK//vWvMXPmzEb1H374YVy6dAnr1q1zrrv77rsRGxuLJUuWQEQQERGBp59+Gr/5zW8AAFarFSaTCcuXL8cjjzzith1r165FVlYWamtrERQUhOPHjyM6OhqlpaWIjY11u01zbWmJ6upqGI1GWK1WjlhORETUQbTF97dHV6Lq6upQUlKC1NRU1w78/ZGamorCwkK32xQWFqrqA0BaWpqzfnl5OSwWi6qO0WhEQkJCk/v8+uuvsWLFCiQlJSEoKEhVlpmZiT59+mDkyJFYu3atR21xp7a2FtXV1aqFiIiIyKMQdf78edhsNphMJtV6k8kEi8XidhuLxXLN+o7XluzzmWeeQbdu3XDDDTfg5MmTWLNmjbOse/fueOGFF7B69WqsX78eI0eORFZWlipINdcWd3Jzc2E0Gp1LZGRkk3WJiIio6+hQT+f99re/RWlpKT788EMEBARg0qRJcNyNDAsLw4wZM5y3G+fOnYuf/vSnmD9//nUdc9asWbBarc7l1KlTrfFRiIiIqIML9KRyWFgYAgICUFlZqVpfWVkJs9nsdhuz2XzN+o7XyspKhIeHq+pc3bcpLCwMYWFhiImJwZAhQxAZGYmioiIkJia6PXZCQgI2bdrU4ra4o9frodfrmywnIiKirsmjK1E6nQ5xcXEoKChwrrPb7SgoKGgyyCQmJqrqA8CmTZuc9aOjo2E2m1V1qqurUVxc3OQ+HccFlD5LTdmzZ48qmDXXFiIiIqKW8uhKFADMmDED2dnZGDFiBOLj4/HSSy/h0qVLmDx5MgBg0qRJ6Nu3L3JzcwEATz75JJKTk/HCCy8gPT0dK1euxGeffYZXX30VAODn54ennnoKf/7zn3HLLbcgOjoas2fPRkREBLKysgAAxcXF2LVrF0aOHIlevXrh6NGjmD17NgYMGOAMQP/4xz+g0+lwxx13AADeffddLF26FK+//rqz7c21hYiIiKjFRINFixZJv379RKfTSXx8vBQVFTnLkpOTJTs7W1V/1apVEhMTIzqdToYOHSrr169Xldvtdpk9e7aYTCbR6/UyZswYKSsrc5bv3btXUlJSpHfv3qLX6yUqKkqeeOIJqaiocNZZvny5DBkyREJCQqRnz54SHx8vq1evbtT25trSHKvVKgDEarV6tB0RERF5T1t8f3s8TlRXx3GiiIiIOh6vjxNFRERERAqGKCIiIiINGKKIiIiINGCIIiIiItKAIYqIiIhIA4YoIiIiIg0YooiIiIg0YIgiIiIi0oAhioiIiEgDhigiIiIiDTyegJiIiIjI20QEdXV1uHz5MmpqalSv7t5/8803rd4GhigiIiK6LjabrdkQ0xbvvT39L0MUERFRJyEiqK2tbZPgcq3yK1euePVz+/n5ITg4GAaDAcHBwW7fBwQEYMOGDa16XIYoIiLyaVdfbRCI27Km1ntrmytXrigh43INLtcoYcP5/nKNan1tTa0zlNTW1ipl37137MPt+++2dey7pqbm6l9fu9PpdNAb9NAb9DAYDKr3BoMBOoPOtV6vhz74u3rfvdfr1ds663+33rHo9DoYgpX1gUGB8PPzA9D0+blw4QI23MIQ5ROe/uBp6EJ03vljbsH2Ld23N7bxhX/cOvo2HeVc+/I2HeVc+/I2bXneWpUAqAdw5bvX9nrv3TtNgB+Ub/mg716vfn+tsut4X+dfhzrU4QIuaGv3le8WjZs3qQ3yJUOURq/vfh0weLsVREQdiACwo/3DjK09PlwzAtAmgcXde78gP+W9P+Dv53oI33Glxvkz/NyWNbW+o29jD7DjS3yJ1sQQpdHMkTOh76YH4Dv/gXT2bRqu5zbatuko59qXt+ko57q5bex2O2prapVbQd/dHnLcGnLcHmrp+5qamka3lRy3pK5eb7fb4U0BAQFKH5lgAwx6AwzBSr8ZvUGPYEOw632DOo73wYZgZ32DQflZb9AjJCREVbdhf5yQkBDo9Xr4+/u3+n+75Jnq6moYZxtbdZ8MURrNGjULPXv29HYziKiDczym7WlH3ut9X1dX5+2P7gwa1+oM3NrvAwP5tUeth/81ERF9x2aztfsj2pcvX/b+Y9qBge0WYhzv9Xo9r6xQh8cQRUQ+x/GYdlsGF3frvP2YNgBn2PA0lGgNMwaDgVdniDTiXw4RXVN9fX2bjS3T1HtfGEQvKCio3a/O6HQ6Xp0h6kAYoog6CBG5ZjBpq6s29fX1Xv3cjkH02iPENHwNCAjw6ucmIt/HEEWkgXMQvXbsP1NbW+vtjw29Xt9mt5Waeh8UFMSrM0TkkxiiqEOz2+3t8iTT1etsNu8OPOPv799uV2cc7w0GA/z9/ZtvHBFRF8EQRa1CRJxXZ9qjz4zjva88pn29t4+0XJ0hIiLvYojqhBo+pt1et5tqarw/iF5gYGC7jjfjmMuJV2eIiLomhqg25BhEr736zPjaY9rtFWgcr3xMm4iI2hO/dTR65JFHYLPZmr064yuPabdnZ2AOokdERF0BQ5RGGzZs8Ki+4zHt9r7dxMe0iYiI2gZDlEYvvfQSevfu3eIww0H0iIiIOheGKI0mT57MCYiJiIi6MD5WRERERKQBQxQRERGRBgxRRERERBowRBERERFpwBBFREREpAFDFBEREZEGDFFEREREGjBEEREREWmgKUQtXrwYUVFRMBgMSEhIwM6dO69Zf/Xq1Rg8eDAMBgOGDRuG/Px8VbmIYM6cOQgPD0dwcDBSU1Nx+PBhVZ3MzEz069cPBoMB4eHhmDhxIs6cOeMs37JlC8aPH4/w8HB069YNsbGxWLFihWofy5cvh5+fn2oxGAxafgVERETUxXkcot5++23MmDEDOTk52L17N26//XakpaXh3Llzbuvv2LEDjz76KKZMmYLS0lJkZWUhKysL+/fvd9aZN28eFi5ciCVLlqC4uBjdunVDWloaampqnHVSUlKwatUqlJWV4Z133sHRo0fxox/9SHWc4cOH45133sHevXsxefJkTJo0CevWrVO1p2fPnjh79qxzOXHihKe/AiIiIiL4iYh4skFCQgLuuusu/O///i8AwG63IzIyEr/+9a8xc+bMRvUffvhhXLp0SRVm7r77bsTGxmLJkiUQEURERODpp5/Gb37zGwCA1WqFyWTC8uXL8cgjj7htx9q1a5GVlYXa2loEBQW5rZOeng6TyYSlS5cCUK5EPfXUU6iqqvLkI6tUV1fDaDTCarVy2hciIqIOoi2+vz26ElVXV4eSkhKkpqa6duDvj9TUVBQWFrrdprCwUFUfANLS0pz1y8vLYbFYVHWMRiMSEhKa3OfXX3+NFStWICkpqckABShhrHfv3qp1Fy9eRP/+/REZGYnx48fjwIED1/zMtbW1qK6uVi1EREREHoWo8+fPw2azwWQyqdabTCZYLBa321gslmvWd7y2ZJ/PPPMMunXrhhtuuAEnT57EmjVrmmzrqlWrsGvXLkyePNm5btCgQVi6dCnWrFmDN998E3a7HUlJSaioqGhyP7m5uTAajc4lMjKyybpERETUdXSop/N++9vforS0FB9++CECAgIwadIkuLsbuXnzZkyePBmvvfYahg4d6lyfmJiISZMmITY2FsnJyXj33Xdx44034pVXXmnymLNmzYLVanUup06dapPPRkRERB1LoCeVw8LCEBAQgMrKStX6yspKmM1mt9uYzeZr1ne8VlZWIjw8XFUnNja20fHDwsIQExODIUOGIDIyEkVFRUhMTHTW2bp1KzIyMvDiiy9i0qRJ1/w8QUFBuOOOO3DkyJEm6+j1euj1+mvuh4iIiLoej65E6XQ6xMXFoaCgwLnObrejoKBAFWQaSkxMVNUHgE2bNjnrR0dHw2w2q+pUV1ejuLi4yX06jgsofZYctmzZgvT0dPztb3/D1KlTm/08NpsN+/btU4U3IiIiopbw6EoUAMyYMQPZ2dkYMWIE4uPj8dJLL+HSpUvOvkeTJk1C3759kZubCwB48sknkZycjBdeeAHp6elYuXIlPvvsM7z66qsAAD8/Pzz11FP485//jFtuuQXR0dGYPXs2IiIikJWVBQAoLi7Grl27MHLkSPTq1QtHjx7F7NmzMWDAAGfQ2rx5M8aNG4cnn3wSP/zhD539qXQ6nbNz+R//+EfcfffdGDhwIKqqqjB//nycOHECjz322PX9FomIiKjrEQ0WLVok/fr1E51OJ/Hx8VJUVOQsS05OluzsbFX9VatWSUxMjOh0Ohk6dKisX79eVW6322X27NliMplEr9fLmDFjpKyszFm+d+9eSUlJkd69e4ter5eoqCh54oknpKKiwlknOztbADRakpOTnXWeeuopZ7tNJpOMHTtWdu/e7dFnt1qtAkCsVqtH2xEREZH3tMX3t8fjRHV1HCeKiIio4/H6OFFEREREpGCIIiIiItKAIYqIiIhIA4YoIiIiIg0YooiIiIg0YIgiIiIi0oAhioiIiEgDhigiIiIiDRiiiIiIiDRgiCIiIiLSgCGKiIiISAOGKCIiIiINGKKIiIiINGCIIiIiItKAIYqIiIhIA4YoIiIiIg0YooiIiIg0YIgiIiIi0oAhioiIiEgDhigiIiIiDRiiiIiIiDRgiCIiIiLSgCGKiIiISAOGKCIiIiINGKKIiIiINGCIIiIiItKAIYqIiIhIA4YoIiIiIg0YooiIiIg0YIgiIiIi0oAhioiIiEgDhigiIiIiDRiiiIiIiDRgiCIiIiLSgCGKiIiISAOGKCIiIiINGKKIiIiINGCIIiIiItKAIYqIiIhIA00havHixYiKioLBYEBCQgJ27tx5zfqrV6/G4MGDYTAYMGzYMOTn56vKRQRz5sxBeHg4goODkZqaisOHD6vqZGZmol+/fjAYDAgPD8fEiRNx5swZVZ29e/di1KhRMBgMiIyMxLx58zxuCxEREVFLeByi3n77bcyYMQM5OTnYvXs3br/9dqSlpeHcuXNu6+/YsQOPPvoopkyZgtLSUmRlZSErKwv79+931pk3bx4WLlyIJUuWoLi4GN26dUNaWhpqamqcdVJSUrBq1SqUlZXhnXfewdGjR/GjH/3IWV5dXY0HHngA/fv3R0lJCebPn4/nn38er776qkdtISIiImoR8VB8fLxMnz7d+bPNZpOIiAjJzc11W3/ChAmSnp6uWpeQkCDTpk0TERG73S5ms1nmz5/vLK+qqhK9Xi9vvfVWk+1Ys2aN+Pn5SV1dnYiIvPzyy9KrVy+pra111nnmmWdk0KBBLW5LS1itVgEgx49bW7wNEREReZfj+9tqbb3vb4+uRNXV1aGkpASpqanOdf7+/khNTUVhYaHbbQoLC1X1ASAtLc1Zv7y8HBaLRVXHaDQiISGhyX1+/fXXWLFiBZKSkhAUFOQ8zr333gudTqc6TllZGb755psWtcWd2tpaVFdXqxYAuPlmICUF+O//Bq6680hERERdgEch6vz587DZbDCZTKr1JpMJFovF7TYWi+Wa9R2vLdnnM888g27duuGGG27AyZMnsWbNmmaP0/AYzbXFndzcXBiNRucSGRkJALDbgS1bgKefBmJigCFDgN/9DvjkE6C+vsndERERUSfRoZ7O++1vf4vS0lJ8+OGHCAgIwKRJkyAibXrMWbNmwWq1OpdTp04BAPZ8egH/8z/AmDFAYCDwxRfA/PnAvfcCZjMwaRKwejXw3YUrIiIi6mQCPakcFhaGgIAAVFZWqtZXVlbCbDa73cZsNl+zvuO1srIS4eHhqjqxsbGNjh8WFoaYmBgMGTIEkZGRKCoqQmJiYpPHaXiM5trijl6vh16vb7Q+elQkbh8xAv9v9GhYp6Vi48WRyCsIQX4+8NVXwL/+pSxBQcDo0UBmJpCRAfTv3+ShiIiIqAPx6EqUTqdDXFwcCgoKnOvsdjsKCgqQmJjodpvExERVfQDYtGmTs350dDTMZrOqTnV1NYqLi5vcp+O4gNJnyXGcbdu24cqVK6rjDBo0CL169WpRWzwiAuzaBcyfD+OENDz8WA+8+Z94nPv5TGz9WyGe/lUtYmKAK1eATZuAX/8aiIoChg8HnnsOKC5WbgkSERFRB+VpT/SVK1eKXq+X5cuXy8GDB2Xq1KkSGhoqFotFREQmTpwoM2fOdNbfvn27BAYGyoIFC+TQoUOSk5MjQUFBsm/fPmeduXPnSmhoqKxZs0b27t0r48ePl+joaLl8+bKIiBQVFcmiRYuktLRUjh8/LgUFBZKUlCQDBgyQmpoaEVGe6DOZTDJx4kTZv3+/rFy5UkJCQuSVV17xqC3NcfbuP3BA5F//EpkyRWTAABElVrmWgACR+Hj5Yso8mT/lkNx7zxXx91dXMZmUzd9/X+TiRU/PBBEREbVUWzyd53GIEhFZtGiR9OvXT3Q6ncTHx0tRUZGzLDk5WbKzs1X1V61aJTExMaLT6WTo0KGyfv16VbndbpfZs2eLyWQSvV4vY8aMkbKyMmf53r17JSUlRXr37i16vV6ioqLkiSeekIqKCtV+Pv/8cxk5cqTo9Xrp27evzJ07t1Hbm2tLc5o8CSdPNhuqzt+RKv9Kf0smjDojPXrYVcUGg0h6usiSJSJXfSwiIiK6Tm0RovxE2rhndidTXV0No9EIq9WKnj17Nl3x5Elg61blEb4tW4Bjx1TFdf4GbIt5DHndHsHaijtwvDJEVR4Xp/ShyswEYmMBP79W/yhERERdRou/vz3AEOUhzSfhGqFKABzwH461EU8gzz4WxWf7QcSVmm66CRg3TglUKSmAwdBqH4eIiKhLYIjyAa12EhyhavNmJVSVlzuLKtEH+X7jsDZ0Ij68mIRvr7gGEO3WDbj/fiVQpacDffpcx4chIiLqIhiifEBbnAQAwIkT6itV34WqGujxMe5Dnl8m8oJ+gNN1rtTk5wfcfbdy2y8jAxg6lLf9iIiI3GGI8gFtFqKu5iZUCYBS3IE8ZCAPGSjBCNUm0dGuQHXvvUCDGXCIiIi6NIYoH9BuIepqx4+rQ9Xx4ziNCKzDOOQhAx8hFbVwdZbq2VPw4IN+yMgAxo4Fevduv6YSERH5GoYoH+C1EHW1q0LVpePn8BFSnVepzsE1R2CAvx33JAoysgKQmanM9UdERNSVMET5AJ8JUVc7ftwZqOwfb8GuUybkIQNrkYl9GK6qGnPTJWT8QIfMHwYhKUmZ+4+IiKgzY4jyAT4boq7WIFQd33QYeWfuRB4ysAWjcQWuzlK9g7/F9xKtyMzuhbTxBhiNXmsxERFRm2GI8gEdJkRd7btQVf1hET7Y5Ie884lYj3R8jRucVQL96pHc/zgyv1ePjOn9ED00pOn9ERERdSAMUT6gw4aohkSA48dRX7AVhf8+jbwdNyDvQjK+wBBVtdtCjiIjtgIZD4cg/me3IqBnNy81mIiI6PowRPmAThGirvZdqDr89m7kvVOLvH1R+KQ2Hja4Okv1QSXS+3yGjFFVuD87At3HJAAhvFJFREQdA0OUD+iUIepqIvh6z0ls/Hs51n5owMaTQ2AVV2cpPWpwn99mZEbvx7h0wU3j44DERIYqIiLyWQxRPqBLhKirXKkTfLLqLNb+4xvkFYXh2EWTqvwO7EaGfz4yh5fjzvQI+KWMZqgiIiKfwhDlA7piiGpIBDh0ULD2H18j7716FB65EQJ/Z3kETmMc1iEzIB/3xV9E8JgkYPRohioiIvIqhigf0NVD1NXOnQPy1wvy3r6ED7bocak2yFkWgku4H5uQgTykB22C+e4oJVA5QlVwsLeaTUREXQxDlA9giGpaTY0yNFXeWkHe+/U4dTZIVZ6Aou/GU8/DsKAy+N2dwFBFRETtgiHKBzBEtYwI8PnnQF4esHat4LPP/FTl/XEcGchDJtYiGVuh0/kBCd+FqpQU4O67GaqIiKjVMET5AIYobc6cAdavB9auBT76SFBT4wpVPfwuIE02IgN5GIt8hOErQKdTgpTjShVDFRERXQeGKB/AEHX9vv0WKChQAtW6dYDF4irz97MjKWgXMuv+jQzkYRDK4AcwVBER0XVhiPIBDFGty24HSkqUQJWXp9wCbGhgdwsy5X1kXFqJkfgUgbApBXp941BlMLR384mIqINgiPIBDFFt68QJ5epUXh6weTNQV+cqCw2pxdgbP0OG9U08WPUWQmF1FTJUERHRNTBE+QCGqPZz4QLw4YdKoFq3DvjqK1dZYKDg3gFnkBFSgIyKv2PAl0XqjRuGqpQUpdM6QxURUZfFEOUDGKK8w2YDioocT/sBhw6py28dWIuMAYeQYXsfd+9/HQGW0+oKer0yjILjShVDFRFRl8IQ5QMYonzD0aOuQLVtmxKyHMLCBOmjqpHZpxgPfPUWun+6Ud17HWCoIiLqYhiifABDlO+pqgI2blQC1YYNys8OOh2QkiLITDiHDMMmRO5dr4wIenWoMhgahyq9vt0+AxERtS2GKB/AEOXbrlwBtm9XAtXatcoVq4Zuvx3IzBBk3H4ScV9uhP+2LQxVRERdAEOUD2CI6jhEgC++UG775eUBO3YoQyo4hIcD48YBGeMEY/odRkjRx0qg2rIFqKxU76xhqEpJAeLjGaqIiDoQhigfwBDVcZ0/D+TnK4Fq40bg4kVXWXAwkJoKZGQA49IF4dVlrkDVVKhKSnJdqWKoIiLyaQxRPoAhqnOorQW2bnV1Tj95Ul1+111KoMrMBIYPE/j9h6GKiKgjY4jyAQxRnY8IsG+fK1Dt3Kkuj4x0BarRowG97rv7hA1D1blz6o2Cg9Wh6q67GKqIiLyIIcoHMER1fmfPKpMl5+UBmzYBly+7yrp3B9LSlFA1dixw441wdb7yJFTFxyuPDhIRUbtgiPIBDFFdy+XLymTJjlHTz5xxlfn5KbkoI0NZhgxR1jlD1ebNrlD15ZfqHTcMVSkpypUqhioiojbDEOUDGKK6LrsdKC11TZZcWqouv/lm5ZZfRgYwahQQFPRdgYgyxHrDK1XuQtU996hv/zFUERG1GoYoH8AQRQ6nTrkmSy4oUE+WbDQC3/ueEqi+9z2gV68GGzJUERG1O4YoH8AQRe5cvKj0n3Lc9muYiQIClCtTjs7pAwdetbEIcPCgOlSdP6+uExKiDlUjRjBUERF5gCHKBzBEUXNsNuUJP8dtvwMH1OWDB7sCVWKiErJUGKqIiFodQ5QPYIgiTx075ho1fetWoL7eVXbDDcpTfpmZwAMPAG7/k7Lbldt/jo7qW7e6D1UjR6pDlbNTFhERMUT5AIYouh5WqzJael6eMnr6N9+4yoKClAf1HE/79e/fxE7s9sZXqr76Sl2HoYqISIUhygcwRFFrqa9XJkt2DPJ5+LC6fPhwV6C66y7A37+JHbUkVHXr1vj2H0MVEXUhbfH93dQ/y9e0ePFiREVFwWAwICEhATuvHuL5KqtXr8bgwYNhMBgwbNgw5Ofnq8pFBHPmzEF4eDiCg4ORmpqKww2+UY4fP44pU6YgOjoawcHBGDBgAHJyclDX4HGo559/Hn5+fo2Wbt26OessX768UbnBYNDyKyC6boGBQHIysGAB8J//KENLzZ+vdEL39wf27gX+8hfg7ruBiAjgsceUsPXtt1ftyN8fuO024Fe/Av79b2Wgz717gYULgR/8QLlneOkS8OGHwLPPKuNT9eoFPPggMHcuUFQEXLnild8BEVGHJh5auXKl6HQ6Wbp0qRw4cEAef/xxCQ0NlcrKSrf1t2/fLgEBATJv3jw5ePCgPPfccxIUFCT79u1z1pk7d64YjUZ5//335fPPP5fMzEyJjo6Wy5cvi4jIhg0b5Gc/+5l88MEHcvToUVmzZo306dNHnn76aec+Lly4IGfPnlUtt956q2RnZzvrLFu2THr27KmqY7FYPPr8VqtVAIjVavVoOyJPnD8v8q9/iTz0kEiPHiJKb3NlMRhE0tNFliwRqahowc5sNpG9e0UWLhT5wQ9EbrhBvUNApFs3kbQ0kdxckcJCkbq6Nv+MRETtqS2+vz0OUfHx8TJ9+nTnzzabTSIiIiQ3N9dt/QkTJkh6erpqXUJCgkybNk1EROx2u5jNZpk/f76zvKqqSvR6vbz11ltNtmPevHkSHR3dZPmePXsEgGzbts25btmyZWI0Gq/5+ZrDEEXtrbZWZNMmkV//WiQqqnH+iYsTef55kd27Rez2FuzQEar+539Evv99kd69G++0e3eRBx8UmTtXpKiIoYqIOry2+P726HZeXV0dSkpKkJqa6lzn7++P1NRUFBYWut2msLBQVR8A0tLSnPXLy8thsVhUdYxGIxISEprcJwBYrVb07t27yfLXX38dMTExGDVqlGr9xYsX0b9/f0RGRmL8+PE4cPXz51epra1FdXW1aiFqTzodkJqq3J07dkyZLNlxm8/PDygpAZ5/HrjzTqBfP+CXvwQ2bABqaprYob8/MGwY8P/+H/Duu8qgVp9/DvzP/wDf/z7Qu7cy8NXGjcDMmcqBevdWRg3929+A4mL1I4ZERF2URyHq/PnzsNlsMJlMqvUmkwkWi8XtNhaL5Zr1Ha+e7PPIkSNYtGgRpk2b5ra8pqYGK1aswJQpU1TrBw0ahKVLl2LNmjV48803YbfbkZSUhIqKiiY+MZCbmwuj0ehcIiMjm6xL1Nb8/JTuT88+CxQWKpMlv/EGkJWlPJBXUQH8/e/KsAlhYUqXqGXLGs+HrOLvr/Ri9yRU9erFUEVEXV6gtxvgqdOnT+PBBx/EQw89hMcff9xtnffeew8XLlxAdna2an1iYiISExOdPyclJWHIkCF45ZVX8Kc//cntvmbNmoUZM2Y4f66urmaQIp9hMgE//7my1NQAH3/sGpPq9GngvfeUxc9PyT6OQT5vvfW7yZLdcYQqR7Cy25XLX44n/7ZuVcZm2LhRWQCge3elR7zj6b8771R6zhMRdWIe/SsXFhaGgIAAVFZWqtZXVlbCbDa73cZsNl+zvuO1srIS4eHhqjqxsbGq7c6cOYOUlBQkJSXh1VdfbbKdr7/+OsaNG9fo6tbVgoKCcMcdd+DIkSNN1tHr9dDr9dfcD5EvMBiUK1BjxwIvv6xMkOwYPmH3buXKVWGhchUrOto1fMK99zYz2Lm/P3D77cry5JNNh6oNG5QFYKgioi7Bo9t5Op0OcXFxKCgocK6z2+0oKChQXeFpKDExUVUfADZt2uSsHx0dDbPZrKpTXV2N4uJi1T5Pnz6N0aNHIy4uDsuWLYN/E4PmlJeXY/PmzY1u5bljs9mwb98+VXgj6gz8/JTckpOj9JmqqACWLFECll4PlJcrfazuvx+48Ubg4YeBFSuAr79uwc4doerJJ5XLXOfPA3v2AC++CIwfD4SGKrf/NmwAnnkGSEhQbgmmpytjOOzaxdt/RNQ5eNoTfeXKlaLX62X58uVy8OBBmTp1qoSGhjqHCpg4caLMnDnTWX/79u0SGBgoCxYskEOHDklOTo7bIQ5CQ0NlzZo1snfvXhk/frxqiIOKigoZOHCgjBkzRioqKlRDFFztueeek4iICKmvr29U9oc//ME5TEJJSYk88sgjYjAY5MCBAy3+/Hw6jzq6ixdF3n9f5Oc/F+nTR/1QXkCAyL33iixYIFJWpvEA9fUipaUiL74oMn68SGho46f/evQQGTtWZN48kZ07Ra5cab0PSETkhk8McSAismjRIunXr5/odDqJj4+XoqIiZ1lycrJqbCYRkVWrVklMTIzodDoZOnSorF+/XlVut9tl9uzZYjKZRK/Xy5gxY6Sswb/gy5YtEwBul4ZsNpvcdNNN8uyzz7pt91NPPeVst8lkkrFjx8ru3bs9+uwMUdSZ2GzKCAbPPisybFjjrBMTI/Kb34hs3XodOcfTULVrF0MVEbW6tvj+5rQvHuK0L9SZHT/u6pi+ZYt6IPPevZXbgRkZQFoaYDRqPIjNpoyo7uhTtW0bUFWlrtOzp7pPVWws+1QR0XXh3Hk+gCGKuorqauCDD5RAtX69ur9UYKCSbRyd06Ojr+NAV4eqrVuVmZobujpU3XEHEBBwHQcloq6GIcoHMERRV1RfrzzZ53jar6xMXX7bba7hE+LjrzFZcks4QtXmza4rVe5C1b33qq9UMVQR0TUwRPkAhigi4PBhV6D69FMl9zj06aM8iJeZqTz912AOcG1sNmXwz4a3/xiqiMhDDFE+gCGKSO3rr5UxN9euVUY1aDgzkl4P3HefEqjGjQNuuqkVDtiSUGU0um7/paQoQzIwVBF1aQxRPoAhiqhpdXXAJ5+4OqcfO6Yuv+MOJVBlZCjjWDU5aronbDZlnKqGoerqOS6NRvWVKoYqoi6HIcoHMEQRtYwIcOiQcoUqL0/pU9XwX5uICFfH9PvuA4KDW+nADFVE5AZDlA9giCLS5tw5ID9fCVQffABcuuQqCwlR+k9lZCi3/ZqZsckzjlDl6Kj+ySeNQ1VoqDpUDR/OUEXUyTBE+QCGKKLrV1Oj5BlH5/SKCleZn5/yhJ/jab/bbmul234O9fWNr1RduKCuw1BF1OkwRPkAhiii1iWi9BN33Pb77DN1ef/+rn5UycnNTJashaehKiVFCVXXNY4DEbU3higfwBBF1LbOnAHWrVMC1UcfKVetHHr0UEZLz8xURk+/4YY2aEB9PVBa6gpVn3zSOFT16tX4ShVDFZFPY4jyAQxRRO3n22+BggLlKtW6dYDF4irz9weSklxXqQYNauXbfg4MVUSdAkOUD2CIIvIOu1251ecYPuHzz9Xlt9zietpv5Mg2nGrPEaoadlS/eFFdp1cv5d6jI1QNG8ZQReRlDFE+gCGKyDecOOG67ffxx+rJknv1Ar73PSVQPfig0qWpzdTXA7t3q69UMVQR+RyGKB/AEEXkey5cAD78UAlU69YBX33lKgsMVO60OW773XxzGzemJaGqd291R/XbbmOoImpjDFE+gCGKyLfZbEBRkWv4hEOH1OW33uoKVAkJ7TBywZUr6lD16afuQ1XDK1UMVUStjiHKBzBEEXUsR464+lFt26aeLDksTBncMyMDeOABoHv3dmjQ1aHqk0/UI48CDFVEbYAhygcwRBF1XN98o0yWnJenTJZcVeUq0+mU6WccndMjI9upUY5Q5eio/umnjUPVDTeoQ9XQoQxVRB5iiPIBDFFEncOVK0pecdz2O3pUXR4b6xo1/c472zGzXLkClJSob/8xVBFdN4YoH8AQRdT5iABffOG67bdjhzKkgkN4uOu235gxylx/7cbTUJWSonT8YqgiUmGI8gEMUUSd3/nzrsmSN25U9wMPDgZSU12TJYeHt3PjrlxRBsxqGKq+/VZdJyxMfaWKoYqIIcoXMEQRdS21tcDWra65/U6eVJffdZfrab/hw9to1PRrYagiahGGKB/AEEXUdYkA+/a5AtXOneryfv1cHdNHjwb0ei80sq5Ouf3n6Ki+fbv7UOUIVI5Q1e7pj6h9MUT5AIYoInI4exZYv14JVJs2AZcvu8q6d1cmS87IUCZLvvFGLzWyrk59pcpdqLrxxsZXqhiqqJNhiPIBDFFE5M7ly8pkyY7O6WfPusr8/YHERNdVqiFDvJhRrg5Vn36qTn+AOlSlpHi5wUStgyHKBzBEEVFz7HZl6CfH8Al79qjLBwxwDZ8wciQQFOSVZirq6oBdu9RXqtyFqoa3/xiqqANiiPIBDFFE5KlTp5Q5/dauVSZLrqtzlRmNymTJmZnKZMm9enmvnQBaFqr69FHf/mOoog6AIcoHMEQR0fW4eFHpP7V2rdKf6ssvXWUBAcCoUa6n/QYO9F47nRyhytFRfccO96Gq4ZWqwYMZqsjnMET5AIYoImotNhtQXOzqR3XggLp8yBBXP6rExHaYLLklamsbX6mqqVHXYagiH8QQ5QMYooiorRw75gpUW7cC9fWusrAw5Sm/jAzlqb8ePbzXTpWWhCqTSR2qBg1iqKJ2xxDlAxiiiKg9WK2uyZLz85XJkx2CgpSH5hxXqfr39147G6mtVQbQcoSqHTsYqsgnMET5AIYoImpv9fXKBR7H036HD6vLhw93Pe03YoSPDUbOUEU+giHKBzBEEZG3lZW5AtX27erJks1mID1dCVSpqe08WXJLOEJVw47qtbXqOmazOlTFxDBU0XVjiPIBDFFE5Eu++ko9WfKFC64ygwEYM0YJVOPGARER3mtnk2pqGl+pYqiiNsAQ5QMYoojIV9XVAdu2ueb2O35cXR4X5xo+ITbWR3NIS0JVeLg6VN1yi49+GPIlDFE+gCGKiDoCEWD/ftfTfsXFyjqHm25ydUxPSVGuWvmkmhql8Y5QVVjIUEWaMET5AIYoIuqIKitdkyV/+KF6DuJu3YAHHlACVXq6MsyTz2KoIo0YonwAQxQRdXSXLyv9uh1XqU6fdpX5+QF33+162u/WW308fzhClaOjemGhel4dQOkM1jBUDRzo4x+K2gJDlA9giCKizkQEKC11Pe23e7e6PDraFahGjQJ0Ou+0s8UuX258pYqhisAQ5RMYooioMzt92jVZckGB+k5Zz57KJMmZmcqkyb17e6+dLdaSUNW3rzpUDRjAUNUJtcX3t6Yh2RYvXoyoqCgYDAYkJCRg586d16y/evVqDB48GAaDAcOGDUN+fr6qXEQwZ84chIeHIzg4GKmpqTjcYDS548ePY8qUKYiOjkZwcDAGDBiAnJwc1DX4Qzh+/Dj8/PwaLUVFRR61hYioK+vbF5g2Tek/9dVXwHvvAT//udJPqroaWLUK+OlPXdPjvfAC8J//eLvV1xAcrDT0+eeVEFVVBXz8MTBnDnDvvcqltdOngRUrgMcfV/pPRUYqH/L114EjR9Q98okaEg+tXLlSdDqdLF26VA4cOCCPP/64hIaGSmVlpdv627dvl4CAAJk3b54cPHhQnnvuOQkKCpJ9+/Y568ydO1eMRqO8//778vnnn0tmZqZER0fL5cuXRURkw4YN8rOf/Uw++OADOXr0qKxZs0b69OkjTz/9tHMf5eXlAkA++ugjOXv2rHOpq6vzqC3NsVqtAkCsVqunvzoiog7LZhMpLBR59lmRYcNElGThWgYNEvnNb0S2bhW5csXbrfXAt9+KfPyxyJw5IvfeK6LTNf5wffuK/OQnIq+9JnLkiIjd7u1WkwZt8f3tcYiKj4+X6dOnO3+22WwSEREhubm5butPmDBB0tPTVesSEhJk2rRpIiJit9vFbDbL/PnzneVVVVWi1+vlrbfearId8+bNk+joaOfPjhBVWlra5DbNtaUlGKKIiETKy0UWLhS5/36RoCB15ujdW+SnPxVZtUqkw/1T6QhVs2eLjBrV+MMBIjfdpHzA119nqOpA2uL726PbeXV1dSgpKUFqaqpznb+/P1JTU1FYWOh2m8LCQlV9AEhLS3PWLy8vh8ViUdUxGo1ISEhocp8AYLVa0dvNDfnMzEz06dMHI0eOxNq1az1qizu1tbWorq5WLUREXV1UFPDrXyvDJZw/r9zmmzhR6Sf19dfAm28CEyYAYWHA/fcDCxcC5eXebnULBAcrA2f98Y/KyKVVVUrnsNmzlZ71QUFARYXyAR97TOmU3q+f8uHfeAM4epS3/7oQj0LU+fPnYbPZYDKZVOtNJhMsFovbbSwWyzXrO1492eeRI0ewaNEiTJs2zbmue/fueOGFF7B69WqsX78eI0eORFZWlipINdcWd3Jzc2E0Gp1LZGRkk3WJiLqinj2Bhx4C/vlPZTyqbduA3/xGmUf4yhXgo4+AJ58Ebr4ZGDYM+P3vgaIi9Zx/PiskBLjvPs9CVf/+wKRJwNKlwLFjDFWdWKC3G+Cp06dP48EHH8RDDz2Exx9/3Lk+LCwMM2bMcP5811134cyZM5g/fz4yMzM1H2/WrFmq/VZXVzNIERE1ITBQyRajRgHz5yudzh3jUX36qTKK+v79wF//qnROHzdOGULh/vuVQT99niNU3Xef8vO33ypP/Dme/isuBk6dAv71L2UBlI7qDZ/+i47m03+dhEchKiwsDAEBAaisrFStr6yshNlsdruN2Wy+Zn3Ha2VlJcLDw1V1YmNjVdudOXMGKSkpSEpKwquvvtpsexMSErBp06YWt8UdvV4PvV7f7LGIiKixmBjg6aeV5euvgQ0blEC1YQNw7pxysWbpUkCvVyZLzshQgtVNN3m75S0UEqI0fMwY5WdPQ1VKinJvlKGqQ/Lodp5Op0NcXBwKCgqc6+x2OwoKCpCYmOh2m8TERFV9ANi0aZOzfnR0NMxms6pOdXU1iouLVfs8ffo0Ro8ejbi4OCxbtgz+/s03fc+ePapg1lxbiIio7fTuDfzkJ8DKlcCXX7pu80VHK+NR5ecDv/iFkjHi4pRRCUpKOtjdMEeo+tOfgE8+UW7/ffSRcg/znnuU23+OUDVlinKPMyoKyM4Gli1TOo51qA/cxXnaE33lypWi1+tl+fLlcvDgQZk6daqEhoaKxWIREZGJEyfKzJkznfW3b98ugYGBsmDBAjl06JDk5OS4HeIgNDRU1qxZI3v37pXx48erhjioqKiQgQMHypgxY6SiokI1hIHD8uXL5f/+7//k0KFDcujQIfnLX/4i/v7+snTpUo/a0hw+nUdE1LrsdpH9+0Vyc0WSkkT8/BqPMDBtmsi6dcrDcx3axYsimzaJ/P73Ivfc4/7pv379RCZNElm6VOTYMW+3uNPwiSEOREQWLVok/fr1E51OJ/Hx8VJUVOQsS05OluzsbFX9VatWSUxMjOh0Ohk6dKisX79eVW6322X27NliMplEr9fLmDFjpKyszFm+bNkyAeB2cVi+fLkMGTJEQkJCpGfPnhIfHy+rV69u1Pbm2tIchigiorZVWSmybJnI978v0q2bOl+EhIiMHy/yxhsi3/2/e8d2dagKDGwcqvr3F8nOVn4p5eXebW8H1hbf35z2xUOc9oWIqP3U1Chdi9auVfpSVVS4yvz8gPh4ZRqajAzgtts6QdeiS5eAHTtcfap27gTq69V1+vdXd1SPimrvVnZInDvPBzBEERF5hwjw+eeuQPXZZ+ryqCglTGVkAMnJHWCy5JbwJFSlpCiv/fu3fzs7AIYoH8AQRUTkG86cUSZLzstT+m7X1LjKevRQJkvOyADGjgVuuMF77WxVLQlVUVHqK1UMVQAYonwCQxQRke/59lslSDnGpGo4mo2/v/JgXEaGcutv0CDvtbPVXbyoDlW7djFUNYEhygcwRBER+Ta7XbnVl5en3Prbu1ddfsstrkB1zz3KAKGdRktCVXS0OlT169fuzfQGhigfwBBFRNSxnDih3PZbuxbYvFmZisahVy/ge99TAtWDDwJGo/fa2SYuXgS2b1eHKptNXaeLhCqGKB/AEEVE1HFduKBMmrx2LbB+PfDVV66ywEDg3ntdT/vdfLP32tlmPAlVjo7qnWSqM4YoH8AQRUTUOdhsykTIjqf9Dh1Slw8d6nraLyEBCAjwTjvb1IULjW//XR2qbr5ZfaWqg4YqhigfwBBFRNQ5HTni6pi+bZs6S9x4I5CergSqBx4Aunf3Xjvb1IUL6itVn33WaUIVQ5QPYIgiIur8vvkG2LhRCVT5+YDV6irT6YD77nNdpeogGUKbloSqAQPUocpHZ49miPIBDFFERF3LlSvAp5+6nvY7elRdHhvretrvzjuVIRU6repqdagqKekwoYohygcwRBERdV0iwBdfuAJVYaEypIJDeDgwbpwSqMaMAYKDvdfWdnF1qPrsM/UvBHCFqpQUZSh5L4UqhigfwBBFREQO588rt/vWrgU++EB5+M0hOBhITVUC1bhxgNnsvXa2m5aEqoED1Veq+vZtp6YxRHkdQxQREblTW6vkBkfn9JMn1eV33eUaPmH48E4wWXJLVFcr90Ib3v7zUqhiiPIBDFFERNQcEWWkdEeg2rlTXd6vn6tj+ujRgF7vlWa2v5aEqltucQWq5ORWC1UMUT6AIYqIiDx19qwyuGdeHrBpE3D5squse3cgLU0JVOnpQFiY99rZ7qxWdajavfvaoWr0aCAiQtOhGKJ8AEMUERFdj8uXgYIC11Wqs2ddZf7+QGKi62m/wYO7yG0/B09ClaOjegtDFUOUD2CIIiKi1mK3KznB8bTfnj3q8gEDXIFq5EggKMgrzfSeloSqmBj17b8mQhVDlA9giCIiorZy8qQyWXJeHvDxx0BdnassNFSZLDkjQ5ksuVcvrzXTe6qq1KGqtPTaoWr0aGXcCTBE+QSGKCIiag8XLij9p/LylP5UX37pKgsIUCZLdnROHzjQe+30qqtD1e7dSq/+hgYNAkaPRnV8PIxTpjBEeRNDFBERtTebDSgudvWjOnBAXT5kiOu23913d9LJkluiqgr45BP1larvYk41ACPAEOVNDFFERORtx465AtXWrUB9vassLAwYO1YJVWlpQI8e3mun1zUIVdWbN8NYWsoQ5U0MUURE5EuqqpTR0teuBTZsUCZPdtDplG5Bjtt+/ft7q5Xexz5RPoAhioiIfFV9vTLrytq1ylWqw4fV5cOHu0ZNHzGik0+WfBWGKB/AEEVERB1FWZkrUG3frn6QzWxW5vTLyFDm+AsJ8V472wNDlA9giCIioo7oq6+UyZLz8oCNG5Wn/xwMBiVIZWQowUrjoOA+jSHKBzBEERFRR1dXp3RId3ROP35cXT5ihKsfVWxs5xg1nSHKBzBEERFRZyIC7N/vGjV95071UEuRkcrVqcxMpZO6weC1pl4XhigfwBBFRESdWWWlMrjn2rXKYJ/ffusq69YNeOABJVClpwM33ui9dnqKIcoHMEQREVFXcfkysHmzq3P6mTOuMj8/ZWBPx9N+t97q27f9GKJ8AEMUERF1RSLKAOCOQLV7t7o8OtoVqO691/cmS2aI8gEMUUREREBFhWuy5IICoLbWVdazp2uy5O99D+jd23vtdGCI8gEMUURERGqXLrkmS163Djh3zlUWEACMHOma2++WW7zTRoYoH8AQRURE1DS7XXnCz/G03/796vJBg1yBKjERCAxsn3YxRPkAhigiIqKWO37cFai2bgWuXHGV9e6tTJacmalMltyWX6sMUT6AIYqIiEib6mrXZMn5+cDXX7vKgoKA5GRX5/SoqNY+NkOU1zFEERERXb/6eqCw0PW0X1mZuvy221yBKj7++idLZojyAQxRREREre8//3FNQ/Ppp4DN5iozmZTBPTMygPvvVwb99BRDlA9giCIiImpbX38NbNigBKoNG5TbgA56PTBmjGtuv759W7ZPhigfwBBFRETUfurqgE8+cXVOLy9Xl995p+tpvzvuaHrU9Lb4/tZ0h3Hx4sWIioqCwWBAQkICdu7cec36q1evxuDBg2EwGDBs2DDk5+erykUEc+bMQXh4OIKDg5GamorDhw87y48fP44pU6YgOjoawcHBGDBgAHJyclBXV+ess2XLFowfPx7h4eHo1q0bYmNjsWLFCtVxli9fDj8/P9Vi6KgzKRIREXUBOp1y5emll4CjR5UhE3JzleER/PyUkdP/8AcgLk6ZLPmJJ5RO6zU1bd82j0PU22+/jRkzZiAnJwe7d+/G7bffjrS0NJxrOLJWAzt27MCjjz6KKVOmoLS0FFlZWcjKysL+BgNHzJs3DwsXLsSSJUtQXFyMbt26IS0tDTXf/Qa++OIL2O12vPLKKzhw4ABefPFFLFmyBM8++6zqOMOHD8c777yDvXv3YvLkyZg0aRLWrVunak/Pnj1x9uxZ53LixAlPfwVERETkBX5+wNChwMyZwI4dgMUCLF0KfP/7Sj+p06eBV15R+k/dcAOQlaWUV1a2UYPEQ/Hx8TJ9+nTnzzabTSIiIiQ3N9dt/QkTJkh6erpqXUJCgkybNk1EROx2u5jNZpk/f76zvKqqSvR6vbz11ltNtmPevHkSHR19zbaOHTtWJk+e7Px52bJlYjQar7lNc6xWqwAQq9V6XfshIiKi1nP5skh+vsgvfiFy000iymx/yuLnJxIX1/rf3x5diaqrq0NJSQlSU1Od6/z9/ZGamorCwkK32xQWFqrqA0BaWpqzfnl5OSwWi6qO0WhEQkJCk/sEAKvVit7NTMbjrs7FixfRv39/REZGYvz48Thw4MA191FbW4vq6mrVQkRERL7FYFDm6Xv5ZeDkSWWy5D/8ARgxQolSJSWtf0yPQtT58+dhs9lgMplU600mEywWi9ttLBbLNes7Xj3Z55EjR7Bo0SJMmzatybauWrUKu3btwuTJk53rBg0ahKVLl2LNmjV48803YbfbkZSUhIqKiib3k5ubC6PR6FwiIyObrEtERETe5+cHxMYCc+YAu3Ypt/lefrn1j3OdQ1e1v9OnT+PBBx/EQw89hMcff9xtnc2bN2Py5Ml47bXXMHToUOf6xMRETJo0CbGxsUhOTsa7776LG2+8Ea+88kqTx5s1axasVqtzOXXqVKt/JiIiImo7ERHAT37S+vv1KESFhYUhICAAlVf10KqsrITZbHa7jdlsvmZ9x2tL9nnmzBmkpKQgKSkJr776qtvjbd26FRkZGXjxxRcxadKka36eoKAg3HHHHThy5EiTdfR6PXr27KlaiIiIiDwKUTqdDnFxcSgoKHCus9vtKCgoQGJiotttEhMTVfUBYNOmTc760dHRMJvNqjrV1dUoLi5W7fP06dMYPXo04uLisGzZMvi7Gf99y5YtSE9Px9/+9jdMnTq12c9js9mwb98+hIeHN1uXiIiIqKFATzeYMWMGsrOzMWLECMTHx+Oll17CpUuXnH2PJk2ahL59+yI3NxcA8OSTTyI5ORkvvPAC0tPTsXLlSnz22WfOK0l+fn546qmn8Oc//xm33HILoqOjMXv2bERERCArKwuAK0D1798fCxYswJdffulsj+Nq1ebNmzFu3Dg8+eST+OEPf+jsT6XT6Zydy//4xz/i7rvvxsCBA1FVVYX58+fjxIkTeOyxxzT++oiIiKjL0vJI36JFi6Rfv36i0+kkPj5eioqKnGXJycmSnZ2tqr9q1SqJiYkRnU4nQ4cOlfXr16vK7Xa7zJ49W0wmk+j1ehkzZoyUlZU5y5ctWyYA3C4O2dnZbsuTk5OddZ566ilnu00mk4wdO1Z2797t0WfnEAdEREQdT1t8f3PaFw9x2hciIqKOx2emfSEiIiLq6hiiiIiIiDRgiCIiIiLSgCGKiIiISAOGKCIiIiINGKKIiIiINGCIIiIiItKAIYqIiIhIA4YoIiIiIg0YooiIiIg0YIgiIiIi0oAhioiIiEgDhigiIiIiDRiiiIiIiDRgiCIiIiLSgCGKiIiISAOGKCIiIiINGKKIiIiINGCIIiIiItKAIYqIiIhIA4YoIiIiIg0YooiIiIg0YIgiIiIi0oAhioiIiEgDhigiIiIiDRiiiIiIiDRgiCIiIiLSgCGKiIiISAOGKCIiIiINGKKIiIiINGCIIiIiItKAIYqIiIhIA4YoIiIiIg0YooiIiIg0YIgiIiIi0oAhioiIiEgDhigiIiIiDRiiiIiIiDRgiCIiIiLSQFOIWrx4MaKiomAwGJCQkICdO3des/7q1asxePBgGAwGDBs2DPn5+apyEcGcOXMQHh6O4OBgpKam4vDhw87y48ePY8qUKYiOjkZwcDAGDBiAnJwc1NXVqfazd+9ejBo1CgaDAZGRkZg3b57HbSEiIiJqCY9D1Ntvv40ZM2YgJycHu3fvxu233460tDScO3fObf0dO3bg0UcfxZQpU1BaWoqsrCxkZWVh//79zjrz5s3DwoULsWTJEhQXF6Nbt25IS0tDTU0NAOCLL76A3W7HK6+8ggMHDuDFF1/EkiVL8Oyzzzr3UV1djQceeAD9+/dHSUkJ5s+fj+effx6vvvqqR20hIiIiahHxUHx8vEyfPt35s81mk4iICMnNzXVbf8KECZKenq5al5CQINOmTRMREbvdLmazWebPn+8sr6qqEr1eL2+99VaT7Zg3b55ER0c7f3755ZelV69eUltb61z3zDPPyKBBg1rclpawWq0CQKxWa4u3ISIiIu9qi+9vj65E1dXVoaSkBKmpqc51/v7+SE1NRWFhodttCgsLVfUBIC0tzVm/vLwcFotFVcdoNCIhIaHJfQKA1WpF7969Vce59957odPpVMcpKyvDN99806K2EBEREbWURyHq/PnzsNlsMJlMqvUmkwkWi8XtNhaL5Zr1Ha+e7PPIkSNYtGgRpk2b1uxxGh6juba4U1tbi+rqatVCRERE1OGezjt9+jQefPBBPPTQQ3j88cfb/Hi5ubkwGo3OJTIyss2PSURERL7PoxAVFhaGgIAAVFZWqtZXVlbCbDa73cZsNl+zvuO1Jfs8c+YMUlJSkJSUpOowfq3jNDxGc21xZ9asWbBarc7l1KlTTdYlIiKirsOjEKXT6RAXF4eCggLnOrvdjoKCAiQmJrrdJjExUVUfADZt2uSsHx0dDbPZrKpTXV2N4uJi1T5Pnz6N0aNHIy4uDsuWLYO/v7rpiYmJ2LZtG65cuaI6zqBBg9CrV68WtcUdvV6Pnj17qhYiIiIij5/OW7lypej1elm+fLkcPHhQpk6dKqGhoWKxWEREZOLEiTJz5kxn/e3bt0tgYKAsWLBADh06JDk5ORIUFCT79u1z1pk7d66EhobKmjVrZO/evTJ+/HiJjo6Wy5cvi4hIRUWFDBw4UMaMGSMVFRVy9uxZ5+JQVVUlJpNJJk6cKPv375eVK1dKSEiIvPLKKx61pTl8Oo+IiKjjaYvvb49DlIjIokWLpF+/fqLT6SQ+Pl6KioqcZcnJyZKdna2qv2rVKomJiRGdTidDhw6V9evXq8rtdrvMnj1bTCaT6PV6GTNmjJSVlTnLly1bJgDcLg19/vnnMnLkSNHr9dK3b1+ZO3duo7Y315bmMEQRERF1PG3x/e0nIuKtq2AdUXV1NYxGI6xWK2/tERERdRBt8f3d4Z7OIyIiIvIFDFFEREREGjBEEREREWnAEEVERESkAUMUERERkQYMUUREREQaMEQRERERacAQRURERKQBQxQRERGRBgxRRERERBowRBERERFpwBBFREREpAFDFBEREZEGDFFEREREGjBEEREREWnAEEVERESkAUMUERERkQYMUUREREQaMEQRERERacAQRURERKQBQxQRERGRBgxRRERERBowRBERERFpwBBFREREpAFDFBEREZEGDFFEREREGjBEEREREWnAEEVERESkAUMUERERkQYMUUREREQaMEQRERERacAQRURERKQBQxQRERGRBgxRRERERBowRBERERFpwBBFREREpAFDFBEREZEGDFFEREREGjBEEREREWnAEEVERESkgaYQtXjxYkRFRcFgMCAhIQE7d+68Zv3Vq1dj8ODBMBgMGDZsGPLz81XlIoI5c+YgPDwcwcHBSE1NxeHDh1V1/vKXvyApKQkhISEIDQ1tdIzly5fDz8/P7XLu3DkAwJYtW9yWWywWLb8GIiIi6sI8DlFvv/02ZsyYgZycHOzevRu333470tLSnEHlajt27MCjjz6KKVOmoLS0FFlZWcjKysL+/fuddebNm4eFCxdiyZIlKC4uRrdu3ZCWloaamhpnnbq6Ojz00EP4xS9+4fY4Dz/8MM6ePata0tLSkJycjD59+qjqlpWVqepdXU5ERETULPFQfHy8TJ8+3fmzzWaTiIgIyc3NdVt/woQJkp6erlqXkJAg06ZNExERu90uZrNZ5s+f7yyvqqoSvV4vb731VqP9LVu2TIxGY7PtPHfunAQFBck///lP57rNmzcLAPnmm2+a3b4pVqtVAIjVatW8DyIiImpfbfH97dGVqLq6OpSUlCA1NdW5zt/fH6mpqSgsLHS7TWFhoao+AKSlpTnrl5eXw2KxqOoYjUYkJCQ0uc+W+Oc//4mQkBD86Ec/alQWGxuL8PBw3H///di+ffs191NbW4vq6mrVQkRERORRiDp//jxsNhtMJpNqvclkarJfkcViuWZ9x6sn+2yJN954Az/+8Y8RHBzsXBceHo4lS5bgnXfewTvvvIPIyEiMHj0au3fvbnI/ubm5MBqNziUyMlJzm4iIiKjzCPR2A9pCYWEhDh06hH/961+q9YMGDcKgQYOcPyclJeHo0aN48cUXG9V1mDVrFmbMmOH8ubq6mkGKiIiIPLsSFRYWhoCAAFRWVqrWV1ZWwmw2u93GbDZfs77j1ZN9Nuf1119HbGws4uLimq0bHx+PI0eONFmu1+vRs2dP1UJERETk0ZUonU6HuLg4FBQUICsrCwBgt9tRUFCAX/3qV263SUxMREFBAZ566innuk2bNiExMREAEB0dDbPZjIKCAsTGxgJQrvYUFxc3+STetVy8eBGrVq1Cbm5ui+rv2bMH4eHhLd6/iDjbSERERB2D43vb8T3eKjztib5y5UrR6/WyfPlyOXjwoEydOlVCQ0PFYrGIiMjEiRNl5syZzvrbt2+XwMBAWbBggRw6dEhycnIkKChI9u3b56wzd+5cCQ0NlTVr1sjevXtl/PjxEh0dLZcvX3bWOXHihJSWlsof/vAH6d69u5SWlkppaalcuHBB1b7XX39dDAaD2yfwXnzxRXn//ffl8OHDsm/fPnnyySfF399fPvrooxZ//qNHjwoALly4cOHChUsHXI4ePdri7/zmeNwn6uGHH8aXX36JOXPmwGKxIDY2Fhs3bnR2DD958iT8/V13CZOSkvB///d/eO655/Dss8/illtuwfvvv4/bbrvNWed3v/sdLl26hKlTp6KqqgojR47Exo0bYTAYnHXmzJmDf/zjH86f77jjDgDA5s2bMXr0aOf6N954Az/4wQ/cDshZV1eHp59+GqdPn0ZISAiGDx+Ojz76CCkpKS3+/L1793Z+TqPR2OLtqPU5+qedOnWKt1m9jOfCd/Bc+BaeD99htVrRr18/5/d4a/ATac3rWp1fdXU1jEYjrFYr/yC8jOfCd/Bc+A6eC9/C8+E72uJccO48IiIiIg0YooiIiIg0YIjykF6vR05ODvR6vbeb0uXxXPgOngvfwXPhW3g+fEdbnAv2iSIiIiLSgFeiiIiIiDRgiCIiIiLSgCGKiIiISAOGKCIiIiINGKLcWLx4MaKiomAwGJCQkICdO3des/7q1asxePBgGAwGDBs2DPn5+e3U0s7Pk3Px2muvYdSoUejVqxd69eqF1NTUZs8dtZynfxcOK1euhJ+fn3O+Tbp+np6LqqoqTJ8+HeHh4dDr9YiJieG/U63E03Px0ksvYdCgQQgODkZkZCT+67/+CzU1Ne3U2s5r27ZtyMjIQEREBPz8/PD+++83u82WLVtw5513Qq/XY+DAgVi+fLnnB261CWQ6iZUrV4pOp5OlS5fKgQMH5PHHH5fQ0FCprKx0W3/79u0SEBAg8+bNk4MHD8pzzz3XaG5A0sbTc/HjH/9YFi9eLKWlpXLo0CH52c9+JkajUSoqKtq55Z2Pp+fCoby8XPr27SujRo2S8ePHt09jOzlPz0Vtba2MGDFCxo4dK59++qmUl5fLli1bZM+ePe3c8s7H03OxYsUK0ev1smLFCikvL5cPPvhAwsPD5b/+67/aueWdT35+vvz+97+Xd999VwDIe++9d836x44dk5CQEJkxY4YcPHhQFi1aJAEBAbJx40aPjssQdZX4+HiZPn2682ebzSYRERGSm5vrtv6ECRMkPT1dtS4hIUGmTZvWpu3sCjw9F1err6+XHj16yD/+8Y+2amKXoeVc1NfXS1JSkrz++uuSnZ3NENVKPD0Xf//73+Xmm2+Wurq69mpil+HpuZg+fbrcd999qnUzZsyQe+65p03b2dW0JET97ne/k6FDh6rWPfzww5KWlubRsXg7r4G6ujqUlJQgNTXVuc7f3x+pqakoLCx0u01hYaGqPgCkpaU1WZ9aRsu5uNq3336LK1eutOpkk12R1nPxxz/+EX369MGUKVPao5ldgpZzsXbtWiQmJmL69OkwmUy47bbb8Ne//hU2m629mt0paTkXSUlJKCkpcd7yO3bsGPLz8zF27Nh2aTO5tNZ3d2BrNqqjO3/+PGw2G0wmk2q9yWTCF1984XYbi8Xitr7FYmmzdnYFWs7F1Z555hlEREQ0+kMhz2g5F59++ineeOMN7Nmzpx1a2HVoORfHjh3Dxx9/jJ/85CfIz8/HkSNH8Mtf/hJXrlxBTk5OezS7U9JyLn784x/j/PnzGDlyJEQE9fX1eOKJJ/Dss8+2R5Opgaa+u6urq3H58mUEBwe3aD+8EkWd0ty5c7Fy5Uq89957MBgM3m5Ol3LhwgVMnDgRr732GsLCwrzdnC7PbrejT58+ePXVVxEXF4eHH34Yv//977FkyRJvN63L2bJlC/7617/i5Zdfxu7du/Huu+9i/fr1+NOf/uTtppFGvBLVQFhYGAICAlBZWalaX1lZCbPZ7HYbs9nsUX1qGS3nwmHBggWYO3cuPvroIwwfPrwtm9kleHoujh49iuPHjyMjI8O5zm63AwACAwNRVlaGAQMGtG2jOyktfxfh4eEICgpCQECAc92QIUNgsVhQV1cHnU7Xpm3urLSci9mzZ2PixIl47LHHAADDhg3DpUuXMHXqVPz+97+Hvz+va7SXpr67e/bs2eKrUACvRKnodDrExcWhoKDAuc5ut6OgoACJiYlut0lMTFTVB4BNmzY1WZ9aRsu5AIB58+bhT3/6EzZu3IgRI0a0R1M7PU/PxeDBg7Fv3z7s2bPHuWRmZiIlJQV79uxBZGRkeza/U9Hyd3HPPffgyJEjziALAP/5z38QHh7OAHUdtJyLb7/9tlFQcoRb4TS27arVvrs96/Pe+a1cuVL0er0sX75cDh48KFOnTpXQ0FCxWCwiIjJx4kSZOXOms/727dslMDBQFixYIIcOHZKcnBwOcdBKPD0Xc+fOFZ1OJ//+97/l7NmzzuXChQve+gidhqfn4mp8Oq/1eHouTp48KT169JBf/epXUlZWJuvWrZM+ffrIn//8Z299hE7D03ORk5MjPXr0kLfeekuOHTsmH374oQwYMEAmTJjgrY/QaVy4cEFKS0ultLRUAMh///d/S2lpqZw4cUJERGbOnCkTJ0501ncMcfDb3/5WDh06JIsXL+YQB61l0aJF0q9fP9HpdBIfHy9FRUXOsuTkZMnOzlbVX7VqlcTExIhOp5OhQ4fK+vXr27nFnZcn56J///4CoNGSk5PT/g3vhDz9u2iIIap1eXouduzYIQkJCaLX6+Xmm2+Wv/zlL1JfX9/Ore6cPDkXV65ckeeff14GDBggBoNBIiMj5Ze//KV888037d/wTmbz5s1u//13/P6zs7MlOTm50TaxsbGi0+nk5ptvlmXLlnl8XD8RXkMkIiIi8hT7RBERERFpwBBFREREpAFDFBEREZEGDFFEREREGjBEEREREWnAEEVERESkAUMUERERkQYMUUREREQaMEQRERERacAQRURERKQBQxQRERGRBgxRRERERBr8f/TiguRCYmrOAAAAAElFTkSuQmCC"
|
| 1512 |
+
},
|
| 1513 |
+
"metadata": {},
|
| 1514 |
+
"output_type": "display_data"
|
| 1515 |
+
}
|
| 1516 |
+
],
|
| 1517 |
+
"execution_count": 21
|
| 1518 |
+
},
|
| 1519 |
+
{
|
| 1520 |
+
"metadata": {
|
| 1521 |
+
"ExecuteTime": {
|
| 1522 |
+
"end_time": "2024-05-04T02:59:34.541951Z",
|
| 1523 |
+
"start_time": "2024-05-04T02:59:34.527987Z"
|
| 1524 |
+
}
|
| 1525 |
+
},
|
| 1526 |
+
"cell_type": "code",
|
| 1527 |
+
"source": "",
|
| 1528 |
+
"id": "a1f0ed25b0e7353b",
|
| 1529 |
+
"outputs": [],
|
| 1530 |
+
"execution_count": 21
|
| 1531 |
+
}
|
| 1532 |
+
],
|
| 1533 |
+
"metadata": {
|
| 1534 |
+
"kernelspec": {
|
| 1535 |
+
"display_name": "Python 3 (ipykernel)",
|
| 1536 |
+
"language": "python",
|
| 1537 |
+
"name": "python3"
|
| 1538 |
+
},
|
| 1539 |
+
"language_info": {
|
| 1540 |
+
"codemirror_mode": {
|
| 1541 |
+
"name": "ipython",
|
| 1542 |
+
"version": 3
|
| 1543 |
+
},
|
| 1544 |
+
"file_extension": ".py",
|
| 1545 |
+
"mimetype": "text/x-python",
|
| 1546 |
+
"name": "python",
|
| 1547 |
+
"nbconvert_exporter": "python",
|
| 1548 |
+
"pygments_lexer": "ipython3",
|
| 1549 |
+
"version": "3.9.18"
|
| 1550 |
+
}
|
| 1551 |
+
},
|
| 1552 |
+
"nbformat": 4,
|
| 1553 |
+
"nbformat_minor": 5
|
| 1554 |
+
}
|
use.ipynb
ADDED
|
@@ -0,0 +1,999 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"id": "initial_id",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"collapsed": true,
|
| 8 |
+
"ExecuteTime": {
|
| 9 |
+
"end_time": "2024-05-09T08:24:27.682520Z",
|
| 10 |
+
"start_time": "2024-05-09T08:24:23.650272Z"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"source": [
|
| 14 |
+
"# 导入库\n",
|
| 15 |
+
"import torch\n",
|
| 16 |
+
"from transformers import BertTokenizerFast"
|
| 17 |
+
],
|
| 18 |
+
"outputs": [],
|
| 19 |
+
"execution_count": 1
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"metadata": {
|
| 23 |
+
"ExecuteTime": {
|
| 24 |
+
"end_time": "2024-05-09T08:24:27.871979Z",
|
| 25 |
+
"start_time": "2024-05-09T08:24:27.683484Z"
|
| 26 |
+
}
|
| 27 |
+
},
|
| 28 |
+
"cell_type": "code",
|
| 29 |
+
"source": "from utils import BertNerModel",
|
| 30 |
+
"id": "bed4b6400ac293d8",
|
| 31 |
+
"outputs": [],
|
| 32 |
+
"execution_count": 2
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"metadata": {
|
| 36 |
+
"ExecuteTime": {
|
| 37 |
+
"end_time": "2024-05-09T08:24:27.886940Z",
|
| 38 |
+
"start_time": "2024-05-09T08:24:27.872977Z"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"source": [
|
| 43 |
+
"# 加载模型\n",
|
| 44 |
+
"with open('labels.txt', 'r') as f:\n",
|
| 45 |
+
" labels = [i.rstrip('\\n') for i in f]\n",
|
| 46 |
+
"labels"
|
| 47 |
+
],
|
| 48 |
+
"id": "614210e3e65da37f",
|
| 49 |
+
"outputs": [
|
| 50 |
+
{
|
| 51 |
+
"data": {
|
| 52 |
+
"text/plain": [
|
| 53 |
+
"['B-BANK',\n",
|
| 54 |
+
" 'E-BANK',\n",
|
| 55 |
+
" 'O',\n",
|
| 56 |
+
" 'B-COMMENTS_N',\n",
|
| 57 |
+
" 'E-COMMENTS_N',\n",
|
| 58 |
+
" 'B-COMMENTS_ADJ',\n",
|
| 59 |
+
" 'E-COMMENTS_ADJ',\n",
|
| 60 |
+
" 'B-PRODUCT',\n",
|
| 61 |
+
" 'E-PRODUCT',\n",
|
| 62 |
+
" 'I-PRODUCT',\n",
|
| 63 |
+
" 'I-COMMENTS_N',\n",
|
| 64 |
+
" 'I-BANK',\n",
|
| 65 |
+
" 'I-COMMENTS_ADJ',\n",
|
| 66 |
+
" 'B-product_name',\n",
|
| 67 |
+
" 'I-product_name',\n",
|
| 68 |
+
" 'B-time',\n",
|
| 69 |
+
" 'I-time',\n",
|
| 70 |
+
" 'E-time',\n",
|
| 71 |
+
" 'B-person_name',\n",
|
| 72 |
+
" 'I-person_name',\n",
|
| 73 |
+
" 'E-person_name',\n",
|
| 74 |
+
" 'E-product_name',\n",
|
| 75 |
+
" 'B-org_name',\n",
|
| 76 |
+
" 'I-org_name',\n",
|
| 77 |
+
" 'E-org_name',\n",
|
| 78 |
+
" 'B-location',\n",
|
| 79 |
+
" 'I-location',\n",
|
| 80 |
+
" 'E-location',\n",
|
| 81 |
+
" 'B-company_name',\n",
|
| 82 |
+
" 'I-company_name',\n",
|
| 83 |
+
" 'E-company_name',\n",
|
| 84 |
+
" 'B-GPE',\n",
|
| 85 |
+
" 'I-GPE',\n",
|
| 86 |
+
" 'E-GPE',\n",
|
| 87 |
+
" 'B-PER',\n",
|
| 88 |
+
" 'I-PER',\n",
|
| 89 |
+
" 'E-PER',\n",
|
| 90 |
+
" 'B-LOC',\n",
|
| 91 |
+
" 'I-LOC',\n",
|
| 92 |
+
" 'E-LOC',\n",
|
| 93 |
+
" 'B-ORG',\n",
|
| 94 |
+
" 'I-ORG',\n",
|
| 95 |
+
" 'E-ORG',\n",
|
| 96 |
+
" 'B-body',\n",
|
| 97 |
+
" 'E-body',\n",
|
| 98 |
+
" 'I-body',\n",
|
| 99 |
+
" 'B-symp',\n",
|
| 100 |
+
" 'E-symp',\n",
|
| 101 |
+
" 'I-symp',\n",
|
| 102 |
+
" 'B-chec',\n",
|
| 103 |
+
" 'E-chec',\n",
|
| 104 |
+
" 'I-chec',\n",
|
| 105 |
+
" 'B-dise',\n",
|
| 106 |
+
" 'I-dise',\n",
|
| 107 |
+
" 'E-dise',\n",
|
| 108 |
+
" 'B-cure',\n",
|
| 109 |
+
" 'I-cure',\n",
|
| 110 |
+
" 'E-cure',\n",
|
| 111 |
+
" 'B-身体部位',\n",
|
| 112 |
+
" 'I-身体部位',\n",
|
| 113 |
+
" 'B-检查和检验',\n",
|
| 114 |
+
" 'E-检查和检验',\n",
|
| 115 |
+
" 'I-检查和检验',\n",
|
| 116 |
+
" 'E-身体部位',\n",
|
| 117 |
+
" 'B-症状和体征',\n",
|
| 118 |
+
" 'E-症状和体征',\n",
|
| 119 |
+
" 'I-症状和体征',\n",
|
| 120 |
+
" 'B-疾病和诊断',\n",
|
| 121 |
+
" 'I-疾病和诊断',\n",
|
| 122 |
+
" 'E-疾病和诊断',\n",
|
| 123 |
+
" 'B-治疗',\n",
|
| 124 |
+
" 'I-治疗',\n",
|
| 125 |
+
" 'E-治疗',\n",
|
| 126 |
+
" 'B-解剖部位',\n",
|
| 127 |
+
" 'E-解剖部位',\n",
|
| 128 |
+
" 'B-手术',\n",
|
| 129 |
+
" 'I-手术',\n",
|
| 130 |
+
" 'E-手术',\n",
|
| 131 |
+
" 'B-影像检查',\n",
|
| 132 |
+
" 'E-影像检查',\n",
|
| 133 |
+
" 'I-解剖部位',\n",
|
| 134 |
+
" 'B-药物',\n",
|
| 135 |
+
" 'E-药物',\n",
|
| 136 |
+
" 'I-药物',\n",
|
| 137 |
+
" 'B-实验室检验',\n",
|
| 138 |
+
" 'I-实验室检验',\n",
|
| 139 |
+
" 'E-实验室检验',\n",
|
| 140 |
+
" 'I-影像检查',\n",
|
| 141 |
+
" 'B-name',\n",
|
| 142 |
+
" 'I-name',\n",
|
| 143 |
+
" 'E-name',\n",
|
| 144 |
+
" 'B-address',\n",
|
| 145 |
+
" 'E-address',\n",
|
| 146 |
+
" 'B-organization',\n",
|
| 147 |
+
" 'E-organization',\n",
|
| 148 |
+
" 'B-game',\n",
|
| 149 |
+
" 'I-game',\n",
|
| 150 |
+
" 'E-game',\n",
|
| 151 |
+
" 'I-address',\n",
|
| 152 |
+
" 'B-scene',\n",
|
| 153 |
+
" 'I-scene',\n",
|
| 154 |
+
" 'E-scene',\n",
|
| 155 |
+
" 'B-book',\n",
|
| 156 |
+
" 'I-book',\n",
|
| 157 |
+
" 'E-book',\n",
|
| 158 |
+
" 'I-organization',\n",
|
| 159 |
+
" 'B-company',\n",
|
| 160 |
+
" 'I-company',\n",
|
| 161 |
+
" 'E-company',\n",
|
| 162 |
+
" 'B-position',\n",
|
| 163 |
+
" 'E-position',\n",
|
| 164 |
+
" 'I-position',\n",
|
| 165 |
+
" 'B-government',\n",
|
| 166 |
+
" 'I-government',\n",
|
| 167 |
+
" 'E-government',\n",
|
| 168 |
+
" 'B-movie',\n",
|
| 169 |
+
" 'I-movie',\n",
|
| 170 |
+
" 'E-movie',\n",
|
| 171 |
+
" 'B-bod',\n",
|
| 172 |
+
" 'I-bod',\n",
|
| 173 |
+
" 'E-bod',\n",
|
| 174 |
+
" 'B-dis',\n",
|
| 175 |
+
" 'I-dis',\n",
|
| 176 |
+
" 'E-dis',\n",
|
| 177 |
+
" 'B-sym',\n",
|
| 178 |
+
" 'I-sym',\n",
|
| 179 |
+
" 'E-sym',\n",
|
| 180 |
+
" 'B-pro',\n",
|
| 181 |
+
" 'I-pro',\n",
|
| 182 |
+
" 'E-pro',\n",
|
| 183 |
+
" 'B-ite',\n",
|
| 184 |
+
" 'I-ite',\n",
|
| 185 |
+
" 'E-ite',\n",
|
| 186 |
+
" 'B-mic',\n",
|
| 187 |
+
" 'I-mic',\n",
|
| 188 |
+
" 'E-mic',\n",
|
| 189 |
+
" 'B-dep',\n",
|
| 190 |
+
" 'E-dep',\n",
|
| 191 |
+
" 'B-dru',\n",
|
| 192 |
+
" 'I-dru',\n",
|
| 193 |
+
" 'E-dru',\n",
|
| 194 |
+
" 'I-dep',\n",
|
| 195 |
+
" 'B-equ',\n",
|
| 196 |
+
" 'I-equ',\n",
|
| 197 |
+
" 'E-equ',\n",
|
| 198 |
+
" 'B-Time',\n",
|
| 199 |
+
" 'I-Time',\n",
|
| 200 |
+
" 'E-Time',\n",
|
| 201 |
+
" 'B-Person',\n",
|
| 202 |
+
" 'B-Location',\n",
|
| 203 |
+
" 'I-Location',\n",
|
| 204 |
+
" 'E-Location',\n",
|
| 205 |
+
" 'E-Person',\n",
|
| 206 |
+
" 'B-Thing',\n",
|
| 207 |
+
" 'E-Thing',\n",
|
| 208 |
+
" 'B-Metric',\n",
|
| 209 |
+
" 'E-Metric',\n",
|
| 210 |
+
" 'I-Person',\n",
|
| 211 |
+
" 'I-Thing',\n",
|
| 212 |
+
" 'B-Organization',\n",
|
| 213 |
+
" 'I-Organization',\n",
|
| 214 |
+
" 'E-Organization',\n",
|
| 215 |
+
" 'I-Metric',\n",
|
| 216 |
+
" 'B-Abstract',\n",
|
| 217 |
+
" 'I-Abstract',\n",
|
| 218 |
+
" 'E-Abstract',\n",
|
| 219 |
+
" 'B-Physical',\n",
|
| 220 |
+
" 'I-Physical',\n",
|
| 221 |
+
" 'E-Physical',\n",
|
| 222 |
+
" 'B-Term',\n",
|
| 223 |
+
" 'I-Term',\n",
|
| 224 |
+
" 'E-Term',\n",
|
| 225 |
+
" 'B-ABstract',\n",
|
| 226 |
+
" 'I-ABstract',\n",
|
| 227 |
+
" 'E-ABstract',\n",
|
| 228 |
+
" 'B-HCCX',\n",
|
| 229 |
+
" 'E-HCCX',\n",
|
| 230 |
+
" 'I-HCCX',\n",
|
| 231 |
+
" 'B-MISC',\n",
|
| 232 |
+
" 'E-MISC',\n",
|
| 233 |
+
" 'B-HPPX',\n",
|
| 234 |
+
" 'E-HPPX',\n",
|
| 235 |
+
" 'I-HPPX',\n",
|
| 236 |
+
" 'I-MISC',\n",
|
| 237 |
+
" 'B-XH',\n",
|
| 238 |
+
" 'I-XH',\n",
|
| 239 |
+
" 'E-XH',\n",
|
| 240 |
+
" 'B-EQU',\n",
|
| 241 |
+
" 'I-EQU',\n",
|
| 242 |
+
" 'E-EQU',\n",
|
| 243 |
+
" 'B-TIME',\n",
|
| 244 |
+
" 'E-TIME',\n",
|
| 245 |
+
" 'I-TIME',\n",
|
| 246 |
+
" 'B-FAC',\n",
|
| 247 |
+
" 'I-FAC',\n",
|
| 248 |
+
" 'E-FAC',\n",
|
| 249 |
+
" 'B-Symptom',\n",
|
| 250 |
+
" 'E-Symptom',\n",
|
| 251 |
+
" 'B-Medical_Examination',\n",
|
| 252 |
+
" 'E-Medical_Examination',\n",
|
| 253 |
+
" 'I-Medical_Examination',\n",
|
| 254 |
+
" 'B-Drug',\n",
|
| 255 |
+
" 'I-Drug',\n",
|
| 256 |
+
" 'E-Drug',\n",
|
| 257 |
+
" 'B-Drug_Category',\n",
|
| 258 |
+
" 'I-Drug_Category',\n",
|
| 259 |
+
" 'E-Drug_Category',\n",
|
| 260 |
+
" 'I-Symptom',\n",
|
| 261 |
+
" 'B-Operation',\n",
|
| 262 |
+
" 'E-Operation',\n",
|
| 263 |
+
" 'I-Operation',\n",
|
| 264 |
+
" 'B-NAME',\n",
|
| 265 |
+
" 'I-NAME',\n",
|
| 266 |
+
" 'E-NAME',\n",
|
| 267 |
+
" 'B-CONT',\n",
|
| 268 |
+
" 'I-CONT',\n",
|
| 269 |
+
" 'E-CONT',\n",
|
| 270 |
+
" 'B-EDU',\n",
|
| 271 |
+
" 'I-EDU',\n",
|
| 272 |
+
" 'E-EDU',\n",
|
| 273 |
+
" 'B-TITLE',\n",
|
| 274 |
+
" 'I-TITLE',\n",
|
| 275 |
+
" 'E-TITLE',\n",
|
| 276 |
+
" 'B-RACE',\n",
|
| 277 |
+
" 'E-RACE',\n",
|
| 278 |
+
" 'B-PRO',\n",
|
| 279 |
+
" 'I-PRO',\n",
|
| 280 |
+
" 'E-PRO',\n",
|
| 281 |
+
" 'I-RACE',\n",
|
| 282 |
+
" 'B-T',\n",
|
| 283 |
+
" 'I-T',\n",
|
| 284 |
+
" 'E-T']"
|
| 285 |
+
]
|
| 286 |
+
},
|
| 287 |
+
"execution_count": 3,
|
| 288 |
+
"metadata": {},
|
| 289 |
+
"output_type": "execute_result"
|
| 290 |
+
}
|
| 291 |
+
],
|
| 292 |
+
"execution_count": 3
|
| 293 |
+
},
|
| 294 |
+
{
|
| 295 |
+
"metadata": {},
|
| 296 |
+
"cell_type": "markdown",
|
| 297 |
+
"source": "",
|
| 298 |
+
"id": "91f0e81784b80263"
|
| 299 |
+
},
|
| 300 |
+
{
|
| 301 |
+
"metadata": {
|
| 302 |
+
"ExecuteTime": {
|
| 303 |
+
"end_time": "2024-05-09T08:24:29.299162Z",
|
| 304 |
+
"start_time": "2024-05-09T08:24:27.888935Z"
|
| 305 |
+
}
|
| 306 |
+
},
|
| 307 |
+
"cell_type": "code",
|
| 308 |
+
"source": [
|
| 309 |
+
"model = BertNerModel(labels)\n",
|
| 310 |
+
"model.load_state_dict(torch.load(\"bert-model.pth\"))"
|
| 311 |
+
],
|
| 312 |
+
"id": "2129b37797e3c37a",
|
| 313 |
+
"outputs": [
|
| 314 |
+
{
|
| 315 |
+
"name": "stderr",
|
| 316 |
+
"output_type": "stream",
|
| 317 |
+
"text": [
|
| 318 |
+
"Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-chinese and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
|
| 319 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 320 |
+
]
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"data": {
|
| 324 |
+
"text/plain": [
|
| 325 |
+
"<All keys matched successfully>"
|
| 326 |
+
]
|
| 327 |
+
},
|
| 328 |
+
"execution_count": 4,
|
| 329 |
+
"metadata": {},
|
| 330 |
+
"output_type": "execute_result"
|
| 331 |
+
}
|
| 332 |
+
],
|
| 333 |
+
"execution_count": 4
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"metadata": {
|
| 337 |
+
"ExecuteTime": {
|
| 338 |
+
"end_time": "2024-05-09T08:24:29.314124Z",
|
| 339 |
+
"start_time": "2024-05-09T08:24:29.302155Z"
|
| 340 |
+
}
|
| 341 |
+
},
|
| 342 |
+
"cell_type": "code",
|
| 343 |
+
"source": "tr_label_dict = {j: i for i,j in zip(labels, range(len(labels)))}",
|
| 344 |
+
"id": "89256fa2c48b0519",
|
| 345 |
+
"outputs": [],
|
| 346 |
+
"execution_count": 5
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"metadata": {
|
| 350 |
+
"ExecuteTime": {
|
| 351 |
+
"end_time": "2024-05-09T08:24:29.376988Z",
|
| 352 |
+
"start_time": "2024-05-09T08:24:29.316121Z"
|
| 353 |
+
}
|
| 354 |
+
},
|
| 355 |
+
"cell_type": "code",
|
| 356 |
+
"source": [
|
| 357 |
+
"tokenizer = BertTokenizerFast.from_pretrained('bert-base-chinese', do_lower_case=True)\n",
|
| 358 |
+
"label_all_tokens = True\n",
|
| 359 |
+
"\n",
|
| 360 |
+
"def align_word_ids(texts):\n",
|
| 361 |
+
" tokenized_inputs = tokenizer(texts, padding='max_length', max_length=512, truncation=True)\n",
|
| 362 |
+
" word_ids = tokenized_inputs.word_ids()\n",
|
| 363 |
+
" previous_word_idx = None\n",
|
| 364 |
+
" label_ids = []\n",
|
| 365 |
+
" for word_idx in word_ids:\n",
|
| 366 |
+
" if word_idx is None:\n",
|
| 367 |
+
" label_ids.append(-100)\n",
|
| 368 |
+
"\n",
|
| 369 |
+
" elif word_idx != previous_word_idx:\n",
|
| 370 |
+
" try:\n",
|
| 371 |
+
" label_ids.append(1)\n",
|
| 372 |
+
" except:\n",
|
| 373 |
+
" label_ids.append(-100)\n",
|
| 374 |
+
" else:\n",
|
| 375 |
+
" try:\n",
|
| 376 |
+
" label_ids.append(1 if label_all_tokens else -100)\n",
|
| 377 |
+
" except:\n",
|
| 378 |
+
" label_ids.append(-100)\n",
|
| 379 |
+
" previous_word_idx = word_idx\n",
|
| 380 |
+
" return label_ids\n",
|
| 381 |
+
"\n",
|
| 382 |
+
"def evaluate_one_text(model, sentence):\n",
|
| 383 |
+
"\n",
|
| 384 |
+
" use_cuda = torch.cuda.is_available()\n",
|
| 385 |
+
" device = torch.device(\"cuda\" if use_cuda else \"cpu\")\n",
|
| 386 |
+
"\n",
|
| 387 |
+
" if use_cuda:\n",
|
| 388 |
+
" model = model.cuda()\n",
|
| 389 |
+
"\n",
|
| 390 |
+
" text = tokenizer(sentence, padding='max_length', max_length = 512, truncation=True, return_tensors=\"pt\")\n",
|
| 391 |
+
" mask = text['attention_mask'][0].unsqueeze(0).to(device)\n",
|
| 392 |
+
" input_id = text['input_ids'][0].unsqueeze(0).to(device)\n",
|
| 393 |
+
" label_ids = torch.Tensor(align_word_ids(sentence)).unsqueeze(0).to(device)\n",
|
| 394 |
+
"\n",
|
| 395 |
+
" logits = model(input_id, mask, None)\n",
|
| 396 |
+
" logits_clean = logits[0][label_ids != -100]\n",
|
| 397 |
+
"\n",
|
| 398 |
+
" predictions = logits_clean.argmax(dim=1).tolist()\n",
|
| 399 |
+
" prediction_label = [tr_label_dict[i] for i in predictions]\n",
|
| 400 |
+
" print(sentence)\n",
|
| 401 |
+
" print(prediction_label)"
|
| 402 |
+
],
|
| 403 |
+
"id": "12a17ea429a5710c",
|
| 404 |
+
"outputs": [],
|
| 405 |
+
"execution_count": 6
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"metadata": {
|
| 409 |
+
"ExecuteTime": {
|
| 410 |
+
"end_time": "2024-05-09T08:24:29.408903Z",
|
| 411 |
+
"start_time": "2024-05-09T08:24:29.377983Z"
|
| 412 |
+
}
|
| 413 |
+
},
|
| 414 |
+
"cell_type": "code",
|
| 415 |
+
"source": "align_word_ids('悉尼遭袭2名死伤中国公民为留学生')",
|
| 416 |
+
"id": "31f15809e7f8913c",
|
| 417 |
+
"outputs": [
|
| 418 |
+
{
|
| 419 |
+
"data": {
|
| 420 |
+
"text/plain": [
|
| 421 |
+
"[-100,\n",
|
| 422 |
+
" 1,\n",
|
| 423 |
+
" 1,\n",
|
| 424 |
+
" 1,\n",
|
| 425 |
+
" 1,\n",
|
| 426 |
+
" 1,\n",
|
| 427 |
+
" 1,\n",
|
| 428 |
+
" 1,\n",
|
| 429 |
+
" 1,\n",
|
| 430 |
+
" 1,\n",
|
| 431 |
+
" 1,\n",
|
| 432 |
+
" 1,\n",
|
| 433 |
+
" 1,\n",
|
| 434 |
+
" 1,\n",
|
| 435 |
+
" 1,\n",
|
| 436 |
+
" 1,\n",
|
| 437 |
+
" 1,\n",
|
| 438 |
+
" -100,\n",
|
| 439 |
+
" -100,\n",
|
| 440 |
+
" -100,\n",
|
| 441 |
+
" -100,\n",
|
| 442 |
+
" -100,\n",
|
| 443 |
+
" -100,\n",
|
| 444 |
+
" -100,\n",
|
| 445 |
+
" -100,\n",
|
| 446 |
+
" -100,\n",
|
| 447 |
+
" -100,\n",
|
| 448 |
+
" -100,\n",
|
| 449 |
+
" -100,\n",
|
| 450 |
+
" -100,\n",
|
| 451 |
+
" -100,\n",
|
| 452 |
+
" -100,\n",
|
| 453 |
+
" -100,\n",
|
| 454 |
+
" -100,\n",
|
| 455 |
+
" -100,\n",
|
| 456 |
+
" -100,\n",
|
| 457 |
+
" -100,\n",
|
| 458 |
+
" -100,\n",
|
| 459 |
+
" -100,\n",
|
| 460 |
+
" -100,\n",
|
| 461 |
+
" -100,\n",
|
| 462 |
+
" -100,\n",
|
| 463 |
+
" -100,\n",
|
| 464 |
+
" -100,\n",
|
| 465 |
+
" -100,\n",
|
| 466 |
+
" -100,\n",
|
| 467 |
+
" -100,\n",
|
| 468 |
+
" -100,\n",
|
| 469 |
+
" -100,\n",
|
| 470 |
+
" -100,\n",
|
| 471 |
+
" -100,\n",
|
| 472 |
+
" -100,\n",
|
| 473 |
+
" -100,\n",
|
| 474 |
+
" -100,\n",
|
| 475 |
+
" -100,\n",
|
| 476 |
+
" -100,\n",
|
| 477 |
+
" -100,\n",
|
| 478 |
+
" -100,\n",
|
| 479 |
+
" -100,\n",
|
| 480 |
+
" -100,\n",
|
| 481 |
+
" -100,\n",
|
| 482 |
+
" -100,\n",
|
| 483 |
+
" -100,\n",
|
| 484 |
+
" -100,\n",
|
| 485 |
+
" -100,\n",
|
| 486 |
+
" -100,\n",
|
| 487 |
+
" -100,\n",
|
| 488 |
+
" -100,\n",
|
| 489 |
+
" -100,\n",
|
| 490 |
+
" -100,\n",
|
| 491 |
+
" -100,\n",
|
| 492 |
+
" -100,\n",
|
| 493 |
+
" -100,\n",
|
| 494 |
+
" -100,\n",
|
| 495 |
+
" -100,\n",
|
| 496 |
+
" -100,\n",
|
| 497 |
+
" -100,\n",
|
| 498 |
+
" -100,\n",
|
| 499 |
+
" -100,\n",
|
| 500 |
+
" -100,\n",
|
| 501 |
+
" -100,\n",
|
| 502 |
+
" -100,\n",
|
| 503 |
+
" -100,\n",
|
| 504 |
+
" -100,\n",
|
| 505 |
+
" -100,\n",
|
| 506 |
+
" -100,\n",
|
| 507 |
+
" -100,\n",
|
| 508 |
+
" -100,\n",
|
| 509 |
+
" -100,\n",
|
| 510 |
+
" -100,\n",
|
| 511 |
+
" -100,\n",
|
| 512 |
+
" -100,\n",
|
| 513 |
+
" -100,\n",
|
| 514 |
+
" -100,\n",
|
| 515 |
+
" -100,\n",
|
| 516 |
+
" -100,\n",
|
| 517 |
+
" -100,\n",
|
| 518 |
+
" -100,\n",
|
| 519 |
+
" -100,\n",
|
| 520 |
+
" -100,\n",
|
| 521 |
+
" -100,\n",
|
| 522 |
+
" -100,\n",
|
| 523 |
+
" -100,\n",
|
| 524 |
+
" -100,\n",
|
| 525 |
+
" -100,\n",
|
| 526 |
+
" -100,\n",
|
| 527 |
+
" -100,\n",
|
| 528 |
+
" -100,\n",
|
| 529 |
+
" -100,\n",
|
| 530 |
+
" -100,\n",
|
| 531 |
+
" -100,\n",
|
| 532 |
+
" -100,\n",
|
| 533 |
+
" -100,\n",
|
| 534 |
+
" -100,\n",
|
| 535 |
+
" -100,\n",
|
| 536 |
+
" -100,\n",
|
| 537 |
+
" -100,\n",
|
| 538 |
+
" -100,\n",
|
| 539 |
+
" -100,\n",
|
| 540 |
+
" -100,\n",
|
| 541 |
+
" -100,\n",
|
| 542 |
+
" -100,\n",
|
| 543 |
+
" -100,\n",
|
| 544 |
+
" -100,\n",
|
| 545 |
+
" -100,\n",
|
| 546 |
+
" -100,\n",
|
| 547 |
+
" -100,\n",
|
| 548 |
+
" -100,\n",
|
| 549 |
+
" -100,\n",
|
| 550 |
+
" -100,\n",
|
| 551 |
+
" -100,\n",
|
| 552 |
+
" -100,\n",
|
| 553 |
+
" -100,\n",
|
| 554 |
+
" -100,\n",
|
| 555 |
+
" -100,\n",
|
| 556 |
+
" -100,\n",
|
| 557 |
+
" -100,\n",
|
| 558 |
+
" -100,\n",
|
| 559 |
+
" -100,\n",
|
| 560 |
+
" -100,\n",
|
| 561 |
+
" -100,\n",
|
| 562 |
+
" -100,\n",
|
| 563 |
+
" -100,\n",
|
| 564 |
+
" -100,\n",
|
| 565 |
+
" -100,\n",
|
| 566 |
+
" -100,\n",
|
| 567 |
+
" -100,\n",
|
| 568 |
+
" -100,\n",
|
| 569 |
+
" -100,\n",
|
| 570 |
+
" -100,\n",
|
| 571 |
+
" -100,\n",
|
| 572 |
+
" -100,\n",
|
| 573 |
+
" -100,\n",
|
| 574 |
+
" -100,\n",
|
| 575 |
+
" -100,\n",
|
| 576 |
+
" -100,\n",
|
| 577 |
+
" -100,\n",
|
| 578 |
+
" -100,\n",
|
| 579 |
+
" -100,\n",
|
| 580 |
+
" -100,\n",
|
| 581 |
+
" -100,\n",
|
| 582 |
+
" -100,\n",
|
| 583 |
+
" -100,\n",
|
| 584 |
+
" -100,\n",
|
| 585 |
+
" -100,\n",
|
| 586 |
+
" -100,\n",
|
| 587 |
+
" -100,\n",
|
| 588 |
+
" -100,\n",
|
| 589 |
+
" -100,\n",
|
| 590 |
+
" -100,\n",
|
| 591 |
+
" -100,\n",
|
| 592 |
+
" -100,\n",
|
| 593 |
+
" -100,\n",
|
| 594 |
+
" -100,\n",
|
| 595 |
+
" -100,\n",
|
| 596 |
+
" -100,\n",
|
| 597 |
+
" -100,\n",
|
| 598 |
+
" -100,\n",
|
| 599 |
+
" -100,\n",
|
| 600 |
+
" -100,\n",
|
| 601 |
+
" -100,\n",
|
| 602 |
+
" -100,\n",
|
| 603 |
+
" -100,\n",
|
| 604 |
+
" -100,\n",
|
| 605 |
+
" -100,\n",
|
| 606 |
+
" -100,\n",
|
| 607 |
+
" -100,\n",
|
| 608 |
+
" -100,\n",
|
| 609 |
+
" -100,\n",
|
| 610 |
+
" -100,\n",
|
| 611 |
+
" -100,\n",
|
| 612 |
+
" -100,\n",
|
| 613 |
+
" -100,\n",
|
| 614 |
+
" -100,\n",
|
| 615 |
+
" -100,\n",
|
| 616 |
+
" -100,\n",
|
| 617 |
+
" -100,\n",
|
| 618 |
+
" -100,\n",
|
| 619 |
+
" -100,\n",
|
| 620 |
+
" -100,\n",
|
| 621 |
+
" -100,\n",
|
| 622 |
+
" -100,\n",
|
| 623 |
+
" -100,\n",
|
| 624 |
+
" -100,\n",
|
| 625 |
+
" -100,\n",
|
| 626 |
+
" -100,\n",
|
| 627 |
+
" -100,\n",
|
| 628 |
+
" -100,\n",
|
| 629 |
+
" -100,\n",
|
| 630 |
+
" -100,\n",
|
| 631 |
+
" -100,\n",
|
| 632 |
+
" -100,\n",
|
| 633 |
+
" -100,\n",
|
| 634 |
+
" -100,\n",
|
| 635 |
+
" -100,\n",
|
| 636 |
+
" -100,\n",
|
| 637 |
+
" -100,\n",
|
| 638 |
+
" -100,\n",
|
| 639 |
+
" -100,\n",
|
| 640 |
+
" -100,\n",
|
| 641 |
+
" -100,\n",
|
| 642 |
+
" -100,\n",
|
| 643 |
+
" -100,\n",
|
| 644 |
+
" -100,\n",
|
| 645 |
+
" -100,\n",
|
| 646 |
+
" -100,\n",
|
| 647 |
+
" -100,\n",
|
| 648 |
+
" -100,\n",
|
| 649 |
+
" -100,\n",
|
| 650 |
+
" -100,\n",
|
| 651 |
+
" -100,\n",
|
| 652 |
+
" -100,\n",
|
| 653 |
+
" -100,\n",
|
| 654 |
+
" -100,\n",
|
| 655 |
+
" -100,\n",
|
| 656 |
+
" -100,\n",
|
| 657 |
+
" -100,\n",
|
| 658 |
+
" -100,\n",
|
| 659 |
+
" -100,\n",
|
| 660 |
+
" -100,\n",
|
| 661 |
+
" -100,\n",
|
| 662 |
+
" -100,\n",
|
| 663 |
+
" -100,\n",
|
| 664 |
+
" -100,\n",
|
| 665 |
+
" -100,\n",
|
| 666 |
+
" -100,\n",
|
| 667 |
+
" -100,\n",
|
| 668 |
+
" -100,\n",
|
| 669 |
+
" -100,\n",
|
| 670 |
+
" -100,\n",
|
| 671 |
+
" -100,\n",
|
| 672 |
+
" -100,\n",
|
| 673 |
+
" -100,\n",
|
| 674 |
+
" -100,\n",
|
| 675 |
+
" -100,\n",
|
| 676 |
+
" -100,\n",
|
| 677 |
+
" -100,\n",
|
| 678 |
+
" -100,\n",
|
| 679 |
+
" -100,\n",
|
| 680 |
+
" -100,\n",
|
| 681 |
+
" -100,\n",
|
| 682 |
+
" -100,\n",
|
| 683 |
+
" -100,\n",
|
| 684 |
+
" -100,\n",
|
| 685 |
+
" -100,\n",
|
| 686 |
+
" -100,\n",
|
| 687 |
+
" -100,\n",
|
| 688 |
+
" -100,\n",
|
| 689 |
+
" -100,\n",
|
| 690 |
+
" -100,\n",
|
| 691 |
+
" -100,\n",
|
| 692 |
+
" -100,\n",
|
| 693 |
+
" -100,\n",
|
| 694 |
+
" -100,\n",
|
| 695 |
+
" -100,\n",
|
| 696 |
+
" -100,\n",
|
| 697 |
+
" -100,\n",
|
| 698 |
+
" -100,\n",
|
| 699 |
+
" -100,\n",
|
| 700 |
+
" -100,\n",
|
| 701 |
+
" -100,\n",
|
| 702 |
+
" -100,\n",
|
| 703 |
+
" -100,\n",
|
| 704 |
+
" -100,\n",
|
| 705 |
+
" -100,\n",
|
| 706 |
+
" -100,\n",
|
| 707 |
+
" -100,\n",
|
| 708 |
+
" -100,\n",
|
| 709 |
+
" -100,\n",
|
| 710 |
+
" -100,\n",
|
| 711 |
+
" -100,\n",
|
| 712 |
+
" -100,\n",
|
| 713 |
+
" -100,\n",
|
| 714 |
+
" -100,\n",
|
| 715 |
+
" -100,\n",
|
| 716 |
+
" -100,\n",
|
| 717 |
+
" -100,\n",
|
| 718 |
+
" -100,\n",
|
| 719 |
+
" -100,\n",
|
| 720 |
+
" -100,\n",
|
| 721 |
+
" -100,\n",
|
| 722 |
+
" -100,\n",
|
| 723 |
+
" -100,\n",
|
| 724 |
+
" -100,\n",
|
| 725 |
+
" -100,\n",
|
| 726 |
+
" -100,\n",
|
| 727 |
+
" -100,\n",
|
| 728 |
+
" -100,\n",
|
| 729 |
+
" -100,\n",
|
| 730 |
+
" -100,\n",
|
| 731 |
+
" -100,\n",
|
| 732 |
+
" -100,\n",
|
| 733 |
+
" -100,\n",
|
| 734 |
+
" -100,\n",
|
| 735 |
+
" -100,\n",
|
| 736 |
+
" -100,\n",
|
| 737 |
+
" -100,\n",
|
| 738 |
+
" -100,\n",
|
| 739 |
+
" -100,\n",
|
| 740 |
+
" -100,\n",
|
| 741 |
+
" -100,\n",
|
| 742 |
+
" -100,\n",
|
| 743 |
+
" -100,\n",
|
| 744 |
+
" -100,\n",
|
| 745 |
+
" -100,\n",
|
| 746 |
+
" -100,\n",
|
| 747 |
+
" -100,\n",
|
| 748 |
+
" -100,\n",
|
| 749 |
+
" -100,\n",
|
| 750 |
+
" -100,\n",
|
| 751 |
+
" -100,\n",
|
| 752 |
+
" -100,\n",
|
| 753 |
+
" -100,\n",
|
| 754 |
+
" -100,\n",
|
| 755 |
+
" -100,\n",
|
| 756 |
+
" -100,\n",
|
| 757 |
+
" -100,\n",
|
| 758 |
+
" -100,\n",
|
| 759 |
+
" -100,\n",
|
| 760 |
+
" -100,\n",
|
| 761 |
+
" -100,\n",
|
| 762 |
+
" -100,\n",
|
| 763 |
+
" -100,\n",
|
| 764 |
+
" -100,\n",
|
| 765 |
+
" -100,\n",
|
| 766 |
+
" -100,\n",
|
| 767 |
+
" -100,\n",
|
| 768 |
+
" -100,\n",
|
| 769 |
+
" -100,\n",
|
| 770 |
+
" -100,\n",
|
| 771 |
+
" -100,\n",
|
| 772 |
+
" -100,\n",
|
| 773 |
+
" -100,\n",
|
| 774 |
+
" -100,\n",
|
| 775 |
+
" -100,\n",
|
| 776 |
+
" -100,\n",
|
| 777 |
+
" -100,\n",
|
| 778 |
+
" -100,\n",
|
| 779 |
+
" -100,\n",
|
| 780 |
+
" -100,\n",
|
| 781 |
+
" -100,\n",
|
| 782 |
+
" -100,\n",
|
| 783 |
+
" -100,\n",
|
| 784 |
+
" -100,\n",
|
| 785 |
+
" -100,\n",
|
| 786 |
+
" -100,\n",
|
| 787 |
+
" -100,\n",
|
| 788 |
+
" -100,\n",
|
| 789 |
+
" -100,\n",
|
| 790 |
+
" -100,\n",
|
| 791 |
+
" -100,\n",
|
| 792 |
+
" -100,\n",
|
| 793 |
+
" -100,\n",
|
| 794 |
+
" -100,\n",
|
| 795 |
+
" -100,\n",
|
| 796 |
+
" -100,\n",
|
| 797 |
+
" -100,\n",
|
| 798 |
+
" -100,\n",
|
| 799 |
+
" -100,\n",
|
| 800 |
+
" -100,\n",
|
| 801 |
+
" -100,\n",
|
| 802 |
+
" -100,\n",
|
| 803 |
+
" -100,\n",
|
| 804 |
+
" -100,\n",
|
| 805 |
+
" -100,\n",
|
| 806 |
+
" -100,\n",
|
| 807 |
+
" -100,\n",
|
| 808 |
+
" -100,\n",
|
| 809 |
+
" -100,\n",
|
| 810 |
+
" -100,\n",
|
| 811 |
+
" -100,\n",
|
| 812 |
+
" -100,\n",
|
| 813 |
+
" -100,\n",
|
| 814 |
+
" -100,\n",
|
| 815 |
+
" -100,\n",
|
| 816 |
+
" -100,\n",
|
| 817 |
+
" -100,\n",
|
| 818 |
+
" -100,\n",
|
| 819 |
+
" -100,\n",
|
| 820 |
+
" -100,\n",
|
| 821 |
+
" -100,\n",
|
| 822 |
+
" -100,\n",
|
| 823 |
+
" -100,\n",
|
| 824 |
+
" -100,\n",
|
| 825 |
+
" -100,\n",
|
| 826 |
+
" -100,\n",
|
| 827 |
+
" -100,\n",
|
| 828 |
+
" -100,\n",
|
| 829 |
+
" -100,\n",
|
| 830 |
+
" -100,\n",
|
| 831 |
+
" -100,\n",
|
| 832 |
+
" -100,\n",
|
| 833 |
+
" -100,\n",
|
| 834 |
+
" -100,\n",
|
| 835 |
+
" -100,\n",
|
| 836 |
+
" -100,\n",
|
| 837 |
+
" -100,\n",
|
| 838 |
+
" -100,\n",
|
| 839 |
+
" -100,\n",
|
| 840 |
+
" -100,\n",
|
| 841 |
+
" -100,\n",
|
| 842 |
+
" -100,\n",
|
| 843 |
+
" -100,\n",
|
| 844 |
+
" -100,\n",
|
| 845 |
+
" -100,\n",
|
| 846 |
+
" -100,\n",
|
| 847 |
+
" -100,\n",
|
| 848 |
+
" -100,\n",
|
| 849 |
+
" -100,\n",
|
| 850 |
+
" -100,\n",
|
| 851 |
+
" -100,\n",
|
| 852 |
+
" -100,\n",
|
| 853 |
+
" -100,\n",
|
| 854 |
+
" -100,\n",
|
| 855 |
+
" -100,\n",
|
| 856 |
+
" -100,\n",
|
| 857 |
+
" -100,\n",
|
| 858 |
+
" -100,\n",
|
| 859 |
+
" -100,\n",
|
| 860 |
+
" -100,\n",
|
| 861 |
+
" -100,\n",
|
| 862 |
+
" -100,\n",
|
| 863 |
+
" -100,\n",
|
| 864 |
+
" -100,\n",
|
| 865 |
+
" -100,\n",
|
| 866 |
+
" -100,\n",
|
| 867 |
+
" -100,\n",
|
| 868 |
+
" -100,\n",
|
| 869 |
+
" -100,\n",
|
| 870 |
+
" -100,\n",
|
| 871 |
+
" -100,\n",
|
| 872 |
+
" -100,\n",
|
| 873 |
+
" -100,\n",
|
| 874 |
+
" -100,\n",
|
| 875 |
+
" -100,\n",
|
| 876 |
+
" -100,\n",
|
| 877 |
+
" -100,\n",
|
| 878 |
+
" -100,\n",
|
| 879 |
+
" -100,\n",
|
| 880 |
+
" -100,\n",
|
| 881 |
+
" -100,\n",
|
| 882 |
+
" -100,\n",
|
| 883 |
+
" -100,\n",
|
| 884 |
+
" -100,\n",
|
| 885 |
+
" -100,\n",
|
| 886 |
+
" -100,\n",
|
| 887 |
+
" -100,\n",
|
| 888 |
+
" -100,\n",
|
| 889 |
+
" -100,\n",
|
| 890 |
+
" -100,\n",
|
| 891 |
+
" -100,\n",
|
| 892 |
+
" -100,\n",
|
| 893 |
+
" -100,\n",
|
| 894 |
+
" -100,\n",
|
| 895 |
+
" -100,\n",
|
| 896 |
+
" -100,\n",
|
| 897 |
+
" -100,\n",
|
| 898 |
+
" -100,\n",
|
| 899 |
+
" -100,\n",
|
| 900 |
+
" -100,\n",
|
| 901 |
+
" -100,\n",
|
| 902 |
+
" -100,\n",
|
| 903 |
+
" -100,\n",
|
| 904 |
+
" -100,\n",
|
| 905 |
+
" -100,\n",
|
| 906 |
+
" -100,\n",
|
| 907 |
+
" -100,\n",
|
| 908 |
+
" -100,\n",
|
| 909 |
+
" -100,\n",
|
| 910 |
+
" -100,\n",
|
| 911 |
+
" -100,\n",
|
| 912 |
+
" -100,\n",
|
| 913 |
+
" -100,\n",
|
| 914 |
+
" -100,\n",
|
| 915 |
+
" -100,\n",
|
| 916 |
+
" -100,\n",
|
| 917 |
+
" -100,\n",
|
| 918 |
+
" -100,\n",
|
| 919 |
+
" -100,\n",
|
| 920 |
+
" -100,\n",
|
| 921 |
+
" -100,\n",
|
| 922 |
+
" -100,\n",
|
| 923 |
+
" -100,\n",
|
| 924 |
+
" -100,\n",
|
| 925 |
+
" -100,\n",
|
| 926 |
+
" -100,\n",
|
| 927 |
+
" -100,\n",
|
| 928 |
+
" -100,\n",
|
| 929 |
+
" -100,\n",
|
| 930 |
+
" -100,\n",
|
| 931 |
+
" -100,\n",
|
| 932 |
+
" -100]"
|
| 933 |
+
]
|
| 934 |
+
},
|
| 935 |
+
"execution_count": 7,
|
| 936 |
+
"metadata": {},
|
| 937 |
+
"output_type": "execute_result"
|
| 938 |
+
}
|
| 939 |
+
],
|
| 940 |
+
"execution_count": 7
|
| 941 |
+
},
|
| 942 |
+
{
|
| 943 |
+
"metadata": {
|
| 944 |
+
"ExecuteTime": {
|
| 945 |
+
"end_time": "2024-05-09T08:24:30.907891Z",
|
| 946 |
+
"start_time": "2024-05-09T08:24:29.411863Z"
|
| 947 |
+
}
|
| 948 |
+
},
|
| 949 |
+
"cell_type": "code",
|
| 950 |
+
"source": "evaluate_one_text(model, '悉尼遭袭2名死伤中国公民为留学生')",
|
| 951 |
+
"id": "acc1f02571cf9e6c",
|
| 952 |
+
"outputs": [
|
| 953 |
+
{
|
| 954 |
+
"name": "stdout",
|
| 955 |
+
"output_type": "stream",
|
| 956 |
+
"text": [
|
| 957 |
+
"悉尼遭袭2名死伤中国公民为留学生\n",
|
| 958 |
+
"['B-LOC', 'E-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'E-LOC', 'O', 'O', 'O', 'O', 'O', 'O']\n"
|
| 959 |
+
]
|
| 960 |
+
}
|
| 961 |
+
],
|
| 962 |
+
"execution_count": 8
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"metadata": {
|
| 966 |
+
"ExecuteTime": {
|
| 967 |
+
"end_time": "2024-05-09T08:24:30.923815Z",
|
| 968 |
+
"start_time": "2024-05-09T08:24:30.909863Z"
|
| 969 |
+
}
|
| 970 |
+
},
|
| 971 |
+
"cell_type": "code",
|
| 972 |
+
"source": "",
|
| 973 |
+
"id": "b84039ea25ce0baf",
|
| 974 |
+
"outputs": [],
|
| 975 |
+
"execution_count": 8
|
| 976 |
+
}
|
| 977 |
+
],
|
| 978 |
+
"metadata": {
|
| 979 |
+
"kernelspec": {
|
| 980 |
+
"display_name": "Python 3",
|
| 981 |
+
"language": "python",
|
| 982 |
+
"name": "python3"
|
| 983 |
+
},
|
| 984 |
+
"language_info": {
|
| 985 |
+
"codemirror_mode": {
|
| 986 |
+
"name": "ipython",
|
| 987 |
+
"version": 2
|
| 988 |
+
},
|
| 989 |
+
"file_extension": ".py",
|
| 990 |
+
"mimetype": "text/x-python",
|
| 991 |
+
"name": "python",
|
| 992 |
+
"nbconvert_exporter": "python",
|
| 993 |
+
"pygments_lexer": "ipython2",
|
| 994 |
+
"version": "2.7.6"
|
| 995 |
+
}
|
| 996 |
+
},
|
| 997 |
+
"nbformat": 4,
|
| 998 |
+
"nbformat_minor": 5
|
| 999 |
+
}
|