ychu612 commited on
Commit
c60fb31
·
verified ·
1 Parent(s): ab2c24f

Add BERTopic model

Browse files
Files changed (2) hide show
  1. topic_embeddings.safetensors +2 -2
  2. topics.json +267 -72
topic_embeddings.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c4c45c60b7cc628f1c2c0649f7f24410681dd4311509c9b488a592968a29e8a
3
- size 4696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2c28ec9466fef80686ce45ca38abca661e39d31b4f55deec86ff61cc4934442
3
+ size 7768
topics.json CHANGED
@@ -2,201 +2,330 @@
2
  "topic_representations": {
3
  "-1": [
4
  [
5
- "babi",
6
- 0.09373660495975625
7
  ],
8
  [
9
- "not",
10
- 0.06547987563838238
11
  ],
12
  [
13
- "hospit",
14
- 0.06503728501911928
15
  ],
16
  [
17
- "mother",
18
- 0.05168877653896505
19
  ],
20
  [
21
  "day",
22
- 0.05161452234199994
23
  ],
24
  [
25
- "child",
26
- 0.05143891600576672
27
  ],
28
  [
29
- "after",
30
- 0.04430203835999481
31
  ],
32
  [
33
- "start",
34
- 0.04245709224017999
35
  ],
36
  [
37
- "told",
38
- 0.04202612262741185
39
  ],
40
  [
41
- "birth",
42
- 0.040413799941205225
43
  ]
44
  ],
45
  "0": [
46
  [
47
  "babi",
48
- 0.1582895593176507
49
  ],
50
  [
51
  "mother",
52
- 0.09470282609947286
 
 
 
 
53
  ],
54
  [
55
  "hospit",
56
- 0.06895616372454667
57
  ],
58
  [
59
  "not",
60
- 0.06650114006006944
61
  ],
62
  [
63
- "nurs",
64
- 0.05849916746755427
65
  ],
66
  [
67
- "told",
68
- 0.05582609593105063
69
  ],
70
  [
71
- "clinic",
72
- 0.05402935979582151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  ],
74
  [
75
- "went",
76
- 0.053790785090644444
77
  ],
78
  [
79
- "check",
80
- 0.05297749048784422
81
  ],
82
  [
83
- "month",
84
- 0.0515997344506757
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  ]
86
  ],
87
- "1": [
 
 
 
 
88
  [
89
  "babi",
90
- 0.14078770808667349
91
  ],
92
  [
93
- "breath",
94
- 0.12812695309495128
95
  ],
96
  [
97
- "born",
98
- 0.11501548849156486
99
  ],
100
  [
101
- "day",
102
- 0.1074519891873241
103
  ],
104
  [
105
- "prematur",
106
- 0.08525336793099957
 
 
 
 
 
 
 
 
 
 
 
 
107
  ],
108
  [
109
  "month",
110
- 0.07867615760643391
 
 
 
 
 
 
111
  ],
112
  [
113
- "die",
114
- 0.07057759824976823
115
  ],
116
  [
117
- "could",
118
- 0.06502825762013714
119
  ],
120
  [
121
- "incub",
122
- 0.0631137433794647
123
  ],
124
  [
125
- "not",
126
- 0.06293601967430601
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  ]
128
  ]
129
  },
130
  "topics": [
131
- 1,
132
  -1,
133
- 1,
 
134
  -1,
135
  -1,
 
 
136
  -1,
137
  0,
138
- 1,
139
  0,
140
  1,
141
- -1,
142
- -1,
143
- -1,
144
- -1,
145
  1,
146
  0,
147
  -1,
 
 
 
 
 
 
148
  -1,
149
  -1,
150
  -1,
151
  -1,
152
  -1,
 
 
153
  -1,
154
  -1,
155
  1,
156
  0,
157
  0,
158
- -1,
 
 
 
159
  0,
 
160
  -1,
161
  -1,
 
 
 
162
  -1,
 
163
  -1,
 
164
  -1,
165
  -1,
166
  0,
 
 
167
  -1,
168
  -1,
169
  -1,
170
  -1,
 
 
171
  -1,
 
172
  -1,
173
  -1,
174
- 0,
175
- 0,
176
  1,
177
  -1,
178
  -1,
 
179
  -1,
180
  -1,
181
  -1,
 
 
182
  -1,
183
  0,
 
184
  0,
 
 
185
  0,
 
 
186
  -1,
187
  -1,
 
 
 
 
188
  -1,
189
  0,
190
  1,
 
 
 
191
  -1,
 
 
 
 
 
192
  -1,
193
  -1,
194
  0,
 
 
195
  1,
196
- 0,
197
  1,
 
 
 
198
  -1,
199
  -1,
 
 
200
  -1,
201
  -1,
202
  -1,
@@ -204,38 +333,90 @@
204
  -1,
205
  -1,
206
  0,
 
207
  -1,
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  -1,
209
  -1,
210
  -1,
 
211
  -1,
212
  1,
 
 
 
 
213
  -1,
 
 
 
214
  -1,
215
  -1,
 
216
  0,
217
  -1,
218
  -1,
219
  -1,
 
 
 
 
220
  0,
221
  -1,
222
  -1,
223
  -1,
 
224
  -1,
225
  -1,
 
 
 
 
 
 
 
 
 
226
  -1,
227
  -1,
228
  -1,
 
 
 
 
 
 
 
 
229
  -1,
230
  -1,
 
 
 
231
  -1,
232
  -1,
 
 
233
  -1
234
  ],
235
  "topic_sizes": {
236
- "1": 11,
237
- "-1": 74,
238
- "0": 18
 
 
239
  },
240
  "topic_mapper": [
241
  [
@@ -248,19 +429,33 @@
248
  0,
249
  0,
250
  0,
251
- 0
252
  ],
253
  [
254
  1,
255
  1,
256
  1,
 
 
 
 
 
 
257
  1
 
 
 
 
 
 
258
  ]
259
  ],
260
  "topic_labels": {
261
- "-1": "-1_babi_not_hospit_mother",
262
- "0": "0_babi_mother_hospit_not",
263
- "1": "1_babi_breath_born_day"
 
 
264
  },
265
  "custom_labels": null,
266
  "_outliers": 1,
 
2
  "topic_representations": {
3
  "-1": [
4
  [
5
+ "hospit",
6
+ 0.059371734257215494
7
  ],
8
  [
9
+ "child",
10
+ 0.05266834744315983
11
  ],
12
  [
13
+ "not",
14
+ 0.050200232020959974
15
  ],
16
  [
17
+ "pas",
18
+ 0.04498954397263199
19
  ],
20
  [
21
  "day",
22
+ 0.04360954258671476
23
  ],
24
  [
25
+ "clinic",
26
+ 0.04210265454054396
27
  ],
28
  [
29
+ "start",
30
+ 0.036584473470681264
31
  ],
32
  [
33
+ "call",
34
+ 0.03604661061893277
35
  ],
36
  [
37
+ "doctor",
38
+ 0.0358391282493848
39
  ],
40
  [
41
+ "mother",
42
+ 0.03534314417800652
43
  ]
44
  ],
45
  "0": [
46
  [
47
  "babi",
48
+ 0.1252870133128255
49
  ],
50
  [
51
  "mother",
52
+ 0.054333762712986164
53
+ ],
54
+ [
55
+ "day",
56
+ 0.05202673075340394
57
  ],
58
  [
59
  "hospit",
60
+ 0.04991878904964433
61
  ],
62
  [
63
  "not",
64
+ 0.04906610116422352
65
  ],
66
  [
67
+ "after",
68
+ 0.03746762909105888
69
  ],
70
  [
71
+ "born",
72
+ 0.036857056607399005
73
  ],
74
  [
75
+ "respond",
76
+ 0.035367314967115954
77
+ ],
78
+ [
79
+ "doctor",
80
+ 0.030494617685190382
81
+ ],
82
+ [
83
+ "birth",
84
+ 0.029730599102025974
85
+ ]
86
+ ],
87
+ "1": [
88
+ [
89
+ "child",
90
+ 0.15520806873724294
91
  ],
92
  [
93
+ "hospit",
94
+ 0.05914856514996918
95
  ],
96
  [
97
+ "mother",
98
+ 0.05816557790431584
99
  ],
100
  [
101
+ "not",
102
+ 0.056266565907403476
103
+ ],
104
+ [
105
+ "die",
106
+ 0.0509795020089475
107
+ ],
108
+ [
109
+ "doctor",
110
+ 0.04409694157080915
111
+ ],
112
+ [
113
+ "day",
114
+ 0.04353689341765372
115
+ ],
116
+ [
117
+ "clinic",
118
+ 0.039697375936421087
119
+ ],
120
+ [
121
+ "famili",
122
+ 0.038969189132851065
123
+ ],
124
+ [
125
+ "took",
126
+ 0.034551655902595416
127
  ]
128
  ],
129
+ "2": [
130
+ [
131
+ "hiv",
132
+ 0.08433021595239126
133
+ ],
134
  [
135
  "babi",
136
+ 0.06786017386849262
137
  ],
138
  [
139
+ "got",
140
+ 0.05531866116677906
141
  ],
142
  [
143
+ "mother",
144
+ 0.05436782539509934
145
  ],
146
  [
147
+ "not",
148
+ 0.04816129575579367
149
  ],
150
  [
151
+ "respond",
152
+ 0.045798160019412476
153
+ ],
154
+ [
155
+ "child",
156
+ 0.045095922364590015
157
+ ],
158
+ [
159
+ "treatment",
160
+ 0.04353539385566118
161
+ ],
162
+ [
163
+ "hospit",
164
+ 0.04181341866271754
165
  ],
166
  [
167
  "month",
168
+ 0.037743061480095666
169
+ ]
170
+ ],
171
+ "3": [
172
+ [
173
+ "swollen",
174
+ 0.08616199201618167
175
  ],
176
  [
177
+ "hospit",
178
+ 0.08554820722255431
179
  ],
180
  [
181
+ "also",
182
+ 0.06275443394752532
183
  ],
184
  [
185
+ "start",
186
+ 0.06227437600598956
187
  ],
188
  [
189
+ "clinic",
190
+ 0.06014948110678601
191
+ ],
192
+ [
193
+ "took",
194
+ 0.05879178405493138
195
+ ],
196
+ [
197
+ "day",
198
+ 0.05597210151874125
199
+ ],
200
+ [
201
+ "stool",
202
+ 0.05474213662585949
203
+ ],
204
+ [
205
+ "taken",
206
+ 0.05467324170466143
207
+ ],
208
+ [
209
+ "leg",
210
+ 0.05001626227361538
211
  ]
212
  ]
213
  },
214
  "topics": [
215
+ 0,
216
  -1,
217
+ 0,
218
+ 0,
219
  -1,
220
  -1,
221
+ 0,
222
+ 0,
223
  -1,
224
  0,
 
225
  0,
226
  1,
 
 
 
 
227
  1,
228
  0,
229
  -1,
230
+ 1,
231
+ 1,
232
+ -1,
233
+ 0,
234
+ 1,
235
+ 3,
236
  -1,
237
  -1,
238
  -1,
239
  -1,
240
  -1,
241
+ 0,
242
+ 0,
243
  -1,
244
  -1,
245
  1,
246
  0,
247
  0,
248
+ 1,
249
+ 2,
250
+ 1,
251
+ 0,
252
  0,
253
+ 2,
254
  -1,
255
  -1,
256
+ 0,
257
+ 0,
258
+ 0,
259
  -1,
260
+ 0,
261
  -1,
262
+ 2,
263
  -1,
264
  -1,
265
  0,
266
+ 2,
267
+ 1,
268
  -1,
269
  -1,
270
  -1,
271
  -1,
272
+ 1,
273
+ 1,
274
  -1,
275
+ 0,
276
  -1,
277
  -1,
 
 
278
  1,
279
  -1,
280
  -1,
281
+ 3,
282
  -1,
283
  -1,
284
  -1,
285
+ 1,
286
+ 1,
287
  -1,
288
  0,
289
+ -1,
290
  0,
291
+ -1,
292
+ -1,
293
  0,
294
+ 2,
295
+ 2,
296
  -1,
297
  -1,
298
+ 1,
299
+ 2,
300
+ 2,
301
+ 2,
302
  -1,
303
  0,
304
  1,
305
+ 0,
306
+ 1,
307
+ 1,
308
  -1,
309
+ 0,
310
+ 0,
311
+ 3,
312
+ 0,
313
+ 0,
314
  -1,
315
  -1,
316
  0,
317
+ -1,
318
+ 3,
319
  1,
320
+ 2,
321
  1,
322
+ 1,
323
+ 0,
324
+ 0,
325
  -1,
326
  -1,
327
+ 1,
328
+ 0,
329
  -1,
330
  -1,
331
  -1,
 
333
  -1,
334
  -1,
335
  0,
336
+ 3,
337
  -1,
338
+ 1,
339
+ 3,
340
+ -1,
341
+ -1,
342
+ -1,
343
+ -1,
344
+ 0,
345
+ 0,
346
+ 0,
347
+ 2,
348
+ 3,
349
+ 0,
350
+ 0,
351
  -1,
352
  -1,
353
  -1,
354
+ 0,
355
  -1,
356
  1,
357
+ 0,
358
+ 1,
359
+ 0,
360
+ 1,
361
  -1,
362
+ 3,
363
+ -1,
364
+ 0,
365
  -1,
366
  -1,
367
+ 2,
368
  0,
369
  -1,
370
  -1,
371
  -1,
372
+ -1,
373
+ 1,
374
+ -1,
375
+ -1,
376
  0,
377
  -1,
378
  -1,
379
  -1,
380
+ 1,
381
  -1,
382
  -1,
383
+ 0,
384
+ 1,
385
+ -1,
386
+ 0,
387
+ 0,
388
+ 3,
389
+ -1,
390
+ 1,
391
+ 2,
392
  -1,
393
  -1,
394
  -1,
395
+ 3,
396
+ -1,
397
+ 2,
398
+ 1,
399
+ 1,
400
+ 1,
401
+ 0,
402
+ 1,
403
  -1,
404
  -1,
405
+ 1,
406
+ 0,
407
+ 0,
408
  -1,
409
  -1,
410
+ 3,
411
+ 1,
412
  -1
413
  ],
414
  "topic_sizes": {
415
+ "0": 52,
416
+ "-1": 85,
417
+ "1": 36,
418
+ "3": 11,
419
+ "2": 14
420
  },
421
  "topic_mapper": [
422
  [
 
429
  0,
430
  0,
431
  0,
432
+ 2
433
  ],
434
  [
435
  1,
436
  1,
437
  1,
438
+ 0
439
+ ],
440
+ [
441
+ 2,
442
+ 2,
443
+ 2,
444
  1
445
+ ],
446
+ [
447
+ 3,
448
+ 3,
449
+ 3,
450
+ 3
451
  ]
452
  ],
453
  "topic_labels": {
454
+ "-1": "-1_hospit_child_not_pas",
455
+ "0": "0_babi_mother_day_hospit",
456
+ "1": "1_child_hospit_mother_not",
457
+ "2": "2_hiv_babi_got_mother",
458
+ "3": "3_swollen_hospit_also_start"
459
  },
460
  "custom_labels": null,
461
  "_outliers": 1,