EphAsad commited on
Commit
a1dc114
·
verified ·
1 Parent(s): 81668a9

train: update extended schema, aliases, signals from gold tests

Browse files
Files changed (2) hide show
  1. models/genus_xgb.json +2 -2
  2. models/genus_xgb_meta.json +282 -248
models/genus_xgb.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af676e86223286f6a32391219b3054ebe309199bbc31bf39f3248edc16114b9d
3
- size 22523681
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6530346e18bdb61e778f887fef7ca33e8e0b56e040ce95287c373073db726cfb
3
+ size 34613851
models/genus_xgb_meta.json CHANGED
@@ -1,260 +1,294 @@
1
  {
2
  "genus_to_idx": {
3
- "Weissella": 0,
4
- "Vibrio": 1,
5
- "Spiroplasma": 2,
6
- "Serratia": 3,
7
- "Saccharopolyspora": 4,
8
- "Saccharomyces": 5,
9
- "Rothia": 6,
10
- "Pseudomonas": 7,
11
- "Prevotella": 8,
12
- "Photorhabdus": 9,
13
- "Oerskovia": 10,
14
- "Nocardia": 11,
15
- "Morganella": 12,
16
- "Moraxella": 13,
17
- "Micrococcus": 14,
18
- "Methylobacterium": 15,
19
- "Massilia": 16,
20
- "Leptospira": 17,
21
- "Leifsonia": 18,
22
- "Kingella": 19,
23
- "Helicobacter": 20,
24
- "Hafnia": 21,
25
- "Gemella": 22,
26
- "Frankia": 23,
27
- "Finegoldia": 24,
28
- "Enterobacter": 25,
29
- "Eikenella": 26,
30
- "Cryptococcus": 27,
31
- "Chromobacterium": 28,
32
- "Cellulomonas": 29,
33
- "Cardiobacterium": 30,
34
- "Candida": 31,
35
- "Brucella": 32,
36
- "Brevibacterium": 33,
37
- "Bordetella": 34,
38
- "Bilophila": 35,
39
- "Bartonella": 36,
40
- "Borrelia": 37,
41
- "Bacteroides": 38,
42
- "Azotobacter": 39,
43
- "Abiotrophia": 40,
44
- "Acidaminococcus": 41,
45
- "Actinobacillus": 42,
46
- "Actinomyces": 43,
47
- "Alcaligenes": 44,
48
- "Yokenella": 45,
49
- "Yersinia": 46,
50
- "Veillonella": 47,
51
- "Trueperella": 48,
52
- "Thermoactinomyces": 49,
53
- "Streptomyces": 50,
54
- "Streptococcus": 51,
55
- "Stenotrophomonas": 52,
56
- "Staphylococcus": 53,
57
- "Sporolactobacillus": 54,
58
- "Spirillum": 55,
59
- "Sphingomonas": 56,
60
- "Shigella": 57,
61
- "Shewanella": 58,
62
- "Salmonella": 59,
63
- "Ruminococcus": 60,
64
- "Rhodococcus": 61,
65
- "Rhizobium": 62,
66
  "Raoultella": 63,
67
- "Ralstonia": 64,
68
- "Psychrobacter": 65,
69
- "Providencia": 66,
70
- "Proteus": 67,
71
- "Porphyromonas": 68,
72
- "Pluralibacter": 69,
73
- "Peptostreptococcus": 70,
74
- "Peptoniphilus": 71,
75
- "Pasteurella": 72,
76
- "Parvimonas": 73,
77
- "Paenibacillus": 74,
78
- "Neisseria": 75,
79
- "Mycobacterium": 76,
80
- "Myroides": 77,
81
- "Lactobacillus": 78,
82
- "Leclercia": 79,
83
- "Legionella": 80,
84
- "Leuconostoc": 81,
85
- "Listeria": 82,
86
- "Kosakonia": 83,
87
- "Kocuria": 84,
88
- "Kluyvera": 85,
89
- "Klebsiella": 86,
90
- "Haemophilus": 87,
91
- "Geobacillus": 88,
92
- "Gardnerella": 89,
93
- "Fusobacterium": 90,
94
- "Flavobacterium": 91,
95
- "Exiguobacterium": 92,
96
- "Eubacterium": 93,
97
- "Escherichia": 94,
98
- "Erysipelothrix": 95,
99
- "Enterococcus": 96,
100
- "Elizabethkingia": 97,
101
- "Edwardsiella": 98,
102
- "Cutibacterium": 99,
103
- "Cupriavidus": 100,
104
- "Cronobacter": 101,
105
- "Corynebacterium": 102,
106
- "Comamonas": 103,
107
- "Clostridium": 104,
108
- "Clostridioides": 105,
109
- "Citrobacter": 106,
110
- "Cedecea": 107,
111
- "Capnocytophaga": 108,
112
- "Campylobacter": 109,
113
- "Burkholderia": 110,
114
- "Bergeyella": 111,
115
- "Bacillus": 112,
116
- "Arcanobacterium": 113,
117
- "Anaerococcus": 114,
118
- "Alicyclobacillus": 115,
119
- "Aggregatibacter": 116,
120
- "Aeromonas": 117,
121
- ":": 118,
122
- "Aerococcus": 119,
123
- "Acinetobacter": 120,
124
- "Acidovorax": 121,
125
- "Achromobacter": 122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  },
127
  "idx_to_genus": {
128
- "0": "Weissella",
129
- "1": "Vibrio",
130
- "2": "Spiroplasma",
131
- "3": "Serratia",
132
- "4": "Saccharopolyspora",
133
- "5": "Saccharomyces",
134
- "6": "Rothia",
135
- "7": "Pseudomonas",
136
- "8": "Prevotella",
137
- "9": "Photorhabdus",
138
- "10": "Oerskovia",
139
- "11": "Nocardia",
140
- "12": "Morganella",
141
- "13": "Moraxella",
142
- "14": "Micrococcus",
143
- "15": "Methylobacterium",
144
- "16": "Massilia",
145
- "17": "Leptospira",
146
- "18": "Leifsonia",
147
- "19": "Kingella",
148
- "20": "Helicobacter",
149
- "21": "Hafnia",
150
- "22": "Gemella",
151
- "23": "Frankia",
152
- "24": "Finegoldia",
153
- "25": "Enterobacter",
154
- "26": "Eikenella",
155
- "27": "Cryptococcus",
156
- "28": "Chromobacterium",
157
- "29": "Cellulomonas",
158
- "30": "Cardiobacterium",
159
- "31": "Candida",
160
- "32": "Brucella",
161
- "33": "Brevibacterium",
162
- "34": "Bordetella",
163
- "35": "Bilophila",
164
- "36": "Bartonella",
165
- "37": "Borrelia",
166
- "38": "Bacteroides",
167
- "39": "Azotobacter",
168
- "40": "Abiotrophia",
169
- "41": "Acidaminococcus",
170
- "42": "Actinobacillus",
171
- "43": "Actinomyces",
172
- "44": "Alcaligenes",
173
- "45": "Yokenella",
174
- "46": "Yersinia",
175
- "47": "Veillonella",
176
- "48": "Trueperella",
177
- "49": "Thermoactinomyces",
178
- "50": "Streptomyces",
179
- "51": "Streptococcus",
180
- "52": "Stenotrophomonas",
181
- "53": "Staphylococcus",
182
- "54": "Sporolactobacillus",
183
- "55": "Spirillum",
184
- "56": "Sphingomonas",
185
- "57": "Shigella",
186
- "58": "Shewanella",
187
- "59": "Salmonella",
188
- "60": "Ruminococcus",
189
- "61": "Rhodococcus",
190
- "62": "Rhizobium",
191
  "63": "Raoultella",
192
- "64": "Ralstonia",
193
- "65": "Psychrobacter",
194
- "66": "Providencia",
195
- "67": "Proteus",
196
- "68": "Porphyromonas",
197
- "69": "Pluralibacter",
198
- "70": "Peptostreptococcus",
199
- "71": "Peptoniphilus",
200
- "72": "Pasteurella",
201
- "73": "Parvimonas",
202
- "74": "Paenibacillus",
203
- "75": "Neisseria",
204
- "76": "Mycobacterium",
205
- "77": "Myroides",
206
- "78": "Lactobacillus",
207
- "79": "Leclercia",
208
- "80": "Legionella",
209
- "81": "Leuconostoc",
210
- "82": "Listeria",
211
- "83": "Kosakonia",
212
- "84": "Kocuria",
213
- "85": "Kluyvera",
214
- "86": "Klebsiella",
215
- "87": "Haemophilus",
216
- "88": "Geobacillus",
217
- "89": "Gardnerella",
218
- "90": "Fusobacterium",
219
- "91": "Flavobacterium",
220
- "92": "Exiguobacterium",
221
- "93": "Eubacterium",
222
- "94": "Escherichia",
223
- "95": "Erysipelothrix",
224
- "96": "Enterococcus",
225
- "97": "Elizabethkingia",
226
- "98": "Edwardsiella",
227
- "99": "Cutibacterium",
228
- "100": "Cupriavidus",
229
- "101": "Cronobacter",
230
- "102": "Corynebacterium",
231
- "103": "Comamonas",
232
- "104": "Clostridium",
233
- "105": "Clostridioides",
234
- "106": "Citrobacter",
235
- "107": "Cedecea",
236
- "108": "Capnocytophaga",
237
- "109": "Campylobacter",
238
- "110": "Burkholderia",
239
- "111": "Bergeyella",
240
- "112": "Bacillus",
241
- "113": "Arcanobacterium",
242
- "114": "Anaerococcus",
243
- "115": "Alicyclobacillus",
244
- "116": "Aggregatibacter",
245
- "117": "Aeromonas",
246
- "118": ":",
247
- "119": "Aerococcus",
248
- "120": "Acinetobacter",
249
- "121": "Acidovorax",
250
- "122": "Achromobacter"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  },
252
  "n_features": 73,
253
- "num_classes": 123,
254
  "metrics": {
255
- "train_accuracy": 0.9825085324232082,
256
- "valid_accuracy": 0.9642248722316865,
257
- "best_iteration": 295
258
  },
259
  "feature_schema_path": "data/feature_schema.json",
260
  "feature_names": [
 
1
  {
2
  "genus_to_idx": {
3
+ "Staphylococcus": 0,
4
+ "Salmonella": 1,
5
+ "Listeria": 2,
6
+ "Enterobacter": 3,
7
+ "Pseudomonas": 4,
8
+ "Streptococcus": 5,
9
+ "Enterococcus": 6,
10
+ "Bacillus": 7,
11
+ "Shigella": 8,
12
+ "Escherichia": 9,
13
+ "Klebsiella": 10,
14
+ "Proteus": 11,
15
+ "Vibrio": 12,
16
+ "Neisseria": 13,
17
+ "Campylobacter": 14,
18
+ "Clostridium": 15,
19
+ "Corynebacterium": 16,
20
+ "Legionella": 17,
21
+ "Mycobacterium": 18,
22
+ "Bacteroides": 19,
23
+ "Micrococcus": 20,
24
+ "Erysipelothrix": 21,
25
+ "Haemophilus": 22,
26
+ "Aeromonas": 23,
27
+ "Yersinia": 24,
28
+ "Acinetobacter": 25,
29
+ "Serratia": 26,
30
+ "Morganella": 27,
31
+ "Providencia": 28,
32
+ "Burkholderia": 29,
33
+ "Helicobacter": 30,
34
+ "Actinomyces": 31,
35
+ "Nocardia": 32,
36
+ "Pasteurella": 33,
37
+ "Citrobacter": 34,
38
+ "Leptospira": 35,
39
+ "Alcaligenes": 36,
40
+ "Shewanella": 37,
41
+ "Edwardsiella": 38,
42
+ "Chromobacterium": 39,
43
+ "Lactobacillus": 40,
44
+ "Propionibacterium": 41,
45
+ "Peptostreptococcus": 42,
46
+ "Veillonella": 43,
47
+ "Fusobacterium": 44,
48
+ "Eubacterium": 45,
49
+ "Halomonas": 46,
50
+ "Psychrobacter": 47,
51
+ "Rhodococcus": 48,
52
+ "Mycoplasma": 49,
53
+ "Bordetella": 50,
54
+ "Stenotrophomonas": 51,
55
+ "Ralstonia": 52,
56
+ "Achromobacter": 53,
57
+ "Brucella": 54,
58
+ "Arthrobacter": 55,
59
+ "Flavobacterium": 56,
60
+ "Oerskovia": 57,
61
+ "Sphingomonas": 58,
62
+ "Comamonas": 59,
63
+ "Thermococcus": 60,
64
+ "Elizabethkingia": 61,
65
+ "Hafnia": 62,
66
  "Raoultella": 63,
67
+ "Ochrobactrum": 64,
68
+ "Roseomonas": 65,
69
+ "Actinobacillus": 66,
70
+ "Gemella": 67,
71
+ "Rothia": 68,
72
+ "Carnobacterium": 69,
73
+ "Plesiomonas": 70,
74
+ "Janthinobacterium": 71,
75
+ "Paenibacillus": 72,
76
+ "Moraxella": 73,
77
+ "Aerococcus": 74,
78
+ "Kocuria": 75,
79
+ "Leuconostoc": 76,
80
+ "Arcanobacterium": 77,
81
+ "Gardnerella": 78,
82
+ "Porphyromonas": 79,
83
+ "Prevotella": 80,
84
+ "Pediococcus": 81,
85
+ "Weissella": 82,
86
+ "Lactococcus": 83,
87
+ "Microbacterium": 84,
88
+ "Clostridioides": 85,
89
+ "Cronobacter": 86,
90
+ "Rhizobium": 87,
91
+ "Azotobacter": 88,
92
+ "Spirillum": 89,
93
+ "Candida": 90,
94
+ "Cryptococcus": 91,
95
+ "Saccharomyces": 92,
96
+ "Rickettsia": 93,
97
+ "Borrelia": 94,
98
+ "Chlamydia": 95,
99
+ "Acidaminococcus": 96,
100
+ "Bartonella": 97,
101
+ "Coxiella": 98,
102
+ "Kingella": 99,
103
+ "Eikenella": 100,
104
+ "Bilophila": 101,
105
+ "Anaerococcus": 102,
106
+ "Finegoldia": 103,
107
+ "Parvimonas": 104,
108
+ "Ruminococcus": 105,
109
+ "Cutibacterium": 106,
110
+ "Exiguobacterium": 107,
111
+ "Kluyvera": 108,
112
+ "Pluralibacter": 109,
113
+ "Massilia": 110,
114
+ "Methylobacterium": 111,
115
+ "Cupriavidus": 112,
116
+ "Acidovorax": 113,
117
+ "Geobacillus": 114,
118
+ "Trueperella": 115,
119
+ "Streptomyces": 116,
120
+ "Thermoactinomyces": 117,
121
+ "Capnocytophaga": 118,
122
+ "Cardiobacterium": 119,
123
+ "Yokenella": 120,
124
+ "Brevibacterium": 121,
125
+ "Peptoniphilus": 122,
126
+ "Weisella": 123,
127
+ "Saccharopolyspora": 124,
128
+ "Frankia": 125,
129
+ "Spiroplasma": 126,
130
+ "Cedecea": 127,
131
+ "Photorhabdus": 128,
132
+ "Abiotrophia": 129,
133
+ "Cellulomonas": 130,
134
+ "Leifsonia": 131,
135
+ "Alicyclobacillus": 132,
136
+ "Sporolactobacillus": 133,
137
+ "Leclercia": 134,
138
+ "Kosakonia": 135,
139
+ "Bergeyella": 136,
140
+ "Myroides": 137,
141
+ "Aggregatibacter": 138,
142
+ ":": 139
143
  },
144
  "idx_to_genus": {
145
+ "0": "Staphylococcus",
146
+ "1": "Salmonella",
147
+ "2": "Listeria",
148
+ "3": "Enterobacter",
149
+ "4": "Pseudomonas",
150
+ "5": "Streptococcus",
151
+ "6": "Enterococcus",
152
+ "7": "Bacillus",
153
+ "8": "Shigella",
154
+ "9": "Escherichia",
155
+ "10": "Klebsiella",
156
+ "11": "Proteus",
157
+ "12": "Vibrio",
158
+ "13": "Neisseria",
159
+ "14": "Campylobacter",
160
+ "15": "Clostridium",
161
+ "16": "Corynebacterium",
162
+ "17": "Legionella",
163
+ "18": "Mycobacterium",
164
+ "19": "Bacteroides",
165
+ "20": "Micrococcus",
166
+ "21": "Erysipelothrix",
167
+ "22": "Haemophilus",
168
+ "23": "Aeromonas",
169
+ "24": "Yersinia",
170
+ "25": "Acinetobacter",
171
+ "26": "Serratia",
172
+ "27": "Morganella",
173
+ "28": "Providencia",
174
+ "29": "Burkholderia",
175
+ "30": "Helicobacter",
176
+ "31": "Actinomyces",
177
+ "32": "Nocardia",
178
+ "33": "Pasteurella",
179
+ "34": "Citrobacter",
180
+ "35": "Leptospira",
181
+ "36": "Alcaligenes",
182
+ "37": "Shewanella",
183
+ "38": "Edwardsiella",
184
+ "39": "Chromobacterium",
185
+ "40": "Lactobacillus",
186
+ "41": "Propionibacterium",
187
+ "42": "Peptostreptococcus",
188
+ "43": "Veillonella",
189
+ "44": "Fusobacterium",
190
+ "45": "Eubacterium",
191
+ "46": "Halomonas",
192
+ "47": "Psychrobacter",
193
+ "48": "Rhodococcus",
194
+ "49": "Mycoplasma",
195
+ "50": "Bordetella",
196
+ "51": "Stenotrophomonas",
197
+ "52": "Ralstonia",
198
+ "53": "Achromobacter",
199
+ "54": "Brucella",
200
+ "55": "Arthrobacter",
201
+ "56": "Flavobacterium",
202
+ "57": "Oerskovia",
203
+ "58": "Sphingomonas",
204
+ "59": "Comamonas",
205
+ "60": "Thermococcus",
206
+ "61": "Elizabethkingia",
207
+ "62": "Hafnia",
208
  "63": "Raoultella",
209
+ "64": "Ochrobactrum",
210
+ "65": "Roseomonas",
211
+ "66": "Actinobacillus",
212
+ "67": "Gemella",
213
+ "68": "Rothia",
214
+ "69": "Carnobacterium",
215
+ "70": "Plesiomonas",
216
+ "71": "Janthinobacterium",
217
+ "72": "Paenibacillus",
218
+ "73": "Moraxella",
219
+ "74": "Aerococcus",
220
+ "75": "Kocuria",
221
+ "76": "Leuconostoc",
222
+ "77": "Arcanobacterium",
223
+ "78": "Gardnerella",
224
+ "79": "Porphyromonas",
225
+ "80": "Prevotella",
226
+ "81": "Pediococcus",
227
+ "82": "Weissella",
228
+ "83": "Lactococcus",
229
+ "84": "Microbacterium",
230
+ "85": "Clostridioides",
231
+ "86": "Cronobacter",
232
+ "87": "Rhizobium",
233
+ "88": "Azotobacter",
234
+ "89": "Spirillum",
235
+ "90": "Candida",
236
+ "91": "Cryptococcus",
237
+ "92": "Saccharomyces",
238
+ "93": "Rickettsia",
239
+ "94": "Borrelia",
240
+ "95": "Chlamydia",
241
+ "96": "Acidaminococcus",
242
+ "97": "Bartonella",
243
+ "98": "Coxiella",
244
+ "99": "Kingella",
245
+ "100": "Eikenella",
246
+ "101": "Bilophila",
247
+ "102": "Anaerococcus",
248
+ "103": "Finegoldia",
249
+ "104": "Parvimonas",
250
+ "105": "Ruminococcus",
251
+ "106": "Cutibacterium",
252
+ "107": "Exiguobacterium",
253
+ "108": "Kluyvera",
254
+ "109": "Pluralibacter",
255
+ "110": "Massilia",
256
+ "111": "Methylobacterium",
257
+ "112": "Cupriavidus",
258
+ "113": "Acidovorax",
259
+ "114": "Geobacillus",
260
+ "115": "Trueperella",
261
+ "116": "Streptomyces",
262
+ "117": "Thermoactinomyces",
263
+ "118": "Capnocytophaga",
264
+ "119": "Cardiobacterium",
265
+ "120": "Yokenella",
266
+ "121": "Brevibacterium",
267
+ "122": "Peptoniphilus",
268
+ "123": "Weisella",
269
+ "124": "Saccharopolyspora",
270
+ "125": "Frankia",
271
+ "126": "Spiroplasma",
272
+ "127": "Cedecea",
273
+ "128": "Photorhabdus",
274
+ "129": "Abiotrophia",
275
+ "130": "Cellulomonas",
276
+ "131": "Leifsonia",
277
+ "132": "Alicyclobacillus",
278
+ "133": "Sporolactobacillus",
279
+ "134": "Leclercia",
280
+ "135": "Kosakonia",
281
+ "136": "Bergeyella",
282
+ "137": "Myroides",
283
+ "138": "Aggregatibacter",
284
+ "139": ":"
285
  },
286
  "n_features": 73,
287
+ "num_classes": 140,
288
  "metrics": {
289
+ "train_accuracy": 0.9869916267942583,
290
+ "valid_accuracy": 0.9509569377990431,
291
+ "best_iteration": 270
292
  },
293
  "feature_schema_path": "data/feature_schema.json",
294
  "feature_names": [