0shunyata commited on
Commit
a2a8ba9
·
1 Parent(s): ed9833d

json file

Browse files
Files changed (2) hide show
  1. README.md +0 -11
  2. enhanced_training_report.json +340 -0
README.md CHANGED
@@ -1,14 +1,3 @@
1
- ---
2
- title: IAB URL Classifier
3
- emoji: 🏷️
4
- colorFrom: blue
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 4.44.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
 
13
  # 🏷️ IAB URL Classifier
14
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
  # 🏷️ IAB URL Classifier
3
 
enhanced_training_report.json ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "2025-07-09T05:14:46.599135",
3
+ "model_info": {
4
+ "model_name": "distilbert-base-uncased",
5
+ "num_labels": 32,
6
+ "label_names": [
7
+ "Attractions",
8
+ "Automotive",
9
+ "Books And Literature",
10
+ "Business And Finance",
11
+ "Careers",
12
+ "Education",
13
+ "Entertainment",
14
+ "Events",
15
+ "Family And Relationships",
16
+ "Fine Art",
17
+ "Food & Drink",
18
+ "Healthy Living",
19
+ "Hobbies & Interests",
20
+ "Holidays",
21
+ "Home & Garden",
22
+ "Law",
23
+ "Medical Health",
24
+ "Personal Celebrations & Life Events",
25
+ "Personal Finance",
26
+ "Pets",
27
+ "Politics",
28
+ "Pop Culture",
29
+ "Real Estate",
30
+ "Religion & Spirituality",
31
+ "Science",
32
+ "Shopping",
33
+ "Sports",
34
+ "Style & Fashion",
35
+ "Technology & Computing",
36
+ "Travel",
37
+ "Video Gaming",
38
+ "Weather"
39
+ ],
40
+ "class_imbalance_handling": true,
41
+ "weighted_loss": true,
42
+ "threshold_optimization": true
43
+ },
44
+ "dataset_analysis": {
45
+ "dataset_stats": {
46
+ "total_samples": 4526,
47
+ "unique_urls": 4012,
48
+ "duplicate_urls": 514
49
+ },
50
+ "label_analysis": {
51
+ "unique_labels_count": 32,
52
+ "total_label_instances": 11268,
53
+ "avg_labels_per_sample": 2.4896155545735748,
54
+ "most_common_labels": [
55
+ [
56
+ "Entertainment",
57
+ 1159
58
+ ],
59
+ [
60
+ "Events",
61
+ 908
62
+ ],
63
+ [
64
+ "Technology & Computing",
65
+ 768
66
+ ],
67
+ [
68
+ "Hobbies & Interests",
69
+ 652
70
+ ],
71
+ [
72
+ "Family And Relationships",
73
+ 625
74
+ ],
75
+ [
76
+ "Education",
77
+ 593
78
+ ],
79
+ [
80
+ "Healthy Living",
81
+ 567
82
+ ],
83
+ [
84
+ "Attractions",
85
+ 514
86
+ ],
87
+ [
88
+ "Medical Health",
89
+ 501
90
+ ],
91
+ [
92
+ "Sports",
93
+ 429
94
+ ]
95
+ ],
96
+ "least_common_labels": [
97
+ [
98
+ "Science",
99
+ 159
100
+ ],
101
+ [
102
+ "Weather",
103
+ 153
104
+ ],
105
+ [
106
+ "Law",
107
+ 144
108
+ ],
109
+ [
110
+ "Real Estate",
111
+ 125
112
+ ],
113
+ [
114
+ "Personal Celebrations & Life Events",
115
+ 90
116
+ ],
117
+ [
118
+ "Style & Fashion",
119
+ 84
120
+ ],
121
+ [
122
+ "Religion & Spirituality",
123
+ 76
124
+ ],
125
+ [
126
+ "Automotive",
127
+ 75
128
+ ],
129
+ [
130
+ "Holidays",
131
+ 54
132
+ ],
133
+ [
134
+ "Pets",
135
+ 50
136
+ ]
137
+ ],
138
+ "label_frequencies": {
139
+ "Entertainment": 0.25607600530269553,
140
+ "Events": 0.20061864781263808,
141
+ "Technology & Computing": 0.16968625718073355,
142
+ "Hobbies & Interests": 0.14405656208572692,
143
+ "Family And Relationships": 0.13809102960671674,
144
+ "Education": 0.13102076889085285,
145
+ "Healthy Living": 0.12527618205921343,
146
+ "Attractions": 0.11356606274856386,
147
+ "Medical Health": 0.11069376933274415,
148
+ "Sports": 0.09478568272205037,
149
+ "Pop Culture": 0.09213433495360142,
150
+ "Travel": 0.08506407423773751,
151
+ "Business And Finance": 0.08241272646928856,
152
+ "Shopping": 0.0793194874060981,
153
+ "Careers": 0.07092355280600972,
154
+ "Food & Drink": 0.06473707467962882,
155
+ "Video Gaming": 0.06407423773751657,
156
+ "Politics": 0.06208572691117985,
157
+ "Personal Finance": 0.04882898806893504,
158
+ "Fine Art": 0.04838709677419355,
159
+ "Books And Literature": 0.04706142288996907,
160
+ "Home & Garden": 0.037560760053026956,
161
+ "Science": 0.03513035793194874,
162
+ "Weather": 0.03380468404772426,
163
+ "Law": 0.031816173221387536,
164
+ "Real Estate": 0.02761820592134335,
165
+ "Personal Celebrations & Life Events": 0.019885108263367212,
166
+ "Style & Fashion": 0.01855943437914273,
167
+ "Religion & Spirituality": 0.016791869200176758,
168
+ "Automotive": 0.01657092355280601,
169
+ "Holidays": 0.011931064958020326,
170
+ "Pets": 0.01104728236853734
171
+ },
172
+ "positive_weights": {
173
+ "Healthy Living": 6.982363315696649,
174
+ "Medical Health": 8.033932135728543,
175
+ "Sports": 9.55011655011655,
176
+ "Food & Drink": 14.447098976109215,
177
+ "Video Gaming": 14.606896551724137,
178
+ "Entertainment": 2.9050905953408113,
179
+ "Events": 3.984581497797357,
180
+ "Pop Culture": 9.853717026378897,
181
+ "Automotive": 59.346666666666664,
182
+ "Hobbies & Interests": 5.941717791411043,
183
+ "Home & Garden": 25.623529411764707,
184
+ "Real Estate": 35.208,
185
+ "Family And Relationships": 6.2416,
186
+ "Travel": 10.755844155844155,
187
+ "Shopping": 11.607242339832869,
188
+ "Attractions": 7.80544747081712,
189
+ "Fine Art": 19.666666666666668,
190
+ "Personal Finance": 19.479638009049772,
191
+ "Weather": 28.58169934640523,
192
+ "Careers": 13.09968847352025,
193
+ "Law": 30.430555555555557,
194
+ "Politics": 15.1067615658363,
195
+ "Personal Celebrations & Life Events": 49.28888888888889,
196
+ "Technology & Computing": 4.893229166666667,
197
+ "Business And Finance": 11.134048257372655,
198
+ "Science": 27.465408805031448,
199
+ "Style & Fashion": 52.88095238095238,
200
+ "Religion & Spirituality": 58.55263157894737,
201
+ "Education": 6.632377740303541,
202
+ "Pets": 89.52,
203
+ "Holidays": 82.81481481481481,
204
+ "Books And Literature": 20.248826291079812
205
+ },
206
+ "labels_per_sample_stats": {
207
+ "mean": 2.4896155545735748,
208
+ "median": 3.0,
209
+ "max": 5,
210
+ "min": 1
211
+ }
212
+ },
213
+ "text_analysis": {
214
+ "avg_text_length": 5106.437251436147,
215
+ "median_text_length": 3066.5,
216
+ "max_text_length": 49881,
217
+ "min_text_length": 3,
218
+ "avg_word_count": 845.3506407423773,
219
+ "median_word_count": 516.0
220
+ },
221
+ "imbalance_analysis": {
222
+ "min_frequency": 0.01104728236853734,
223
+ "max_frequency": 0.25607600530269553,
224
+ "frequency_std": 0.05803898127679166,
225
+ "imbalance_ratio": 23.18,
226
+ "labels_below_1_percent": 0,
227
+ "labels_below_5_percent": 14,
228
+ "highly_imbalanced_labels": []
229
+ }
230
+ },
231
+ "training_info": {
232
+ "learning_rate": 2e-05,
233
+ "epochs": 7,
234
+ "batch_size": 16,
235
+ "total_train_steps": 693,
236
+ "training_time": 755.6297,
237
+ "positive_weights": [
238
+ 7.676712328767123,
239
+ 57.648148148148145,
240
+ 20.113333333333333,
241
+ 11.37109375,
242
+ 13.730232558139535,
243
+ 6.46933962264151,
244
+ 3.0292620865139948,
245
+ 4.026984126984127,
246
+ 6.022172949002218,
247
+ 19.171974522292995,
248
+ 13.395454545454545,
249
+ 6.762254901960785,
250
+ 5.6533613445378155,
251
+ 78.175,
252
+ 24.747967479674795,
253
+ 31.989583333333332,
254
+ 7.871148459383753,
255
+ 53.60344827586207,
256
+ 19.044303797468356,
257
+ 92.1470588235294,
258
+ 14.756218905472636,
259
+ 9.699324324324325,
260
+ 35.40229885057471,
261
+ 57.648148148148145,
262
+ 30.67,
263
+ 11.77016129032258,
264
+ 10.112280701754386,
265
+ 50.91803278688525,
266
+ 4.843173431734317,
267
+ 10.996212121212121,
268
+ 15.24102564102564,
269
+ 28.324074074074073
270
+ ],
271
+ "early_stopping": true
272
+ },
273
+ "performance_metrics": {
274
+ "standard_threshold_results": {
275
+ "eval_loss": 0.6122565865516663,
276
+ "eval_exact_match_accuracy": 0.18211920529801323,
277
+ "eval_hamming_loss": 0.10589128035320089,
278
+ "eval_jaccard_score": 0.5157878698441613,
279
+ "eval_f1_macro": 0.5588610579747092,
280
+ "eval_f1_micro": 0.5671178793006204,
281
+ "eval_precision_macro": 0.44220884981664305,
282
+ "eval_precision_micro": 0.4143828559653822,
283
+ "eval_recall_macro": 0.8946385429046899,
284
+ "eval_recall_micro": 0.8981688253684681,
285
+ "eval_any_match_accuracy": 0.9768211920529801,
286
+ "eval_avg_label_overlap": 0.9020419426048565,
287
+ "eval_runtime": 11.9365,
288
+ "eval_samples_per_second": 75.902,
289
+ "eval_steps_per_second": 2.43,
290
+ "epoch": 7.0
291
+ },
292
+ "optimized_threshold_results": {
293
+ "exact_match_accuracy": 0.4735099337748344,
294
+ "any_match_accuracy": 0.8841059602649006,
295
+ "hamming_loss": 0.045012417218543044,
296
+ "jaccard_score": 0.6567486597287922,
297
+ "f1_macro": 0.7200289961647619,
298
+ "f1_micro": 0.7168583206769363,
299
+ "precision_macro": 0.755967885800307,
300
+ "precision_micro": 0.6970464135021097,
301
+ "recall_macro": 0.7382120760450825,
302
+ "recall_micro": 0.7378293881196963
303
+ },
304
+ "optimal_thresholds": [
305
+ 0.7500000000000002,
306
+ 0.6000000000000002,
307
+ 0.8000000000000002,
308
+ 0.7500000000000002,
309
+ 0.7000000000000002,
310
+ 0.7000000000000002,
311
+ 0.6500000000000001,
312
+ 0.7000000000000002,
313
+ 0.6000000000000002,
314
+ 0.6000000000000002,
315
+ 0.8500000000000002,
316
+ 0.6000000000000002,
317
+ 0.7000000000000002,
318
+ 0.7000000000000002,
319
+ 0.8000000000000002,
320
+ 0.8000000000000002,
321
+ 0.7000000000000002,
322
+ 0.6000000000000002,
323
+ 0.8500000000000002,
324
+ 0.8000000000000002,
325
+ 0.8000000000000002,
326
+ 0.7500000000000002,
327
+ 0.8500000000000002,
328
+ 0.7500000000000002,
329
+ 0.8000000000000002,
330
+ 0.6500000000000001,
331
+ 0.8000000000000002,
332
+ 0.8000000000000002,
333
+ 0.6000000000000002,
334
+ 0.6000000000000002,
335
+ 0.8000000000000002,
336
+ 0.5000000000000001
337
+ ],
338
+ "training_loss": 0.7469475348446448
339
+ }
340
+ }