Tokyosaurus commited on
Commit
f83ba0a
·
verified ·
1 Parent(s): 740947c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -190
app.py CHANGED
@@ -14,6 +14,7 @@ Usage (Hugging Face Spaces):
14
 
15
  import os
16
  import traceback
 
17
  import gradio as gr
18
  import numpy as np
19
  import pandas as pd
@@ -31,6 +32,7 @@ HF_USERNAME = "Tokyosaurus"
31
  # Model version - points to v2 repos (balanced + sentence-extracted dataset)
32
  MODEL_VERSION = "v2"
33
 
 
34
  class Config:
35
 
36
  # Model registry
@@ -138,6 +140,8 @@ class Config:
138
  # Labels
139
  BINARY_LABELS = {0: "Non-Gaslighting", 1: "Gaslighting"}
140
 
 
 
141
  TACTIC_LABELS = {
142
  0: "Non-Gaslighting",
143
  1: "Distortion & Denial",
@@ -157,66 +161,6 @@ class Config:
157
  "target is incapable of understanding or making valid judgments.",
158
  }
159
 
160
- CONFUSION_MATRICES = {
161
- "roberta-tagalog": {
162
- "binary": {
163
- "labels": ["Non-Gaslighting", "Gaslighting"],
164
- "matrix": [
165
- [59, 12],
166
- [19, 49],
167
- ],
168
- },
169
- "tactic": {
170
- "labels": ["Non-Gaslighting", "D&D", "T&M", "C&I", "KI"],
171
- "matrix": [
172
- [56, 3, 9, 2, 1],
173
- [ 5,10, 0, 0, 2],
174
- [ 5, 0, 9, 1, 2],
175
- [ 0, 1, 0,16, 0],
176
- [ 8, 1, 4, 0, 4],
177
- ],
178
- },
179
- },
180
- "mbert": {
181
- "binary": {
182
- "labels": ["Non-Gaslighting", "Gaslighting"],
183
- "matrix": [
184
- [66, 5],
185
- [20,48],
186
- ],
187
- },
188
- "tactic": {
189
- "labels": ["Non-Gaslighting", "D&D", "T&M", "C&I", "KI"],
190
- "matrix": [
191
- [62, 5, 0, 3, 1],
192
- [ 5, 8, 0, 1, 3],
193
- [ 9, 0, 3, 4, 1],
194
- [ 1, 1, 0,14, 1],
195
- [ 9, 2, 1, 2, 3],
196
- ],
197
- },
198
- },
199
- "xlm-roberta": {
200
- "binary": {
201
- "labels": ["Non-Gaslighting", "Gaslighting"],
202
- "matrix": [
203
- [60,11],
204
- [19,49],
205
- ],
206
- },
207
- "tactic": {
208
- "labels": ["Non-Gaslighting", "D&D", "T&M", "C&I", "KI"],
209
- "matrix": [
210
- [59, 7, 1, 3, 1],
211
- [ 6,10, 0, 1, 0],
212
- [ 7, 3, 1, 5, 1],
213
- [ 1, 0, 1,15, 0],
214
- [ 9, 0, 0, 5, 3],
215
- ],
216
- },
217
- },
218
- }
219
-
220
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
221
  MAX_LENGTH = 128
222
 
@@ -264,6 +208,7 @@ class ModelCache:
264
  print(f" {info['display']} ready")
265
  return entry
266
 
 
267
  _cache = ModelCache()
268
 
269
 
@@ -334,30 +279,16 @@ def predict_sequential(text: str, model_key: str):
334
  models["tactic"]["model"],
335
  text,
336
  )
337
-
338
- # --- OVERRIDE LOGIC ---
339
- if t_pred == 0: # Tactic model says it's actually Non-Gaslighting
340
- is_gas = False
341
- binary_label = "Non-Gaslighting"
342
- b_conf = float(t_probs[0])
343
-
344
- # Safe math for Gradio pie chart
345
- binary_prob_dict = {
346
- "Non-Gaslighting": float(t_probs[0]),
347
- "Gaslighting": max(0.0, 1.0 - float(t_probs[0])),
348
- }
349
- tactic_section = "_Tactic model overruled binary model: Text classified as Non-Gaslighting._"
350
- # ----------------------
351
- else:
352
- tactic_label = Config.TACTIC_LABELS[t_pred]
353
- tactic_desc = Config.TACTIC_DESCRIPTIONS.get(
354
- t_pred, "_No description available._"
355
- )
356
- tactic_prob_dict = {
357
- Config.TACTIC_LABELS[i]: float(t_probs[i])
358
- for i in range(1, 5)
359
- }
360
- tactic_section = f"""
361
  ### Tactic: {tactic_label}
362
  **Confidence:** {t_conf:.1%}
363
 
@@ -395,6 +326,7 @@ def predict_sequential(text: str, model_key: str):
395
  except Exception as e:
396
  return f"Error: {e}\n\n{traceback.format_exc()}", None, None
397
 
 
398
  # ---------------------------------------------------------------------------
399
  # BATCH PREDICTION
400
  # ---------------------------------------------------------------------------
@@ -414,25 +346,14 @@ def batch_predict(file, model_key: str):
414
 
415
  for text in df["sentence"].astype(str):
416
  b_probs, b_pred, b_conf = _infer(b_tok, b_mod, text)
 
 
417
 
418
  if b_pred == 1:
419
  t_probs, t_pred, t_conf = _infer(t_tok, t_mod, text)
420
-
421
- # --- OVERRIDE LOGIC ---
422
- if t_pred == 0:
423
- b_labels.append("Non-Gaslighting")
424
- b_confs.append(f"{t_conf:.1%}")
425
- t_labels.append("N/A")
426
- t_confs.append("N/A")
427
- else:
428
- b_labels.append("Gaslighting")
429
- b_confs.append(f"{b_conf:.1%}")
430
- t_labels.append(Config.TACTIC_LABELS[t_pred])
431
- t_confs.append(f"{t_conf:.1%}")
432
- # ----------------------
433
  else:
434
- b_labels.append("Non-Gaslighting")
435
- b_confs.append(f"{b_conf:.1%}")
436
  t_labels.append("N/A")
437
  t_confs.append("N/A")
438
 
@@ -444,7 +365,8 @@ def batch_predict(file, model_key: str):
444
 
445
  except Exception as e:
446
  return pd.DataFrame({"Error": [str(e)], "Traceback": [traceback.format_exc()]})
447
-
 
448
  # ---------------------------------------------------------------------------
449
  # EXAMPLE TEXTS (political Taglish, aligned with training domain)
450
  # ---------------------------------------------------------------------------
@@ -465,71 +387,6 @@ EXAMPLES = {
465
  }
466
 
467
 
468
- # ---------------------------------------------------------------------------
469
- # CONFUSION MATRIX HELPER
470
- # ---------------------------------------------------------------------------
471
-
472
- def _cm_to_html(labels, matrix):
473
- """Render a confusion matrix as a colour-coded HTML table.
474
-
475
- Diagonal cells (correct predictions) are shaded green.
476
- Off-diagonal cells are shaded red, scaled by count magnitude.
477
- """
478
- n = len(labels)
479
- # find max off-diagonal for colour scaling
480
- max_off = max(
481
- matrix[r][c]
482
- for r in range(n) for c in range(n) if r != c
483
- ) or 1
484
-
485
- header_cells = "".join(
486
- f'<th style="padding:6px 10px;background:#374151;color:#fff;'
487
- f'text-align:center;font-size:12px;">{lbl}</th>'
488
- for lbl in labels
489
- )
490
- rows_html = ""
491
- for r, row_lbl in enumerate(labels):
492
- row_label_cell = (
493
- f'<td style="padding:6px 10px;font-weight:bold;white-space:nowrap;'
494
- f'background:#1f2937;color:#fff;font-size:12px;">True: {row_lbl}</td>'
495
- )
496
- cells = ""
497
- for c, val in enumerate(matrix[r]):
498
- if r == c: # correct prediction
499
- intensity = min(255, 120 + int(val * 4))
500
- bg = f"rgb(34,{intensity},34)"
501
- fg = "#fff"
502
- else: # error cell
503
- alpha = val / max_off
504
- r_ch = int(180 + 75 * alpha)
505
- bg = f"rgb({r_ch},50,50)"
506
- fg = "#fff" if alpha > 0.3 else "#ccc"
507
- cells += (
508
- f'<td style="padding:6px 10px;text-align:center;'
509
- f'background:{bg};color:{fg};font-weight:bold;font-size:13px;">'
510
- f'{val}</td>'
511
- )
512
- rows_html += f"<tr>{row_label_cell}{cells}</tr>"
513
-
514
- table = f"""
515
- <div style="overflow-x:auto;margin:8px 0;">
516
- <table style="border-collapse:collapse;font-family:monospace;width:100%;">
517
- <thead>
518
- <tr>
519
- <th style="padding:6px 10px;background:#111827;color:#fff;text-align:left;
520
- font-size:12px;">Actual \ Predicted</th>
521
- {header_cells}
522
- </tr>
523
- </thead>
524
- <tbody>
525
- {rows_html}
526
- </tbody>
527
- </table>
528
- </div>
529
- """
530
- return table
531
-
532
-
533
  # ---------------------------------------------------------------------------
534
  # MODEL PROFILES
535
  # ---------------------------------------------------------------------------
@@ -752,7 +609,8 @@ def create_interface():
752
  gr.Dataframe(pd.DataFrame(binary_rows), wrap=True)
753
 
754
  gr.Markdown("""
755
- > Confusion matrices (2×2 per model) are shown in the per-model accordions below.
 
756
 
757
  **Key findings - Binary:**
758
  - **mBERT** achieves the best binary Gas-F1 (0.7934) and Macro-F1 (0.8171) on the test set
@@ -782,7 +640,8 @@ def create_interface():
782
  gr.Dataframe(pd.DataFrame(tactic_rows), wrap=True)
783
 
784
  gr.Markdown("""
785
- > Confusion matrices (5×5 per model) are shown in the per-model accordions below.
 
786
 
787
  **Key findings - Tactic:**
788
  - **RoBERTa-Tagalog** leads tactic Macro-F1 (0.6111) - best overall tactic classifier
@@ -809,15 +668,6 @@ def create_interface():
809
  for k, info in Config.MODELS.items():
810
  p = info["performance"]
811
  strengths, limitations = _model_profile(k)
812
- cm_data = Config.CONFUSION_MATRICES[k]
813
- bin_cm_html = _cm_to_html(
814
- cm_data["binary"]["labels"],
815
- cm_data["binary"]["matrix"],
816
- )
817
- tac_cm_html = _cm_to_html(
818
- cm_data["tactic"]["labels"],
819
- cm_data["tactic"]["matrix"],
820
- )
821
  with gr.Accordion(f"{info['display']}", open=False):
822
  gr.Markdown(f"""
823
  **{info['display']}**
@@ -831,12 +681,7 @@ def create_interface():
831
  | Gas. Recall | {p['val_binary_gas_r']:.4f} | {p['test_binary_gas_r']:.4f} |
832
  | Gas. F1 | {p['val_binary_gas_f1']:.4f} | {p['test_binary_gas_f1']:.4f} |
833
  | ROC-AUC | {p['val_binary_roc_auc']:.4f} | {p['test_binary_roc_auc']:.4f} |
834
-
835
- **Binary Confusion Matrix (Test In-Domain)**
836
- """)
837
- gr.HTML(bin_cm_html)
838
- gr.Markdown(f"""
839
- ---
840
 
841
  **Tactic Classification**
842
 
@@ -847,12 +692,7 @@ def create_interface():
847
  | F1 Trivialization & Min. | {p['val_tactic_f1_tm']:.4f} | {p['test_tactic_f1_tm']:.4f} |
848
  | F1 Coercion & Intimidation | {p['val_tactic_f1_ci']:.4f} | {p['test_tactic_f1_ci']:.4f} |
849
  | F1 Knowledge Invalidation | {p['val_tactic_f1_ki']:.4f} | {p['test_tactic_f1_ki']:.4f} |
850
-
851
- **Tactic Confusion Matrix (Test In-Domain)**
852
- """)
853
- gr.HTML(tac_cm_html)
854
- gr.Markdown(f"""
855
- ---
856
 
857
  **Strengths:** {strengths}
858
 
@@ -971,7 +811,7 @@ def create_interface():
971
  | Binary | Gas. Precision, Gas. Recall, Gas. F1, Macro-F1, ROC-AUC, Confusion matrix (2x2) |
972
  | Tactic | Per-class P / R / F1 (D&D, T&M, C&I, KI), Macro-F1, Confusion matrix (5x5) |
973
 
974
- Confusion matrices are displayed per model in the Model Performance tab (per-model accordions).
975
 
976
  ### Technical Details
977
  - Framework: PyTorch + Hugging Face Transformers
@@ -1026,4 +866,4 @@ if __name__ == "__main__":
1026
  print("\nLaunching Gradio ...\n" + "=" * 70)
1027
 
1028
  app = create_interface()
1029
- app.launch()
 
14
 
15
  import os
16
  import traceback
17
+
18
  import gradio as gr
19
  import numpy as np
20
  import pandas as pd
 
32
  # Model version - points to v2 repos (balanced + sentence-extracted dataset)
33
  MODEL_VERSION = "v2"
34
 
35
+
36
  class Config:
37
 
38
  # Model registry
 
140
  # Labels
141
  BINARY_LABELS = {0: "Non-Gaslighting", 1: "Gaslighting"}
142
 
143
+ # Tactic model outputs 5 classes (0 = Non-Gaslighting, 1-4 = tactics).
144
+ # For display we only show the tactic name when class > 0.
145
  TACTIC_LABELS = {
146
  0: "Non-Gaslighting",
147
  1: "Distortion & Denial",
 
161
  "target is incapable of understanding or making valid judgments.",
162
  }
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
165
  MAX_LENGTH = 128
166
 
 
208
  print(f" {info['display']} ready")
209
  return entry
210
 
211
+
212
  _cache = ModelCache()
213
 
214
 
 
279
  models["tactic"]["model"],
280
  text,
281
  )
282
+ tactic_label = Config.TACTIC_LABELS[t_pred]
283
+ tactic_desc = Config.TACTIC_DESCRIPTIONS.get(
284
+ t_pred, "_No description available._"
285
+ )
286
+ # Only expose tactic probabilities for the 4 gaslighting classes
287
+ tactic_prob_dict = {
288
+ Config.TACTIC_LABELS[i]: float(t_probs[i])
289
+ for i in range(1, 5)
290
+ }
291
+ tactic_section = f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  ### Tactic: {tactic_label}
293
  **Confidence:** {t_conf:.1%}
294
 
 
326
  except Exception as e:
327
  return f"Error: {e}\n\n{traceback.format_exc()}", None, None
328
 
329
+
330
  # ---------------------------------------------------------------------------
331
  # BATCH PREDICTION
332
  # ---------------------------------------------------------------------------
 
346
 
347
  for text in df["sentence"].astype(str):
348
  b_probs, b_pred, b_conf = _infer(b_tok, b_mod, text)
349
+ b_labels.append(Config.BINARY_LABELS[b_pred])
350
+ b_confs.append(f"{b_conf:.1%}")
351
 
352
  if b_pred == 1:
353
  t_probs, t_pred, t_conf = _infer(t_tok, t_mod, text)
354
+ t_labels.append(Config.TACTIC_LABELS[t_pred])
355
+ t_confs.append(f"{t_conf:.1%}")
 
 
 
 
 
 
 
 
 
 
 
356
  else:
 
 
357
  t_labels.append("N/A")
358
  t_confs.append("N/A")
359
 
 
365
 
366
  except Exception as e:
367
  return pd.DataFrame({"Error": [str(e)], "Traceback": [traceback.format_exc()]})
368
+
369
+
370
  # ---------------------------------------------------------------------------
371
  # EXAMPLE TEXTS (political Taglish, aligned with training domain)
372
  # ---------------------------------------------------------------------------
 
387
  }
388
 
389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  # ---------------------------------------------------------------------------
391
  # MODEL PROFILES
392
  # ---------------------------------------------------------------------------
 
609
  gr.Dataframe(pd.DataFrame(binary_rows), wrap=True)
610
 
611
  gr.Markdown("""
612
+ > Confusion matrices (2x2 per model) are saved as
613
+ > `confusion_matrix_test_id.csv` under each model's output folder.
614
 
615
  **Key findings - Binary:**
616
  - **mBERT** achieves the best binary Gas-F1 (0.7934) and Macro-F1 (0.8171) on the test set
 
640
  gr.Dataframe(pd.DataFrame(tactic_rows), wrap=True)
641
 
642
  gr.Markdown("""
643
+ > Confusion matrices (5x5 per model) are saved as
644
+ > `confusion_matrix_test_id.csv` under each model's output folder.
645
 
646
  **Key findings - Tactic:**
647
  - **RoBERTa-Tagalog** leads tactic Macro-F1 (0.6111) - best overall tactic classifier
 
668
  for k, info in Config.MODELS.items():
669
  p = info["performance"]
670
  strengths, limitations = _model_profile(k)
 
 
 
 
 
 
 
 
 
671
  with gr.Accordion(f"{info['display']}", open=False):
672
  gr.Markdown(f"""
673
  **{info['display']}**
 
681
  | Gas. Recall | {p['val_binary_gas_r']:.4f} | {p['test_binary_gas_r']:.4f} |
682
  | Gas. F1 | {p['val_binary_gas_f1']:.4f} | {p['test_binary_gas_f1']:.4f} |
683
  | ROC-AUC | {p['val_binary_roc_auc']:.4f} | {p['test_binary_roc_auc']:.4f} |
684
+ | Confusion matrix | - | confusion_matrix_test_id.csv (2x2) |
 
 
 
 
 
685
 
686
  **Tactic Classification**
687
 
 
692
  | F1 Trivialization & Min. | {p['val_tactic_f1_tm']:.4f} | {p['test_tactic_f1_tm']:.4f} |
693
  | F1 Coercion & Intimidation | {p['val_tactic_f1_ci']:.4f} | {p['test_tactic_f1_ci']:.4f} |
694
  | F1 Knowledge Invalidation | {p['val_tactic_f1_ki']:.4f} | {p['test_tactic_f1_ki']:.4f} |
695
+ | Confusion matrix | - | confusion_matrix_test_id.csv (5x5) |
 
 
 
 
 
696
 
697
  **Strengths:** {strengths}
698
 
 
811
  | Binary | Gas. Precision, Gas. Recall, Gas. F1, Macro-F1, ROC-AUC, Confusion matrix (2x2) |
812
  | Tactic | Per-class P / R / F1 (D&D, T&M, C&I, KI), Macro-F1, Confusion matrix (5x5) |
813
 
814
+ Confusion matrices are saved per model under model_outputs/<task>_<model>/confusion_matrix_test_id.csv.
815
 
816
  ### Technical Details
817
  - Framework: PyTorch + Hugging Face Transformers
 
866
  print("\nLaunching Gradio ...\n" + "=" * 70)
867
 
868
  app = create_interface()
869
+ app.launch()