Spaces:

Tokyosaurus
/

Sentence_Political_Gaslighting_Detector

Paused

App Files Files Community

Tokyosaurus commited on Mar 8

Commit

f83ba0a

verified ·

1 Parent(s): 740947c

Upload app.py

Browse files

Files changed (1) hide show

app.py +30 -190

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ Usage (Hugging Face Spaces):
 import os
 import traceback
 import gradio as gr
 import numpy as np
 import pandas as pd
@@ -31,6 +32,7 @@ HF_USERNAME = "Tokyosaurus"
 # Model version - points to v2 repos (balanced + sentence-extracted dataset)
 MODEL_VERSION = "v2"
 class Config:
     # Model registry
@@ -138,6 +140,8 @@ class Config:
     # Labels
     BINARY_LABELS = {0: "Non-Gaslighting", 1: "Gaslighting"}
     TACTIC_LABELS = {
         0: "Non-Gaslighting",
         1: "Distortion & Denial",
@@ -157,66 +161,6 @@ class Config:
            "target is incapable of understanding or making valid judgments.",
     }
-    CONFUSION_MATRICES = {
-        "roberta-tagalog": {
-            "binary": {
-                "labels": ["Non-Gaslighting", "Gaslighting"],
-                "matrix": [
-                    [59, 12],
-                    [19, 49],
-                ],
-            },
-            "tactic": {
-                "labels": ["Non-Gaslighting", "D&D", "T&M", "C&I", "KI"],
-                "matrix": [
-                    [56, 3, 9, 2, 1],
-                    [ 5,10, 0, 0, 2],
-                    [ 5, 0, 9, 1, 2],
-                    [ 0, 1, 0,16, 0],
-                    [ 8, 1, 4, 0, 4],
-                ],
-            },
-        },
-        "mbert": {
-            "binary": {
-                "labels": ["Non-Gaslighting", "Gaslighting"],
-                "matrix": [
-                    [66, 5],
-                    [20,48],
-                ],
-            },
-            "tactic": {
-                "labels": ["Non-Gaslighting", "D&D", "T&M", "C&I", "KI"],
-                "matrix": [
-                    [62, 5, 0, 3, 1],
-                    [ 5, 8, 0, 1, 3],
-                    [ 9, 0, 3, 4, 1],
-                    [ 1, 1, 0,14, 1],
-                    [ 9, 2, 1, 2, 3],
-                ],
-            },
-        },
-        "xlm-roberta": {
-            "binary": {
-                "labels": ["Non-Gaslighting", "Gaslighting"],
-                "matrix": [
-                    [60,11],
-                    [19,49],
-                ],
-            },
-            "tactic": {
-                "labels": ["Non-Gaslighting", "D&D", "T&M", "C&I", "KI"],
-                "matrix": [
-                    [59, 7, 1, 3, 1],
-                    [ 6,10, 0, 1, 0],
-                    [ 7, 3, 1, 5, 1],
-                    [ 1, 0, 1,15, 0],
-                    [ 9, 0, 0, 5, 3],
-                ],
-            },
-        },
-    }
     DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
     MAX_LENGTH = 128
@@ -264,6 +208,7 @@ class ModelCache:
         print(f"  {info['display']} ready")
         return entry
 _cache = ModelCache()
@@ -334,30 +279,16 @@ def predict_sequential(text: str, model_key: str):
                 models["tactic"]["model"],
                 text,
             )
-            # --- OVERRIDE LOGIC ---
-            if t_pred == 0:  # Tactic model says it's actually Non-Gaslighting
-                is_gas = False
-                binary_label = "Non-Gaslighting"
-                b_conf = float(t_probs[0])
-                # Safe math for Gradio pie chart
-                binary_prob_dict = {
-                    "Non-Gaslighting": float(t_probs[0]),
-                    "Gaslighting":     max(0.0, 1.0 - float(t_probs[0])),
-                }
-                tactic_section = "_Tactic model overruled binary model: Text classified as Non-Gaslighting._"
-            # ----------------------
-            else:
-                tactic_label = Config.TACTIC_LABELS[t_pred]
-                tactic_desc  = Config.TACTIC_DESCRIPTIONS.get(
-                    t_pred, "_No description available._"
-                )
-                tactic_prob_dict = {
-                    Config.TACTIC_LABELS[i]: float(t_probs[i])
-                    for i in range(1, 5)
-                }
-                tactic_section = f"""
 ### Tactic: {tactic_label}
 **Confidence:** {t_conf:.1%}
@@ -395,6 +326,7 @@ def predict_sequential(text: str, model_key: str):
     except Exception as e:
         return f"Error: {e}\n\n{traceback.format_exc()}", None, None
 # ---------------------------------------------------------------------------
 # BATCH PREDICTION
 # ---------------------------------------------------------------------------
@@ -414,25 +346,14 @@ def batch_predict(file, model_key: str):
         for text in df["sentence"].astype(str):
             b_probs, b_pred, b_conf = _infer(b_tok, b_mod, text)
             if b_pred == 1:
                 t_probs, t_pred, t_conf = _infer(t_tok, t_mod, text)
-                # --- OVERRIDE LOGIC ---
-                if t_pred == 0:
-                    b_labels.append("Non-Gaslighting")
-                    b_confs.append(f"{t_conf:.1%}")
-                    t_labels.append("N/A")
-                    t_confs.append("N/A")
-                else:
-                    b_labels.append("Gaslighting")
-                    b_confs.append(f"{b_conf:.1%}")
-                    t_labels.append(Config.TACTIC_LABELS[t_pred])
-                    t_confs.append(f"{t_conf:.1%}")
-                # ----------------------
             else:
-                b_labels.append("Non-Gaslighting")
-                b_confs.append(f"{b_conf:.1%}")
                 t_labels.append("N/A")
                 t_confs.append("N/A")
@@ -444,7 +365,8 @@ def batch_predict(file, model_key: str):
     except Exception as e:
         return pd.DataFrame({"Error": [str(e)], "Traceback": [traceback.format_exc()]})
 # ---------------------------------------------------------------------------
 # EXAMPLE TEXTS (political Taglish, aligned with training domain)
 # ---------------------------------------------------------------------------
@@ -465,71 +387,6 @@ EXAMPLES = {
 }
-# ---------------------------------------------------------------------------
-# CONFUSION MATRIX HELPER
-# ---------------------------------------------------------------------------
-def _cm_to_html(labels, matrix):
-    """Render a confusion matrix as a colour-coded HTML table.
-    Diagonal cells (correct predictions) are shaded green.
-    Off-diagonal cells are shaded red, scaled by count magnitude.
-    """
-    n = len(labels)
-    # find max off-diagonal for colour scaling
-    max_off = max(
-        matrix[r][c]
-        for r in range(n) for c in range(n) if r != c
-    ) or 1
-    header_cells = "".join(
-        f'<th style="padding:6px 10px;background:#374151;color:#fff;'
-        f'text-align:center;font-size:12px;">{lbl}</th>'
-        for lbl in labels
-    )
-    rows_html = ""
-    for r, row_lbl in enumerate(labels):
-        row_label_cell = (
-            f'<td style="padding:6px 10px;font-weight:bold;white-space:nowrap;'
-            f'background:#1f2937;color:#fff;font-size:12px;">True: {row_lbl}</td>'
-        )
-        cells = ""
-        for c, val in enumerate(matrix[r]):
-            if r == c:                                      # correct prediction
-                intensity = min(255, 120 + int(val * 4))
-                bg = f"rgb(34,{intensity},34)"
-                fg = "#fff"
-            else:                                           # error cell
-                alpha = val / max_off
-                r_ch = int(180 + 75 * alpha)
-                bg = f"rgb({r_ch},50,50)"
-                fg = "#fff" if alpha > 0.3 else "#ccc"
-            cells += (
-                f'<td style="padding:6px 10px;text-align:center;'
-                f'background:{bg};color:{fg};font-weight:bold;font-size:13px;">'
-                f'{val}</td>'
-            )
-        rows_html += f"<tr>{row_label_cell}{cells}</tr>"
-    table = f"""
-<div style="overflow-x:auto;margin:8px 0;">
-<table style="border-collapse:collapse;font-family:monospace;width:100%;">
-  <thead>
-    <tr>
-      <th style="padding:6px 10px;background:#111827;color:#fff;text-align:left;
-                 font-size:12px;">Actual \ Predicted</th>
-      {header_cells}
-    </tr>
-  </thead>
-  <tbody>
-    {rows_html}
-  </tbody>
-</table>
-</div>
-"""
-    return table
 # ---------------------------------------------------------------------------
 # MODEL PROFILES
 # ---------------------------------------------------------------------------
@@ -752,7 +609,8 @@ def create_interface():
                 gr.Dataframe(pd.DataFrame(binary_rows), wrap=True)
                 gr.Markdown("""
-                > Confusion matrices (2×2 per model) are shown in the per-model accordions below.
                 **Key findings - Binary:**
                 - **mBERT** achieves the best binary Gas-F1 (0.7934) and Macro-F1 (0.8171) on the test set
@@ -782,7 +640,8 @@ def create_interface():
                 gr.Dataframe(pd.DataFrame(tactic_rows), wrap=True)
                 gr.Markdown("""
-                > Confusion matrices (5×5 per model) are shown in the per-model accordions below.
                 **Key findings - Tactic:**
                 - **RoBERTa-Tagalog** leads tactic Macro-F1 (0.6111) - best overall tactic classifier
@@ -809,15 +668,6 @@ def create_interface():
                 for k, info in Config.MODELS.items():
                     p = info["performance"]
                     strengths, limitations = _model_profile(k)
-                    cm_data = Config.CONFUSION_MATRICES[k]
-                    bin_cm_html = _cm_to_html(
-                        cm_data["binary"]["labels"],
-                        cm_data["binary"]["matrix"],
-                    )
-                    tac_cm_html = _cm_to_html(
-                        cm_data["tactic"]["labels"],
-                        cm_data["tactic"]["matrix"],
-                    )
                     with gr.Accordion(f"{info['display']}", open=False):
                         gr.Markdown(f"""
 **{info['display']}**
@@ -831,12 +681,7 @@ def create_interface():
 | Gas. Recall | {p['val_binary_gas_r']:.4f} | {p['test_binary_gas_r']:.4f} |
 | Gas. F1 | {p['val_binary_gas_f1']:.4f} | {p['test_binary_gas_f1']:.4f} |
 | ROC-AUC | {p['val_binary_roc_auc']:.4f} | {p['test_binary_roc_auc']:.4f} |
-**Binary Confusion Matrix (Test In-Domain)**
-""")
-                        gr.HTML(bin_cm_html)
-                        gr.Markdown(f"""
----
 **Tactic Classification**
@@ -847,12 +692,7 @@ def create_interface():
 | F1 Trivialization & Min. | {p['val_tactic_f1_tm']:.4f} | {p['test_tactic_f1_tm']:.4f} |
 | F1 Coercion & Intimidation | {p['val_tactic_f1_ci']:.4f} | {p['test_tactic_f1_ci']:.4f} |
 | F1 Knowledge Invalidation | {p['val_tactic_f1_ki']:.4f} | {p['test_tactic_f1_ki']:.4f} |
-**Tactic Confusion Matrix (Test In-Domain)**
-""")
-                        gr.HTML(tac_cm_html)
-                        gr.Markdown(f"""
----
 **Strengths:** {strengths}
@@ -971,7 +811,7 @@ def create_interface():
                 | Binary | Gas. Precision, Gas. Recall, Gas. F1, Macro-F1, ROC-AUC, Confusion matrix (2x2) |
                 | Tactic | Per-class P / R / F1 (D&D, T&M, C&I, KI), Macro-F1, Confusion matrix (5x5) |
-                Confusion matrices are displayed per model in the Model Performance tab (per-model accordions).
                 ### Technical Details
                 - Framework: PyTorch + Hugging Face Transformers
@@ -1026,4 +866,4 @@ if __name__ == "__main__":
     print("\nLaunching Gradio ...\n" + "=" * 70)
     app = create_interface()
-    app.launch()

 import os
 import traceback
 import gradio as gr
 import numpy as np
 import pandas as pd
 # Model version - points to v2 repos (balanced + sentence-extracted dataset)
 MODEL_VERSION = "v2"
 class Config:
     # Model registry
     # Labels
     BINARY_LABELS = {0: "Non-Gaslighting", 1: "Gaslighting"}
+    # Tactic model outputs 5 classes (0 = Non-Gaslighting, 1-4 = tactics).
+    # For display we only show the tactic name when class > 0.
     TACTIC_LABELS = {
         0: "Non-Gaslighting",
         1: "Distortion & Denial",
            "target is incapable of understanding or making valid judgments.",
     }
     DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
     MAX_LENGTH = 128
         print(f"  {info['display']} ready")
         return entry
 _cache = ModelCache()
                 models["tactic"]["model"],
                 text,
             )
+            tactic_label = Config.TACTIC_LABELS[t_pred]
+            tactic_desc  = Config.TACTIC_DESCRIPTIONS.get(
+                t_pred, "_No description available._"
+            )
+            # Only expose tactic probabilities for the 4 gaslighting classes
+            tactic_prob_dict = {
+                Config.TACTIC_LABELS[i]: float(t_probs[i])
+                for i in range(1, 5)
+            }
+            tactic_section = f"""
 ### Tactic: {tactic_label}
 **Confidence:** {t_conf:.1%}
     except Exception as e:
         return f"Error: {e}\n\n{traceback.format_exc()}", None, None
 # ---------------------------------------------------------------------------
 # BATCH PREDICTION
 # ---------------------------------------------------------------------------
         for text in df["sentence"].astype(str):
             b_probs, b_pred, b_conf = _infer(b_tok, b_mod, text)
+            b_labels.append(Config.BINARY_LABELS[b_pred])
+            b_confs.append(f"{b_conf:.1%}")
             if b_pred == 1:
                 t_probs, t_pred, t_conf = _infer(t_tok, t_mod, text)
+                t_labels.append(Config.TACTIC_LABELS[t_pred])
+                t_confs.append(f"{t_conf:.1%}")
             else:
                 t_labels.append("N/A")
                 t_confs.append("N/A")
     except Exception as e:
         return pd.DataFrame({"Error": [str(e)], "Traceback": [traceback.format_exc()]})
 # ---------------------------------------------------------------------------
 # EXAMPLE TEXTS (political Taglish, aligned with training domain)
 # ---------------------------------------------------------------------------
 }
 # ---------------------------------------------------------------------------
 # MODEL PROFILES
 # ---------------------------------------------------------------------------
                 gr.Dataframe(pd.DataFrame(binary_rows), wrap=True)
                 gr.Markdown("""
+                > Confusion matrices (2x2 per model) are saved as
+                > `confusion_matrix_test_id.csv` under each model's output folder.
                 **Key findings - Binary:**
                 - **mBERT** achieves the best binary Gas-F1 (0.7934) and Macro-F1 (0.8171) on the test set
                 gr.Dataframe(pd.DataFrame(tactic_rows), wrap=True)
                 gr.Markdown("""
+                > Confusion matrices (5x5 per model) are saved as
+                > `confusion_matrix_test_id.csv` under each model's output folder.
                 **Key findings - Tactic:**
                 - **RoBERTa-Tagalog** leads tactic Macro-F1 (0.6111) - best overall tactic classifier
                 for k, info in Config.MODELS.items():
                     p = info["performance"]
                     strengths, limitations = _model_profile(k)
                     with gr.Accordion(f"{info['display']}", open=False):
                         gr.Markdown(f"""
 **{info['display']}**
 | Gas. Recall | {p['val_binary_gas_r']:.4f} | {p['test_binary_gas_r']:.4f} |
 | Gas. F1 | {p['val_binary_gas_f1']:.4f} | {p['test_binary_gas_f1']:.4f} |
 | ROC-AUC | {p['val_binary_roc_auc']:.4f} | {p['test_binary_roc_auc']:.4f} |
+| Confusion matrix | - | confusion_matrix_test_id.csv (2x2) |
 **Tactic Classification**
 | F1 Trivialization & Min. | {p['val_tactic_f1_tm']:.4f} | {p['test_tactic_f1_tm']:.4f} |
 | F1 Coercion & Intimidation | {p['val_tactic_f1_ci']:.4f} | {p['test_tactic_f1_ci']:.4f} |
 | F1 Knowledge Invalidation | {p['val_tactic_f1_ki']:.4f} | {p['test_tactic_f1_ki']:.4f} |
+| Confusion matrix | - | confusion_matrix_test_id.csv (5x5) |
 **Strengths:** {strengths}
                 | Binary | Gas. Precision, Gas. Recall, Gas. F1, Macro-F1, ROC-AUC, Confusion matrix (2x2) |
                 | Tactic | Per-class P / R / F1 (D&D, T&M, C&I, KI), Macro-F1, Confusion matrix (5x5) |
+                Confusion matrices are saved per model under model_outputs/<task>_<model>/confusion_matrix_test_id.csv.
                 ### Technical Details
                 - Framework: PyTorch + Hugging Face Transformers
     print("\nLaunching Gradio ...\n" + "=" * 70)
     app = create_interface()
+    app.launch()