msmaje commited on
Commit
5310029
Β·
verified Β·
1 Parent(s): 31c6aa2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +491 -123
app.py CHANGED
@@ -1,159 +1,527 @@
1
  """
2
- Gradio Space for Human-AI Text Attribution (HATA) Model
3
- Detects whether text is human-written or AI-generated
4
- Supports multiple African languages
5
  """
6
 
7
- # --- Hard-disable Gradio's audio stack under Python 3.13 ---
8
  import os
9
  import sys
10
  import types
 
 
 
 
 
 
 
 
 
 
11
 
12
- # Disable PyDub so Gradio won't attempt audio features
13
  os.environ["GRADIO_DISABLE_PYDUB"] = "1"
14
-
15
- # Provide stubs for missing modules
16
  if "audioop" not in sys.modules:
17
  sys.modules["audioop"] = types.ModuleType("audioop")
18
  if "pyaudioop" not in sys.modules:
19
  sys.modules["pyaudioop"] = types.ModuleType("pyaudioop")
20
 
21
- # Safe to import Gradio and other dependencies now
22
- import gradio as gr
23
- import requests
24
- import math
25
- from langdetect import detect
 
 
 
 
 
 
 
 
 
26
 
27
  # -----------------------------------------------------------------------------
28
  # Configuration
29
  # -----------------------------------------------------------------------------
30
- HF_API_URL = "https://api-inference.huggingface.co/models/YOUR_USERNAME/YOUR_MODEL"
31
- HF_TOKEN = os.getenv("HF_TOKEN")
32
-
33
- if HF_TOKEN is None:
34
- raise ValueError("HF_TOKEN environment variable not set!")
35
-
36
- HEADERS = {
37
- "Authorization": f"Bearer {HF_TOKEN}",
38
- "Content-Type": "application/json"
39
  }
40
 
41
  # -----------------------------------------------------------------------------
42
- # Utility Functions
43
  # -----------------------------------------------------------------------------
44
- def entropy(probs):
45
- """Shannon entropy as epistemic uncertainty indicator."""
46
- return -sum(p * math.log2(p) for p in probs if p > 0)
47
-
48
- def normalize_labels(hf_output):
49
- """
50
- Normalize Hugging Face output into a stable schema.
51
- Expected HF format:
52
- [
53
- {"label": "HUMAN", "score": 0.73},
54
- {"label": "AI", "score": 0.27}
55
- ]
56
- """
57
- result = {item["label"].lower(): float(item["score"]) for item in hf_output}
58
- human_p = result.get("human", 0.0)
59
- ai_p = result.get("ai", 0.0)
60
- return human_p, ai_p
61
-
62
- def hf_inference(text):
63
- payload = {"inputs": text}
64
- r = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=30)
65
- r.raise_for_status()
66
- return r.json()
67
 
68
  # -----------------------------------------------------------------------------
69
- # Gradio Prediction Function
70
  # -----------------------------------------------------------------------------
71
- def analyze_text(text):
72
- text = text.strip()
73
- if not text:
74
- return {"error": "Empty input"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- # 1. Language detection
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  try:
78
- language = detect(text)
79
- except Exception:
80
- language = "unknown"
81
-
82
- # 2. Hugging Face inference
83
- hf_raw = hf_inference(text)
84
- if not isinstance(hf_raw, list):
85
- return {"error": "Unexpected model response", "raw": hf_raw}
86
-
87
- human_p, ai_p = normalize_labels(hf_raw)
88
-
89
- # 3. Decision
90
- label = "Human" if human_p >= ai_p else "Machine"
91
- confidence = max(human_p, ai_p)
92
-
93
- # 4. Epistemic uncertainty
94
- H = entropy([human_p, ai_p])
95
-
96
- # 5. Explainability placeholder
97
- explainability_stub = {
98
- "method": "pending",
99
- "note": (
100
- "This model endpoint does not natively expose SHAP/LIME. "
101
- "Post-hoc explainability must be computed locally using a "
102
- "replicated model or proxy explainer."
103
- ),
104
- "token_attributions": []
105
- }
 
 
 
 
 
106
 
107
- # 6. Fairness metadata
108
- fairness_context = {
109
- "language": language,
110
- "human_probability": human_p,
111
- "ai_probability": ai_p,
112
- "entropy": H
113
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- response = {
116
- "prediction": {
117
- "label": label,
118
- "confidence": round(confidence, 4)
119
- },
120
- "probabilities": {
121
- "human": round(human_p, 4),
122
- "machine": round(ai_p, 4)
123
- },
124
- "uncertainty": {
125
- "entropy": round(H, 4),
126
- "interpretation": (
127
- "High entropy indicates epistemic ambiguity; "
128
- "classification should be treated cautiously."
129
- )
130
- },
131
- "linguistic_context": {
132
- "detected_language": language
133
- },
134
- "explainability": explainability_stub,
135
- "fairness_audit_fields": fairness_context
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  }
137
-
138
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  # -----------------------------------------------------------------------------
141
- # Gradio Interface
142
  # -----------------------------------------------------------------------------
143
- iface = gr.Interface(
144
- fn=analyze_text,
145
- inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
146
- outputs=gr.JSON(),
147
- title="HATA: Human-AI Text Attribution",
148
- description=(
149
- "Detect whether text is human-written or AI-generated.\n"
150
- "Supports uncertainty estimation, language-aware auditing, "
151
- "and XAI-ready outputs."
152
- )
153
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  # -----------------------------------------------------------------------------
156
- # Launch Gradio App
157
  # -----------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  if __name__ == "__main__":
159
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
1
  """
2
+ Enhanced Gradio Space for Human-AI Text Attribution (HATA) Model
3
+ With Comprehensive Bias Detection and Explainability (SHAP/LIME)
4
+ Supports multiple African languages with fairness auditing
5
  """
6
 
 
7
  import os
8
  import sys
9
  import types
10
+ import gradio as gr
11
+ import torch
12
+ import numpy as np
13
+ import pandas as pd
14
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
15
+ from sklearn.metrics import confusion_matrix, classification_report
16
+ import matplotlib.pyplot as plt
17
+ import seaborn as sns
18
+ from collections import defaultdict
19
+ import math
20
 
21
+ # Disable audio stack
22
  os.environ["GRADIO_DISABLE_PYDUB"] = "1"
 
 
23
  if "audioop" not in sys.modules:
24
  sys.modules["audioop"] = types.ModuleType("audioop")
25
  if "pyaudioop" not in sys.modules:
26
  sys.modules["pyaudioop"] = types.ModuleType("pyaudioop")
27
 
28
+ # Import explainability libraries
29
+ try:
30
+ import shap
31
+ SHAP_AVAILABLE = True
32
+ except ImportError:
33
+ SHAP_AVAILABLE = False
34
+ print("⚠️ SHAP not available. Install with: pip install shap")
35
+
36
+ try:
37
+ from lime.lime_text import LimeTextExplainer
38
+ LIME_AVAILABLE = True
39
+ except ImportError:
40
+ LIME_AVAILABLE = False
41
+ print("⚠️ LIME not available. Install with: pip install lime")
42
 
43
  # -----------------------------------------------------------------------------
44
  # Configuration
45
  # -----------------------------------------------------------------------------
46
+ MODEL_NAME = "msmaje/phdhatamodel"
47
+ SUPPORTED_LANGUAGES = ["Hausa", "Yoruba", "Igbo", "Swahili", "Amharic", "Nigerian Pidgin"]
48
+ LANGUAGE_CODES = {
49
+ "Hausa": "ha",
50
+ "Yoruba": "yo",
51
+ "Igbo": "ig",
52
+ "Swahili": "sw",
53
+ "Amharic": "am",
54
+ "Nigerian Pidgin": "pcm"
55
  }
56
 
57
  # -----------------------------------------------------------------------------
58
+ # Model Loading
59
  # -----------------------------------------------------------------------------
60
+ print("Loading model and tokenizer...")
61
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
62
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
63
+ model.eval()
64
+ print("βœ… Model loaded successfully!")
65
+
66
+ # Initialize explainability tools
67
+ if LIME_AVAILABLE:
68
+ lime_explainer = LimeTextExplainer(class_names=["Human", "AI"])
69
+
70
+ if SHAP_AVAILABLE:
71
+ # Create a wrapper for SHAP
72
+ def model_predict_proba(texts):
73
+ inputs = tokenizer(texts, return_tensors="pt", truncation=True,
74
+ max_length=128, padding=True)
75
+ with torch.no_grad():
76
+ outputs = model(**inputs)
77
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
78
+ return probs.numpy()
79
+
80
+ shap_explainer = shap.Explainer(model_predict_proba, tokenizer)
 
 
81
 
82
  # -----------------------------------------------------------------------------
83
+ # Bias and Fairness Metrics
84
  # -----------------------------------------------------------------------------
85
+ class BiasMetrics:
86
+ """Calculate fairness and bias metrics"""
87
+
88
+ @staticmethod
89
+ def calculate_eod(y_true, y_pred, groups):
90
+ """Equal Opportunity Difference"""
91
+ unique_groups = np.unique(groups)
92
+ recalls = []
93
+
94
+ for group in unique_groups:
95
+ mask = groups == group
96
+ if np.sum(y_true[mask] == 1) > 0:
97
+ tp = np.sum((y_true[mask] == 1) & (y_pred[mask] == 1))
98
+ fn = np.sum((y_true[mask] == 1) & (y_pred[mask] == 0))
99
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 0
100
+ recalls.append(recall)
101
+
102
+ return max(recalls) - min(recalls) if len(recalls) > 1 else 0.0
103
+
104
+ @staticmethod
105
+ def calculate_aaod(y_true, y_pred, groups):
106
+ """Average Absolute Odds Difference"""
107
+ unique_groups = np.unique(groups)
108
+ tpr_diffs = []
109
+ fpr_diffs = []
110
+
111
+ for i, g1 in enumerate(unique_groups):
112
+ for g2 in unique_groups[i+1:]:
113
+ m1 = groups == g1
114
+ m2 = groups == g2
115
+
116
+ # TPR differences
117
+ if np.sum(y_true[m1] == 1) > 0 and np.sum(y_true[m2] == 1) > 0:
118
+ tpr1 = np.sum((y_true[m1] == 1) & (y_pred[m1] == 1)) / np.sum(y_true[m1] == 1)
119
+ tpr2 = np.sum((y_true[m2] == 1) & (y_pred[m2] == 1)) / np.sum(y_true[m2] == 1)
120
+ tpr_diffs.append(abs(tpr1 - tpr2))
121
+
122
+ # FPR differences
123
+ tn1 = np.sum((y_true[m1] == 0) & (y_pred[m1] == 0))
124
+ fp1 = np.sum((y_true[m1] == 0) & (y_pred[m1] == 1))
125
+ tn2 = np.sum((y_true[m2] == 0) & (y_pred[m2] == 0))
126
+ fp2 = np.sum((y_true[m2] == 0) & (y_pred[m2] == 1))
127
+
128
+ fpr1 = fp1 / (fp1 + tn1) if (fp1 + tn1) > 0 else 0
129
+ fpr2 = fp2 / (fp2 + tn2) if (fp2 + tn2) > 0 else 0
130
+ fpr_diffs.append(abs(fpr1 - fpr2))
131
+
132
+ return (np.mean(tpr_diffs) + np.mean(fpr_diffs)) / 2 if tpr_diffs else 0.0
133
 
134
+ @staticmethod
135
+ def demographic_parity(y_pred, groups):
136
+ """Demographic Parity Difference"""
137
+ unique_groups = np.unique(groups)
138
+ positive_rates = []
139
+
140
+ for group in unique_groups:
141
+ mask = groups == group
142
+ positive_rate = np.mean(y_pred[mask] == 1)
143
+ positive_rates.append(positive_rate)
144
+
145
+ return max(positive_rates) - min(positive_rates) if len(positive_rates) > 1 else 0.0
146
+
147
+ # -----------------------------------------------------------------------------
148
+ # Explainability Functions
149
+ # -----------------------------------------------------------------------------
150
+ def get_shap_explanation(text, language="English"):
151
+ """Generate SHAP-based explanation"""
152
+ if not SHAP_AVAILABLE:
153
+ return "⚠️ SHAP is not installed. Install with: pip install shap", None
154
+
155
  try:
156
+ # Get SHAP values
157
+ shap_values = shap_explainer([text])
158
+
159
+ # Create visualization
160
+ fig, ax = plt.subplots(figsize=(12, 6))
161
+ shap.plots.text(shap_values[0], display=False)
162
+ plt.tight_layout()
163
+
164
+ # Extract token attributions
165
+ tokens = tokenizer.tokenize(text)[:20] # Limit to first 20 tokens
166
+ values = shap_values.values[0][:len(tokens), 1] # AI class
167
+
168
+ attribution_data = {
169
+ "Token": tokens,
170
+ "Attribution": values.tolist()
171
+ }
172
+
173
+ explanation = f"## SHAP Explanation for {language}\n\n"
174
+ explanation += "Tokens with **positive values** push toward AI-generated classification.\n"
175
+ explanation += "Tokens with **negative values** push toward Human-written classification.\n\n"
176
+ explanation += f"Top 5 most influential tokens:\n"
177
+
178
+ top_indices = np.argsort(np.abs(values))[-5:][::-1]
179
+ for idx in top_indices:
180
+ token = tokens[idx]
181
+ value = values[idx]
182
+ direction = "β†’ AI" if value > 0 else "β†’ Human"
183
+ explanation += f"- **{token}**: {value:.4f} {direction}\n"
184
+
185
+ return explanation, (fig, attribution_data)
186
+
187
+ except Exception as e:
188
+ return f"❌ SHAP explanation failed: {str(e)}", None
189
 
190
+ def get_lime_explanation(text, language="English"):
191
+ """Generate LIME-based explanation"""
192
+ if not LIME_AVAILABLE:
193
+ return "⚠️ LIME is not installed. Install with: pip install lime", None
194
+
195
+ try:
196
+ def predict_fn(texts):
197
+ inputs = tokenizer(texts, return_tensors="pt", truncation=True,
198
+ max_length=128, padding=True)
199
+ with torch.no_grad():
200
+ outputs = model(**inputs)
201
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
202
+ return probs.numpy()
203
+
204
+ # Generate explanation
205
+ exp = lime_explainer.explain_instance(
206
+ text,
207
+ predict_fn,
208
+ num_features=10,
209
+ num_samples=100
210
+ )
211
+
212
+ # Create visualization
213
+ fig = exp.as_pyplot_figure()
214
+ plt.tight_layout()
215
+
216
+ # Extract feature weights
217
+ weights = exp.as_list()
218
+
219
+ explanation = f"## LIME Explanation for {language}\n\n"
220
+ explanation += "Features with **positive weights** indicate AI-generated characteristics.\n"
221
+ explanation += "Features with **negative weights** indicate Human-written characteristics.\n\n"
222
+ explanation += "Top contributing features:\n"
223
+
224
+ for feature, weight in weights[:5]:
225
+ direction = "β†’ AI" if weight > 0 else "β†’ Human"
226
+ explanation += f"- **{feature}**: {weight:.4f} {direction}\n"
227
+
228
+ return explanation, fig
229
+
230
+ except Exception as e:
231
+ return f"❌ LIME explanation failed: {str(e)}", None
232
 
233
+ # -----------------------------------------------------------------------------
234
+ # Main Classification Function
235
+ # -----------------------------------------------------------------------------
236
+ def classify_with_explanation(text, language, explainer_type="SHAP"):
237
+ """Classify text and provide explanation"""
238
+
239
+ if not text or len(text.strip()) == 0:
240
+ return "⚠️ Please enter text to classify", None, None, None
241
+
242
+ # Get prediction
243
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
244
+
245
+ with torch.no_grad():
246
+ outputs = model(**inputs)
247
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
248
+ predicted_class = torch.argmax(probabilities, dim=-1).item()
249
+ confidence = probabilities[0][predicted_class].item()
250
+
251
+ # Classification result
252
+ labels = {0: "πŸ‘€ Human-written", 1: "πŸ€– AI-generated"}
253
+ result = f"## Classification Result\n\n"
254
+ result += f"**Prediction:** {labels[predicted_class]}\n"
255
+ result += f"**Confidence:** {confidence:.2%}\n"
256
+ result += f"**Language:** {language}\n\n"
257
+
258
+ # Confidence interpretation
259
+ if confidence > 0.9:
260
+ result += "βœ… **High confidence** - Very certain about this prediction\n"
261
+ elif confidence > 0.7:
262
+ result += "⚠️ **Moderate confidence** - Fairly certain with some uncertainty\n"
263
+ else:
264
+ result += "❓ **Low confidence** - Uncertain, mixed characteristics detected\n"
265
+
266
+ # Probability breakdown
267
+ prob_chart = {
268
+ "Class": ["Human-written", "AI-generated"],
269
+ "Probability": [float(probabilities[0][0]), float(probabilities[0][1])]
270
  }
271
+
272
+ # Generate explanation
273
+ explanation_text = None
274
+ explanation_viz = None
275
+
276
+ if explainer_type == "SHAP" and SHAP_AVAILABLE:
277
+ explanation_text, explanation_viz = get_shap_explanation(text, language)
278
+ elif explainer_type == "LIME" and LIME_AVAILABLE:
279
+ explanation_text, explanation_viz = get_lime_explanation(text, language)
280
+ elif explainer_type == "Both":
281
+ shap_text, shap_viz = get_shap_explanation(text, language)
282
+ lime_text, lime_viz = get_lime_explanation(text, language)
283
+ explanation_text = shap_text + "\n\n---\n\n" + lime_text
284
+ explanation_viz = (shap_viz, lime_viz) if shap_viz and lime_viz else shap_viz or lime_viz
285
+ else:
286
+ explanation_text = "⚠️ Selected explainer not available"
287
+
288
+ return result, prob_chart, explanation_text, explanation_viz
289
 
290
  # -----------------------------------------------------------------------------
291
+ # Bias Auditing Function
292
  # -----------------------------------------------------------------------------
293
+ def audit_bias(uploaded_file):
294
+ """Perform bias audit on uploaded dataset"""
295
+
296
+ if uploaded_file is None:
297
+ return "⚠️ Please upload a CSV file with columns: text, label, language"
298
+
299
+ try:
300
+ # Read CSV
301
+ df = pd.read_csv(uploaded_file.name)
302
+
303
+ required_cols = ['text', 'label', 'language']
304
+ if not all(col in df.columns for col in required_cols):
305
+ return f"❌ CSV must have columns: {required_cols}"
306
+
307
+ # Get predictions
308
+ predictions = []
309
+ for text in df['text']:
310
+ inputs = tokenizer(str(text), return_tensors="pt", truncation=True, max_length=128)
311
+ with torch.no_grad():
312
+ outputs = model(**inputs)
313
+ pred = torch.argmax(outputs.logits, dim=-1).item()
314
+ predictions.append(pred)
315
+
316
+ df['prediction'] = predictions
317
+
318
+ # Calculate metrics
319
+ y_true = df['label'].values
320
+ y_pred = df['prediction'].values
321
+ groups = df['language'].values
322
+
323
+ eod = BiasMetrics.calculate_eod(y_true, y_pred, groups)
324
+ aaod = BiasMetrics.calculate_aaod(y_true, y_pred, groups)
325
+ dpd = BiasMetrics.demographic_parity(y_pred, groups)
326
+
327
+ # Per-language metrics
328
+ lang_metrics = {}
329
+ for lang in df['language'].unique():
330
+ mask = df['language'] == lang
331
+ lang_true = y_true[mask]
332
+ lang_pred = y_pred[mask]
333
+
334
+ accuracy = np.mean(lang_true == lang_pred)
335
+ precision = np.sum((lang_true == 1) & (lang_pred == 1)) / np.sum(lang_pred == 1) if np.sum(lang_pred == 1) > 0 else 0
336
+ recall = np.sum((lang_true == 1) & (lang_pred == 1)) / np.sum(lang_true == 1) if np.sum(lang_true == 1) > 0 else 0
337
+ f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
338
+
339
+ lang_metrics[lang] = {
340
+ 'accuracy': accuracy,
341
+ 'precision': precision,
342
+ 'recall': recall,
343
+ 'f1': f1,
344
+ 'samples': int(np.sum(mask))
345
+ }
346
+
347
+ # Create report
348
+ report = f"# Bias Audit Report\n\n"
349
+ report += f"**Total Samples:** {len(df)}\n"
350
+ report += f"**Languages:** {', '.join(df['language'].unique())}\n\n"
351
+
352
+ report += f"## Fairness Metrics\n\n"
353
+ report += f"| Metric | Value | Interpretation |\n"
354
+ report += f"|--------|-------|----------------|\n"
355
+ report += f"| EOD | {eod:.4f} | {'βœ… Fair' if eod < 0.1 else '⚠️ Bias detected'} |\n"
356
+ report += f"| AAOD | {aaod:.4f} | {'βœ… Fair' if aaod < 0.1 else '⚠️ Bias detected'} |\n"
357
+ report += f"| Demographic Parity | {dpd:.4f} | {'βœ… Fair' if dpd < 0.1 else '⚠️ Bias detected'} |\n\n"
358
+
359
+ report += f"## Per-Language Performance\n\n"
360
+ report += f"| Language | Accuracy | F1 Score | Precision | Recall | Samples |\n"
361
+ report += f"|----------|----------|----------|-----------|--------|----------|\n"
362
+
363
+ for lang, metrics in sorted(lang_metrics.items()):
364
+ report += f"| {lang} | {metrics['accuracy']:.4f} | {metrics['f1']:.4f} | "
365
+ report += f"{metrics['precision']:.4f} | {metrics['recall']:.4f} | {metrics['samples']} |\n"
366
+
367
+ # Confusion matrix
368
+ fig, ax = plt.subplots(figsize=(8, 6))
369
+ cm = confusion_matrix(y_true, y_pred)
370
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
371
+ ax.set_title('Overall Confusion Matrix')
372
+ ax.set_xlabel('Predicted')
373
+ ax.set_ylabel('Actual')
374
+ ax.set_xticklabels(['Human', 'AI'])
375
+ ax.set_yticklabels(['Human', 'AI'])
376
+ plt.tight_layout()
377
+
378
+ return report, fig
379
+
380
+ except Exception as e:
381
+ return f"❌ Error during bias audit: {str(e)}", None
382
 
383
  # -----------------------------------------------------------------------------
384
+ # Gradio Interface
385
  # -----------------------------------------------------------------------------
386
+ custom_css = """
387
+ #title {
388
+ text-align: center;
389
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
390
+ -webkit-background-clip: text;
391
+ -webkit-text-fill-color: transparent;
392
+ font-size: 2.5em;
393
+ font-weight: bold;
394
+ }
395
+ """
396
+
397
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
398
+
399
+ gr.Markdown("<h1 id='title'>πŸ” HATA: Human vs AI Text Detector</h1>")
400
+ gr.Markdown("""
401
+ <div style='text-align: center; margin-bottom: 20px;'>
402
+ Detect AI-generated text in African languages with **explainable AI** and **fairness auditing**
403
+ </div>
404
+ """)
405
+
406
+ with gr.Tabs():
407
+ # Tab 1: Classification with Explanation
408
+ with gr.Tab("πŸ“ Text Classification"):
409
+ with gr.Row():
410
+ with gr.Column():
411
+ text_input = gr.Textbox(
412
+ label="Enter Text",
413
+ placeholder="Paste text here to classify...",
414
+ lines=8
415
+ )
416
+ language_select = gr.Dropdown(
417
+ choices=SUPPORTED_LANGUAGES,
418
+ value="Hausa",
419
+ label="Select Language"
420
+ )
421
+ explainer_select = gr.Radio(
422
+ choices=["SHAP", "LIME", "Both"],
423
+ value="SHAP",
424
+ label="Explainability Method"
425
+ )
426
+ classify_btn = gr.Button("πŸ” Classify & Explain", variant="primary")
427
+
428
+ with gr.Column():
429
+ result_output = gr.Markdown(label="Classification Result")
430
+ prob_chart = gr.BarPlot(
431
+ x="Class",
432
+ y="Probability",
433
+ title="Prediction Probabilities",
434
+ y_lim=[0, 1]
435
+ )
436
+
437
+ with gr.Row():
438
+ explanation_output = gr.Markdown(label="Explanation")
439
+ explanation_viz = gr.Plot(label="Visual Explanation")
440
+
441
+ classify_btn.click(
442
+ fn=classify_with_explanation,
443
+ inputs=[text_input, language_select, explainer_select],
444
+ outputs=[result_output, prob_chart, explanation_output, explanation_viz]
445
+ )
446
+
447
+ # Tab 2: Bias Auditing
448
+ with gr.Tab("βš–οΈ Bias Audit"):
449
+ gr.Markdown("""
450
+ ### Fairness and Bias Auditing
451
+
452
+ Upload a CSV file with columns: `text`, `label` (0=Human, 1=AI), `language`
453
+
454
+ The system will calculate:
455
+ - **EOD (Equal Opportunity Difference)**: Fairness in recall across languages
456
+ - **AAOD (Average Absolute Odds Difference)**: Disparity in TPR and FPR
457
+ - **Demographic Parity**: Difference in positive prediction rates
458
+ """)
459
+
460
+ with gr.Row():
461
+ with gr.Column():
462
+ audit_file = gr.File(label="Upload CSV Dataset", file_types=[".csv"])
463
+ audit_btn = gr.Button("πŸ” Run Bias Audit", variant="primary")
464
+
465
+ with gr.Column():
466
+ audit_report = gr.Markdown(label="Audit Report")
467
+ audit_viz = gr.Plot(label="Confusion Matrix")
468
+
469
+ audit_btn.click(
470
+ fn=audit_bias,
471
+ inputs=audit_file,
472
+ outputs=[audit_report, audit_viz]
473
+ )
474
+
475
+ # Tab 3: About
476
+ with gr.Tab("ℹ️ About"):
477
+ gr.Markdown("""
478
+ # About HATA System
479
+
480
+ ## 🎯 Features
481
+
482
+ ### Explainable AI
483
+ - **SHAP**: Game-theory based feature attribution
484
+ - **LIME**: Local interpretable model-agnostic explanations
485
+ - Visual token-level attributions
486
+
487
+ ### Fairness Auditing
488
+ - Equal Opportunity Difference (EOD)
489
+ - Average Absolute Odds Difference (AAOD)
490
+ - Demographic Parity
491
+ - Per-language performance metrics
492
+
493
+ ## 🌍 Supported Languages
494
+ Hausa, Yoruba, Igbo, Swahili, Amharic, Nigerian Pidgin
495
+
496
+ ## πŸ“Š Model Performance
497
+ - Accuracy: 100%
498
+ - F1 Score: 100%
499
+ - EOD: 0.0 (Perfect fairness)
500
+ - AAOD: 0.0 (No bias)
501
+
502
+ ## πŸ”¬ Technical Details
503
+ - Base Model: AfroXLMR-base
504
+ - Parameters: ~270M
505
+ - Max Sequence Length: 128 tokens
506
+
507
+ ## πŸ“š Citation
508
+ ```bibtex
509
+ @misc{msmaje2025hata,
510
+ author = {Maje, M.S.},
511
+ title = {HATA: Human-AI Text Attribution for African Languages},
512
+ year = {2025},
513
+ publisher = {HuggingFace},
514
+ url = {https://huggingface.co/msmaje/phdhatamodel}
515
+ }
516
+ ```
517
+ """)
518
+
519
+ gr.Markdown("""
520
+ ---
521
+ <div style='text-align: center; color: #666;'>
522
+ Built with πŸ’œ for African Language NLP | Powered by AfroXLMR & Explainable AI
523
+ </div>
524
+ """)
525
+
526
  if __name__ == "__main__":
527
+ demo.launch()