msmaje commited on
Commit
22f8fd9
Β·
verified Β·
1 Parent(s): daa9498

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -280
app.py CHANGED
@@ -12,10 +12,9 @@ import torch
12
  import numpy as np
13
  import pandas as pd
14
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
15
- from sklearn.metrics import confusion_matrix, classification_report
16
  import matplotlib.pyplot as plt
17
  import seaborn as sns
18
- from collections import defaultdict
19
  import math
20
 
21
  # Disable audio stack
@@ -68,29 +67,22 @@ if LIME_AVAILABLE:
68
  lime_explainer = LimeTextExplainer(class_names=["Human", "AI"])
69
 
70
  if SHAP_AVAILABLE:
71
- # Create a wrapper for SHAP
72
  def model_predict_proba(texts):
73
- inputs = tokenizer(texts, return_tensors="pt", truncation=True,
74
- max_length=128, padding=True)
75
  with torch.no_grad():
76
  outputs = model(**inputs)
77
  probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
78
  return probs.numpy()
79
-
80
  shap_explainer = shap.Explainer(model_predict_proba, tokenizer)
81
 
82
  # -----------------------------------------------------------------------------
83
  # Bias and Fairness Metrics
84
  # -----------------------------------------------------------------------------
85
  class BiasMetrics:
86
- """Calculate fairness and bias metrics"""
87
-
88
  @staticmethod
89
  def calculate_eod(y_true, y_pred, groups):
90
- """Equal Opportunity Difference"""
91
  unique_groups = np.unique(groups)
92
  recalls = []
93
-
94
  for group in unique_groups:
95
  mask = groups == group
96
  if np.sum(y_true[mask] == 1) > 0:
@@ -98,135 +90,84 @@ class BiasMetrics:
98
  fn = np.sum((y_true[mask] == 1) & (y_pred[mask] == 0))
99
  recall = tp / (tp + fn) if (tp + fn) > 0 else 0
100
  recalls.append(recall)
101
-
102
  return max(recalls) - min(recalls) if len(recalls) > 1 else 0.0
103
-
104
  @staticmethod
105
  def calculate_aaod(y_true, y_pred, groups):
106
- """Average Absolute Odds Difference"""
107
  unique_groups = np.unique(groups)
108
  tpr_diffs = []
109
  fpr_diffs = []
110
-
111
  for i, g1 in enumerate(unique_groups):
112
  for g2 in unique_groups[i+1:]:
113
  m1 = groups == g1
114
  m2 = groups == g2
115
-
116
- # TPR differences
117
  if np.sum(y_true[m1] == 1) > 0 and np.sum(y_true[m2] == 1) > 0:
118
  tpr1 = np.sum((y_true[m1] == 1) & (y_pred[m1] == 1)) / np.sum(y_true[m1] == 1)
119
  tpr2 = np.sum((y_true[m2] == 1) & (y_pred[m2] == 1)) / np.sum(y_true[m2] == 1)
120
  tpr_diffs.append(abs(tpr1 - tpr2))
121
-
122
- # FPR differences
123
  tn1 = np.sum((y_true[m1] == 0) & (y_pred[m1] == 0))
124
  fp1 = np.sum((y_true[m1] == 0) & (y_pred[m1] == 1))
125
  tn2 = np.sum((y_true[m2] == 0) & (y_pred[m2] == 0))
126
  fp2 = np.sum((y_true[m2] == 0) & (y_pred[m2] == 1))
127
-
128
  fpr1 = fp1 / (fp1 + tn1) if (fp1 + tn1) > 0 else 0
129
  fpr2 = fp2 / (fp2 + tn2) if (fp2 + tn2) > 0 else 0
130
  fpr_diffs.append(abs(fpr1 - fpr2))
131
-
132
  return (np.mean(tpr_diffs) + np.mean(fpr_diffs)) / 2 if tpr_diffs else 0.0
133
 
134
  @staticmethod
135
  def demographic_parity(y_pred, groups):
136
- """Demographic Parity Difference"""
137
  unique_groups = np.unique(groups)
138
  positive_rates = []
139
-
140
  for group in unique_groups:
141
  mask = groups == group
142
- positive_rate = np.mean(y_pred[mask] == 1)
143
- positive_rates.append(positive_rate)
144
-
145
  return max(positive_rates) - min(positive_rates) if len(positive_rates) > 1 else 0.0
146
 
147
  # -----------------------------------------------------------------------------
148
  # Explainability Functions
149
  # -----------------------------------------------------------------------------
150
  def get_shap_explanation(text, language="English"):
151
- """Generate SHAP-based explanation"""
152
  if not SHAP_AVAILABLE:
153
- return "⚠️ SHAP is not installed. Install with: pip install shap", None
154
-
155
  try:
156
- # Get SHAP values
157
  shap_values = shap_explainer([text])
158
-
159
- # Create visualization
160
- fig, ax = plt.subplots(figsize=(12, 6))
161
  shap.plots.text(shap_values[0], display=False)
162
  plt.tight_layout()
163
-
164
- # Extract token attributions
165
- tokens = tokenizer.tokenize(text)[:20] # Limit to first 20 tokens
166
- values = shap_values.values[0][:len(tokens), 1] # AI class
167
-
168
- attribution_data = {
169
- "Token": tokens,
170
- "Attribution": values.tolist()
171
- }
172
-
173
  explanation = f"## SHAP Explanation for {language}\n\n"
174
- explanation += "Tokens with **positive values** push toward AI-generated classification.\n"
175
- explanation += "Tokens with **negative values** push toward Human-written classification.\n\n"
176
- explanation += f"Top 5 most influential tokens:\n"
177
-
178
  top_indices = np.argsort(np.abs(values))[-5:][::-1]
179
  for idx in top_indices:
180
  token = tokens[idx]
181
  value = values[idx]
182
  direction = "β†’ AI" if value > 0 else "β†’ Human"
183
  explanation += f"- **{token}**: {value:.4f} {direction}\n"
184
-
185
  return explanation, (fig, attribution_data)
186
-
187
  except Exception as e:
188
  return f"❌ SHAP explanation failed: {str(e)}", None
189
 
190
  def get_lime_explanation(text, language="English"):
191
- """Generate LIME-based explanation"""
192
  if not LIME_AVAILABLE:
193
- return "⚠️ LIME is not installed. Install with: pip install lime", None
194
-
195
  try:
196
  def predict_fn(texts):
197
- inputs = tokenizer(texts, return_tensors="pt", truncation=True,
198
- max_length=128, padding=True)
199
  with torch.no_grad():
200
  outputs = model(**inputs)
201
  probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
202
  return probs.numpy()
203
-
204
- # Generate explanation
205
- exp = lime_explainer.explain_instance(
206
- text,
207
- predict_fn,
208
- num_features=10,
209
- num_samples=100
210
- )
211
-
212
- # Create visualization
213
  fig = exp.as_pyplot_figure()
214
  plt.tight_layout()
215
-
216
- # Extract feature weights
217
  weights = exp.as_list()
218
-
219
- explanation = f"## LIME Explanation for {language}\n\n"
220
- explanation += "Features with **positive weights** indicate AI-generated characteristics.\n"
221
- explanation += "Features with **negative weights** indicate Human-written characteristics.\n\n"
222
- explanation += "Top contributing features:\n"
223
-
224
  for feature, weight in weights[:5]:
225
  direction = "β†’ AI" if weight > 0 else "β†’ Human"
226
  explanation += f"- **{feature}**: {weight:.4f} {direction}\n"
227
-
228
  return explanation, fig
229
-
230
  except Exception as e:
231
  return f"❌ LIME explanation failed: {str(e)}", None
232
 
@@ -234,149 +175,90 @@ def get_lime_explanation(text, language="English"):
234
  # Main Classification Function
235
  # -----------------------------------------------------------------------------
236
  def classify_with_explanation(text, language, explainer_type="SHAP"):
237
- """Classify text and provide explanation"""
238
-
239
- if not text or len(text.strip()) == 0:
240
- return "⚠️ Please enter text to classify", None, None, None
241
-
242
- # Get prediction
243
  inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
244
-
245
  with torch.no_grad():
246
  outputs = model(**inputs)
247
- probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
248
- predicted_class = torch.argmax(probabilities, dim=-1).item()
249
- confidence = probabilities[0][predicted_class].item()
250
-
251
- # Classification result
252
  labels = {0: "πŸ‘€ Human-written", 1: "πŸ€– AI-generated"}
253
- result = f"## Classification Result\n\n"
254
- result += f"**Prediction:** {labels[predicted_class]}\n"
255
- result += f"**Confidence:** {confidence:.2%}\n"
256
- result += f"**Language:** {language}\n\n"
257
-
258
- # Confidence interpretation
259
- if confidence > 0.9:
260
- result += "βœ… **High confidence** - Very certain about this prediction\n"
261
- elif confidence > 0.7:
262
- result += "⚠️ **Moderate confidence** - Fairly certain with some uncertainty\n"
263
  else:
264
- result += "❓ **Low confidence** - Uncertain, mixed characteristics detected\n"
265
-
266
- # Probability breakdown
267
- prob_chart = {
268
- "Class": ["Human-written", "AI-generated"],
269
- "Probability": [float(probabilities[0][0]), float(probabilities[0][1])]
270
- }
271
-
272
- # Generate explanation
273
- explanation_text = None
274
- explanation_viz = None
275
-
276
- if explainer_type == "SHAP" and SHAP_AVAILABLE:
277
  explanation_text, explanation_viz = get_shap_explanation(text, language)
278
- elif explainer_type == "LIME" and LIME_AVAILABLE:
279
  explanation_text, explanation_viz = get_lime_explanation(text, language)
280
- elif explainer_type == "Both":
281
  shap_text, shap_viz = get_shap_explanation(text, language)
282
  lime_text, lime_viz = get_lime_explanation(text, language)
283
  explanation_text = shap_text + "\n\n---\n\n" + lime_text
284
  explanation_viz = (shap_viz, lime_viz) if shap_viz and lime_viz else shap_viz or lime_viz
285
  else:
286
  explanation_text = "⚠️ Selected explainer not available"
287
-
288
  return result, prob_chart, explanation_text, explanation_viz
289
 
290
  # -----------------------------------------------------------------------------
291
  # Bias Auditing Function
292
  # -----------------------------------------------------------------------------
293
  def audit_bias(uploaded_file):
294
- """Perform bias audit on uploaded dataset"""
295
-
296
  if uploaded_file is None:
297
- return "⚠️ Please upload a CSV file with columns: text, label, language"
298
-
299
  try:
300
- # Read CSV
301
  df = pd.read_csv(uploaded_file.name)
302
-
303
- required_cols = ['text', 'label', 'language']
304
  if not all(col in df.columns for col in required_cols):
305
- return f"❌ CSV must have columns: {required_cols}"
306
-
307
- # Get predictions
308
- predictions = []
309
  for text in df['text']:
310
  inputs = tokenizer(str(text), return_tensors="pt", truncation=True, max_length=128)
311
  with torch.no_grad():
312
  outputs = model(**inputs)
313
  pred = torch.argmax(outputs.logits, dim=-1).item()
314
- predictions.append(pred)
315
-
316
- df['prediction'] = predictions
317
-
318
- # Calculate metrics
319
  y_true = df['label'].values
320
  y_pred = df['prediction'].values
321
  groups = df['language'].values
322
-
323
  eod = BiasMetrics.calculate_eod(y_true, y_pred, groups)
324
  aaod = BiasMetrics.calculate_aaod(y_true, y_pred, groups)
325
  dpd = BiasMetrics.demographic_parity(y_pred, groups)
326
-
327
- # Per-language metrics
328
  lang_metrics = {}
329
  for lang in df['language'].unique():
330
- mask = df['language'] == lang
331
  lang_true = y_true[mask]
332
  lang_pred = y_pred[mask]
333
-
334
- accuracy = np.mean(lang_true == lang_pred)
335
- precision = np.sum((lang_true == 1) & (lang_pred == 1)) / np.sum(lang_pred == 1) if np.sum(lang_pred == 1) > 0 else 0
336
- recall = np.sum((lang_true == 1) & (lang_pred == 1)) / np.sum(lang_true == 1) if np.sum(lang_true == 1) > 0 else 0
337
- f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
338
-
339
- lang_metrics[lang] = {
340
- 'accuracy': accuracy,
341
- 'precision': precision,
342
- 'recall': recall,
343
- 'f1': f1,
344
- 'samples': int(np.sum(mask))
345
- }
346
-
347
- # Create report
348
- report = f"# Bias Audit Report\n\n"
349
- report += f"**Total Samples:** {len(df)}\n"
350
- report += f"**Languages:** {', '.join(df['language'].unique())}\n\n"
351
-
352
- report += f"## Fairness Metrics\n\n"
353
- report += f"| Metric | Value | Interpretation |\n"
354
- report += f"|--------|-------|----------------|\n"
355
- report += f"| EOD | {eod:.4f} | {'βœ… Fair' if eod < 0.1 else '⚠️ Bias detected'} |\n"
356
- report += f"| AAOD | {aaod:.4f} | {'βœ… Fair' if aaod < 0.1 else '⚠️ Bias detected'} |\n"
357
- report += f"| Demographic Parity | {dpd:.4f} | {'βœ… Fair' if dpd < 0.1 else '⚠️ Bias detected'} |\n\n"
358
-
359
- report += f"## Per-Language Performance\n\n"
360
- report += f"| Language | Accuracy | F1 Score | Precision | Recall | Samples |\n"
361
- report += f"|----------|----------|----------|-----------|--------|----------|\n"
362
-
363
  for lang, metrics in sorted(lang_metrics.items()):
364
- report += f"| {lang} | {metrics['accuracy']:.4f} | {metrics['f1']:.4f} | "
365
- report += f"{metrics['precision']:.4f} | {metrics['recall']:.4f} | {metrics['samples']} |\n"
366
-
367
- # Confusion matrix
368
- fig, ax = plt.subplots(figsize=(8, 6))
369
  cm = confusion_matrix(y_true, y_pred)
370
  sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
371
  ax.set_title('Overall Confusion Matrix')
372
  ax.set_xlabel('Predicted')
373
  ax.set_ylabel('Actual')
374
- ax.set_xticklabels(['Human', 'AI'])
375
- ax.set_yticklabels(['Human', 'AI'])
376
  plt.tight_layout()
377
-
378
  return report, fig
379
-
380
  except Exception as e:
381
  return f"❌ Error during bias audit: {str(e)}", None
382
 
@@ -395,140 +277,50 @@ custom_css = """
395
  """
396
 
397
  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
398
-
399
  gr.Markdown("<h1 id='title'>πŸ” HATA: Human vs AI Text Detector</h1>")
400
- gr.Markdown("""
401
- <div style='text-align: center; margin-bottom: 20px;'>
402
- Detect AI-generated text in African languages with **explainable AI** and **fairness auditing**
403
- </div>
404
- """)
405
-
406
  with gr.Tabs():
407
- # Tab 1: Classification with Explanation
408
  with gr.Tab("πŸ“ Text Classification"):
409
  with gr.Row():
410
  with gr.Column():
411
- text_input = gr.Textbox(
412
- label="Enter Text",
413
- placeholder="Paste text here to classify...",
414
- lines=8
415
- )
416
- language_select = gr.Dropdown(
417
- choices=SUPPORTED_LANGUAGES,
418
- value="Hausa",
419
- label="Select Language"
420
- )
421
- explainer_select = gr.Radio(
422
- choices=["SHAP", "LIME", "Both"],
423
- value="SHAP",
424
- label="Explainability Method"
425
- )
426
  classify_btn = gr.Button("πŸ” Classify & Explain", variant="primary")
427
-
428
  with gr.Column():
429
  result_output = gr.Markdown(label="Classification Result")
430
- prob_chart = gr.BarPlot(
431
- x="Class",
432
- y="Probability",
433
- title="Prediction Probabilities",
434
- y_lim=[0, 1]
435
- )
436
-
437
  with gr.Row():
438
  explanation_output = gr.Markdown(label="Explanation")
439
  explanation_viz = gr.Plot(label="Visual Explanation")
440
-
441
- classify_btn.click(
442
- fn=classify_with_explanation,
443
- inputs=[text_input, language_select, explainer_select],
444
- outputs=[result_output, prob_chart, explanation_output, explanation_viz]
445
- )
446
-
447
- # Tab 2: Bias Auditing
448
  with gr.Tab("βš–οΈ Bias Audit"):
449
- gr.Markdown("""
450
- ### Fairness and Bias Auditing
451
-
452
- Upload a CSV file with columns: `text`, `label` (0=Human, 1=AI), `language`
453
-
454
- The system will calculate:
455
- - **EOD (Equal Opportunity Difference)**: Fairness in recall across languages
456
- - **AAOD (Average Absolute Odds Difference)**: Disparity in TPR and FPR
457
- - **Demographic Parity**: Difference in positive prediction rates
458
- """)
459
-
460
  with gr.Row():
461
  with gr.Column():
462
- audit_file = gr.File(label="Upload CSV Dataset", file_types=[".csv"])
463
  audit_btn = gr.Button("πŸ” Run Bias Audit", variant="primary")
464
-
465
  with gr.Column():
466
  audit_report = gr.Markdown(label="Audit Report")
467
  audit_viz = gr.Plot(label="Confusion Matrix")
468
-
469
- audit_btn.click(
470
- fn=audit_bias,
471
- inputs=audit_file,
472
- outputs=[audit_report, audit_viz]
473
- )
474
-
475
  # Tab 3: About
476
  with gr.Tab("ℹ️ About"):
477
  gr.Markdown("""
478
  # About HATA System
479
-
480
- ## 🎯 Features
481
-
482
- ### Explainable AI
483
- - **SHAP**: Game-theory based feature attribution
484
- - **LIME**: Local interpretable model-agnostic explanations
485
- - Visual token-level attributions
486
-
487
- ### Fairness Auditing
488
- - Equal Opportunity Difference (EOD)
489
- - Average Absolute Odds Difference (AAOD)
490
- - Demographic Parity
491
- - Per-language performance metrics
492
-
493
- ## 🌍 Supported Languages
494
- Hausa, Yoruba, Igbo, Swahili, Amharic, Nigerian Pidgin
495
-
496
- ## πŸ“Š Model Performance
497
- - Accuracy: 100%
498
- - F1 Score: 100%
499
- - EOD: 0.0 (Perfect fairness)
500
- - AAOD: 0.0 (No bias)
501
-
502
- ## πŸ”¬ Technical Details
503
  - Base Model: AfroXLMR-base
504
- - Parameters: ~270M
505
- - Max Sequence Length: 128 tokens
506
-
507
- ## πŸ“š Citation
508
- ```bibtex
509
- @misc{msmaje2025hata,
510
- author = {Maje, M.S.},
511
- title = {HATA: Human-AI Text Attribution for African Languages},
512
- year = {2025},
513
- publisher = {HuggingFace},
514
- url = {https://huggingface.co/msmaje/phdhatamodel}
515
- }
516
- ```
517
  """)
518
-
519
- gr.Markdown("""
520
- ---
521
- <div style='text-align: center; color: #666;'>
522
- Built with πŸ’œ for African Language NLP | Powered by AfroXLMR & Explainable AI
523
- </div>
524
- """)
525
 
 
 
 
526
  if __name__ == "__main__":
527
  demo.queue(api_open=False)
528
- demo.launch(
529
- server_name="0.0.0.0",
530
- server_port=7860,
531
- show_error=True,
532
- share=True # <-- important for Spaces
533
- )
534
-
 
12
  import numpy as np
13
  import pandas as pd
14
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
15
+ from sklearn.metrics import confusion_matrix
16
  import matplotlib.pyplot as plt
17
  import seaborn as sns
 
18
  import math
19
 
20
  # Disable audio stack
 
67
  lime_explainer = LimeTextExplainer(class_names=["Human", "AI"])
68
 
69
  if SHAP_AVAILABLE:
 
70
  def model_predict_proba(texts):
71
+ inputs = tokenizer(texts, return_tensors="pt", truncation=True, max_length=128, padding=True)
 
72
  with torch.no_grad():
73
  outputs = model(**inputs)
74
  probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
75
  return probs.numpy()
 
76
  shap_explainer = shap.Explainer(model_predict_proba, tokenizer)
77
 
78
  # -----------------------------------------------------------------------------
79
  # Bias and Fairness Metrics
80
  # -----------------------------------------------------------------------------
81
  class BiasMetrics:
 
 
82
  @staticmethod
83
  def calculate_eod(y_true, y_pred, groups):
 
84
  unique_groups = np.unique(groups)
85
  recalls = []
 
86
  for group in unique_groups:
87
  mask = groups == group
88
  if np.sum(y_true[mask] == 1) > 0:
 
90
  fn = np.sum((y_true[mask] == 1) & (y_pred[mask] == 0))
91
  recall = tp / (tp + fn) if (tp + fn) > 0 else 0
92
  recalls.append(recall)
 
93
  return max(recalls) - min(recalls) if len(recalls) > 1 else 0.0
94
+
95
  @staticmethod
96
  def calculate_aaod(y_true, y_pred, groups):
 
97
  unique_groups = np.unique(groups)
98
  tpr_diffs = []
99
  fpr_diffs = []
 
100
  for i, g1 in enumerate(unique_groups):
101
  for g2 in unique_groups[i+1:]:
102
  m1 = groups == g1
103
  m2 = groups == g2
 
 
104
  if np.sum(y_true[m1] == 1) > 0 and np.sum(y_true[m2] == 1) > 0:
105
  tpr1 = np.sum((y_true[m1] == 1) & (y_pred[m1] == 1)) / np.sum(y_true[m1] == 1)
106
  tpr2 = np.sum((y_true[m2] == 1) & (y_pred[m2] == 1)) / np.sum(y_true[m2] == 1)
107
  tpr_diffs.append(abs(tpr1 - tpr2))
 
 
108
  tn1 = np.sum((y_true[m1] == 0) & (y_pred[m1] == 0))
109
  fp1 = np.sum((y_true[m1] == 0) & (y_pred[m1] == 1))
110
  tn2 = np.sum((y_true[m2] == 0) & (y_pred[m2] == 0))
111
  fp2 = np.sum((y_true[m2] == 0) & (y_pred[m2] == 1))
 
112
  fpr1 = fp1 / (fp1 + tn1) if (fp1 + tn1) > 0 else 0
113
  fpr2 = fp2 / (fp2 + tn2) if (fp2 + tn2) > 0 else 0
114
  fpr_diffs.append(abs(fpr1 - fpr2))
 
115
  return (np.mean(tpr_diffs) + np.mean(fpr_diffs)) / 2 if tpr_diffs else 0.0
116
 
117
  @staticmethod
118
  def demographic_parity(y_pred, groups):
 
119
  unique_groups = np.unique(groups)
120
  positive_rates = []
 
121
  for group in unique_groups:
122
  mask = groups == group
123
+ positive_rates.append(np.mean(y_pred[mask] == 1))
 
 
124
  return max(positive_rates) - min(positive_rates) if len(positive_rates) > 1 else 0.0
125
 
126
  # -----------------------------------------------------------------------------
127
  # Explainability Functions
128
  # -----------------------------------------------------------------------------
129
  def get_shap_explanation(text, language="English"):
 
130
  if not SHAP_AVAILABLE:
131
+ return "⚠️ SHAP not installed", None
 
132
  try:
 
133
  shap_values = shap_explainer([text])
134
+ fig, ax = plt.subplots(figsize=(12,6))
 
 
135
  shap.plots.text(shap_values[0], display=False)
136
  plt.tight_layout()
137
+ tokens = tokenizer.tokenize(text)[:20]
138
+ values = shap_values.values[0][:len(tokens),1]
139
+ attribution_data = {"Token": tokens, "Attribution": values.tolist()}
 
 
 
 
 
 
 
140
  explanation = f"## SHAP Explanation for {language}\n\n"
141
+ explanation += "Top 5 tokens influencing AI/Human prediction:\n"
 
 
 
142
  top_indices = np.argsort(np.abs(values))[-5:][::-1]
143
  for idx in top_indices:
144
  token = tokens[idx]
145
  value = values[idx]
146
  direction = "β†’ AI" if value > 0 else "β†’ Human"
147
  explanation += f"- **{token}**: {value:.4f} {direction}\n"
 
148
  return explanation, (fig, attribution_data)
 
149
  except Exception as e:
150
  return f"❌ SHAP explanation failed: {str(e)}", None
151
 
152
  def get_lime_explanation(text, language="English"):
 
153
  if not LIME_AVAILABLE:
154
+ return "⚠️ LIME not installed", None
 
155
  try:
156
  def predict_fn(texts):
157
+ inputs = tokenizer(texts, return_tensors="pt", truncation=True, max_length=128, padding=True)
 
158
  with torch.no_grad():
159
  outputs = model(**inputs)
160
  probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
161
  return probs.numpy()
162
+ exp = lime_explainer.explain_instance(text, predict_fn, num_features=10, num_samples=100)
 
 
 
 
 
 
 
 
 
163
  fig = exp.as_pyplot_figure()
164
  plt.tight_layout()
 
 
165
  weights = exp.as_list()
166
+ explanation = f"## LIME Explanation for {language}\n\nTop contributing features:\n"
 
 
 
 
 
167
  for feature, weight in weights[:5]:
168
  direction = "β†’ AI" if weight > 0 else "β†’ Human"
169
  explanation += f"- **{feature}**: {weight:.4f} {direction}\n"
 
170
  return explanation, fig
 
171
  except Exception as e:
172
  return f"❌ LIME explanation failed: {str(e)}", None
173
 
 
175
  # Main Classification Function
176
  # -----------------------------------------------------------------------------
177
  def classify_with_explanation(text, language, explainer_type="SHAP"):
178
+ if not text or len(text.strip())==0:
179
+ return "⚠️ Please enter text", None, None, None
 
 
 
 
180
  inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
 
181
  with torch.no_grad():
182
  outputs = model(**inputs)
183
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
184
+ pred_class = torch.argmax(probs, dim=-1).item()
185
+ confidence = probs[0][pred_class].item()
 
 
186
  labels = {0: "πŸ‘€ Human-written", 1: "πŸ€– AI-generated"}
187
+ result = f"## Classification Result\n**Prediction:** {labels[pred_class]}\n**Confidence:** {confidence:.2%}\n**Language:** {language}\n\n"
188
+ if confidence>0.9:
189
+ result += "βœ… High confidence\n"
190
+ elif confidence>0.7:
191
+ result += "⚠️ Moderate confidence\n"
 
 
 
 
 
192
  else:
193
+ result += "❓ Low confidence\n"
194
+ prob_chart = {"Class":["Human-written","AI-generated"],"Probability":[float(probs[0][0]), float(probs[0][1])]}
195
+ explanation_text, explanation_viz = None, None
196
+ if explainer_type=="SHAP" and SHAP_AVAILABLE:
 
 
 
 
 
 
 
 
 
197
  explanation_text, explanation_viz = get_shap_explanation(text, language)
198
+ elif explainer_type=="LIME" and LIME_AVAILABLE:
199
  explanation_text, explanation_viz = get_lime_explanation(text, language)
200
+ elif explainer_type=="Both":
201
  shap_text, shap_viz = get_shap_explanation(text, language)
202
  lime_text, lime_viz = get_lime_explanation(text, language)
203
  explanation_text = shap_text + "\n\n---\n\n" + lime_text
204
  explanation_viz = (shap_viz, lime_viz) if shap_viz and lime_viz else shap_viz or lime_viz
205
  else:
206
  explanation_text = "⚠️ Selected explainer not available"
 
207
  return result, prob_chart, explanation_text, explanation_viz
208
 
209
  # -----------------------------------------------------------------------------
210
  # Bias Auditing Function
211
  # -----------------------------------------------------------------------------
212
  def audit_bias(uploaded_file):
 
 
213
  if uploaded_file is None:
214
+ return "⚠️ Please upload a CSV file", None
 
215
  try:
 
216
  df = pd.read_csv(uploaded_file.name)
217
+ required_cols = ['text','label','language']
 
218
  if not all(col in df.columns for col in required_cols):
219
+ return f"❌ CSV must have columns: {required_cols}", None
220
+ preds = []
 
 
221
  for text in df['text']:
222
  inputs = tokenizer(str(text), return_tensors="pt", truncation=True, max_length=128)
223
  with torch.no_grad():
224
  outputs = model(**inputs)
225
  pred = torch.argmax(outputs.logits, dim=-1).item()
226
+ preds.append(pred)
227
+ df['prediction'] = preds
 
 
 
228
  y_true = df['label'].values
229
  y_pred = df['prediction'].values
230
  groups = df['language'].values
 
231
  eod = BiasMetrics.calculate_eod(y_true, y_pred, groups)
232
  aaod = BiasMetrics.calculate_aaod(y_true, y_pred, groups)
233
  dpd = BiasMetrics.demographic_parity(y_pred, groups)
 
 
234
  lang_metrics = {}
235
  for lang in df['language'].unique():
236
+ mask = df['language']==lang
237
  lang_true = y_true[mask]
238
  lang_pred = y_pred[mask]
239
+ accuracy = np.mean(lang_true==lang_pred)
240
+ precision = np.sum((lang_true==1)&(lang_pred==1))/np.sum(lang_pred==1) if np.sum(lang_pred==1)>0 else 0
241
+ recall = np.sum((lang_true==1)&(lang_pred==1))/np.sum(lang_true==1) if np.sum(lang_true==1)>0 else 0
242
+ f1 = 2*precision*recall/(precision+recall) if (precision+recall)>0 else 0
243
+ lang_metrics[lang] = {'accuracy':accuracy,'precision':precision,'recall':recall,'f1':f1,'samples':int(np.sum(mask))}
244
+ report = f"# Bias Audit Report\nTotal Samples: {len(df)}\nLanguages: {', '.join(df['language'].unique())}\n\n"
245
+ report += f"## Fairness Metrics\n| Metric | Value | Interpretation |\n|--------|-------|----------------|\n"
246
+ report += f"| EOD | {eod:.4f} | {'βœ… Fair' if eod<0.1 else '⚠️ Bias detected'} |\n"
247
+ report += f"| AAOD | {aaod:.4f} | {'βœ… Fair' if aaod<0.1 else '⚠️ Bias detected'} |\n"
248
+ report += f"| Demographic Parity | {dpd:.4f} | {'βœ… Fair' if dpd<0.1 else '⚠️ Bias detected'} |\n\n"
249
+ report += f"## Per-Language Performance\n| Language | Accuracy | F1 Score | Precision | Recall | Samples |\n|----------|----------|----------|-----------|--------|----------|\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  for lang, metrics in sorted(lang_metrics.items()):
251
+ report += f"| {lang} | {metrics['accuracy']:.4f} | {metrics['f1']:.4f} | {metrics['precision']:.4f} | {metrics['recall']:.4f} | {metrics['samples']} |\n"
252
+ fig, ax = plt.subplots(figsize=(8,6))
 
 
 
253
  cm = confusion_matrix(y_true, y_pred)
254
  sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
255
  ax.set_title('Overall Confusion Matrix')
256
  ax.set_xlabel('Predicted')
257
  ax.set_ylabel('Actual')
258
+ ax.set_xticklabels(['Human','AI'])
259
+ ax.set_yticklabels(['Human','AI'])
260
  plt.tight_layout()
 
261
  return report, fig
 
262
  except Exception as e:
263
  return f"❌ Error during bias audit: {str(e)}", None
264
 
 
277
  """
278
 
279
  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
 
280
  gr.Markdown("<h1 id='title'>πŸ” HATA: Human vs AI Text Detector</h1>")
281
+ gr.Markdown("<div style='text-align: center; margin-bottom: 20px;'>Detect AI-generated text in African languages with explainable AI and fairness auditing</div>")
 
 
 
 
 
282
  with gr.Tabs():
283
+ # Tab 1: Classification
284
  with gr.Tab("πŸ“ Text Classification"):
285
  with gr.Row():
286
  with gr.Column():
287
+ text_input = gr.Textbox(label="Enter Text", placeholder="Paste text here...", lines=8)
288
+ language_select = gr.Dropdown(choices=SUPPORTED_LANGUAGES, value="Hausa", label="Select Language")
289
+ explainer_select = gr.Radio(choices=["SHAP","LIME","Both"], value="SHAP", label="Explainability Method")
 
 
 
 
 
 
 
 
 
 
 
 
290
  classify_btn = gr.Button("πŸ” Classify & Explain", variant="primary")
 
291
  with gr.Column():
292
  result_output = gr.Markdown(label="Classification Result")
293
+ prob_chart = gr.BarPlot(x="Class", y="Probability", title="Prediction Probabilities", y_lim=[0,1])
 
 
 
 
 
 
294
  with gr.Row():
295
  explanation_output = gr.Markdown(label="Explanation")
296
  explanation_viz = gr.Plot(label="Visual Explanation")
297
+ classify_btn.click(fn=classify_with_explanation, inputs=[text_input, language_select, explainer_select], outputs=[result_output, prob_chart, explanation_output, explanation_viz])
298
+ # Tab 2: Bias Audit
 
 
 
 
 
 
299
  with gr.Tab("βš–οΈ Bias Audit"):
300
+ gr.Markdown("Upload a CSV with columns: text,label (0=Human,1=AI),language")
 
 
 
 
 
 
 
 
 
 
301
  with gr.Row():
302
  with gr.Column():
303
+ audit_file = gr.File(label="Upload CSV", file_types=[".csv"])
304
  audit_btn = gr.Button("πŸ” Run Bias Audit", variant="primary")
 
305
  with gr.Column():
306
  audit_report = gr.Markdown(label="Audit Report")
307
  audit_viz = gr.Plot(label="Confusion Matrix")
308
+ audit_btn.click(fn=audit_bias, inputs=audit_file, outputs=[audit_report, audit_viz])
 
 
 
 
 
 
309
  # Tab 3: About
310
  with gr.Tab("ℹ️ About"):
311
  gr.Markdown("""
312
  # About HATA System
313
+ - SHAP & LIME Explainability
314
+ - Bias auditing across languages
315
+ - Supported Languages: Hausa, Yoruba, Igbo, Swahili, Amharic, Nigerian Pidgin
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  - Base Model: AfroXLMR-base
317
+ - Citation: [HuggingFace Model](https://huggingface.co/msmaje/phdhatamodel)
 
 
 
 
 
 
 
 
 
 
 
 
318
  """)
319
+ gr.Markdown("<div style='text-align: center; color:#666;'>Built with πŸ’œ for African Language NLP</div>")
 
 
 
 
 
 
320
 
321
+ # -----------------------------------------------------------------------------
322
+ # Launch
323
+ # -----------------------------------------------------------------------------
324
  if __name__ == "__main__":
325
  demo.queue(api_open=False)
326
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, share=True)