| """ |
| Enhanced Gradio Space for Human-AI Text Attribution (HATA) Model |
| With Comprehensive Bias Detection and Explainability (SHAP/LIME) |
| Supports multiple African languages with fairness auditing |
| """ |
|
|
| import os |
| import sys |
| import types |
| import gradio as gr |
| import torch |
| import numpy as np |
| import pandas as pd |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| from sklearn.metrics import confusion_matrix, classification_report |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| from collections import defaultdict |
| import math |
|
|
| |
| os.environ["GRADIO_DISABLE_PYDUB"] = "1" |
| if "audioop" not in sys.modules: |
| sys.modules["audioop"] = types.ModuleType("audioop") |
| if "pyaudioop" not in sys.modules: |
| sys.modules["pyaudioop"] = types.ModuleType("pyaudioop") |
|
|
| |
| try: |
| import shap |
| SHAP_AVAILABLE = True |
| except ImportError: |
| SHAP_AVAILABLE = False |
| print("β οΈ SHAP not available. Install with: pip install shap") |
|
|
| try: |
| from lime.lime_text import LimeTextExplainer |
| LIME_AVAILABLE = True |
| except ImportError: |
| LIME_AVAILABLE = False |
| print("β οΈ LIME not available. Install with: pip install lime") |
|
|
| |
| |
| |
| MODEL_NAME = "msmaje/phdhatamodel" |
| SUPPORTED_LANGUAGES = ["Hausa", "Yoruba", "Igbo", "Swahili", "Amharic", "Nigerian Pidgin"] |
| LANGUAGE_CODES = { |
| "Hausa": "ha", |
| "Yoruba": "yo", |
| "Igbo": "ig", |
| "Swahili": "sw", |
| "Amharic": "am", |
| "Nigerian Pidgin": "pcm" |
| } |
|
|
| |
| |
| |
| print("Loading model and tokenizer...") |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) |
| model.eval() |
| print("β
Model loaded successfully!") |
|
|
| |
| if LIME_AVAILABLE: |
| lime_explainer = LimeTextExplainer(class_names=["Human", "AI"]) |
|
|
| if SHAP_AVAILABLE: |
| |
| def model_predict_proba(texts): |
| inputs = tokenizer(texts, return_tensors="pt", truncation=True, |
| max_length=128, padding=True) |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| probs = torch.nn.functional.softmax(outputs.logits, dim=-1) |
| return probs.numpy() |
| |
| shap_explainer = shap.Explainer(model_predict_proba, tokenizer) |
|
|
| |
| |
| |
| class BiasMetrics: |
| """Calculate fairness and bias metrics""" |
| |
| @staticmethod |
| def calculate_eod(y_true, y_pred, groups): |
| """Equal Opportunity Difference""" |
| unique_groups = np.unique(groups) |
| recalls = [] |
| |
| for group in unique_groups: |
| mask = groups == group |
| if np.sum(y_true[mask] == 1) > 0: |
| tp = np.sum((y_true[mask] == 1) & (y_pred[mask] == 1)) |
| fn = np.sum((y_true[mask] == 1) & (y_pred[mask] == 0)) |
| recall = tp / (tp + fn) if (tp + fn) > 0 else 0 |
| recalls.append(recall) |
| |
| return max(recalls) - min(recalls) if len(recalls) > 1 else 0.0 |
| |
| @staticmethod |
| def calculate_aaod(y_true, y_pred, groups): |
| """Average Absolute Odds Difference""" |
| unique_groups = np.unique(groups) |
| tpr_diffs = [] |
| fpr_diffs = [] |
| |
| for i, g1 in enumerate(unique_groups): |
| for g2 in unique_groups[i+1:]: |
| m1 = groups == g1 |
| m2 = groups == g2 |
| |
| |
| if np.sum(y_true[m1] == 1) > 0 and np.sum(y_true[m2] == 1) > 0: |
| tpr1 = np.sum((y_true[m1] == 1) & (y_pred[m1] == 1)) / np.sum(y_true[m1] == 1) |
| tpr2 = np.sum((y_true[m2] == 1) & (y_pred[m2] == 1)) / np.sum(y_true[m2] == 1) |
| tpr_diffs.append(abs(tpr1 - tpr2)) |
| |
| |
| tn1 = np.sum((y_true[m1] == 0) & (y_pred[m1] == 0)) |
| fp1 = np.sum((y_true[m1] == 0) & (y_pred[m1] == 1)) |
| tn2 = np.sum((y_true[m2] == 0) & (y_pred[m2] == 0)) |
| fp2 = np.sum((y_true[m2] == 0) & (y_pred[m2] == 1)) |
| |
| fpr1 = fp1 / (fp1 + tn1) if (fp1 + tn1) > 0 else 0 |
| fpr2 = fp2 / (fp2 + tn2) if (fp2 + tn2) > 0 else 0 |
| fpr_diffs.append(abs(fpr1 - fpr2)) |
| |
| return (np.mean(tpr_diffs) + np.mean(fpr_diffs)) / 2 if tpr_diffs else 0.0 |
|
|
| @staticmethod |
| def demographic_parity(y_pred, groups): |
| """Demographic Parity Difference""" |
| unique_groups = np.unique(groups) |
| positive_rates = [] |
| |
| for group in unique_groups: |
| mask = groups == group |
| positive_rate = np.mean(y_pred[mask] == 1) |
| positive_rates.append(positive_rate) |
| |
| return max(positive_rates) - min(positive_rates) if len(positive_rates) > 1 else 0.0 |
|
|
| |
| |
| |
| def get_shap_explanation(text, language="English"): |
| """Generate SHAP-based explanation""" |
| if not SHAP_AVAILABLE: |
| return "β οΈ SHAP is not installed. Install with: pip install shap", None |
| |
| try: |
| |
| shap_values = shap_explainer([text]) |
| |
| |
| fig, ax = plt.subplots(figsize=(12, 6)) |
| shap.plots.text(shap_values[0], display=False) |
| plt.tight_layout() |
| |
| |
| tokens = tokenizer.tokenize(text)[:20] |
| values = shap_values.values[0][:len(tokens), 1] |
| |
| attribution_data = { |
| "Token": tokens, |
| "Attribution": values.tolist() |
| } |
| |
| explanation = f"## SHAP Explanation for {language}\n\n" |
| explanation += "Tokens with **positive values** push toward AI-generated classification.\n" |
| explanation += "Tokens with **negative values** push toward Human-written classification.\n\n" |
| explanation += f"Top 5 most influential tokens:\n" |
| |
| top_indices = np.argsort(np.abs(values))[-5:][::-1] |
| for idx in top_indices: |
| token = tokens[idx] |
| value = values[idx] |
| direction = "β AI" if value > 0 else "β Human" |
| explanation += f"- **{token}**: {value:.4f} {direction}\n" |
| |
| return explanation, (fig, attribution_data) |
| |
| except Exception as e: |
| return f"β SHAP explanation failed: {str(e)}", None |
|
|
| def get_lime_explanation(text, language="English"): |
| """Generate LIME-based explanation""" |
| if not LIME_AVAILABLE: |
| return "β οΈ LIME is not installed. Install with: pip install lime", None |
| |
| try: |
| def predict_fn(texts): |
| inputs = tokenizer(texts, return_tensors="pt", truncation=True, |
| max_length=128, padding=True) |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| probs = torch.nn.functional.softmax(outputs.logits, dim=-1) |
| return probs.numpy() |
| |
| |
| exp = lime_explainer.explain_instance( |
| text, |
| predict_fn, |
| num_features=10, |
| num_samples=100 |
| ) |
| |
| |
| fig = exp.as_pyplot_figure() |
| plt.tight_layout() |
| |
| |
| weights = exp.as_list() |
| |
| explanation = f"## LIME Explanation for {language}\n\n" |
| explanation += "Features with **positive weights** indicate AI-generated characteristics.\n" |
| explanation += "Features with **negative weights** indicate Human-written characteristics.\n\n" |
| explanation += "Top contributing features:\n" |
| |
| for feature, weight in weights[:5]: |
| direction = "β AI" if weight > 0 else "β Human" |
| explanation += f"- **{feature}**: {weight:.4f} {direction}\n" |
| |
| return explanation, fig |
| |
| except Exception as e: |
| return f"β LIME explanation failed: {str(e)}", None |
|
|
| |
| |
| |
| def classify_with_explanation(text, language, explainer_type="SHAP"): |
| """Classify text and provide explanation""" |
| |
| if not text or len(text.strip()) == 0: |
| return "β οΈ Please enter text to classify", None, None, None |
| |
| |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128) |
| |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) |
| predicted_class = torch.argmax(probabilities, dim=-1).item() |
| confidence = probabilities[0][predicted_class].item() |
| |
| |
| labels = {0: "π€ Human-written", 1: "π€ AI-generated"} |
| result = f"## Classification Result\n\n" |
| result += f"**Prediction:** {labels[predicted_class]}\n" |
| result += f"**Confidence:** {confidence:.2%}\n" |
| result += f"**Language:** {language}\n\n" |
| |
| |
| if confidence > 0.9: |
| result += "β
**High confidence** - Very certain about this prediction\n" |
| elif confidence > 0.7: |
| result += "β οΈ **Moderate confidence** - Fairly certain with some uncertainty\n" |
| else: |
| result += "β **Low confidence** - Uncertain, mixed characteristics detected\n" |
| |
| |
| prob_chart = { |
| "Class": ["Human-written", "AI-generated"], |
| "Probability": [float(probabilities[0][0]), float(probabilities[0][1])] |
| } |
| |
| |
| explanation_text = None |
| explanation_viz = None |
| |
| if explainer_type == "SHAP" and SHAP_AVAILABLE: |
| explanation_text, explanation_viz = get_shap_explanation(text, language) |
| elif explainer_type == "LIME" and LIME_AVAILABLE: |
| explanation_text, explanation_viz = get_lime_explanation(text, language) |
| elif explainer_type == "Both": |
| shap_text, shap_viz = get_shap_explanation(text, language) |
| lime_text, lime_viz = get_lime_explanation(text, language) |
| explanation_text = shap_text + "\n\n---\n\n" + lime_text |
| explanation_viz = (shap_viz, lime_viz) if shap_viz and lime_viz else shap_viz or lime_viz |
| else: |
| explanation_text = "β οΈ Selected explainer not available" |
| |
| return result, prob_chart, explanation_text, explanation_viz |
|
|
| |
| |
| |
| def audit_bias(uploaded_file): |
| """Perform bias audit on uploaded dataset""" |
| |
| if uploaded_file is None: |
| return "β οΈ Please upload a CSV file with columns: text, label, language" |
| |
| try: |
| |
| df = pd.read_csv(uploaded_file.name) |
| |
| required_cols = ['text', 'label', 'language'] |
| if not all(col in df.columns for col in required_cols): |
| return f"β CSV must have columns: {required_cols}" |
| |
| |
| predictions = [] |
| for text in df['text']: |
| inputs = tokenizer(str(text), return_tensors="pt", truncation=True, max_length=128) |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| pred = torch.argmax(outputs.logits, dim=-1).item() |
| predictions.append(pred) |
| |
| df['prediction'] = predictions |
| |
| |
| y_true = df['label'].values |
| y_pred = df['prediction'].values |
| groups = df['language'].values |
| |
| eod = BiasMetrics.calculate_eod(y_true, y_pred, groups) |
| aaod = BiasMetrics.calculate_aaod(y_true, y_pred, groups) |
| dpd = BiasMetrics.demographic_parity(y_pred, groups) |
| |
| |
| lang_metrics = {} |
| for lang in df['language'].unique(): |
| mask = df['language'] == lang |
| lang_true = y_true[mask] |
| lang_pred = y_pred[mask] |
| |
| accuracy = np.mean(lang_true == lang_pred) |
| precision = np.sum((lang_true == 1) & (lang_pred == 1)) / np.sum(lang_pred == 1) if np.sum(lang_pred == 1) > 0 else 0 |
| recall = np.sum((lang_true == 1) & (lang_pred == 1)) / np.sum(lang_true == 1) if np.sum(lang_true == 1) > 0 else 0 |
| f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0 |
| |
| lang_metrics[lang] = { |
| 'accuracy': accuracy, |
| 'precision': precision, |
| 'recall': recall, |
| 'f1': f1, |
| 'samples': int(np.sum(mask)) |
| } |
| |
| |
| report = f"# Bias Audit Report\n\n" |
| report += f"**Total Samples:** {len(df)}\n" |
| report += f"**Languages:** {', '.join(df['language'].unique())}\n\n" |
| |
| report += f"## Fairness Metrics\n\n" |
| report += f"| Metric | Value | Interpretation |\n" |
| report += f"|--------|-------|----------------|\n" |
| report += f"| EOD | {eod:.4f} | {'β
Fair' if eod < 0.1 else 'β οΈ Bias detected'} |\n" |
| report += f"| AAOD | {aaod:.4f} | {'β
Fair' if aaod < 0.1 else 'β οΈ Bias detected'} |\n" |
| report += f"| Demographic Parity | {dpd:.4f} | {'β
Fair' if dpd < 0.1 else 'β οΈ Bias detected'} |\n\n" |
| |
| report += f"## Per-Language Performance\n\n" |
| report += f"| Language | Accuracy | F1 Score | Precision | Recall | Samples |\n" |
| report += f"|----------|----------|----------|-----------|--------|----------|\n" |
| |
| for lang, metrics in sorted(lang_metrics.items()): |
| report += f"| {lang} | {metrics['accuracy']:.4f} | {metrics['f1']:.4f} | " |
| report += f"{metrics['precision']:.4f} | {metrics['recall']:.4f} | {metrics['samples']} |\n" |
| |
| |
| fig, ax = plt.subplots(figsize=(8, 6)) |
| cm = confusion_matrix(y_true, y_pred) |
| sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax) |
| ax.set_title('Overall Confusion Matrix') |
| ax.set_xlabel('Predicted') |
| ax.set_ylabel('Actual') |
| ax.set_xticklabels(['Human', 'AI']) |
| ax.set_yticklabels(['Human', 'AI']) |
| plt.tight_layout() |
| |
| return report, fig |
| |
| except Exception as e: |
| return f"β Error during bias audit: {str(e)}", None |
|
|
| |
| |
| |
| custom_css = """ |
| #title { |
| text-align: center; |
| background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| font-size: 2.5em; |
| font-weight: bold; |
| } |
| """ |
|
|
| with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: |
| |
| gr.Markdown("<h1 id='title'>π HATA: Human vs AI Text Detector</h1>") |
| gr.Markdown(""" |
| <div style='text-align: center; margin-bottom: 20px;'> |
| Detect AI-generated text in African languages with **explainable AI** and **fairness auditing** |
| </div> |
| """) |
| |
| with gr.Tabs(): |
| |
| with gr.Tab("π Text Classification"): |
| with gr.Row(): |
| with gr.Column(): |
| text_input = gr.Textbox( |
| label="Enter Text", |
| placeholder="Paste text here to classify...", |
| lines=8 |
| ) |
| language_select = gr.Dropdown( |
| choices=SUPPORTED_LANGUAGES, |
| value="Hausa", |
| label="Select Language" |
| ) |
| explainer_select = gr.Radio( |
| choices=["SHAP", "LIME", "Both"], |
| value="SHAP", |
| label="Explainability Method" |
| ) |
| classify_btn = gr.Button("π Classify & Explain", variant="primary") |
| |
| with gr.Column(): |
| result_output = gr.Markdown(label="Classification Result") |
| prob_chart = gr.BarPlot( |
| x="Class", |
| y="Probability", |
| title="Prediction Probabilities", |
| y_lim=[0, 1] |
| ) |
| |
| with gr.Row(): |
| explanation_output = gr.Markdown(label="Explanation") |
| explanation_viz = gr.Plot(label="Visual Explanation") |
| |
| classify_btn.click( |
| fn=classify_with_explanation, |
| inputs=[text_input, language_select, explainer_select], |
| outputs=[result_output, prob_chart, explanation_output, explanation_viz] |
| ) |
| |
| |
| with gr.Tab("βοΈ Bias Audit"): |
| gr.Markdown(""" |
| ### Fairness and Bias Auditing |
| |
| Upload a CSV file with columns: `text`, `label` (0=Human, 1=AI), `language` |
| |
| The system will calculate: |
| - **EOD (Equal Opportunity Difference)**: Fairness in recall across languages |
| - **AAOD (Average Absolute Odds Difference)**: Disparity in TPR and FPR |
| - **Demographic Parity**: Difference in positive prediction rates |
| """) |
| |
| with gr.Row(): |
| with gr.Column(): |
| audit_file = gr.File(label="Upload CSV Dataset", file_types=[".csv"]) |
| audit_btn = gr.Button("π Run Bias Audit", variant="primary") |
| |
| with gr.Column(): |
| audit_report = gr.Markdown(label="Audit Report") |
| audit_viz = gr.Plot(label="Confusion Matrix") |
| |
| audit_btn.click( |
| fn=audit_bias, |
| inputs=audit_file, |
| outputs=[audit_report, audit_viz] |
| ) |
| |
| |
| with gr.Tab("βΉοΈ About"): |
| gr.Markdown(""" |
| # About HATA System |
| |
| ## π― Features |
| |
| ### Explainable AI |
| - **SHAP**: Game-theory based feature attribution |
| - **LIME**: Local interpretable model-agnostic explanations |
| - Visual token-level attributions |
| |
| ### Fairness Auditing |
| - Equal Opportunity Difference (EOD) |
| - Average Absolute Odds Difference (AAOD) |
| - Demographic Parity |
| - Per-language performance metrics |
| |
| ## π Supported Languages |
| Hausa, Yoruba, Igbo, Swahili, Amharic, Nigerian Pidgin |
| |
| ## π Model Performance |
| - Accuracy: 100% |
| - F1 Score: 100% |
| - EOD: 0.0 (Perfect fairness) |
| - AAOD: 0.0 (No bias) |
| |
| ## π¬ Technical Details |
| - Base Model: AfroXLMR-base |
| - Parameters: ~270M |
| - Max Sequence Length: 128 tokens |
| |
| ## π Citation |
| ```bibtex |
| @misc{msmaje2025hata, |
| author = {Maje, M.S.}, |
| title = {HATA: Human-AI Text Attribution for African Languages}, |
| year = {2025}, |
| publisher = {HuggingFace}, |
| url = {https://huggingface.co/msmaje/phdhatamodel} |
| } |
| ``` |
| """) |
| |
| gr.Markdown(""" |
| --- |
| <div style='text-align: center; color: #666;'> |
| Built with π for African Language NLP | Powered by AfroXLMR & Explainable AI |
| </div> |
| """) |
|
|
| if __name__ == "__main__": |
| demo.launch() |