| """ |
| Gradio Space for Human-AI Text Attribution (HATA) Model |
| Detects whether text is human-written or AI-generated |
| Supports multiple African languages |
| """ |
|
|
| |
| import os |
| import sys |
| import types |
|
|
| |
| os.environ["GRADIO_DISABLE_PYDUB"] = "1" |
|
|
| |
| if "audioop" not in sys.modules: |
| sys.modules["audioop"] = types.ModuleType("audioop") |
| if "pyaudioop" not in sys.modules: |
| sys.modules["pyaudioop"] = types.ModuleType("pyaudioop") |
|
|
| |
| import gradio as gr |
| import requests |
| import math |
| from langdetect import detect |
|
|
| |
| |
| |
| HF_API_URL = "https://api-inference.huggingface.co/models/YOUR_USERNAME/YOUR_MODEL" |
| HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
| if HF_TOKEN is None: |
| raise ValueError("HF_TOKEN environment variable not set!") |
|
|
| HEADERS = { |
| "Authorization": f"Bearer {HF_TOKEN}", |
| "Content-Type": "application/json" |
| } |
|
|
| |
| |
| |
| def entropy(probs): |
| """Shannon entropy as epistemic uncertainty indicator.""" |
| return -sum(p * math.log2(p) for p in probs if p > 0) |
|
|
| def normalize_labels(hf_output): |
| """ |
| Normalize Hugging Face output into a stable schema. |
| Expected HF format: |
| [ |
| {"label": "HUMAN", "score": 0.73}, |
| {"label": "AI", "score": 0.27} |
| ] |
| """ |
| result = {item["label"].lower(): float(item["score"]) for item in hf_output} |
| human_p = result.get("human", 0.0) |
| ai_p = result.get("ai", 0.0) |
| return human_p, ai_p |
|
|
| def hf_inference(text): |
| payload = {"inputs": text} |
| r = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=30) |
| r.raise_for_status() |
| return r.json() |
|
|
| |
| |
| |
| def analyze_text(text): |
| text = text.strip() |
| if not text: |
| return {"error": "Empty input"} |
|
|
| |
| try: |
| language = detect(text) |
| except Exception: |
| language = "unknown" |
|
|
| |
| hf_raw = hf_inference(text) |
| if not isinstance(hf_raw, list): |
| return {"error": "Unexpected model response", "raw": hf_raw} |
|
|
| human_p, ai_p = normalize_labels(hf_raw) |
|
|
| |
| label = "Human" if human_p >= ai_p else "Machine" |
| confidence = max(human_p, ai_p) |
|
|
| |
| H = entropy([human_p, ai_p]) |
|
|
| |
| explainability_stub = { |
| "method": "pending", |
| "note": ( |
| "This model endpoint does not natively expose SHAP/LIME. " |
| "Post-hoc explainability must be computed locally using a " |
| "replicated model or proxy explainer." |
| ), |
| "token_attributions": [] |
| } |
|
|
| |
| fairness_context = { |
| "language": language, |
| "human_probability": human_p, |
| "ai_probability": ai_p, |
| "entropy": H |
| } |
|
|
| response = { |
| "prediction": { |
| "label": label, |
| "confidence": round(confidence, 4) |
| }, |
| "probabilities": { |
| "human": round(human_p, 4), |
| "machine": round(ai_p, 4) |
| }, |
| "uncertainty": { |
| "entropy": round(H, 4), |
| "interpretation": ( |
| "High entropy indicates epistemic ambiguity; " |
| "classification should be treated cautiously." |
| ) |
| }, |
| "linguistic_context": { |
| "detected_language": language |
| }, |
| "explainability": explainability_stub, |
| "fairness_audit_fields": fairness_context |
| } |
|
|
| return response |
|
|
| |
| |
| |
| iface = gr.Interface( |
| fn=analyze_text, |
| inputs=gr.Textbox(lines=5, placeholder="Enter text here..."), |
| outputs=gr.JSON(), |
| title="HATA: Human-AI Text Attribution", |
| description=( |
| "Detect whether text is human-written or AI-generated.\n" |
| "Supports uncertainty estimation, language-aware auditing, " |
| "and XAI-ready outputs." |
| ) |
| ) |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| iface.launch(server_name="0.0.0.0", server_port=7860) |
|
|