""" Gradio Space for Human-AI Text Attribution (HATA) Model Detects whether text is human-written or AI-generated Supports multiple African languages """ # --- Deterministic suppression of Gradio audio stack under Python 3.13 --- import os import sys import types os.environ["GRADIO_DISABLE_PYDUB"] = "1" # Provide stubs so that pydub cannot fail on audioop / pyaudioop if "audioop" not in sys.modules: sys.modules["audioop"] = types.ModuleType("audioop") if "pyaudioop" not in sys.modules: sys.modules["pyaudioop"] = types.ModuleType("pyaudioop") # Now it is safe to import Gradio and the rest of the stack import gradio as gr import torch import numpy as np from transformers import AutoTokenizer, AutoModelForSequenceClassification # ---------------------------------------------------------------------- # Model configuration # ---------------------------------------------------------------------- MODEL_NAME = "distilbert-base-multilingual-cased" # replace with your fine-tuned HATA checkpoint if available DEVICE = "cuda" if torch.cuda.is_available() else "cpu" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2) model.to(DEVICE) model.eval() LABELS = ["Human-written", "AI-generated"] # ---------------------------------------------------------------------- # Inference routine # ---------------------------------------------------------------------- @torch.no_grad() def hata_predict(text: str): if not text or not text.strip(): return {"Human-written": 0.0, "AI-generated": 0.0} inputs = tokenizer( text, return_tensors="pt", truncation=True, padding=True, max_length=512, ).to(DEVICE) outputs = model(**inputs) logits = outputs.logits.squeeze(0) probs = torch.softmax(logits, dim=-1).cpu().numpy() return {LABELS[i]: float(probs[i]) for i in range(len(LABELS))} # ---------------------------------------------------------------------- # Gradio interface # ---------------------------------------------------------------------- with gr.Blocks(title="Multilingual HATA System") as demo: gr.Markdown( """ # Multilingual Human–AI Text Attribution (HATA) This system estimates whether an input passage is **human-written** or **AI-generated**, with a focus on multilingual and African-language use cases (e.g., Hausa, Yoruba, Igbo, Pidgin). The backend is a Transformer-based classifier fine-tuned for attribution. """ ) with gr.Row(): with gr.Column(scale=3): text_input = gr.Textbox( label="Input Text", placeholder="Paste a paragraph in Hausa, Yoruba, Igbo, Pidgin, or English...", lines=8, ) submit_btn = gr.Button("Analyze") with gr.Column(scale=2): output = gr.Label(label="Attribution Probabilities") submit_btn.click( fn=hata_predict, inputs=text_input, outputs=output, ) # ---------------------------------------------------------------------- # Entry point # ---------------------------------------------------------------------- if __name__ == "__main__": demo.launch()