|
|
""" |
|
|
Gradio Space for Human-AI Text Attribution (HATA) Model |
|
|
Detects whether text is human-written or AI-generated |
|
|
Supports multiple African languages |
|
|
""" |
|
|
|
|
|
|
|
|
import os |
|
|
import sys |
|
|
import types |
|
|
|
|
|
os.environ["GRADIO_DISABLE_PYDUB"] = "1" |
|
|
|
|
|
|
|
|
if "audioop" not in sys.modules: |
|
|
sys.modules["audioop"] = types.ModuleType("audioop") |
|
|
if "pyaudioop" not in sys.modules: |
|
|
sys.modules["pyaudioop"] = types.ModuleType("pyaudioop") |
|
|
|
|
|
|
|
|
import gradio as gr |
|
|
import torch |
|
|
import numpy as np |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_NAME = "distilbert-base-multilingual-cased" |
|
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2) |
|
|
model.to(DEVICE) |
|
|
model.eval() |
|
|
|
|
|
LABELS = ["Human-written", "AI-generated"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@torch.no_grad() |
|
|
def hata_predict(text: str): |
|
|
if not text or not text.strip(): |
|
|
return {"Human-written": 0.0, "AI-generated": 0.0} |
|
|
|
|
|
inputs = tokenizer( |
|
|
text, |
|
|
return_tensors="pt", |
|
|
truncation=True, |
|
|
padding=True, |
|
|
max_length=512, |
|
|
).to(DEVICE) |
|
|
|
|
|
outputs = model(**inputs) |
|
|
logits = outputs.logits.squeeze(0) |
|
|
probs = torch.softmax(logits, dim=-1).cpu().numpy() |
|
|
|
|
|
return {LABELS[i]: float(probs[i]) for i in range(len(LABELS))} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Multilingual HATA System") as demo: |
|
|
gr.Markdown( |
|
|
""" |
|
|
# Multilingual Human–AI Text Attribution (HATA) |
|
|
|
|
|
This system estimates whether an input passage is **human-written** or |
|
|
**AI-generated**, with a focus on multilingual and African-language use |
|
|
cases (e.g., Hausa, Yoruba, Igbo, Pidgin). |
|
|
|
|
|
The backend is a Transformer-based classifier fine-tuned for attribution. |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=3): |
|
|
text_input = gr.Textbox( |
|
|
label="Input Text", |
|
|
placeholder="Paste a paragraph in Hausa, Yoruba, Igbo, Pidgin, or English...", |
|
|
lines=8, |
|
|
) |
|
|
submit_btn = gr.Button("Analyze") |
|
|
with gr.Column(scale=2): |
|
|
output = gr.Label(label="Attribution Probabilities") |
|
|
|
|
|
submit_btn.click( |
|
|
fn=hata_predict, |
|
|
inputs=text_input, |
|
|
outputs=output, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|