AIDetector / app.py
VictorM-Coder's picture
Update app.py
1feb8eb verified
raw
history blame
3.67 kB
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import re
import pandas as pd
import gradio as gr
# -----------------------------
# STRONGEST AVAILABLE PUBLIC MODEL (WORKING)
# -----------------------------
MODEL_NAME = "openai-community/roberta-base-openai-detector"
# -----------------------------
# LOAD MODEL
# -----------------------------
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, torch_dtype=dtype)
model.to(device).eval()
# -----------------------------
# PARAGRAPH SPLITTER
# -----------------------------
def paragraph_split(text):
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
return paragraphs
# -----------------------------
# AI DETECTION FUNCTION
# -----------------------------
def classify_text(text):
if not text.strip():
return "⚠️ Please enter some text.", None, None
paragraphs = paragraph_split(text)
if not paragraphs:
return "⚠️ No paragraphs detected.", None, None
# Tokenize paragraphs
inputs = tokenizer(
paragraphs,
return_tensors="pt",
padding=True,
truncation=True,
max_length=model.config.max_position_embeddings
).to(device)
# Predict
with torch.no_grad():
logits = model(**inputs).logits
probs = F.softmax(logits, dim=-1).cpu()
preds = torch.argmax(probs, dim=-1).cpu()
# -----------------------------
# BUILD RESULTS
# -----------------------------
results = []
highlighted_paragraphs = []
for i, p in enumerate(paragraphs):
pred_label = preds[i].item()
confidence = probs[i, pred_label].item()
label = "AI" if pred_label == 1 else "Human"
# NOTE: roberta-base-openai-detector → label 1 = Fake (AI), 0 = Real (Human)
conf_text = f"{confidence:.2f}"
results.append([p, label, conf_text])
if label == "AI":
highlighted_paragraphs.append(
f"<p style='color:red; font-weight:bold; margin-bottom:10px'>{p}</p>"
)
else:
highlighted_paragraphs.append(
f"<p style='color:green; font-weight:bold; margin-bottom:10px'>{p}</p>"
)
# -----------------------------
# DOCUMENT LEVEL SCORE
# -----------------------------
avg = torch.mean(probs, dim=0)
ai_likelihood = avg[1].item() * 100 # class 1 = AI
highlighted_html = "\n".join(highlighted_paragraphs)
df = pd.DataFrame(results, columns=["Paragraph", "Classification", "Confidence"])
return f"⚖️ Document AI Likelihood: {ai_likelihood:.1f}%", highlighted_html, df
# -----------------------------
# GRADIO UI
# -----------------------------
with gr.Blocks() as demo:
gr.Markdown("## 🧠 Writenix Advanced AI Detection (Paragraph-Level)")
with gr.Row():
text_input = gr.Textbox(
label="Enter text",
lines=14,
placeholder="Paste your essay, article, or content here…"
)
classify_btn = gr.Button("🚀 Detect AI")
ai_score = gr.Label(label="Overall AI Likelihood")
highlighted = gr.HTML()
table = gr.Dataframe(headers=["Paragraph", "Classification", "Confidence"], wrap=True)
classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])
if __name__ == "__main__":
demo.launch()