Spaces:

VictorM-Coder
/

AIDetector

Running

App Files Files Community

AIDetector / app.py

VictorM-Coder

Update app.py

1feb8eb verified 20 days ago

raw

history blame

3.67 kB

	import torch
	import torch.nn.functional as F
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import re
	import pandas as pd
	import gradio as gr

	# -----------------------------
	# STRONGEST AVAILABLE PUBLIC MODEL (WORKING)
	# -----------------------------
	MODEL_NAME = "openai-community/roberta-base-openai-detector"

	# -----------------------------
	# LOAD MODEL
	# -----------------------------
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, torch_dtype=dtype)
	model.to(device).eval()


	# -----------------------------
	# PARAGRAPH SPLITTER
	# -----------------------------
	def paragraph_split(text):
	paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
	return paragraphs


	# -----------------------------
	# AI DETECTION FUNCTION
	# -----------------------------
	def classify_text(text):
	if not text.strip():
	return "⚠️ Please enter some text.", None, None

	paragraphs = paragraph_split(text)
	if not paragraphs:
	return "⚠️ No paragraphs detected.", None, None

	# Tokenize paragraphs
	inputs = tokenizer(
	paragraphs,
	return_tensors="pt",
	padding=True,
	truncation=True,
	max_length=model.config.max_position_embeddings
	).to(device)

	# Predict
	with torch.no_grad():
	logits = model(**inputs).logits
	probs = F.softmax(logits, dim=-1).cpu()
	preds = torch.argmax(probs, dim=-1).cpu()

	# -----------------------------
	# BUILD RESULTS
	# -----------------------------
	results = []
	highlighted_paragraphs = []

	for i, p in enumerate(paragraphs):
	pred_label = preds[i].item()
	confidence = probs[i, pred_label].item()

	label = "AI" if pred_label == 1 else "Human"
	# NOTE: roberta-base-openai-detector → label 1 = Fake (AI), 0 = Real (Human)

	conf_text = f"{confidence:.2f}"

	results.append([p, label, conf_text])

	if label == "AI":
	highlighted_paragraphs.append(
	f"<p style='color:red; font-weight:bold; margin-bottom:10px'>{p}</p>"
	)
	else:
	highlighted_paragraphs.append(
	f"<p style='color:green; font-weight:bold; margin-bottom:10px'>{p}</p>"
	)

	# -----------------------------
	# DOCUMENT LEVEL SCORE
	# -----------------------------
	avg = torch.mean(probs, dim=0)
	ai_likelihood = avg[1].item() * 100 # class 1 = AI

	highlighted_html = "\n".join(highlighted_paragraphs)
	df = pd.DataFrame(results, columns=["Paragraph", "Classification", "Confidence"])

	return f"⚖️ Document AI Likelihood: {ai_likelihood:.1f}%", highlighted_html, df


	# -----------------------------
	# GRADIO UI
	# -----------------------------
	with gr.Blocks() as demo:
	gr.Markdown("## 🧠 Writenix Advanced AI Detection (Paragraph-Level)")

	with gr.Row():
	text_input = gr.Textbox(
	label="Enter text",
	lines=14,
	placeholder="Paste your essay, article, or content here…"
	)

	classify_btn = gr.Button("🚀 Detect AI")

	ai_score = gr.Label(label="Overall AI Likelihood")
	highlighted = gr.HTML()
	table = gr.Dataframe(headers=["Paragraph", "Classification", "Confidence"], wrap=True)

	classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])

	if __name__ == "__main__":
	demo.launch()