Spaces:

SahilSingh0
/

ai-content-detector

Sleeping

App Files Files Community

ai-content-detector / app.py

SahilSingh0

Upload app.py

a14e700 verified 4 months ago

raw

history blame contribute delete

6.31 kB

	import gradio as gr
	from transformers import pipeline

	# ----------------------------
	# Load TEXT detector (upgradeable)
	# ----------------------------
	TEXT_MODEL_ID = "wangkevin02/AI_Detect_Model" # swap if you try another model

	text_pipe = pipeline("text-classification", model=TEXT_MODEL_ID)

	def _canonical(label: str) -> str \| None:
	"""Map raw label names to 'AI' or 'HUMAN' when possible."""
	if not label:
	return None
	l = label.strip().lower()
	# Common explicit names
	if any(k in l for k in ["ai", "machine", "generated", "fake", "synthetic", "gpt"]):
	return "AI"
	if any(k in l for k in ["human", "real", "authentic", "organic"]):
	return "HUMAN"
	# Try LABEL_X -> use id2label if present
	if l.startswith("label_"):
	try:
	idx = int(l.split("_")[-1])
	except ValueError:
	return None
	id2label = getattr(text_pipe.model.config, "id2label", None)
	if isinstance(id2label, dict) and idx in id2label:
	return _canonical(str(id2label[idx]))
	# Sometimes labels are just "0"/"1"
	if l in {"0", "1"}:
	id2label = getattr(text_pipe.model.config, "id2label", None)
	if isinstance(id2label, dict) and l.isdigit():
	mapped = id2label.get(int(l))
	if mapped:
	return _canonical(str(mapped))
	return None

	def _aggregate_probs(raw_results):
	"""
	Convert pipeline outputs into {'AI': p, 'HUMAN': p, 'raw': {...}} robustly.
	Ensures both keys exist and sum <= 1.0 (may be < 1 if labels don't map).
	"""
	# text-classification with top_k=None returns a list of dicts
	# e.g. [{'label': 'AI', 'score': 0.82}, {'label': 'HUMAN', 'score': 0.18}]
	if isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], dict):
	label_scores = {d["label"]: float(d["score"]) for d in raw_results}
	elif isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], list):
	# return_all_scores=True style: [[{label, score}, {label, score}, ...]]
	label_scores = {d["label"]: float(d["score"]) for d in raw_results[0]}
	else:
	label_scores = {}

	ai_p = 0.0
	human_p = 0.0
	for lbl, sc in label_scores.items():
	canon = _canonical(lbl)
	if canon == "AI":
	ai_p += sc
	elif canon == "HUMAN":
	human_p += sc

	# If nothing mapped, fall back to top label heuristic
	if ai_p == 0.0 and human_p == 0.0 and label_scores:
	top_lbl = max(label_scores, key=label_scores.get)
	top_sc = label_scores[top_lbl]
	canon = _canonical(top_lbl)
	if canon == "AI":
	ai_p = top_sc
	human_p = 1.0 - top_sc
	elif canon == "HUMAN":
	human_p = top_sc
	ai_p = 1.0 - top_sc

	return {"AI": round(ai_p, 6), "HUMAN": round(human_p, 6), "raw": label_scores}

	def _verdict(ai_p: float, human_p: float, n_words: int) -> str:
	conf = max(ai_p, human_p)
	if n_words < 120:
	band = "LOW (short text)"
	elif conf < 0.60:
	band = "LOW (uncertain)"
	elif conf < 0.80:
	band = "MEDIUM"
	else:
	band = "HIGH"

	if ai_p > human_p:
	return f"🤖 Likely AI — Confidence: {band}"
	elif human_p > ai_p:
	return f"📝 Likely Human — Confidence: {band}"
	else:
	return "❓ Uncertain — Confidence: LOW"

	def detect_text(input_text: str):
	text = (input_text or "").strip()
	if not text:
	return {}, "❌ Please enter some text."

	try:
	# Get ALL label scores so we can map correctly
	results = text_pipe(text, top_k=None)
	agg = _aggregate_probs(results)
	ai_p, human_p = float(agg["AI"]), float(agg["HUMAN"])

	# Normalize to show nicely, but keep raw too
	probs_out = {
	"AI-generated": round(ai_p, 4),
	"Human-written": round(human_p, 4),
	}
	# Optional: include raw labels so you can debug mappings in UI
	# probs_out.update({f"raw::{k}": round(v, 4) for k, v in agg["raw"].items()})

	verdict = _verdict(ai_p, human_p, n_words=len(text.split()))
	return probs_out, verdict

	except Exception as e:
	return {}, f"❌ Error: {str(e)}"

	# ----------------------------
	# (Optional) IMAGE detector — won't crash if model unavailable
	# ----------------------------
	try:
	from PIL import Image
	image_pipe = pipeline("image-classification", model="umm-maybe/ai-vs-human-images")
	except Exception:
	image_pipe = None

	def detect_image(img):
	if image_pipe is None:
	return {}, "⚠️ Image detector not available on this Space."
	try:
	results = image_pipe(img)
	label_scores = {d["label"]: float(d["score"]) for d in results}
	best = max(label_scores, key=label_scores.get)
	if any(k in best.lower() for k in ["ai", "fake", "generated", "synthetic"]):
	return label_scores, "🤖 This image looks AI-generated"
	else:
	return label_scores, "📷 This image looks Human/Real"
	except Exception as e:
	return {}, f"❌ Error: {str(e)}"

	# ----------------------------
	# UI
	# ----------------------------
	with gr.Blocks() as demo:
	gr.Markdown("# 🔍 AI Content Detector\nDetect whether text (and optionally images) are AI-generated or human-made.")

	with gr.Tab("📝 Text"):
	txt = gr.Textbox(label="Enter text", lines=10, placeholder="Paste text here…")
	out_probs = gr.Label(label="Probabilities")
	out_verdict = gr.Textbox(label="Verdict", interactive=False)
	btn = gr.Button("Analyze", variant="primary")
	btn.click(detect_text, inputs=txt, outputs=[out_probs, out_verdict])

	with gr.Tab("📷 Image"):
	img_in = gr.Image(type="pil", label="Upload an image")
	img_probs = gr.Label(label="Probabilities")
	img_verdict = gr.Textbox(label="Verdict", interactive=False)
	btn2 = gr.Button("Analyze Image")
	btn2.click(detect_image, inputs=img_in, outputs=[img_probs, img_verdict])

	if __name__ == "__main__":
	demo.launch()