Spaces:

ShivamVN
/

Ai-Text-Detector

Sleeping

App Files Files Community

Ai-Text-Detector / app.py

ShivamVN

Update app.py

cf78d1f verified about 1 month ago

raw

history blame contribute delete

5.59 kB

	import gradio as gr
	import torch
	import torch.nn.functional as F
	from transformers import AutoModelForSequenceClassification, AutoTokenizer, GPT2LMHeadModel, GPT2TokenizerFast
	from nltk.tokenize import sent_tokenize
	import nltk

	# --- CONFIGURATION ---
	MODEL_NAME = "ShivamVN/My-Ai-Text-Detector"

	# --- SETUP ---
	# Fix for the nltk error
	nltk.download('punkt')
	nltk.download('punkt_tab')
	print("Initializing App...")

	# Detect Hardware
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# 1. Load RoBERTa
	print(f"Loading {MODEL_NAME}...")
	try:
	clf_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
	clf_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	except Exception as e:
	print(f"Error loading RoBERTa: {e}")

	# 2. Load GPT-2
	print("Loading GPT-2...")
	try:
	ppl_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
	ppl_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
	except Exception as e:
	print(f"Error loading GPT-2: {e}")

	# --- CORE FUNCTIONS ---

	def get_roberta_prob(text):
	if not text.strip(): return 0.0
	inputs = clf_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
	with torch.no_grad():
	outputs = clf_model(**inputs)
	return F.softmax(outputs.logits, dim=-1).cpu().numpy()[0][1]

	def get_perplexity(text):
	if not text.strip(): return 0.0
	encodings = ppl_tokenizer(text, return_tensors="pt")
	input_ids = encodings.input_ids.to(device)
	with torch.no_grad():
	outputs = ppl_model(input_ids, labels=input_ids)
	return torch.exp(outputs.loss).item()

	# ==========================================
	# TEMPLATE 1: ONLY MODEL (Fast Check)
	# ==========================================
	def template_model_only(text):
	if not text: return "Please enter text."

	ai_prob = get_roberta_prob(text)
	percent = ai_prob * 100

	label = "AI-GENERATED" if ai_prob > 0.5 else "HUMAN-WRITTEN"
	emoji = "🔴" if ai_prob > 0.5 else "🟢"

	return f"# {emoji} {label}\nConfidence: {percent:.2f}%"

	# ==========================================
	# TEMPLATE 2: FULL SYSTEM (Advanced Logic)
	# ==========================================
	def template_full_system(text):
	if not text: return "Please enter text."

	sentences = sent_tokenize(text)
	if not sentences: return "No text detected."

	# 1. SLIDING WINDOW
	window_size = 2
	sentence_raw_scores = {i: [] for i in range(len(sentences))}

	for i in range(len(sentences) - window_size + 1):
	chunk = " ".join(sentences[i : i + window_size])
	prob = get_roberta_prob(chunk)
	for j in range(window_size):
	sentence_raw_scores[i+j].append(prob)

	# 2. HYBRID LOGIC
	log_output = f"{'SENTENCE':<60} \| {'SCORE':<5} \| {'PPL':<4} \| {'VERDICT'}\n"
	log_output += "-" * 95 + "\n"

	total_ai = 0

	for i in range(len(sentences)):
	sent = sentences[i]
	scores = sentence_raw_scores[i]
	if not scores: scores = [0.0]

	# Calculate Stats
	min_s = min(scores)
	max_s = max(scores)
	avg_s = sum(scores) / len(scores) # <--- NEW: Calculate Average

	# Determine Status
	status = "UNCERTAIN"
	if min_s > 0.80: status = "AI"
	elif max_s < 0.20: status = "HUMAN"

	# Perplexity Check
	ppl = get_perplexity(sent)

	# Final Decision Logic
	final = "HUMAN"
	if status == "UNCERTAIN":
	if ppl < 40: final = "AI"
	elif status == "AI":
	if ppl < 100: final = "AI"

	if final == "AI": total_ai += 1

	# --- DISPLAY LOGIC FIX ---
	# If Uncertain, show the Average (e.g., 50%) instead of Max (e.g., 99%)
	if status == "UNCERTAIN":
	display_score = avg_s
	else:
	display_score = max_s

	# Formatting
	disp_sent = (sent[:57] + "..") if len(sent) > 57 else sent.ljust(59)
	score_val = f"{display_score*100:.0f}%"
	ppl_val = f"{int(ppl)}"
	log_output += f"{disp_sent} \| {score_val:<5} \| {ppl_val:<4} \| {final}\n"

	# 3. SUMMARY
	ai_percent = (total_ai / len(sentences)) * 100
	verdict = "🟢 LIKELY HUMAN"
	if ai_percent > 40: verdict = "🔴 LIKELY AI"

	return f"# {verdict}\nAI Sentence Count: {ai_percent:.1f}%\n\n```text\n{log_output}\n```"

	# ==========================================
	# USER INTERFACE
	# ==========================================
	with gr.Blocks() as demo:
	gr.Markdown("# 🕵️‍♂️ AI Text Detector Suite")
	gr.Markdown(f"Current Model: `{MODEL_NAME}`")

	with gr.Tabs():
	with gr.TabItem("Template 1: Only Model"):
	gr.Markdown("### ⚡ Fast Check")
	t1_input = gr.Textbox(lines=5, placeholder="Paste text here...", label="Input Text")
	t1_button = gr.Button("Analyze (Model Only)", variant="primary")
	t1_output = gr.Markdown(label="Result")
	t1_button.click(template_model_only, inputs=t1_input, outputs=t1_output)

	with gr.TabItem("Template 2: Full System"):
	gr.Markdown("### 🧠 Deep Analysis")
	t2_input = gr.Textbox(lines=8, placeholder="Paste text here...", label="Input Text")
	t2_button = gr.Button("Analyze (Full System)", variant="primary")
	t2_output = gr.Markdown(label="Detailed Report")
	t2_button.click(template_full_system, inputs=t2_input, outputs=t2_output)

	demo.launch()