ShivamVN's picture
Update app.py
cf78d1f verified
import gradio as gr
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer, GPT2LMHeadModel, GPT2TokenizerFast
from nltk.tokenize import sent_tokenize
import nltk
# --- CONFIGURATION ---
MODEL_NAME = "ShivamVN/My-Ai-Text-Detector"
# --- SETUP ---
# Fix for the nltk error
nltk.download('punkt')
nltk.download('punkt_tab')
print("Initializing App...")
# Detect Hardware
device = "cuda" if torch.cuda.is_available() else "cpu"
# 1. Load RoBERTa
print(f"Loading {MODEL_NAME}...")
try:
clf_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
clf_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
except Exception as e:
print(f"Error loading RoBERTa: {e}")
# 2. Load GPT-2
print("Loading GPT-2...")
try:
ppl_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
ppl_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
except Exception as e:
print(f"Error loading GPT-2: {e}")
# --- CORE FUNCTIONS ---
def get_roberta_prob(text):
if not text.strip(): return 0.0
inputs = clf_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
with torch.no_grad():
outputs = clf_model(**inputs)
return F.softmax(outputs.logits, dim=-1).cpu().numpy()[0][1]
def get_perplexity(text):
if not text.strip(): return 0.0
encodings = ppl_tokenizer(text, return_tensors="pt")
input_ids = encodings.input_ids.to(device)
with torch.no_grad():
outputs = ppl_model(input_ids, labels=input_ids)
return torch.exp(outputs.loss).item()
# ==========================================
# TEMPLATE 1: ONLY MODEL (Fast Check)
# ==========================================
def template_model_only(text):
if not text: return "Please enter text."
ai_prob = get_roberta_prob(text)
percent = ai_prob * 100
label = "AI-GENERATED" if ai_prob > 0.5 else "HUMAN-WRITTEN"
emoji = "πŸ”΄" if ai_prob > 0.5 else "🟒"
return f"# {emoji} {label}\n**Confidence:** {percent:.2f}%"
# ==========================================
# TEMPLATE 2: FULL SYSTEM (Advanced Logic)
# ==========================================
def template_full_system(text):
if not text: return "Please enter text."
sentences = sent_tokenize(text)
if not sentences: return "No text detected."
# 1. SLIDING WINDOW
window_size = 2
sentence_raw_scores = {i: [] for i in range(len(sentences))}
for i in range(len(sentences) - window_size + 1):
chunk = " ".join(sentences[i : i + window_size])
prob = get_roberta_prob(chunk)
for j in range(window_size):
sentence_raw_scores[i+j].append(prob)
# 2. HYBRID LOGIC
log_output = f"{'SENTENCE':<60} | {'SCORE':<5} | {'PPL':<4} | {'VERDICT'}\n"
log_output += "-" * 95 + "\n"
total_ai = 0
for i in range(len(sentences)):
sent = sentences[i]
scores = sentence_raw_scores[i]
if not scores: scores = [0.0]
# Calculate Stats
min_s = min(scores)
max_s = max(scores)
avg_s = sum(scores) / len(scores) # <--- NEW: Calculate Average
# Determine Status
status = "UNCERTAIN"
if min_s > 0.80: status = "AI"
elif max_s < 0.20: status = "HUMAN"
# Perplexity Check
ppl = get_perplexity(sent)
# Final Decision Logic
final = "HUMAN"
if status == "UNCERTAIN":
if ppl < 40: final = "AI"
elif status == "AI":
if ppl < 100: final = "AI"
if final == "AI": total_ai += 1
# --- DISPLAY LOGIC FIX ---
# If Uncertain, show the Average (e.g., 50%) instead of Max (e.g., 99%)
if status == "UNCERTAIN":
display_score = avg_s
else:
display_score = max_s
# Formatting
disp_sent = (sent[:57] + "..") if len(sent) > 57 else sent.ljust(59)
score_val = f"{display_score*100:.0f}%"
ppl_val = f"{int(ppl)}"
log_output += f"{disp_sent} | {score_val:<5} | {ppl_val:<4} | {final}\n"
# 3. SUMMARY
ai_percent = (total_ai / len(sentences)) * 100
verdict = "🟒 LIKELY HUMAN"
if ai_percent > 40: verdict = "πŸ”΄ LIKELY AI"
return f"# {verdict}\n**AI Sentence Count:** {ai_percent:.1f}%\n\n```text\n{log_output}\n```"
# ==========================================
# USER INTERFACE
# ==========================================
with gr.Blocks() as demo:
gr.Markdown("# πŸ•΅οΈβ€β™‚οΈ AI Text Detector Suite")
gr.Markdown(f"Current Model: `{MODEL_NAME}`")
with gr.Tabs():
with gr.TabItem("Template 1: Only Model"):
gr.Markdown("### ⚑ Fast Check")
t1_input = gr.Textbox(lines=5, placeholder="Paste text here...", label="Input Text")
t1_button = gr.Button("Analyze (Model Only)", variant="primary")
t1_output = gr.Markdown(label="Result")
t1_button.click(template_model_only, inputs=t1_input, outputs=t1_output)
with gr.TabItem("Template 2: Full System"):
gr.Markdown("### 🧠 Deep Analysis")
t2_input = gr.Textbox(lines=8, placeholder="Paste text here...", label="Input Text")
t2_button = gr.Button("Analyze (Full System)", variant="primary")
t2_output = gr.Markdown(label="Detailed Report")
t2_button.click(template_full_system, inputs=t2_input, outputs=t2_output)
demo.launch()