import gradio as gr import torch import torch.nn.functional as F from transformers import AutoModelForSequenceClassification, AutoTokenizer, GPT2LMHeadModel, GPT2TokenizerFast from nltk.tokenize import sent_tokenize import nltk # --- CONFIGURATION --- MODEL_NAME = "ShivamVN/My-Ai-Text-Detector" # --- SETUP --- # Fix for the nltk error nltk.download('punkt') nltk.download('punkt_tab') print("Initializing App...") # Detect Hardware device = "cuda" if torch.cuda.is_available() else "cpu" # 1. Load RoBERTa print(f"Loading {MODEL_NAME}...") try: clf_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device) clf_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) except Exception as e: print(f"Error loading RoBERTa: {e}") # 2. Load GPT-2 print("Loading GPT-2...") try: ppl_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device) ppl_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") except Exception as e: print(f"Error loading GPT-2: {e}") # --- CORE FUNCTIONS --- def get_roberta_prob(text): if not text.strip(): return 0.0 inputs = clf_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device) with torch.no_grad(): outputs = clf_model(**inputs) return F.softmax(outputs.logits, dim=-1).cpu().numpy()[0][1] def get_perplexity(text): if not text.strip(): return 0.0 encodings = ppl_tokenizer(text, return_tensors="pt") input_ids = encodings.input_ids.to(device) with torch.no_grad(): outputs = ppl_model(input_ids, labels=input_ids) return torch.exp(outputs.loss).item() # ========================================== # TEMPLATE 1: ONLY MODEL (Fast Check) # ========================================== def template_model_only(text): if not text: return "Please enter text." ai_prob = get_roberta_prob(text) percent = ai_prob * 100 label = "AI-GENERATED" if ai_prob > 0.5 else "HUMAN-WRITTEN" emoji = "🔴" if ai_prob > 0.5 else "🟢" return f"# {emoji} {label}\n**Confidence:** {percent:.2f}%" # ========================================== # TEMPLATE 2: FULL SYSTEM (Advanced Logic) # ========================================== def template_full_system(text): if not text: return "Please enter text." sentences = sent_tokenize(text) if not sentences: return "No text detected." # 1. SLIDING WINDOW window_size = 2 sentence_raw_scores = {i: [] for i in range(len(sentences))} for i in range(len(sentences) - window_size + 1): chunk = " ".join(sentences[i : i + window_size]) prob = get_roberta_prob(chunk) for j in range(window_size): sentence_raw_scores[i+j].append(prob) # 2. HYBRID LOGIC log_output = f"{'SENTENCE':<60} | {'SCORE':<5} | {'PPL':<4} | {'VERDICT'}\n" log_output += "-" * 95 + "\n" total_ai = 0 for i in range(len(sentences)): sent = sentences[i] scores = sentence_raw_scores[i] if not scores: scores = [0.0] # Calculate Stats min_s = min(scores) max_s = max(scores) avg_s = sum(scores) / len(scores) # <--- NEW: Calculate Average # Determine Status status = "UNCERTAIN" if min_s > 0.80: status = "AI" elif max_s < 0.20: status = "HUMAN" # Perplexity Check ppl = get_perplexity(sent) # Final Decision Logic final = "HUMAN" if status == "UNCERTAIN": if ppl < 40: final = "AI" elif status == "AI": if ppl < 100: final = "AI" if final == "AI": total_ai += 1 # --- DISPLAY LOGIC FIX --- # If Uncertain, show the Average (e.g., 50%) instead of Max (e.g., 99%) if status == "UNCERTAIN": display_score = avg_s else: display_score = max_s # Formatting disp_sent = (sent[:57] + "..") if len(sent) > 57 else sent.ljust(59) score_val = f"{display_score*100:.0f}%" ppl_val = f"{int(ppl)}" log_output += f"{disp_sent} | {score_val:<5} | {ppl_val:<4} | {final}\n" # 3. SUMMARY ai_percent = (total_ai / len(sentences)) * 100 verdict = "🟢 LIKELY HUMAN" if ai_percent > 40: verdict = "🔴 LIKELY AI" return f"# {verdict}\n**AI Sentence Count:** {ai_percent:.1f}%\n\n```text\n{log_output}\n```" # ========================================== # USER INTERFACE # ========================================== with gr.Blocks() as demo: gr.Markdown("# 🕵️‍♂️ AI Text Detector Suite") gr.Markdown(f"Current Model: `{MODEL_NAME}`") with gr.Tabs(): with gr.TabItem("Template 1: Only Model"): gr.Markdown("### ⚡ Fast Check") t1_input = gr.Textbox(lines=5, placeholder="Paste text here...", label="Input Text") t1_button = gr.Button("Analyze (Model Only)", variant="primary") t1_output = gr.Markdown(label="Result") t1_button.click(template_model_only, inputs=t1_input, outputs=t1_output) with gr.TabItem("Template 2: Full System"): gr.Markdown("### 🧠 Deep Analysis") t2_input = gr.Textbox(lines=8, placeholder="Paste text here...", label="Input Text") t2_button = gr.Button("Analyze (Full System)", variant="primary") t2_output = gr.Markdown(label="Detailed Report") t2_button.click(template_full_system, inputs=t2_input, outputs=t2_output) demo.launch()