Spaces:

ShivamVN
/

Ai-Text-Detector

Sleeping

App Files Files Community

ShivamVN commited on Nov 27, 2025

Commit

cf78d1f

verified ·

1 Parent(s): 2b3a3a1

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -32

app.py CHANGED Viewed

@@ -3,32 +3,29 @@ import torch
 import torch.nn.functional as F
 from transformers import AutoModelForSequenceClassification, AutoTokenizer, GPT2LMHeadModel, GPT2TokenizerFast
 from nltk.tokenize import sent_tokenize
-# --- SETUP ---
 import nltk
-nltk.download('punkt')
-nltk.download('punkt_tab')  # <--- ADD THIS LINE
-print("Initializing App...")
 # --- CONFIGURATION ---
-# Your Fine-Tuned Model
 MODEL_NAME = "ShivamVN/My-Ai-Text-Detector"
 # --- SETUP ---
 nltk.download('punkt')
 print("Initializing App...")
 # Detect Hardware
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# 1. Load RoBERTa (For Both Templates)
 print(f"Loading {MODEL_NAME}...")
 try:
     clf_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
     clf_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 except Exception as e:
     print(f"Error loading RoBERTa: {e}")
-    print("Ensure your Model Repo is PUBLIC in Settings!")
-# 2. Load GPT-2 (Only for Template 2)
 print("Loading GPT-2...")
 try:
     ppl_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
@@ -39,16 +36,13 @@ except Exception as e:
 # --- CORE FUNCTIONS ---
 def get_roberta_prob(text):
-    """Returns scalar probability of AI (0.0 to 1.0)"""
     if not text.strip(): return 0.0
     inputs = clf_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
     with torch.no_grad():
         outputs = clf_model(**inputs)
-    # Label 1 is AI
     return F.softmax(outputs.logits, dim=-1).cpu().numpy()[0][1]
 def get_perplexity(text):
-    """Returns scalar Perplexity score"""
     if not text.strip(): return 0.0
     encodings = ppl_tokenizer(text, return_tensors="pt")
     input_ids = encodings.input_ids.to(device)
@@ -62,11 +56,9 @@ def get_perplexity(text):
 def template_model_only(text):
     if not text: return "Please enter text."
-    # Just run RoBERTa on the full text
     ai_prob = get_roberta_prob(text)
     percent = ai_prob * 100
-    # Simple formatting
     label = "AI-GENERATED" if ai_prob > 0.5 else "HUMAN-WRITTEN"
     emoji = "🔴" if ai_prob > 0.5 else "🟢"
@@ -81,7 +73,7 @@ def template_full_system(text):
     sentences = sent_tokenize(text)
     if not sentences: return "No text detected."
-    # 1. SLIDING WINDOW (RoBERTa)
     window_size = 2
     sentence_raw_scores = {i: [] for i in range(len(sentences))}
@@ -91,7 +83,7 @@ def template_full_system(text):
         for j in range(window_size):
             sentence_raw_scores[i+j].append(prob)
-    # 2. HYBRID LOGIC (Per Sentence)
     log_output = f"{'SENTENCE':<60} | {'SCORE':<5} | {'PPL':<4} | {'VERDICT'}\n"
     log_output += "-" * 95 + "\n"
@@ -102,9 +94,12 @@ def template_full_system(text):
         scores = sentence_raw_scores[i]
         if not scores: scores = [0.0]
-        # RoBERTa Status
         min_s = min(scores)
         max_s = max(scores)
         status = "UNCERTAIN"
         if min_s > 0.80: status = "AI"
         elif max_s < 0.20: status = "HUMAN"
@@ -115,15 +110,22 @@ def template_full_system(text):
         # Final Decision Logic
         final = "HUMAN"
         if status == "UNCERTAIN":
-            if ppl < 40: final = "AI"    # Low PPL confirms AI
         elif status == "AI":
-            if ppl < 100: final = "AI"   # Sanity check
         if final == "AI": total_ai += 1
-        # Table Row formatting
         disp_sent = (sent[:57] + "..") if len(sent) > 57 else sent.ljust(59)
-        score_val = f"{max(scores)*100:.0f}%"
         ppl_val = f"{int(ppl)}"
         log_output += f"{disp_sent} | {score_val:<5} | {ppl_val:<4} | {final}\n"
@@ -135,36 +137,25 @@ def template_full_system(text):
     return f"# {verdict}\n**AI Sentence Count:** {ai_percent:.1f}%\n\n```text\n{log_output}\n```"
 # ==========================================
-# USER INTERFACE (Gradio)
 # ==========================================
-# FIXED: Removed theme argument to prevent errors
 with gr.Blocks() as demo:
     gr.Markdown("# 🕵️‍♂️ AI Text Detector Suite")
     gr.Markdown(f"Current Model: `{MODEL_NAME}`")
     with gr.Tabs():
-        # --- TAB 1: MODEL ONLY ---
         with gr.TabItem("Template 1: Only Model"):
             gr.Markdown("### ⚡ Fast Check")
-            gr.Markdown("Uses **only RoBERTa** to scan the text as a single block. Good for quick, rough estimates.")
             t1_input = gr.Textbox(lines=5, placeholder="Paste text here...", label="Input Text")
             t1_button = gr.Button("Analyze (Model Only)", variant="primary")
             t1_output = gr.Markdown(label="Result")
             t1_button.click(template_model_only, inputs=t1_input, outputs=t1_output)
-        # --- TAB 2: FULL SYSTEM ---
         with gr.TabItem("Template 2: Full System"):
             gr.Markdown("### 🧠 Deep Analysis")
-            gr.Markdown("Uses **RoBERTa + GPT-2 + Logic**. Breaks text into sentences, checks context, and analyzes randomness.")
             t2_input = gr.Textbox(lines=8, placeholder="Paste text here...", label="Input Text")
             t2_button = gr.Button("Analyze (Full System)", variant="primary")
             t2_output = gr.Markdown(label="Detailed Report")
             t2_button.click(template_full_system, inputs=t2_input, outputs=t2_output)
-# Launch
 demo.launch()

 import torch.nn.functional as F
 from transformers import AutoModelForSequenceClassification, AutoTokenizer, GPT2LMHeadModel, GPT2TokenizerFast
 from nltk.tokenize import sent_tokenize
 import nltk
 # --- CONFIGURATION ---
 MODEL_NAME = "ShivamVN/My-Ai-Text-Detector"
 # --- SETUP ---
+# Fix for the nltk error
 nltk.download('punkt')
+nltk.download('punkt_tab')
 print("Initializing App...")
 # Detect Hardware
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# 1. Load RoBERTa
 print(f"Loading {MODEL_NAME}...")
 try:
     clf_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
     clf_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 except Exception as e:
     print(f"Error loading RoBERTa: {e}")
+# 2. Load GPT-2
 print("Loading GPT-2...")
 try:
     ppl_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
 # --- CORE FUNCTIONS ---
 def get_roberta_prob(text):
     if not text.strip(): return 0.0
     inputs = clf_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
     with torch.no_grad():
         outputs = clf_model(**inputs)
     return F.softmax(outputs.logits, dim=-1).cpu().numpy()[0][1]
 def get_perplexity(text):
     if not text.strip(): return 0.0
     encodings = ppl_tokenizer(text, return_tensors="pt")
     input_ids = encodings.input_ids.to(device)
 def template_model_only(text):
     if not text: return "Please enter text."
     ai_prob = get_roberta_prob(text)
     percent = ai_prob * 100
     label = "AI-GENERATED" if ai_prob > 0.5 else "HUMAN-WRITTEN"
     emoji = "🔴" if ai_prob > 0.5 else "🟢"
     sentences = sent_tokenize(text)
     if not sentences: return "No text detected."
+    # 1. SLIDING WINDOW
     window_size = 2
     sentence_raw_scores = {i: [] for i in range(len(sentences))}
         for j in range(window_size):
             sentence_raw_scores[i+j].append(prob)
+    # 2. HYBRID LOGIC
     log_output = f"{'SENTENCE':<60} | {'SCORE':<5} | {'PPL':<4} | {'VERDICT'}\n"
     log_output += "-" * 95 + "\n"
         scores = sentence_raw_scores[i]
         if not scores: scores = [0.0]
+        # Calculate Stats
         min_s = min(scores)
         max_s = max(scores)
+        avg_s = sum(scores) / len(scores) # <--- NEW: Calculate Average
+        # Determine Status
         status = "UNCERTAIN"
         if min_s > 0.80: status = "AI"
         elif max_s < 0.20: status = "HUMAN"
         # Final Decision Logic
         final = "HUMAN"
         if status == "UNCERTAIN":
+            if ppl < 40: final = "AI"
         elif status == "AI":
+            if ppl < 100: final = "AI"
         if final == "AI": total_ai += 1
+        # --- DISPLAY LOGIC FIX ---
+        # If Uncertain, show the Average (e.g., 50%) instead of Max (e.g., 99%)
+        if status == "UNCERTAIN":
+            display_score = avg_s
+        else:
+            display_score = max_s
+        # Formatting
         disp_sent = (sent[:57] + "..") if len(sent) > 57 else sent.ljust(59)
+        score_val = f"{display_score*100:.0f}%"
         ppl_val = f"{int(ppl)}"
         log_output += f"{disp_sent} | {score_val:<5} | {ppl_val:<4} | {final}\n"
     return f"# {verdict}\n**AI Sentence Count:** {ai_percent:.1f}%\n\n```text\n{log_output}\n```"
 # ==========================================
+# USER INTERFACE
 # ==========================================
 with gr.Blocks() as demo:
     gr.Markdown("# 🕵️‍♂️ AI Text Detector Suite")
     gr.Markdown(f"Current Model: `{MODEL_NAME}`")
     with gr.Tabs():
         with gr.TabItem("Template 1: Only Model"):
             gr.Markdown("### ⚡ Fast Check")
             t1_input = gr.Textbox(lines=5, placeholder="Paste text here...", label="Input Text")
             t1_button = gr.Button("Analyze (Model Only)", variant="primary")
             t1_output = gr.Markdown(label="Result")
             t1_button.click(template_model_only, inputs=t1_input, outputs=t1_output)
         with gr.TabItem("Template 2: Full System"):
             gr.Markdown("### 🧠 Deep Analysis")
             t2_input = gr.Textbox(lines=8, placeholder="Paste text here...", label="Input Text")
             t2_button = gr.Button("Analyze (Full System)", variant="primary")
             t2_output = gr.Markdown(label="Detailed Report")
             t2_button.click(template_full_system, inputs=t2_input, outputs=t2_output)
 demo.launch()