Spaces:

VictorM-Coder
/

AIDetector

Running

App Files Files Community

VictorM-Coder commited on 25 days ago

Commit

2bb6fdc

verified ·

1 Parent(s): 2b59ac0

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -35

app.py CHANGED Viewed

@@ -6,17 +6,17 @@ import pandas as pd
 import gradio as gr
 # ----------------------------------------------------
-# LOAD CAUSAL LM (DetectGPT requires a generative LM)
 # ----------------------------------------------------
-MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
-    device_map="auto"
-).eval()
 # ----------------------------------------------------
@@ -34,22 +34,19 @@ def sentence_split(text):
 def perplexity(sentence):
     inputs = tokenizer(sentence, return_tensors="pt").to(device)
     with torch.no_grad():
-        outputs = model(**inputs, labels=inputs["input_ids"])
-        loss = outputs.loss
-    return torch.exp(loss).item()
 # ----------------------------------------------------
-# SIMPLE TEXT PERTURBATION (Synonym-like noise)
 # ----------------------------------------------------
 def perturb(text):
     words = text.split()
     if len(words) < 4:
-        return text  # too short to perturb
     idx = np.random.randint(0, len(words))
-    words[idx] = words[idx] + " "  # small noise (DetectGPT paper trick)
     return " ".join(words)
@@ -58,23 +55,23 @@ def perturb(text):
 # ----------------------------------------------------
 def detectgpt_score(sentence, perturbations=5):
     try:
-        orig = perplexity(sentence)
     except:
-        return 0  # fallback
-    perturbed_scores = []
     for _ in range(perturbations):
         p = perturb(sentence)
         try:
-            pp = perplexity(p)
-            perturbed_scores.append(pp)
         except:
-            continue
-    if not perturbed_scores:
         return 0
-    return np.mean(perturbed_scores) - orig  # DetectGPT signal
 # ----------------------------------------------------
@@ -85,43 +82,37 @@ def classify_text(text):
         return "⚠️ Please enter some text.", None, None
     sentences = sentence_split(text)
     results = []
     highlighted = []
-    detectgpt_scores = []
     for s in sentences:
         score = detectgpt_score(s)
-        detectgpt_scores.append(score)
         label = "AI" if score > 0 else "Human"
-        conf = abs(score)
-        results.append([s, label, f"{conf:.4f}"])
         if label == "AI":
             highlighted.append(f"<p style='color:red;font-weight:bold'>{s}</p>")
         else:
             highlighted.append(f"<p style='color:green;font-weight:bold'>{s}</p>")
-    # -------------------------
-    # DOCUMENT-LEVEL SCORE
-    # -------------------------
-    avg_score = np.mean(detectgpt_scores)
-    doc_ai_percent = max(0, min(100, (avg_score + 1) * 50))
     df = pd.DataFrame(results, columns=["Sentence", "Label", "Score"])
     html = "\n".join(highlighted)
-    return f"⚖️ Document AI Likelihood: {doc_ai_percent:.1f}%", html, df
 # ----------------------------------------------------
 # GRADIO UI
 # ----------------------------------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 Writenix DetectGPT (Turnitin-like Detector)")
     text_input = gr.Textbox(
         label="Enter text",

 import gradio as gr
 # ----------------------------------------------------
+# LOAD CAUSAL LM (GPT-J 6B = Best balance)
 # ----------------------------------------------------
+MODEL_NAME = "EleutherAI/gpt-j-6B"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
+).to(device).eval()
 # ----------------------------------------------------
 def perplexity(sentence):
     inputs = tokenizer(sentence, return_tensors="pt").to(device)
     with torch.no_grad():
+        out = model(**inputs, labels=inputs["input_ids"])
+    return torch.exp(out.loss).item()
 # ----------------------------------------------------
+# SIMPLE TEXT PERTURBATION
 # ----------------------------------------------------
 def perturb(text):
     words = text.split()
     if len(words) < 4:
+        return text
     idx = np.random.randint(0, len(words))
+    words[idx] += " "
     return " ".join(words)
 # ----------------------------------------------------
 def detectgpt_score(sentence, perturbations=5):
     try:
+        base = perplexity(sentence)
     except:
+        return 0
+    pert_scores = []
     for _ in range(perturbations):
         p = perturb(sentence)
         try:
+            pert_scores.append(perplexity(p))
         except:
+            pass
+    if not pert_scores:
         return 0
+    # DetectGPT signal
+    return np.mean(pert_scores) - base
 # ----------------------------------------------------
         return "⚠️ Please enter some text.", None, None
     sentences = sentence_split(text)
     results = []
     highlighted = []
+    scores = []
     for s in sentences:
         score = detectgpt_score(s)
+        scores.append(score)
         label = "AI" if score > 0 else "Human"
+        results.append([s, label, f"{abs(score):.4f}"])
         if label == "AI":
             highlighted.append(f"<p style='color:red;font-weight:bold'>{s}</p>")
         else:
             highlighted.append(f"<p style='color:green;font-weight:bold'>{s}</p>")
+    # Document-level score rescaled 0–100%
+    avg = np.mean(scores)
+    doc_ai = max(0, min(100, (avg + 1) * 50))
     df = pd.DataFrame(results, columns=["Sentence", "Label", "Score"])
     html = "\n".join(highlighted)
+    return f"⚖️ Document AI Likelihood: {doc_ai:.1f}%", html, df
 # ----------------------------------------------------
 # GRADIO UI
 # ----------------------------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 Writenix DetectGPT (GPT-J-6B Version)")
     text_input = gr.Textbox(
         label="Enter text",