Spaces:

VictorM-Coder
/

AIDetector

Running

App Files Files Community

VictorM-Coder commited on 26 days ago

Commit

27d1d53

verified ·

1 Parent(s): 370410f

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -30

app.py CHANGED Viewed

@@ -15,8 +15,12 @@ dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported(
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, dtype=dtype).to(device).eval()
 # -----------------------------
-# SENTENCE SPLITTER (simple, robust, no lookbehinds)
-# Protect → split → restore
 # -----------------------------
 ABBR = [
     "e.g", "i.e", "mr", "mrs", "ms", "dr", "prof", "vs", "etc", "fig", "al",
@@ -59,15 +63,14 @@ def sentence_split(text: str):
     return [_restore(s).strip() for s in sentences if s.strip()]
 # -----------------------------
-# CLASSIFY SENTENCE-BY-SENTENCE (Fakespot: id2label[1] == "AI")
-# Also compute an overall Likely-AI score (mean AI prob)
 # -----------------------------
-def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
     sents = sentence_split(text)
     if not sents:
-        return "⚠️ Please paste some text.", "", "", None, None
-    # light cleaning similar to model card guidance
     clean_sents = [re.sub(r"\s+", " ", s).strip() for s in sents]
     inputs = tokenizer(
@@ -79,25 +82,22 @@ def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
         probs = F.softmax(logits, dim=-1)  # [:,0]=Human, [:,1]=AI
     ai_probs = probs[:, 1].detach().cpu().tolist()
-    overall_ai = sum(ai_probs) / len(ai_probs)  # mean AI probability across sentences
     overall_pct = f"{overall_ai * 100:.1f}%"
-    overall_label = "🤖 Likely AI" if overall_ai >= threshold else "🧒 Likely Human"
-    rows = []
-    highlights = []
     for i, orig in enumerate(sents, start=1):
-        ai_p = float(ai_probs[i-1])  # Fakespot: 1 == AI
-        label = "AI" if ai_p >= threshold else "Human"
         pct = f"{ai_p*100:.1f}%"
-        # choose color
         if ai_p < 0.30:
             color = "#11823b"      # green
         elif ai_p < 0.70:
             color = "#b8860b"      # amber
         else:
             color = "#b80d0d"      # red
         normalized = re.sub(r"\s+", " ", orig)
         highlights.append(
             "<div style='margin:6px 0; padding:6px 8px; border-radius:6px; background:rgba(0,0,0,0.03)'>"
@@ -107,31 +107,23 @@ def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
     html = "\n".join(highlights)
     df = pd.DataFrame(rows, columns=["#", "Sentence", "AI_Prob", "Label"])
-    status = "Done ✅ (Fakespot detector)"
-    return status, overall_label, overall_pct, html, df
 # -----------------------------
-# GRADIO UI (minimal, with overall score)
 # -----------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("### 🧠 Sentence-by-Sentence AI Check (Fakespot)")
     text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
-    threshold = gr.Slider(0.50, 0.95, value=0.70, step=0.01, label="AI threshold")
     btn = gr.Button("Analyze")
-    status = gr.Label(label="Status")
-    overall_label = gr.Label(label="Overall Verdict")
-    overall_score = gr.Label(label="Likely AI-generated Score (avg)")
     highlights = gr.HTML(label="Per-Sentence Highlights")
     table = gr.Dataframe(headers=["#", "Sentence", "AI_Prob", "Label"], wrap=True)
-    btn.click(
-        classify_sentence_by_sentence,
-        inputs=[text_input, threshold],
-        outputs=[status, overall_label, overall_score, highlights, table]
-    )
 if __name__ == "__main__":
     demo.launch()

 model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, dtype=dtype).to(device).eval()
 # -----------------------------
+# INTERNAL THRESHOLD for sentence labels/colors
+# -----------------------------
+THRESHOLD = 0.70  # used only for per-sentence "AI/Human" tags & color
+# -----------------------------
+# SENTENCE SPLITTER (protect → split → restore; no lookbehinds)
 # -----------------------------
 ABBR = [
     "e.g", "i.e", "mr", "mrs", "ms", "dr", "prof", "vs", "etc", "fig", "al",
     return [_restore(s).strip() for s in sentences if s.strip()]
 # -----------------------------
+# CORE: overall AI score + highlights
 # -----------------------------
+def analyze(text, max_len=512):
     sents = sentence_split(text)
     if not sents:
+        return "—", "—", "<em>Paste some text to analyze.</em>", None
+    # light clean (per model card vibe)
     clean_sents = [re.sub(r"\s+", " ", s).strip() for s in sents]
     inputs = tokenizer(
         probs = F.softmax(logits, dim=-1)  # [:,0]=Human, [:,1]=AI
     ai_probs = probs[:, 1].detach().cpu().tolist()
+    overall_ai = sum(ai_probs) / len(ai_probs)
     overall_pct = f"{overall_ai * 100:.1f}%"
+    overall_label = "🤖 Likely AI Written" if overall_ai >= THRESHOLD else "🧒 Likely Human Written"
+    # Per-sentence highlights (use THRESHOLD only for the tag/color)
+    rows, highlights = [], []
     for i, orig in enumerate(sents, start=1):
+        ai_p = float(ai_probs[i-1])
+        label = "AI" if ai_p >= THRESHOLD else "Human"
         pct = f"{ai_p*100:.1f}%"
         if ai_p < 0.30:
             color = "#11823b"      # green
         elif ai_p < 0.70:
             color = "#b8860b"      # amber
         else:
             color = "#b80d0d"      # red
         normalized = re.sub(r"\s+", " ", orig)
         highlights.append(
             "<div style='margin:6px 0; padding:6px 8px; border-radius:6px; background:rgba(0,0,0,0.03)'>"
     html = "\n".join(highlights)
     df = pd.DataFrame(rows, columns=["#", "Sentence", "AI_Prob", "Label"])
+    return overall_label, overall_pct, html, df
 # -----------------------------
+# GRADIO UI (verdict + score, plus highlights)
 # -----------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("### 🕵️ AI Written Text Detector — Fakespot Model")
     text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
     btn = gr.Button("Analyze")
+    verdict = gr.Label(label="Verdict (Overall)")
+    score = gr.Label(label="AI Score (Average across sentences)")
     highlights = gr.HTML(label="Per-Sentence Highlights")
     table = gr.Dataframe(headers=["#", "Sentence", "AI_Prob", "Label"], wrap=True)
+    btn.click(analyze, inputs=[text_input], outputs=[verdict, score, highlights, table])
 if __name__ == "__main__":
     demo.launch()