Spaces:

VictorM-Coder
/

AIDetector

Running

App Files Files Community

VictorM-Coder commited on Dec 3, 2025

Commit

8d27116

verified ·

1 Parent(s): 70fc9f3

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -29

app.py CHANGED Viewed

@@ -43,17 +43,12 @@ def _restore(text):
 # PERFECT PARAGRAPH-PRESERVING SPLITTER
 # -----------------------------
 def split_preserving_structure(text):
-    """
-    Splits text into:
-    - EXACT newline blocks (\n, \n\n, etc.)
-    - Sentences inside non-newline blocks
-    """
     blocks = re.split(r"(\n+)", text)  # keep newline separators
     final_blocks = []
     for block in blocks:
         if block.startswith("\n"):
-            final_blocks.append(block)  # preserve EXACT paragraph spacing
         else:
             protected = _protect(block)
             parts = re.split(r"([.?!])(\s+)", protected)
@@ -72,9 +67,7 @@ def split_preserving_structure(text):
     return final_blocks
 def extract_sentences_only(blocks):
-    """Return only sentence blocks (no whitespace/newlines)."""
     return [
         b for b in blocks
         if b.strip() != "" and not b.startswith("\n") and not b.isspace()
@@ -91,14 +84,12 @@ def group_sentences(sents, size=3):
 # -----------------------------
 def analyze(text, max_len=512):
-    # Structured block split
     blocks = split_preserving_structure(text)
     pure_sentences = extract_sentences_only(blocks)
     if not pure_sentences:
         return "—", "—", "<em>Paste text to analyze.</em>", None
-    # Group into 3-sentence windows (Turnitin style)
     grouped = group_sentences(pure_sentences, 3)
     clean_grouped = [re.sub(r"\s+", " ", g).strip() for g in grouped]
@@ -111,7 +102,7 @@ def analyze(text, max_len=512):
         logits = model(**inputs).logits
         chunk_probs = F.softmax(logits, dim=-1)[:, 1].cpu().tolist()
-    # Expand group scores back to individual sentences
     ai_scores = []
     for idx, prob in enumerate(chunk_probs):
         start = idx * 3
@@ -120,53 +111,54 @@ def analyze(text, max_len=512):
             ai_scores.append(prob)
     # -----------------------------
-    # RECONSTRUCT ORIGINAL TEXT W/ HIGHLIGHTING
     # -----------------------------
     highlighted = ""
     current_sentence = 0
     for block in blocks:
-        # newline block → keep EXACT
         if block.startswith("\n"):
             highlighted += block
             continue
-        # whitespace block → keep
         if block.isspace():
             highlighted += block
             continue
-        # real sentence → highlight
         ai_p = ai_scores[current_sentence]
         current_sentence += 1
         pct = f"{ai_p * 100:.1f}%"
         if ai_p < 0.30:
-            color = "#11823b"
         elif ai_p < 0.70:
-            color = "#b8860b"
         else:
-            color = "#b80d0d"
         highlighted += (
-            f"<span style='background:rgba(0,0,0,0.03); padding:3px 4px; "
-            f"border-radius:4px;'><strong style='color:{color}'>[{pct}]</strong> "
-            f"{block.strip()}</span>"
         )
-        # maintain spacing after sentence
-        highlighted += " "
     # -----------------------------
-    # OVERALL SCORE
     # -----------------------------
     overall = sum(ai_scores) / len(ai_scores)
     overall_pct = f"{overall * 100:.1f}%"
     overall_label = "🤖 Likely AI Written" if overall >= THRESHOLD else "🧒 Likely Human Written"
-    # Table output
     df = pd.DataFrame(
         [[i + 1, s, ai_scores[i]] for i, s in enumerate(pure_sentences)],
         columns=["#", "Sentence", "AI_Prob"]
@@ -175,10 +167,10 @@ def analyze(text, max_len=512):
     return overall_label, overall_pct, highlighted, df
 # -----------------------------
-# UI
 # -----------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("### 🕵️ AI Sentence-Level Detector — Exact Structure Highlighting")
     text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your text…")
     btn = gr.Button("Analyze")

 # PERFECT PARAGRAPH-PRESERVING SPLITTER
 # -----------------------------
 def split_preserving_structure(text):
     blocks = re.split(r"(\n+)", text)  # keep newline separators
     final_blocks = []
     for block in blocks:
         if block.startswith("\n"):
+            final_blocks.append(block)
         else:
             protected = _protect(block)
             parts = re.split(r"([.?!])(\s+)", protected)
     return final_blocks
 def extract_sentences_only(blocks):
     return [
         b for b in blocks
         if b.strip() != "" and not b.startswith("\n") and not b.isspace()
 # -----------------------------
 def analyze(text, max_len=512):
     blocks = split_preserving_structure(text)
     pure_sentences = extract_sentences_only(blocks)
     if not pure_sentences:
         return "—", "—", "<em>Paste text to analyze.</em>", None
     grouped = group_sentences(pure_sentences, 3)
     clean_grouped = [re.sub(r"\s+", " ", g).strip() for g in grouped]
         logits = model(**inputs).logits
         chunk_probs = F.softmax(logits, dim=-1)[:, 1].cpu().tolist()
+    # Expand grouped probs to each sentence
     ai_scores = []
     for idx, prob in enumerate(chunk_probs):
         start = idx * 3
             ai_scores.append(prob)
     # -----------------------------
+    # COLOR HIGHLIGHTING (FULL SENTENCE BLOCK COLORING)
     # -----------------------------
     highlighted = ""
     current_sentence = 0
     for block in blocks:
+        # newline blocks
         if block.startswith("\n"):
             highlighted += block
             continue
+        # whitespace blocks
         if block.isspace():
             highlighted += block
             continue
+        # real sentence
         ai_p = ai_scores[current_sentence]
         current_sentence += 1
         pct = f"{ai_p * 100:.1f}%"
+        # COLOR LEVELS (background + text)
         if ai_p < 0.30:
+            bg = "rgba(17,130,59,0.18)"      # green
+            color = "#0f5e2e"
         elif ai_p < 0.70:
+            bg = "rgba(184,134,11,0.23)"     # yellow
+            color = "#7a5f00"
         else:
+            bg = "rgba(184,13,13,0.20)"      # red
+            color = "#7a0000"
         highlighted += (
+            f"<span style='background:{bg}; padding:5px 8px; "
+            f"border-radius:6px; display:inline-block; margin-bottom:4px;'>"
+            f"<strong style='color:{color}'>[{pct}]</strong> "
+            f"{block.strip()}</span> "
         )
     # -----------------------------
+    # OVERALL
     # -----------------------------
     overall = sum(ai_scores) / len(ai_scores)
     overall_pct = f"{overall * 100:.1f}%"
     overall_label = "🤖 Likely AI Written" if overall >= THRESHOLD else "🧒 Likely Human Written"
+    # Table
     df = pd.DataFrame(
         [[i + 1, s, ai_scores[i]] for i, s in enumerate(pure_sentences)],
         columns=["#", "Sentence", "AI_Prob"]
     return overall_label, overall_pct, highlighted, df
 # -----------------------------
+# GRADIO UI
 # -----------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("### 🕵️ AI Sentence-Level Detector — Color Highlight Mode")
     text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your text…")
     btn = gr.Button("Analyze")