Spaces:

VictorM-Coder
/

AIDetector

Running

App Files Files Community

VictorM-Coder commited on Nov 29, 2025

Commit

23b2adf

verified ·

1 Parent(s): ea83121

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -17

app.py CHANGED Viewed

@@ -15,17 +15,17 @@ dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported(
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, dtype=dtype).to(device).eval()
 # -----------------------------
-# THRESHOLD FOR LABEL COLOR
 # -----------------------------
-THRESHOLD = 0.70
 # -----------------------------
 # SENTENCE SPLITTING UTILITIES
 # -----------------------------
 ABBR = [
     "e.g", "i.e", "mr", "mrs", "ms", "dr", "prof", "vs", "etc", "fig", "al",
-    "jr", "sr", "st", "no", "vol", "pp", "mt", "inc", "ltd", "co", "u.s", "u.k",
-    "a.m", "p.m"
 ]
 ABBR_REGEX = re.compile(r"\b(" + "|".join(map(re.escape, ABBR)) + r")\.", flags=re.IGNORECASE)
@@ -50,6 +50,7 @@ def sentence_split(text: str):
     if not t:
         return []
     parts = re.split(r"([.?!])\s+(?=(?:[\"“”‘’']?\s*[A-Z(])|$)", t)
     sentences, buf = [], ""
     for i, chunk in enumerate(parts):
         if i % 2 == 0:
@@ -58,8 +59,10 @@ def sentence_split(text: str):
             buf += chunk
             sentences.append(buf.strip())
             buf = ""
     if buf.strip():
         sentences.append(buf.strip())
     return [_restore(s).strip() for s in sentences if s.strip()]
 # -----------------------------
@@ -68,8 +71,7 @@ def sentence_split(text: str):
 def group_sentences(sents, size=3):
     grouped = []
     for i in range(0, len(sents), size):
-        chunk = " ".join(sents[i:i+size])
-        grouped.append(chunk)
     return grouped
 # -----------------------------
@@ -80,14 +82,14 @@ def analyze(text, max_len=512):
     if not sents:
         return "—", "—", "<em>Paste some text to analyze.</em>", None
-    # GROUP sentences into 3-sentence chunks
     grouped = group_sentences(sents, size=3)
     clean_grouped = [re.sub(r"\s+", " ", g).strip() for g in grouped]
     # tokenize grouped
     inputs = tokenizer(
-        clean_grouped, return_tensors="pt", padding=True,
-        truncation=True, max_length=max_len
     ).to(device)
     # model inference
@@ -95,7 +97,7 @@ def analyze(text, max_len=512):
         logits = model(**inputs).logits
         chunk_probs = F.softmax(logits, dim=-1)[:, 1].detach().cpu().tolist()
-    # EXPAND chunk-level probabilities back to per-sentence
     ai_probs = []
     for idx, prob in enumerate(chunk_probs):
         start = idx * 3
@@ -106,28 +108,34 @@ def analyze(text, max_len=512):
     # overall AI score
     overall_ai = sum(ai_probs) / len(ai_probs)
     overall_pct = f"{overall_ai * 100:.1f}%"
     overall_label = (
         "🤖 Likely AI Written" if overall_ai >= THRESHOLD else "🧒 Likely Human Written"
     )
     # HIGHLIGHTS + TABLE
     rows, highlights = [], []
     for i, orig in enumerate(sents, start=1):
         ai_p = float(ai_probs[i-1])
         label = "AI" if ai_p >= THRESHOLD else "Human"
-        pct = f"{ai_p*100:.1f}%"
-        # color logic
         if ai_p < 0.30:
-            color = "#11823b"
         elif ai_p < 0.70:
-            color = "#b8860b"
         else:
-            color = "#b80d0d"
         normalized = re.sub(r"\s+", " ", orig)
         highlights.append(
-            "<div style='margin:6px 0; padding:6px 8px; border-radius:6px; "
             "background:rgba(0,0,0,0.03)'>"
             f"<strong style='color:{color}'>[{pct} {label}]</strong> "
             f"{normalized}</div>"
@@ -144,7 +152,7 @@ def analyze(text, max_len=512):
 # GRADIO UI
 # -----------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("### 🕵️ AI Written Text Detector — Fakespot Model (Turnitin-Style)")
     text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
     btn = gr.Button("Analyze")

 model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, dtype=dtype).to(device).eval()
 # -----------------------------
+# AI DECISION THRESHOLD (80%)
 # -----------------------------
+THRESHOLD = 0.80  # AI from 80% and above
 # -----------------------------
 # SENTENCE SPLITTING UTILITIES
 # -----------------------------
 ABBR = [
     "e.g", "i.e", "mr", "mrs", "ms", "dr", "prof", "vs", "etc", "fig", "al",
+    "jr", "sr",    "st", "no", "vol", "pp", "mt", "inc", "ltd", "co",
+    "u.s", "u.k", "a.m", "p.m"
 ]
 ABBR_REGEX = re.compile(r"\b(" + "|".join(map(re.escape, ABBR)) + r")\.", flags=re.IGNORECASE)
     if not t:
         return []
     parts = re.split(r"([.?!])\s+(?=(?:[\"“”‘’']?\s*[A-Z(])|$)", t)
     sentences, buf = [], ""
     for i, chunk in enumerate(parts):
         if i % 2 == 0:
             buf += chunk
             sentences.append(buf.strip())
             buf = ""
     if buf.strip():
         sentences.append(buf.strip())
     return [_restore(s).strip() for s in sentences if s.strip()]
 # -----------------------------
 def group_sentences(sents, size=3):
     grouped = []
     for i in range(0, len(sents), size):
+        grouped.append(" ".join(sents[i:i+size]))
     return grouped
 # -----------------------------
     if not sents:
         return "—", "—", "<em>Paste some text to analyze.</em>", None
+    # GROUP sentences (3 at a time)
     grouped = group_sentences(sents, size=3)
     clean_grouped = [re.sub(r"\s+", " ", g).strip() for g in grouped]
     # tokenize grouped
     inputs = tokenizer(
+        clean_grouped, return_tensors="pt",
+        padding=True, truncation=True, max_length=max_len
     ).to(device)
     # model inference
         logits = model(**inputs).logits
         chunk_probs = F.softmax(logits, dim=-1)[:, 1].detach().cpu().tolist()
+    # EXPAND chunk-level probabilities to per-sentence
     ai_probs = []
     for idx, prob in enumerate(chunk_probs):
         start = idx * 3
     # overall AI score
     overall_ai = sum(ai_probs) / len(ai_probs)
     overall_pct = f"{overall_ai * 100:.1f}%"
+    # UPDATED THRESHOLD (80%)
     overall_label = (
         "🤖 Likely AI Written" if overall_ai >= THRESHOLD else "🧒 Likely Human Written"
     )
     # HIGHLIGHTS + TABLE
     rows, highlights = [], []
     for i, orig in enumerate(sents, start=1):
         ai_p = float(ai_probs[i-1])
+        pct = f"{ai_p * 100:.1f}%"
+        # UPDATED → label decided by 80%
         label = "AI" if ai_p >= THRESHOLD else "Human"
+        # color logic (unchanged)
         if ai_p < 0.30:
+            color = "#11823b"   # green
         elif ai_p < 0.70:
+            color = "#b8860b"   # amber
         else:
+            color = "#b80d0d"   # red
         normalized = re.sub(r"\s+", " ", orig)
         highlights.append(
+            "<div style='margin:6px 0; padding:6px 8px; border-radius:6px;"
             "background:rgba(0,0,0,0.03)'>"
             f"<strong style='color:{color}'>[{pct} {label}]</strong> "
             f"{normalized}</div>"
 # GRADIO UI
 # -----------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("### 🕵️ AI Written Text Detector — Fakespot Model (80% Threshold)")
     text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
     btn = gr.Button("Analyze")