Spaces:

VictorM-Coder
/

Test

Sleeping

App Files Files Community

VictorM-Coder commited on about 1 month ago

Commit

c1d0bb0

verified ·

1 Parent(s): 38debf0

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -26

app.py CHANGED Viewed

@@ -17,25 +17,19 @@ def get_model():
     global tokenizer, model
     if model is None:
         print(f"Loading Specialized Model: {MODEL_NAME} on {device}")
-        # DeBERTa-v3 requires use_fast=False for stable tokenization.
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
-        # We load as Sequence Classification with 1 label (Single Logit).
-        # ignore_mismatched_sizes=True allows us to load the custom Desklib head.
         model = AutoModelForSequenceClassification.from_pretrained(
             MODEL_NAME,
             num_labels=1,
             ignore_mismatched_sizes=True
         ).to(device).eval()
     return tokenizer, model
-# Only 81% and above is flagged as AI
 THRESHOLD = 0.81
 # -----------------------------
-# UTILITIES (Regex & Structure)
 # -----------------------------
 ABBR = ["e.g", "i.e", "mr", "mrs", "ms", "dr", "prof", "vs", "etc", "fig", "al", "jr", "sr", "st", "inc", "ltd", "u.s", "u.k"]
 ABBR_REGEX = re.compile(r"\b(" + "|".join(map(re.escape, ABBR)) + r")\.", re.IGNORECASE)
@@ -94,7 +88,6 @@ def analyze(text):
     if not pure_sents:
         return "—", "—", "<em>No sentences detected.</em>", None
-    # Contextual Sliding Window
     windows = []
     for i in range(len(pure_sents)):
         start = max(0, i - 1)
@@ -103,15 +96,15 @@ def analyze(text):
     inputs = tok(windows, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
     output = mod(**inputs)
-    # Since num_labels=1, we use Sigmoid on the single logit per window
     probs = torch.sigmoid(output.logits).cpu().numpy().flatten().tolist()
     lengths = [len(s.split()) for s in pure_sents]
     total_words = sum(lengths)
     weighted_avg = sum(p * l for p, l in zip(probs, lengths)) / total_words if total_words > 0 else 0
-    # HTML Heatmap
     highlighted_html = "<div style='font-family: sans-serif; line-height: 1.8;'>"
     prob_map = {idx: probs[i] for i, idx in enumerate(pure_sents_indices)}
@@ -122,6 +115,7 @@ def analyze(text):
         if i in prob_map:
             score = prob_map[i]
             if score >= THRESHOLD:
                 color, bg = "#b80d0d", "rgba(184, 13, 13, 0.15)" # RED
             else:
@@ -129,37 +123,34 @@ def analyze(text):
             highlighted_html += (
                 f"<span style='background:{bg}; padding:2px 4px; border-radius:4px; border-bottom: 2px solid {color};' "
-                f"title='AI Probability: {score:.1%}'>"
-                f"<b style='color:{color}; font-size: 0.8em;'>[{score:.0%}]</b> {block}</span>"
             )
         else:
             highlighted_html += block
     highlighted_html += "</div>"
-    if weighted_avg >= THRESHOLD:
-        label = f"{weighted_avg:.0%} AI Content Detected"
-        display_score = f"{weighted_avg:.1%}"
-    else:
-        label = "0 or * AI Content Detected"
-        display_score = "*"
-    df = pd.DataFrame({"Sentence": pure_sents, "AI Confidence": [f"{p:.1%}" for p in probs]})
     return label, display_score, highlighted_html, df
 # -----------------------------
 # GRADIO INTERFACE
 # -----------------------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 🕵️ AI Detector Pro (Academic Edition)")
-    gr.Markdown(f"Using **{MODEL_NAME}**. Threshold: **{THRESHOLD*100:.0f}%**.")
     with gr.Row():
         with gr.Column(scale=3):
-            text_input = gr.Textbox(label="Paste Text", lines=12, placeholder="Minimum 300 words...")
             run_btn = gr.Button("Analyze", variant="primary")
         with gr.Column(scale=1):
-            verdict_out = gr.Label(label="Verdict")
-            score_out = gr.Label(label="Weighted AI Score")
     with gr.Tabs():
         with gr.TabItem("Visual Heatmap"):

     global tokenizer, model
     if model is None:
         print(f"Loading Specialized Model: {MODEL_NAME} on {device}")
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
         model = AutoModelForSequenceClassification.from_pretrained(
             MODEL_NAME,
             num_labels=1,
             ignore_mismatched_sizes=True
         ).to(device).eval()
     return tokenizer, model
+# Threshold used ONLY for coloring (Red vs Green)
 THRESHOLD = 0.81
 # -----------------------------
+# UTILITIES
 # -----------------------------
 ABBR = ["e.g", "i.e", "mr", "mrs", "ms", "dr", "prof", "vs", "etc", "fig", "al", "jr", "sr", "st", "inc", "ltd", "u.s", "u.k"]
 ABBR_REGEX = re.compile(r"\b(" + "|".join(map(re.escape, ABBR)) + r")\.", re.IGNORECASE)
     if not pure_sents:
         return "—", "—", "<em>No sentences detected.</em>", None
     windows = []
     for i in range(len(pure_sents)):
         start = max(0, i - 1)
     inputs = tok(windows, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
     output = mod(**inputs)
     probs = torch.sigmoid(output.logits).cpu().numpy().flatten().tolist()
     lengths = [len(s.split()) for s in pure_sents]
     total_words = sum(lengths)
     weighted_avg = sum(p * l for p, l in zip(probs, lengths)) / total_words if total_words > 0 else 0
+    # -----------------------------
+    # HTML RECONSTRUCTION (Unfiltered Probabilities)
+    # -----------------------------
     highlighted_html = "<div style='font-family: sans-serif; line-height: 1.8;'>"
     prob_map = {idx: probs[i] for i, idx in enumerate(pure_sents_indices)}
         if i in prob_map:
             score = prob_map[i]
+            # Color is still determined by the 81% threshold for visual aid
             if score >= THRESHOLD:
                 color, bg = "#b80d0d", "rgba(184, 13, 13, 0.15)" # RED
             else:
             highlighted_html += (
                 f"<span style='background:{bg}; padding:2px 4px; border-radius:4px; border-bottom: 2px solid {color};' "
+                f"title='Raw Model Score: {score:.4f}'>"
+                f"<b style='color:{color}; font-size: 0.8em;'>[{score:.1%}]</b> {block}</span>"
             )
         else:
             highlighted_html += block
     highlighted_html += "</div>"
+    # --- RAW RESULTS (No Masking) ---
+    label = f"{weighted_avg:.1%} AI Probability"
+    display_score = f"{weighted_avg:.2%}"
+    df = pd.DataFrame({"Sentence": pure_sents, "AI Confidence": [f"{p:.2%}" for p in probs]})
     return label, display_score, highlighted_html, df
 # -----------------------------
 # GRADIO INTERFACE
 # -----------------------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🕵️ AI Detector Pro: Raw Mode")
+    gr.Markdown(f"Direct model output from **{MODEL_NAME}**. Visual highlight still triggers at **{THRESHOLD*100:.0f}%**.")
     with gr.Row():
         with gr.Column(scale=3):
+            text_input = gr.Textbox(label="Paste Text", lines=12, placeholder="Min 300 words...")
             run_btn = gr.Button("Analyze", variant="primary")
         with gr.Column(scale=1):
+            verdict_out = gr.Label(label="Model Verdict (Raw)")
+            score_out = gr.Label(label="Exact Weighted Probability")
     with gr.Tabs():
         with gr.TabItem("Visual Heatmap"):