Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,8 +8,8 @@ import gradio as gr
|
|
| 8 |
# -----------------------------
|
| 9 |
# MODEL INITIALIZATION
|
| 10 |
# -----------------------------
|
| 11 |
-
#
|
| 12 |
-
MODEL_NAME = "
|
| 13 |
tokenizer = None
|
| 14 |
model = None
|
| 15 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
@@ -18,7 +18,9 @@ def get_model():
|
|
| 18 |
global tokenizer, model
|
| 19 |
if model is None:
|
| 20 |
print(f"Loading High-Performance Model: {MODEL_NAME} on {device}")
|
| 21 |
-
|
|
|
|
|
|
|
| 22 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
|
| 23 |
|
| 24 |
dtype = torch.float32
|
|
@@ -93,7 +95,7 @@ def analyze(text):
|
|
| 93 |
if not pure_sents:
|
| 94 |
return "—", "—", "<em>No sentences detected.</em>", None
|
| 95 |
|
| 96 |
-
# Sliding window inference (Contextual)
|
| 97 |
windows = []
|
| 98 |
for i in range(len(pure_sents)):
|
| 99 |
start = max(0, i - 1)
|
|
@@ -102,6 +104,7 @@ def analyze(text):
|
|
| 102 |
|
| 103 |
inputs = tok(windows, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
|
| 104 |
logits = mod(**inputs).logits
|
|
|
|
| 105 |
probs = F.softmax(logits.float(), dim=-1)[:, 1].cpu().numpy().tolist()
|
| 106 |
|
| 107 |
lengths = [len(s.split()) for s in pure_sents]
|
|
@@ -124,9 +127,9 @@ def analyze(text):
|
|
| 124 |
|
| 125 |
# Binary logic: Threshold applied to color
|
| 126 |
if score >= THRESHOLD:
|
| 127 |
-
color, bg = "#b80d0d", "rgba(184, 13, 13, 0.15)" # RED
|
| 128 |
else:
|
| 129 |
-
color, bg = "#11823b", "rgba(17, 130, 59, 0.15)" # GREEN
|
| 130 |
|
| 131 |
highlighted_html += (
|
| 132 |
f"<span style='background:{bg}; padding:2px 4px; border-radius:4px; border-bottom: 2px solid {color};' "
|
|
@@ -152,8 +155,8 @@ def analyze(text):
|
|
| 152 |
# GRADIO INTERFACE
|
| 153 |
# -----------------------------
|
| 154 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 155 |
-
gr.Markdown("## 🕵️ AI Detector Pro (
|
| 156 |
-
gr.Markdown(f"
|
| 157 |
|
| 158 |
with gr.Row():
|
| 159 |
with gr.Column(scale=3):
|
|
|
|
| 8 |
# -----------------------------
|
| 9 |
# MODEL INITIALIZATION
|
| 10 |
# -----------------------------
|
| 11 |
+
# desklib/ai-text-detector-v1.01 is highly robust for academic/essay detection.
|
| 12 |
+
MODEL_NAME = "desklib/ai-text-detector-v1.01"
|
| 13 |
tokenizer = None
|
| 14 |
model = None
|
| 15 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
| 18 |
global tokenizer, model
|
| 19 |
if model is None:
|
| 20 |
print(f"Loading High-Performance Model: {MODEL_NAME} on {device}")
|
| 21 |
+
|
| 22 |
+
# DeBERTa-v3 requires use_fast=False for stable SentencePiece tokenization.
|
| 23 |
+
# Ensure 'sentencepiece' is installed (pip install sentencepiece).
|
| 24 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
|
| 25 |
|
| 26 |
dtype = torch.float32
|
|
|
|
| 95 |
if not pure_sents:
|
| 96 |
return "—", "—", "<em>No sentences detected.</em>", None
|
| 97 |
|
| 98 |
+
# Sliding window inference (Contextual for better accuracy)
|
| 99 |
windows = []
|
| 100 |
for i in range(len(pure_sents)):
|
| 101 |
start = max(0, i - 1)
|
|
|
|
| 104 |
|
| 105 |
inputs = tok(windows, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
|
| 106 |
logits = mod(**inputs).logits
|
| 107 |
+
# Note: Desklib uses Label 1 for AI-generated and Label 0 for Human.
|
| 108 |
probs = F.softmax(logits.float(), dim=-1)[:, 1].cpu().numpy().tolist()
|
| 109 |
|
| 110 |
lengths = [len(s.split()) for s in pure_sents]
|
|
|
|
| 127 |
|
| 128 |
# Binary logic: Threshold applied to color
|
| 129 |
if score >= THRESHOLD:
|
| 130 |
+
color, bg = "#b80d0d", "rgba(184, 13, 13, 0.15)" # RED (AI)
|
| 131 |
else:
|
| 132 |
+
color, bg = "#11823b", "rgba(17, 130, 59, 0.15)" # GREEN (Human)
|
| 133 |
|
| 134 |
highlighted_html += (
|
| 135 |
f"<span style='background:{bg}; padding:2px 4px; border-radius:4px; border-bottom: 2px solid {color};' "
|
|
|
|
| 155 |
# GRADIO INTERFACE
|
| 156 |
# -----------------------------
|
| 157 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 158 |
+
gr.Markdown("## 🕵️ AI Detector Pro (Academic Edition)")
|
| 159 |
+
gr.Markdown(f"Using **{MODEL_NAME}**. Threshold: **{THRESHOLD*100:.0f}%**. Scores below this are marked as Human.")
|
| 160 |
|
| 161 |
with gr.Row():
|
| 162 |
with gr.Column(scale=3):
|