VictorM-Coder commited on
Commit
c059497
·
verified ·
1 Parent(s): 0a84024

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -8,8 +8,8 @@ import gradio as gr
8
  # -----------------------------
9
  # MODEL INITIALIZATION
10
  # -----------------------------
11
- # This is a DeBERTa-v3-Large model fine-tuned on the DAIGT (Student Writing vs AI) dataset.
12
- MODEL_NAME = "Hamidreza/DeBERTa-v3-large-AI-Detector-v2"
13
  tokenizer = None
14
  model = None
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -18,7 +18,9 @@ def get_model():
18
  global tokenizer, model
19
  if model is None:
20
  print(f"Loading High-Performance Model: {MODEL_NAME} on {device}")
21
- # DeBERTa-v3 requires use_fast=False for stable SentencePiece tokenization
 
 
22
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
23
 
24
  dtype = torch.float32
@@ -93,7 +95,7 @@ def analyze(text):
93
  if not pure_sents:
94
  return "—", "—", "<em>No sentences detected.</em>", None
95
 
96
- # Sliding window inference (Contextual)
97
  windows = []
98
  for i in range(len(pure_sents)):
99
  start = max(0, i - 1)
@@ -102,6 +104,7 @@ def analyze(text):
102
 
103
  inputs = tok(windows, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
104
  logits = mod(**inputs).logits
 
105
  probs = F.softmax(logits.float(), dim=-1)[:, 1].cpu().numpy().tolist()
106
 
107
  lengths = [len(s.split()) for s in pure_sents]
@@ -124,9 +127,9 @@ def analyze(text):
124
 
125
  # Binary logic: Threshold applied to color
126
  if score >= THRESHOLD:
127
- color, bg = "#b80d0d", "rgba(184, 13, 13, 0.15)" # RED
128
  else:
129
- color, bg = "#11823b", "rgba(17, 130, 59, 0.15)" # GREEN
130
 
131
  highlighted_html += (
132
  f"<span style='background:{bg}; padding:2px 4px; border-radius:4px; border-bottom: 2px solid {color};' "
@@ -152,8 +155,8 @@ def analyze(text):
152
  # GRADIO INTERFACE
153
  # -----------------------------
154
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
155
- gr.Markdown("## 🕵️ AI Detector Pro (DeBERTa Edition)")
156
- gr.Markdown(f"Advanced Academic Analysis. Threshold: **{THRESHOLD*100:.0f}%**. Everything below is categorized as Human.")
157
 
158
  with gr.Row():
159
  with gr.Column(scale=3):
 
8
  # -----------------------------
9
  # MODEL INITIALIZATION
10
  # -----------------------------
11
+ # desklib/ai-text-detector-v1.01 is highly robust for academic/essay detection.
12
+ MODEL_NAME = "desklib/ai-text-detector-v1.01"
13
  tokenizer = None
14
  model = None
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
18
  global tokenizer, model
19
  if model is None:
20
  print(f"Loading High-Performance Model: {MODEL_NAME} on {device}")
21
+
22
+ # DeBERTa-v3 requires use_fast=False for stable SentencePiece tokenization.
23
+ # Ensure 'sentencepiece' is installed (pip install sentencepiece).
24
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
25
 
26
  dtype = torch.float32
 
95
  if not pure_sents:
96
  return "—", "—", "<em>No sentences detected.</em>", None
97
 
98
+ # Sliding window inference (Contextual for better accuracy)
99
  windows = []
100
  for i in range(len(pure_sents)):
101
  start = max(0, i - 1)
 
104
 
105
  inputs = tok(windows, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
106
  logits = mod(**inputs).logits
107
+ # Note: Desklib uses Label 1 for AI-generated and Label 0 for Human.
108
  probs = F.softmax(logits.float(), dim=-1)[:, 1].cpu().numpy().tolist()
109
 
110
  lengths = [len(s.split()) for s in pure_sents]
 
127
 
128
  # Binary logic: Threshold applied to color
129
  if score >= THRESHOLD:
130
+ color, bg = "#b80d0d", "rgba(184, 13, 13, 0.15)" # RED (AI)
131
  else:
132
+ color, bg = "#11823b", "rgba(17, 130, 59, 0.15)" # GREEN (Human)
133
 
134
  highlighted_html += (
135
  f"<span style='background:{bg}; padding:2px 4px; border-radius:4px; border-bottom: 2px solid {color};' "
 
155
  # GRADIO INTERFACE
156
  # -----------------------------
157
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
158
+ gr.Markdown("## 🕵️ AI Detector Pro (Academic Edition)")
159
+ gr.Markdown(f"Using **{MODEL_NAME}**. Threshold: **{THRESHOLD*100:.0f}%**. Scores below this are marked as Human.")
160
 
161
  with gr.Row():
162
  with gr.Column(scale=3):