VictorM-Coder commited on
Commit
f6d6e24
·
verified ·
1 Parent(s): 7eb83ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -18
app.py CHANGED
@@ -9,7 +9,6 @@ import gradio as gr
9
  # MODEL INITIALIZATION
10
  # -----------------------------
11
  MODEL_NAME = "fakespot-ai/roberta-base-ai-text-detection-v1"
12
-
13
  tokenizer = None
14
  model = None
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -17,21 +16,18 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
  def get_model():
18
  global tokenizer, model
19
  if model is None:
20
- # Loading messages to help debug in HF logs
21
  print(f"Loading model: {MODEL_NAME} on {device}")
22
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
23
-
24
- # Use float32 on CPU to prevent build-time precision hangs
25
  dtype = torch.float32
26
  if device.type == "cuda" and torch.cuda.is_bf16_supported():
27
  dtype = torch.bfloat16
28
-
29
  model = AutoModelForSequenceClassification.from_pretrained(
30
  MODEL_NAME, torch_dtype=dtype
31
  ).to(device).eval()
32
  return tokenizer, model
33
 
34
- THRESHOLD = 0.20
 
35
 
36
  # -----------------------------
37
  # PROTECT STRUCTURE
@@ -82,10 +78,11 @@ def analyze(text):
82
  blocks = split_preserving_structure(text)
83
  pure_sents_indices = [i for i, b in enumerate(blocks) if b.strip() and not b.startswith("\n")]
84
  pure_sents = [blocks[i] for i in pure_sents_indices]
85
-
86
  if not pure_sents:
87
  return "—", "—", "<em>No sentences detected.</em>", None
88
 
 
89
  windows = []
90
  for i in range(len(pure_sents)):
91
  start = max(0, i - 1)
@@ -101,11 +98,11 @@ def analyze(text):
101
  weighted_avg = sum(p * l for p, l in zip(probs, lengths)) / total_words if total_words > 0 else 0
102
 
103
  # -----------------------------
104
- # HTML RECONSTRUCTION
105
  # -----------------------------
106
  highlighted_html = "<div style='font-family: sans-serif; line-height: 1.8;'>"
107
  prob_map = {idx: probs[i] for i, idx in enumerate(pure_sents_indices)}
108
-
109
  for i, block in enumerate(blocks):
110
  if block.startswith("\n") or block.isspace():
111
  highlighted_html += block.replace("\n", "<br>")
@@ -113,9 +110,14 @@ def analyze(text):
113
 
114
  if i in prob_map:
115
  score = prob_map[i]
116
- if score < 0.35: color, bg = "#11823b", "rgba(17, 130, 59, 0.15)"
117
- elif score < 0.70: color, bg = "#b8860b", "rgba(184, 134, 11, 0.15)"
118
- else: color, bg = "#b80d0d", "rgba(184, 13, 13, 0.15)"
 
 
 
 
 
119
 
120
  highlighted_html += (
121
  f"<span style='background:{bg}; padding:2px 4px; border-radius:4px; border-bottom: 2px solid {color};' "
@@ -124,10 +126,9 @@ def analyze(text):
124
  )
125
  else:
126
  highlighted_html += block
127
-
128
  highlighted_html += "</div>"
129
-
130
- # Updated Label Logic per your request
131
  label = "AI Content Detected" if weighted_avg >= THRESHOLD else "0 or * AI Content Detected"
132
 
133
  df = pd.DataFrame({"Sentence": pure_sents, "AI Confidence": [f"{p:.1%}" for p in probs]})
@@ -138,8 +139,8 @@ def analyze(text):
138
  # -----------------------------
139
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
140
  gr.Markdown("## 🕵️ AI Detector Pro")
141
- gr.Markdown("Sentence-level analysis with weighted context windows.")
142
-
143
  with gr.Row():
144
  with gr.Column(scale=3):
145
  text_input = gr.Textbox(label="Paste Text", lines=12)
@@ -147,7 +148,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
147
  with gr.Column(scale=1):
148
  verdict_out = gr.Label(label="Verdict")
149
  score_out = gr.Label(label="Weighted AI Score")
150
-
151
  with gr.Tabs():
152
  with gr.TabItem("Visual Heatmap"):
153
  html_out = gr.HTML()
 
9
  # MODEL INITIALIZATION
10
  # -----------------------------
11
  MODEL_NAME = "fakespot-ai/roberta-base-ai-text-detection-v1"
 
12
  tokenizer = None
13
  model = None
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
16
  def get_model():
17
  global tokenizer, model
18
  if model is None:
 
19
  print(f"Loading model: {MODEL_NAME} on {device}")
20
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
 
21
  dtype = torch.float32
22
  if device.type == "cuda" and torch.cuda.is_bf16_supported():
23
  dtype = torch.bfloat16
 
24
  model = AutoModelForSequenceClassification.from_pretrained(
25
  MODEL_NAME, torch_dtype=dtype
26
  ).to(device).eval()
27
  return tokenizer, model
28
 
29
+ # Global threshold for the "Verdict" label
30
+ THRESHOLD = 0.60
31
 
32
  # -----------------------------
33
  # PROTECT STRUCTURE
 
78
  blocks = split_preserving_structure(text)
79
  pure_sents_indices = [i for i, b in enumerate(blocks) if b.strip() and not b.startswith("\n")]
80
  pure_sents = [blocks[i] for i in pure_sents_indices]
81
+
82
  if not pure_sents:
83
  return "—", "—", "<em>No sentences detected.</em>", None
84
 
85
+ # Context window analysis
86
  windows = []
87
  for i in range(len(pure_sents)):
88
  start = max(0, i - 1)
 
98
  weighted_avg = sum(p * l for p, l in zip(probs, lengths)) / total_words if total_words > 0 else 0
99
 
100
  # -----------------------------
101
+ # HTML RECONSTRUCTION (Revised Thresholds)
102
  # -----------------------------
103
  highlighted_html = "<div style='font-family: sans-serif; line-height: 1.8;'>"
104
  prob_map = {idx: probs[i] for i, idx in enumerate(pure_sents_indices)}
105
+
106
  for i, block in enumerate(blocks):
107
  if block.startswith("\n") or block.isspace():
108
  highlighted_html += block.replace("\n", "<br>")
 
110
 
111
  if i in prob_map:
112
  score = prob_map[i]
113
+
114
+ # Revised coloring logic: < 60% is Human (Green)
115
+ if score < 0.60:
116
+ color, bg = "#11823b", "rgba(17, 130, 59, 0.15)" # Green
117
+ elif score < 0.80:
118
+ color, bg = "#b8860b", "rgba(184, 134, 11, 0.15)" # Amber
119
+ else:
120
+ color, bg = "#b80d0d", "rgba(184, 13, 13, 0.15)" # Red
121
 
122
  highlighted_html += (
123
  f"<span style='background:{bg}; padding:2px 4px; border-radius:4px; border-bottom: 2px solid {color};' "
 
126
  )
127
  else:
128
  highlighted_html += block
 
129
  highlighted_html += "</div>"
130
+
131
+ # Final Verdict logic based on the 60% rule
132
  label = "AI Content Detected" if weighted_avg >= THRESHOLD else "0 or * AI Content Detected"
133
 
134
  df = pd.DataFrame({"Sentence": pure_sents, "AI Confidence": [f"{p:.1%}" for p in probs]})
 
139
  # -----------------------------
140
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
141
  gr.Markdown("## 🕵️ AI Detector Pro")
142
+ gr.Markdown("Sentence-level analysis with weighted context windows. **Threshold: < 60% = Human.**")
143
+
144
  with gr.Row():
145
  with gr.Column(scale=3):
146
  text_input = gr.Textbox(label="Paste Text", lines=12)
 
148
  with gr.Column(scale=1):
149
  verdict_out = gr.Label(label="Verdict")
150
  score_out = gr.Label(label="Weighted AI Score")
151
+
152
  with gr.Tabs():
153
  with gr.TabItem("Visual Heatmap"):
154
  html_out = gr.HTML()