VictorM-Coder commited on
Commit
27d1d53
·
verified ·
1 Parent(s): 370410f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -30
app.py CHANGED
@@ -15,8 +15,12 @@ dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported(
15
  model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, dtype=dtype).to(device).eval()
16
 
17
  # -----------------------------
18
- # SENTENCE SPLITTER (simple, robust, no lookbehinds)
19
- # Protect → split → restore
 
 
 
 
20
  # -----------------------------
21
  ABBR = [
22
  "e.g", "i.e", "mr", "mrs", "ms", "dr", "prof", "vs", "etc", "fig", "al",
@@ -59,15 +63,14 @@ def sentence_split(text: str):
59
  return [_restore(s).strip() for s in sentences if s.strip()]
60
 
61
  # -----------------------------
62
- # CLASSIFY SENTENCE-BY-SENTENCE (Fakespot: id2label[1] == "AI")
63
- # Also compute an overall Likely-AI score (mean AI prob)
64
  # -----------------------------
65
- def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
66
  sents = sentence_split(text)
67
  if not sents:
68
- return "⚠️ Please paste some text.", "", "", None, None
69
 
70
- # light cleaning similar to model card guidance
71
  clean_sents = [re.sub(r"\s+", " ", s).strip() for s in sents]
72
 
73
  inputs = tokenizer(
@@ -79,25 +82,22 @@ def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
79
  probs = F.softmax(logits, dim=-1) # [:,0]=Human, [:,1]=AI
80
 
81
  ai_probs = probs[:, 1].detach().cpu().tolist()
82
- overall_ai = sum(ai_probs) / len(ai_probs) # mean AI probability across sentences
83
  overall_pct = f"{overall_ai * 100:.1f}%"
84
- overall_label = "🤖 Likely AI" if overall_ai >= threshold else "🧒 Likely Human"
85
 
86
- rows = []
87
- highlights = []
88
  for i, orig in enumerate(sents, start=1):
89
- ai_p = float(ai_probs[i-1]) # Fakespot: 1 == AI
90
- label = "AI" if ai_p >= threshold else "Human"
91
  pct = f"{ai_p*100:.1f}%"
92
-
93
- # choose color
94
  if ai_p < 0.30:
95
  color = "#11823b" # green
96
  elif ai_p < 0.70:
97
  color = "#b8860b" # amber
98
  else:
99
  color = "#b80d0d" # red
100
-
101
  normalized = re.sub(r"\s+", " ", orig)
102
  highlights.append(
103
  "<div style='margin:6px 0; padding:6px 8px; border-radius:6px; background:rgba(0,0,0,0.03)'>"
@@ -107,31 +107,23 @@ def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
107
 
108
  html = "\n".join(highlights)
109
  df = pd.DataFrame(rows, columns=["#", "Sentence", "AI_Prob", "Label"])
110
-
111
- status = "Done ✅ (Fakespot detector)"
112
- return status, overall_label, overall_pct, html, df
113
 
114
  # -----------------------------
115
- # GRADIO UI (minimal, with overall score)
116
  # -----------------------------
117
  with gr.Blocks() as demo:
118
- gr.Markdown("### 🧠 Sentence-by-Sentence AI Check (Fakespot)")
119
 
120
  text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
121
- threshold = gr.Slider(0.50, 0.95, value=0.70, step=0.01, label="AI threshold")
122
  btn = gr.Button("Analyze")
123
 
124
- status = gr.Label(label="Status")
125
- overall_label = gr.Label(label="Overall Verdict")
126
- overall_score = gr.Label(label="Likely AI-generated Score (avg)")
127
  highlights = gr.HTML(label="Per-Sentence Highlights")
128
  table = gr.Dataframe(headers=["#", "Sentence", "AI_Prob", "Label"], wrap=True)
129
 
130
- btn.click(
131
- classify_sentence_by_sentence,
132
- inputs=[text_input, threshold],
133
- outputs=[status, overall_label, overall_score, highlights, table]
134
- )
135
 
136
  if __name__ == "__main__":
137
  demo.launch()
 
15
  model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, dtype=dtype).to(device).eval()
16
 
17
  # -----------------------------
18
+ # INTERNAL THRESHOLD for sentence labels/colors
19
+ # -----------------------------
20
+ THRESHOLD = 0.70 # used only for per-sentence "AI/Human" tags & color
21
+
22
+ # -----------------------------
23
+ # SENTENCE SPLITTER (protect → split → restore; no lookbehinds)
24
  # -----------------------------
25
  ABBR = [
26
  "e.g", "i.e", "mr", "mrs", "ms", "dr", "prof", "vs", "etc", "fig", "al",
 
63
  return [_restore(s).strip() for s in sentences if s.strip()]
64
 
65
  # -----------------------------
66
+ # CORE: overall AI score + highlights
 
67
  # -----------------------------
68
+ def analyze(text, max_len=512):
69
  sents = sentence_split(text)
70
  if not sents:
71
+ return "—", "—", "<em>Paste some text to analyze.</em>", None
72
 
73
+ # light clean (per model card vibe)
74
  clean_sents = [re.sub(r"\s+", " ", s).strip() for s in sents]
75
 
76
  inputs = tokenizer(
 
82
  probs = F.softmax(logits, dim=-1) # [:,0]=Human, [:,1]=AI
83
 
84
  ai_probs = probs[:, 1].detach().cpu().tolist()
85
+ overall_ai = sum(ai_probs) / len(ai_probs)
86
  overall_pct = f"{overall_ai * 100:.1f}%"
87
+ overall_label = "🤖 Likely AI Written" if overall_ai >= THRESHOLD else "🧒 Likely Human Written"
88
 
89
+ # Per-sentence highlights (use THRESHOLD only for the tag/color)
90
+ rows, highlights = [], []
91
  for i, orig in enumerate(sents, start=1):
92
+ ai_p = float(ai_probs[i-1])
93
+ label = "AI" if ai_p >= THRESHOLD else "Human"
94
  pct = f"{ai_p*100:.1f}%"
 
 
95
  if ai_p < 0.30:
96
  color = "#11823b" # green
97
  elif ai_p < 0.70:
98
  color = "#b8860b" # amber
99
  else:
100
  color = "#b80d0d" # red
 
101
  normalized = re.sub(r"\s+", " ", orig)
102
  highlights.append(
103
  "<div style='margin:6px 0; padding:6px 8px; border-radius:6px; background:rgba(0,0,0,0.03)'>"
 
107
 
108
  html = "\n".join(highlights)
109
  df = pd.DataFrame(rows, columns=["#", "Sentence", "AI_Prob", "Label"])
110
+ return overall_label, overall_pct, html, df
 
 
111
 
112
  # -----------------------------
113
+ # GRADIO UI (verdict + score, plus highlights)
114
  # -----------------------------
115
  with gr.Blocks() as demo:
116
+ gr.Markdown("### 🕵️ AI Written Text Detector — Fakespot Model")
117
 
118
  text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
 
119
  btn = gr.Button("Analyze")
120
 
121
+ verdict = gr.Label(label="Verdict (Overall)")
122
+ score = gr.Label(label="AI Score (Average across sentences)")
 
123
  highlights = gr.HTML(label="Per-Sentence Highlights")
124
  table = gr.Dataframe(headers=["#", "Sentence", "AI_Prob", "Label"], wrap=True)
125
 
126
+ btn.click(analyze, inputs=[text_input], outputs=[verdict, score, highlights, table])
 
 
 
 
127
 
128
  if __name__ == "__main__":
129
  demo.launch()