Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,8 +15,12 @@ dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported(
|
|
| 15 |
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, dtype=dtype).to(device).eval()
|
| 16 |
|
| 17 |
# -----------------------------
|
| 18 |
-
#
|
| 19 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# -----------------------------
|
| 21 |
ABBR = [
|
| 22 |
"e.g", "i.e", "mr", "mrs", "ms", "dr", "prof", "vs", "etc", "fig", "al",
|
|
@@ -59,15 +63,14 @@ def sentence_split(text: str):
|
|
| 59 |
return [_restore(s).strip() for s in sentences if s.strip()]
|
| 60 |
|
| 61 |
# -----------------------------
|
| 62 |
-
#
|
| 63 |
-
# Also compute an overall Likely-AI score (mean AI prob)
|
| 64 |
# -----------------------------
|
| 65 |
-
def
|
| 66 |
sents = sentence_split(text)
|
| 67 |
if not sents:
|
| 68 |
-
return "
|
| 69 |
|
| 70 |
-
# light
|
| 71 |
clean_sents = [re.sub(r"\s+", " ", s).strip() for s in sents]
|
| 72 |
|
| 73 |
inputs = tokenizer(
|
|
@@ -79,25 +82,22 @@ def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
|
|
| 79 |
probs = F.softmax(logits, dim=-1) # [:,0]=Human, [:,1]=AI
|
| 80 |
|
| 81 |
ai_probs = probs[:, 1].detach().cpu().tolist()
|
| 82 |
-
overall_ai = sum(ai_probs) / len(ai_probs)
|
| 83 |
overall_pct = f"{overall_ai * 100:.1f}%"
|
| 84 |
-
overall_label = "🤖 Likely AI" if overall_ai >=
|
| 85 |
|
| 86 |
-
|
| 87 |
-
highlights = []
|
| 88 |
for i, orig in enumerate(sents, start=1):
|
| 89 |
-
ai_p = float(ai_probs[i-1])
|
| 90 |
-
label = "AI" if ai_p >=
|
| 91 |
pct = f"{ai_p*100:.1f}%"
|
| 92 |
-
|
| 93 |
-
# choose color
|
| 94 |
if ai_p < 0.30:
|
| 95 |
color = "#11823b" # green
|
| 96 |
elif ai_p < 0.70:
|
| 97 |
color = "#b8860b" # amber
|
| 98 |
else:
|
| 99 |
color = "#b80d0d" # red
|
| 100 |
-
|
| 101 |
normalized = re.sub(r"\s+", " ", orig)
|
| 102 |
highlights.append(
|
| 103 |
"<div style='margin:6px 0; padding:6px 8px; border-radius:6px; background:rgba(0,0,0,0.03)'>"
|
|
@@ -107,31 +107,23 @@ def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
|
|
| 107 |
|
| 108 |
html = "\n".join(highlights)
|
| 109 |
df = pd.DataFrame(rows, columns=["#", "Sentence", "AI_Prob", "Label"])
|
| 110 |
-
|
| 111 |
-
status = "Done ✅ (Fakespot detector)"
|
| 112 |
-
return status, overall_label, overall_pct, html, df
|
| 113 |
|
| 114 |
# -----------------------------
|
| 115 |
-
# GRADIO UI (
|
| 116 |
# -----------------------------
|
| 117 |
with gr.Blocks() as demo:
|
| 118 |
-
gr.Markdown("###
|
| 119 |
|
| 120 |
text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
|
| 121 |
-
threshold = gr.Slider(0.50, 0.95, value=0.70, step=0.01, label="AI threshold")
|
| 122 |
btn = gr.Button("Analyze")
|
| 123 |
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
overall_score = gr.Label(label="Likely AI-generated Score (avg)")
|
| 127 |
highlights = gr.HTML(label="Per-Sentence Highlights")
|
| 128 |
table = gr.Dataframe(headers=["#", "Sentence", "AI_Prob", "Label"], wrap=True)
|
| 129 |
|
| 130 |
-
btn.click(
|
| 131 |
-
classify_sentence_by_sentence,
|
| 132 |
-
inputs=[text_input, threshold],
|
| 133 |
-
outputs=[status, overall_label, overall_score, highlights, table]
|
| 134 |
-
)
|
| 135 |
|
| 136 |
if __name__ == "__main__":
|
| 137 |
demo.launch()
|
|
|
|
| 15 |
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, dtype=dtype).to(device).eval()
|
| 16 |
|
| 17 |
# -----------------------------
|
| 18 |
+
# INTERNAL THRESHOLD for sentence labels/colors
|
| 19 |
+
# -----------------------------
|
| 20 |
+
THRESHOLD = 0.70 # used only for per-sentence "AI/Human" tags & color
|
| 21 |
+
|
| 22 |
+
# -----------------------------
|
| 23 |
+
# SENTENCE SPLITTER (protect → split → restore; no lookbehinds)
|
| 24 |
# -----------------------------
|
| 25 |
ABBR = [
|
| 26 |
"e.g", "i.e", "mr", "mrs", "ms", "dr", "prof", "vs", "etc", "fig", "al",
|
|
|
|
| 63 |
return [_restore(s).strip() for s in sentences if s.strip()]
|
| 64 |
|
| 65 |
# -----------------------------
|
| 66 |
+
# CORE: overall AI score + highlights
|
|
|
|
| 67 |
# -----------------------------
|
| 68 |
+
def analyze(text, max_len=512):
|
| 69 |
sents = sentence_split(text)
|
| 70 |
if not sents:
|
| 71 |
+
return "—", "—", "<em>Paste some text to analyze.</em>", None
|
| 72 |
|
| 73 |
+
# light clean (per model card vibe)
|
| 74 |
clean_sents = [re.sub(r"\s+", " ", s).strip() for s in sents]
|
| 75 |
|
| 76 |
inputs = tokenizer(
|
|
|
|
| 82 |
probs = F.softmax(logits, dim=-1) # [:,0]=Human, [:,1]=AI
|
| 83 |
|
| 84 |
ai_probs = probs[:, 1].detach().cpu().tolist()
|
| 85 |
+
overall_ai = sum(ai_probs) / len(ai_probs)
|
| 86 |
overall_pct = f"{overall_ai * 100:.1f}%"
|
| 87 |
+
overall_label = "🤖 Likely AI Written" if overall_ai >= THRESHOLD else "🧒 Likely Human Written"
|
| 88 |
|
| 89 |
+
# Per-sentence highlights (use THRESHOLD only for the tag/color)
|
| 90 |
+
rows, highlights = [], []
|
| 91 |
for i, orig in enumerate(sents, start=1):
|
| 92 |
+
ai_p = float(ai_probs[i-1])
|
| 93 |
+
label = "AI" if ai_p >= THRESHOLD else "Human"
|
| 94 |
pct = f"{ai_p*100:.1f}%"
|
|
|
|
|
|
|
| 95 |
if ai_p < 0.30:
|
| 96 |
color = "#11823b" # green
|
| 97 |
elif ai_p < 0.70:
|
| 98 |
color = "#b8860b" # amber
|
| 99 |
else:
|
| 100 |
color = "#b80d0d" # red
|
|
|
|
| 101 |
normalized = re.sub(r"\s+", " ", orig)
|
| 102 |
highlights.append(
|
| 103 |
"<div style='margin:6px 0; padding:6px 8px; border-radius:6px; background:rgba(0,0,0,0.03)'>"
|
|
|
|
| 107 |
|
| 108 |
html = "\n".join(highlights)
|
| 109 |
df = pd.DataFrame(rows, columns=["#", "Sentence", "AI_Prob", "Label"])
|
| 110 |
+
return overall_label, overall_pct, html, df
|
|
|
|
|
|
|
| 111 |
|
| 112 |
# -----------------------------
|
| 113 |
+
# GRADIO UI (verdict + score, plus highlights)
|
| 114 |
# -----------------------------
|
| 115 |
with gr.Blocks() as demo:
|
| 116 |
+
gr.Markdown("### 🕵️ AI Written Text Detector — Fakespot Model")
|
| 117 |
|
| 118 |
text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
|
|
|
|
| 119 |
btn = gr.Button("Analyze")
|
| 120 |
|
| 121 |
+
verdict = gr.Label(label="Verdict (Overall)")
|
| 122 |
+
score = gr.Label(label="AI Score (Average across sentences)")
|
|
|
|
| 123 |
highlights = gr.HTML(label="Per-Sentence Highlights")
|
| 124 |
table = gr.Dataframe(headers=["#", "Sentence", "AI_Prob", "Label"], wrap=True)
|
| 125 |
|
| 126 |
+
btn.click(analyze, inputs=[text_input], outputs=[verdict, score, highlights, table])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
if __name__ == "__main__":
|
| 129 |
demo.launch()
|