Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,9 +6,9 @@ import pandas as pd
|
|
| 6 |
import gradio as gr
|
| 7 |
|
| 8 |
# -----------------------------
|
| 9 |
-
# MODEL
|
| 10 |
# -----------------------------
|
| 11 |
-
MODEL_NAME = "
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 13 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 14 |
dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
|
|
@@ -59,15 +59,18 @@ def sentence_split(text: str):
|
|
| 59 |
return [_restore(s).strip() for s in sentences if s.strip()]
|
| 60 |
|
| 61 |
# -----------------------------
|
| 62 |
-
# CLASSIFY SENTENCE-BY-SENTENCE
|
| 63 |
# -----------------------------
|
| 64 |
def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
|
| 65 |
sents = sentence_split(text)
|
| 66 |
if not sents:
|
| 67 |
return "⚠️ Please paste some text.", None, None
|
| 68 |
|
|
|
|
|
|
|
|
|
|
| 69 |
inputs = tokenizer(
|
| 70 |
-
|
| 71 |
).to(device)
|
| 72 |
|
| 73 |
with torch.no_grad():
|
|
@@ -76,8 +79,8 @@ def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
|
|
| 76 |
|
| 77 |
rows = []
|
| 78 |
highlights = []
|
| 79 |
-
for i,
|
| 80 |
-
ai_p = float(probs[i-1, 1].item())
|
| 81 |
label = "AI" if ai_p >= threshold else "Human"
|
| 82 |
pct = f"{ai_p*100:.1f}%"
|
| 83 |
|
|
@@ -89,24 +92,22 @@ def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
|
|
| 89 |
else:
|
| 90 |
color = "#b80d0d" # red
|
| 91 |
|
| 92 |
-
|
| 93 |
-
normalized = re.sub(r"\s+", " ", s)
|
| 94 |
-
|
| 95 |
highlights.append(
|
| 96 |
"<div style='margin:6px 0; padding:6px 8px; border-radius:6px; background:rgba(0,0,0,0.03)'>"
|
| 97 |
f"<strong style='color:{color}'>[{pct} {label}]</strong> {normalized}</div>"
|
| 98 |
)
|
| 99 |
-
rows.append([i,
|
| 100 |
|
| 101 |
html = "\n".join(highlights)
|
| 102 |
df = pd.DataFrame(rows, columns=["#", "Sentence", "AI_Prob", "Label"])
|
| 103 |
-
return "Done ✅ (
|
| 104 |
|
| 105 |
# -----------------------------
|
| 106 |
# GRADIO UI (minimal)
|
| 107 |
# -----------------------------
|
| 108 |
with gr.Blocks() as demo:
|
| 109 |
-
gr.Markdown("### 🧠 Sentence-by-Sentence AI Check")
|
| 110 |
|
| 111 |
text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
|
| 112 |
threshold = gr.Slider(0.50, 0.95, value=0.70, step=0.01, label="AI threshold")
|
|
|
|
| 6 |
import gradio as gr
|
| 7 |
|
| 8 |
# -----------------------------
|
| 9 |
+
# MODEL (Fakespot 2025)
|
| 10 |
# -----------------------------
|
| 11 |
+
MODEL_NAME = "fakespot-ai/roberta-base-ai-text-detection-v1"
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 13 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 14 |
dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
|
|
|
|
| 59 |
return [_restore(s).strip() for s in sentences if s.strip()]
|
| 60 |
|
| 61 |
# -----------------------------
|
| 62 |
+
# CLASSIFY SENTENCE-BY-SENTENCE (Fakespot: id2label[1] == "AI")
|
| 63 |
# -----------------------------
|
| 64 |
def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
|
| 65 |
sents = sentence_split(text)
|
| 66 |
if not sents:
|
| 67 |
return "⚠️ Please paste some text.", None, None
|
| 68 |
|
| 69 |
+
# (Optional) light cleaning similar to model card guidance
|
| 70 |
+
clean_sents = [re.sub(r"\s+", " ", s).strip() for s in sents]
|
| 71 |
+
|
| 72 |
inputs = tokenizer(
|
| 73 |
+
clean_sents, return_tensors="pt", padding=True, truncation=True, max_length=max_len
|
| 74 |
).to(device)
|
| 75 |
|
| 76 |
with torch.no_grad():
|
|
|
|
| 79 |
|
| 80 |
rows = []
|
| 81 |
highlights = []
|
| 82 |
+
for i, orig in enumerate(sents, start=1):
|
| 83 |
+
ai_p = float(probs[i-1, 1].item()) # Fakespot: 1 == AI
|
| 84 |
label = "AI" if ai_p >= threshold else "Human"
|
| 85 |
pct = f"{ai_p*100:.1f}%"
|
| 86 |
|
|
|
|
| 92 |
else:
|
| 93 |
color = "#b80d0d" # red
|
| 94 |
|
| 95 |
+
normalized = re.sub(r"\s+", " ", orig)
|
|
|
|
|
|
|
| 96 |
highlights.append(
|
| 97 |
"<div style='margin:6px 0; padding:6px 8px; border-radius:6px; background:rgba(0,0,0,0.03)'>"
|
| 98 |
f"<strong style='color:{color}'>[{pct} {label}]</strong> {normalized}</div>"
|
| 99 |
)
|
| 100 |
+
rows.append([i, orig, round(ai_p, 4), label])
|
| 101 |
|
| 102 |
html = "\n".join(highlights)
|
| 103 |
df = pd.DataFrame(rows, columns=["#", "Sentence", "AI_Prob", "Label"])
|
| 104 |
+
return "Done ✅ (Fakespot detector)", html, df
|
| 105 |
|
| 106 |
# -----------------------------
|
| 107 |
# GRADIO UI (minimal)
|
| 108 |
# -----------------------------
|
| 109 |
with gr.Blocks() as demo:
|
| 110 |
+
gr.Markdown("### 🧠 Sentence-by-Sentence AI Check (Fakespot)")
|
| 111 |
|
| 112 |
text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
|
| 113 |
threshold = gr.Slider(0.50, 0.95, value=0.70, step=0.01, label="AI threshold")
|