Spaces:

VictorM-Coder
/

AIDetector

Running

App Files Files Community

VictorM-Coder commited on 26 days ago

Commit

91f4244

verified ·

1 Parent(s): 3535cf7

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -12

app.py CHANGED Viewed

@@ -6,9 +6,9 @@ import pandas as pd
 import gradio as gr
 # -----------------------------
-# MODEL
 # -----------------------------
-MODEL_NAME = "openai-community/roberta-base-openai-detector"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
@@ -59,15 +59,18 @@ def sentence_split(text: str):
     return [_restore(s).strip() for s in sentences if s.strip()]
 # -----------------------------
-# CLASSIFY SENTENCE-BY-SENTENCE
 # -----------------------------
 def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
     sents = sentence_split(text)
     if not sents:
         return "⚠️ Please paste some text.", None, None
     inputs = tokenizer(
-        sents, return_tensors="pt", padding=True, truncation=True, max_length=max_len
     ).to(device)
     with torch.no_grad():
@@ -76,8 +79,8 @@ def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
     rows = []
     highlights = []
-    for i, s in enumerate(sents, start=1):
-        ai_p = float(probs[i-1, 1].item())
         label = "AI" if ai_p >= threshold else "Human"
         pct = f"{ai_p*100:.1f}%"
@@ -89,24 +92,22 @@ def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
         else:
             color = "#b80d0d"      # red
-        # ✅ avoid backslashes inside the f-string expression
-        normalized = re.sub(r"\s+", " ", s)
         highlights.append(
             "<div style='margin:6px 0; padding:6px 8px; border-radius:6px; background:rgba(0,0,0,0.03)'>"
             f"<strong style='color:{color}'>[{pct} {label}]</strong> {normalized}</div>"
         )
-        rows.append([i, s, round(ai_p, 4), label])
     html = "\n".join(highlights)
     df = pd.DataFrame(rows, columns=["#", "Sentence", "AI_Prob", "Label"])
-    return "Done ✅ (sentence-by-sentence only)", html, df
 # -----------------------------
 # GRADIO UI (minimal)
 # -----------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("### 🧠 Sentence-by-Sentence AI Check")
     text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
     threshold = gr.Slider(0.50, 0.95, value=0.70, step=0.01, label="AI threshold")

 import gradio as gr
 # -----------------------------
+# MODEL (Fakespot 2025)
 # -----------------------------
+MODEL_NAME = "fakespot-ai/roberta-base-ai-text-detection-v1"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
     return [_restore(s).strip() for s in sentences if s.strip()]
 # -----------------------------
+# CLASSIFY SENTENCE-BY-SENTENCE (Fakespot: id2label[1] == "AI")
 # -----------------------------
 def classify_sentence_by_sentence(text, threshold=0.70, max_len=512):
     sents = sentence_split(text)
     if not sents:
         return "⚠️ Please paste some text.", None, None
+    # (Optional) light cleaning similar to model card guidance
+    clean_sents = [re.sub(r"\s+", " ", s).strip() for s in sents]
     inputs = tokenizer(
+        clean_sents, return_tensors="pt", padding=True, truncation=True, max_length=max_len
     ).to(device)
     with torch.no_grad():
     rows = []
     highlights = []
+    for i, orig in enumerate(sents, start=1):
+        ai_p = float(probs[i-1, 1].item())  # Fakespot: 1 == AI
         label = "AI" if ai_p >= threshold else "Human"
         pct = f"{ai_p*100:.1f}%"
         else:
             color = "#b80d0d"      # red
+        normalized = re.sub(r"\s+", " ", orig)
         highlights.append(
             "<div style='margin:6px 0; padding:6px 8px; border-radius:6px; background:rgba(0,0,0,0.03)'>"
             f"<strong style='color:{color}'>[{pct} {label}]</strong> {normalized}</div>"
         )
+        rows.append([i, orig, round(ai_p, 4), label])
     html = "\n".join(highlights)
     df = pd.DataFrame(rows, columns=["#", "Sentence", "AI_Prob", "Label"])
+    return "Done ✅ (Fakespot detector)", html, df
 # -----------------------------
 # GRADIO UI (minimal)
 # -----------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("### 🧠 Sentence-by-Sentence AI Check (Fakespot)")
     text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
     threshold = gr.Slider(0.50, 0.95, value=0.70, step=0.01, label="AI threshold")