Spaces:

VictorM-Coder
/

AIDetector

Running

App Files Files Community

VictorM-Coder commited on 19 days ago

Commit

f2f742a

verified ·

1 Parent(s): eba685c

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -21

app.py CHANGED Viewed

@@ -10,7 +10,6 @@ import gradio as gr
 # -----------------------------
 MODEL_NAME = "openai-community/roberta-base-openai-detector"
 # -----------------------------
 # LOAD MODEL
 # -----------------------------
@@ -22,10 +21,17 @@ model.to(device).eval()
 # -----------------------------
-# LINE SPLITTER  (SAFE, FIXED)
 # -----------------------------
-def line_split(text):
-    return [l.strip() for l in text.split("\n") if l.strip()]
 # -----------------------------
@@ -35,17 +41,17 @@ def classify_text(text):
     if not text.strip():
         return "⚠️ Please enter some text.", None, None
-    lines = line_split(text)
-    if not lines:
         return "⚠️ No content detected.", None, None
-    # Tokenize line by line → SAFE
     inputs = tokenizer(
-        lines,
         return_tensors="pt",
         padding=True,
-        truncation=True,
-        max_length=512  # SAFE for RoBERTa
     ).to(device)
     with torch.no_grad():
@@ -54,31 +60,31 @@ def classify_text(text):
         preds = torch.argmax(probs, dim=-1).cpu()
     results = []
-    highlighted_lines = []
-    for i, line in enumerate(lines):
         pred = preds[i].item()
         conf = probs[i, pred].item()
-        # For this model: 1 = AI, 0 = Human
         label = "AI" if pred == 1 else "Human"
         conf_text = f"{conf:.2f}"
-        results.append([line, label, conf_text])
         if label == "AI":
-            highlighted_lines.append(f"<p style='color:red; font-weight:bold'>{line}</p>")
         else:
-            highlighted_lines.append(f"<p style='color:green; font-weight:bold'>{line}</p>")
     # -----------------------------
     # DOCUMENT AI SCORE
     # -----------------------------
     avg = torch.mean(probs, dim=0)
-    ai_percent = avg[1].item() * 100  # class 1 = AI
-    highlighted_html = "\n".join(highlighted_lines)
-    df = pd.DataFrame(results, columns=["Line", "Classification", "Confidence"])
     return f"⚖️ Document AI Likelihood: {ai_percent:.1f}%", highlighted_html, df
@@ -87,7 +93,7 @@ def classify_text(text):
 # GRADIO UI
 # -----------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 Writenix AI Detector (Line-Level, Stable Version)")
     text_input = gr.Textbox(
         label="Enter text",
@@ -99,7 +105,7 @@ with gr.Blocks() as demo:
     ai_score = gr.Label(label="Overall AI Likelihood")
     highlighted = gr.HTML()
-    table = gr.Dataframe(headers=["Line", "Classification", "Confidence"], wrap=True)
     classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])

 # -----------------------------
 MODEL_NAME = "openai-community/roberta-base-openai-detector"
 # -----------------------------
 # LOAD MODEL
 # -----------------------------
 # -----------------------------
+# SENTENCE SPLITTER (SAFE)
 # -----------------------------
+def sentence_split(text):
+    # Replace newlines with periods to avoid broken sentences
+    text = text.replace("\n", ". ")
+    # Regex split on . ! ? but keep them
+    sentences = re.split(r'(?<=[.!?])\s+', text)
+    # Clean and filter
+    return [s.strip() for s in sentences if s.strip()]
 # -----------------------------
     if not text.strip():
         return "⚠️ Please enter some text.", None, None
+    sentences = sentence_split(text)
+    if not sentences:
         return "⚠️ No content detected.", None, None
+    # Tokenize per sentence
     inputs = tokenizer(
+        sentences,
         return_tensors="pt",
         padding=True,
+        truncation=True,
+        max_length=512
     ).to(device)
     with torch.no_grad():
         preds = torch.argmax(probs, dim=-1).cpu()
     results = []
+    highlighted_sentences = []
+    for i, sentence in enumerate(sentences):
         pred = preds[i].item()
         conf = probs[i, pred].item()
+        # Model: 1 = AI, 0 = Human
         label = "AI" if pred == 1 else "Human"
         conf_text = f"{conf:.2f}"
+        results.append([sentence, label, conf_text])
         if label == "AI":
+            highlighted_sentences.append(f"<p style='color:red; font-weight:bold'>{sentence}</p>")
         else:
+            highlighted_sentences.append(f"<p style='color:green; font-weight:bold'>{sentence}</p>")
     # -----------------------------
     # DOCUMENT AI SCORE
     # -----------------------------
     avg = torch.mean(probs, dim=0)
+    ai_percent = avg[1].item() * 100
+    highlighted_html = "\n".join(highlighted_sentences)
+    df = pd.DataFrame(results, columns=["Sentence", "Classification", "Confidence"])
     return f"⚖️ Document AI Likelihood: {ai_percent:.1f}%", highlighted_html, df
 # GRADIO UI
 # -----------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 Writenix AI Detector (Sentence-Level, Stable Version)")
     text_input = gr.Textbox(
         label="Enter text",
     ai_score = gr.Label(label="Overall AI Likelihood")
     highlighted = gr.HTML()
+    table = gr.Dataframe(headers=["Sentence", "Classification", "Confidence"], wrap=True)
     classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])