Spaces:

VictorM-Coder
/

AIDetector

Running

File size: 3,674 Bytes

958e345
ceeca7d
57bb1ed
6c56aca
ceeca7d
49d2f3f
ceeca7d
9267b26
1feb8eb
9267b26
1feb8eb
9267b26
 
 
 
49d2f3f
 
 
 
 
ceeca7d
 
9267b26
1feb8eb
9267b26
 
 
 
 
 
 
1feb8eb
9267b26
49d2f3f
ceeca7d
49d2f3f
 
9267b26
 
 
49d2f3f
9267b26
49d2f3f
9267b26
49d2f3f
 
 
 
 
 
9267b26
49d2f3f
 
 
 
 
9267b26
 
 
49d2f3f
9267b26
 
 
 
 
 
1feb8eb
 
 
9267b26
49d2f3f
9267b26
 
49d2f3f
9267b26
 
 
49d2f3f
9267b26
 
 
49d2f3f
9267b26
 
 
49d2f3f
1feb8eb
9267b26
 
 
49d2f3f
9267b26
49d2f3f
9267b26
 
1feb8eb
9267b26
49d2f3f
9267b26
49d2f3f
 
9267b26
 
 
 
 
49d2f3f
9267b26
49d2f3f
 
 
9267b26
49d2f3f

import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import re
import pandas as pd
import gradio as gr

# -----------------------------
# STRONGEST AVAILABLE PUBLIC MODEL (WORKING)
# -----------------------------
MODEL_NAME = "openai-community/roberta-base-openai-detector"

# -----------------------------
# LOAD MODEL
# -----------------------------
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, torch_dtype=dtype)
model.to(device).eval()


# -----------------------------
# PARAGRAPH SPLITTER
# -----------------------------
def paragraph_split(text):
    paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
    return paragraphs


# -----------------------------
# AI DETECTION FUNCTION
# -----------------------------
def classify_text(text):
    if not text.strip():
        return "⚠️ Please enter some text.", None, None

    paragraphs = paragraph_split(text)
    if not paragraphs:
        return "⚠️ No paragraphs detected.", None, None

    # Tokenize paragraphs
    inputs = tokenizer(
        paragraphs,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=model.config.max_position_embeddings
    ).to(device)

    # Predict
    with torch.no_grad():
        logits = model(**inputs).logits
        probs = F.softmax(logits, dim=-1).cpu()
        preds = torch.argmax(probs, dim=-1).cpu()

    # -----------------------------
    # BUILD RESULTS
    # -----------------------------
    results = []
    highlighted_paragraphs = []

    for i, p in enumerate(paragraphs):
        pred_label = preds[i].item()
        confidence = probs[i, pred_label].item()

        label = "AI" if pred_label == 1 else "Human"  
        # NOTE: roberta-base-openai-detector → label 1 = Fake (AI), 0 = Real (Human)

        conf_text = f"{confidence:.2f}"

        results.append([p, label, conf_text])

        if label == "AI":
            highlighted_paragraphs.append(
                f"<p style='color:red; font-weight:bold; margin-bottom:10px'>{p}</p>"
            )
        else:
            highlighted_paragraphs.append(
                f"<p style='color:green; font-weight:bold; margin-bottom:10px'>{p}</p>"
            )

    # -----------------------------
    # DOCUMENT LEVEL SCORE
    # -----------------------------
    avg = torch.mean(probs, dim=0)
    ai_likelihood = avg[1].item() * 100   # class 1 = AI

    highlighted_html = "\n".join(highlighted_paragraphs)
    df = pd.DataFrame(results, columns=["Paragraph", "Classification", "Confidence"])

    return f"⚖️ Document AI Likelihood: {ai_likelihood:.1f}%", highlighted_html, df


# -----------------------------
# GRADIO UI
# -----------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 Writenix Advanced AI Detection (Paragraph-Level)")

    with gr.Row():
        text_input = gr.Textbox(
            label="Enter text",
            lines=14,
            placeholder="Paste your essay, article, or content here…"
        )

    classify_btn = gr.Button("🚀 Detect AI")

    ai_score = gr.Label(label="Overall AI Likelihood")
    highlighted = gr.HTML()
    table = gr.Dataframe(headers=["Paragraph", "Classification", "Confidence"], wrap=True)

    classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])

if __name__ == "__main__":
    demo.launch()