Spaces:

profplate
/

ai-detection-tool

Paused

File size: 11,636 Bytes

"""
AI Writing Analyzer — sentence-level heat map for human vs. AI-generated text.

Built for classroom use. Loads a RoBERTa-based ChatGPT detector from
Hugging Face and runs it on each sentence independently, then renders the
input text with per-sentence color coding indicating the probability that
the sentence was AI-generated.

Runs comfortably on the free CPU tier.
"""

import re
import html
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# ---------------------------------------------------------------------------
# Model
# ---------------------------------------------------------------------------
# Hello-SimpleAI's RoBERTa detector — small, CPU-friendly, widely used.
MODEL_NAME = "Hello-SimpleAI/chatgpt-detector-roberta"

print(f"Loading model: {MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
model.eval()

# The model's label order: index 0 = Human, index 1 = ChatGPT/AI.
# (Confirmed from the model card's id2label.)
AI_INDEX = 1


# ---------------------------------------------------------------------------
# Sentence splitting
# ---------------------------------------------------------------------------
_SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+(?=[A-Z\"'\(\[])")

def split_sentences(text: str):
    """Lightweight sentence splitter — no NLTK download needed on free CPU."""
    text = text.strip()
    if not text:
        return []
    # First split on paragraph breaks to preserve structure, then sentences.
    chunks = []
    for para in re.split(r"\n\s*\n", text):
        para = para.strip()
        if not para:
            continue
        parts = _SENT_SPLIT_RE.split(para)
        parts = [p.strip() for p in parts if p.strip()]
        chunks.extend(parts)
    return chunks


# ---------------------------------------------------------------------------
# Scoring
# ---------------------------------------------------------------------------
@torch.no_grad()
def score_sentence(sentence: str) -> float:
    """Return probability that `sentence` is AI-generated (0.0 – 1.0)."""
    inputs = tokenizer(
        sentence,
        return_tensors="pt",
        truncation=True,
        max_length=512,
    )
    logits = model(**inputs).logits
    probs = torch.softmax(logits, dim=-1)[0]
    return float(probs[AI_INDEX].item())


# ---------------------------------------------------------------------------
# Rendering
# ---------------------------------------------------------------------------
def prob_to_color(p: float) -> str:
    """
    Map probability 0..1 to a background color.
    Low  (human)  -> cool teal
    Mid           -> amber
    High (AI)     -> warm red
    """
    # Interpolate between three stops in RGB.
    if p < 0.5:
        t = p / 0.5
        r = int(56  + (245 - 56)  * t)
        g = int(189 + (191 - 189) * t)
        b = int(248 + (66  - 248) * t)
    else:
        t = (p - 0.5) / 0.5
        r = int(245 + (248 - 245) * t)
        g = int(191 + (80  - 191) * t)
        b = int(66  + (80  - 66)  * t)
    # Higher opacity for a vivid highlight; text is forced light on top.
    return f"rgba({r}, {g}, {b}, 0.42)"


def border_color(p: float) -> str:
    if p < 0.5:
        t = p / 0.5
        r = int(56  + (245 - 56)  * t)
        g = int(189 + (191 - 189) * t)
        b = int(248 + (66  - 248) * t)
    else:
        t = (p - 0.5) / 0.5
        r = int(245 + (248 - 245) * t)
        g = int(191 + (80  - 191) * t)
        b = int(66  + (80  - 66)  * t)
    return f"rgba({r}, {g}, {b}, 0.95)"


def render_heatmap(sentences, scores) -> str:
    if not sentences:
        return (
            "<div style='color:#94a3b8; font-style:italic; padding:1rem;'>"
            "Paste some writing above and click <b>Analyze</b> to see a "
            "sentence-by-sentence breakdown.</div>"
        )

    pieces = []
    for sent, p in zip(sentences, scores):
        bg = prob_to_color(p)
        bd = border_color(p)
        pct = int(round(p * 100))
        safe = html.escape(sent)
        pieces.append(
            f"<span class='awa-sent' title='AI likelihood: {pct}%' "
            f"style='background:{bg} !important; "
            f"border-bottom:2px solid {bd} !important; "
            f"color:#f8fafc !important; "
            f"text-shadow:0 1px 2px rgba(0,0,0,0.65); "
            f"padding:3px 6px; margin:2px 1px; border-radius:5px; "
            f"box-decoration-break:clone; -webkit-box-decoration-break:clone; "
            f"line-height:2.3;'>{safe} "
            f"<span style='font-size:0.72em; color:#f1f5f9 !important; "
            f"font-weight:600; vertical-align:super; "
            f"text-shadow:0 1px 2px rgba(0,0,0,0.7);'>{pct}%</span></span>"
        )

    body = " ".join(pieces)

    avg = sum(scores) / len(scores)
    verdict, vcolor = classify_overall(avg)

    summary = (
        f"<div style='display:flex; align-items:center; gap:1rem; "
        f"margin-bottom:1.25rem; padding:1rem 1.25rem; "
        f"background:#0f172a; border:1px solid #1e293b; border-radius:12px;'>"
        f"<div style='font-size:0.78rem; letter-spacing:0.12em; "
        f"text-transform:uppercase; color:#94a3b8;'>Overall assessment</div>"
        f"<div style='font-size:1.15rem; font-weight:600; color:{vcolor};'>"
        f"{verdict}</div>"
        f"<div style='margin-left:auto; color:#cbd5e1; font-variant-numeric:tabular-nums;'>"
        f"Avg. AI likelihood: <b style='color:#f1f5f9;'>{int(round(avg*100))}%</b> "
        f"&nbsp;·&nbsp; Sentences: <b style='color:#f1f5f9;'>{len(sentences)}</b></div>"
        f"</div>"
    )

    legend = (
        "<div style='display:flex; gap:0.75rem; align-items:center; "
        "margin-top:1.25rem; font-size:0.82rem; color:#94a3b8;'>"
        "<span>Legend:</span>"
        "<span style='background:rgba(56,189,248,0.28); padding:2px 10px; "
        "border-radius:4px; border-bottom:2px solid rgba(56,189,248,0.95);'>Likely human</span>"
        "<span style='background:rgba(245,191,66,0.28); padding:2px 10px; "
        "border-radius:4px; border-bottom:2px solid rgba(245,191,66,0.95);'>Uncertain</span>"
        "<span style='background:rgba(248,80,80,0.28); padding:2px 10px; "
        "border-radius:4px; border-bottom:2px solid rgba(248,80,80,0.95);'>Likely AI</span>"
        "</div>"
    )

    return (
        f"<div style='font-family: -apple-system, BlinkMacSystemFont, "
        f"\"Segoe UI\", Inter, sans-serif; color:#e2e8f0;'>"
        f"{summary}"
        f"<div style='padding:1.25rem 1.5rem; background:#0b1220; "
        f"border:1px solid #1e293b; border-radius:12px; font-size:1rem; "
        f"line-height:2.1;'>{body}</div>"
        f"{legend}"
        f"</div>"
    )


def classify_overall(avg: float):
    if avg < 0.25:
        return "Likely human-written", "#38bdf8"
    if avg < 0.5:
        return "Leaning human", "#7dd3fc"
    if avg < 0.75:
        return "Leaning AI", "#fbbf24"
    return "Likely AI-generated", "#f87171"


# ---------------------------------------------------------------------------
# Main analyze function
# ---------------------------------------------------------------------------
def analyze(text: str):
    if not text or not text.strip():
        return render_heatmap([], [])
    sentences = split_sentences(text)
    if not sentences:
        return render_heatmap([], [])
    scores = [score_sentence(s) for s in sentences]
    return render_heatmap(sentences, scores)


# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------
CUSTOM_CSS = """
:root, .gradio-container, body {
    background: #060912 !important;
    color: #e2e8f0 !important;
    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Inter, system-ui, sans-serif !important;
}
.gradio-container {
    max-width: 960px !important;
    margin: 0 auto !important;
    padding-top: 2.5rem !important;
}
#app-header {
    text-align: left;
    margin-bottom: 1.75rem;
    padding: 1.75rem 2rem;
    background: linear-gradient(135deg, #0f172a 0%, #111827 100%);
    border: 1px solid #1e293b;
    border-radius: 16px;
}
#app-header h1 {
    margin: 0 0 0.5rem 0;
    font-size: 1.9rem;
    font-weight: 700;
    letter-spacing: -0.02em;
    background: linear-gradient(90deg, #38bdf8 0%, #a78bfa 100%);
    -webkit-background-clip: text;
    background-clip: text;
    color: transparent;
}
#app-header p {
    margin: 0;
    color: #94a3b8;
    font-size: 0.98rem;
    line-height: 1.55;
    max-width: 62ch;
}
textarea {
    background: #0b1220 !important;
    border: 1px solid #1e293b !important;
    color: #e2e8f0 !important;
    border-radius: 12px !important;
    font-size: 0.98rem !important;
    line-height: 1.6 !important;
}
textarea:focus {
    border-color: #38bdf8 !important;
    box-shadow: 0 0 0 3px rgba(56,189,248,0.15) !important;
}
label span {
    color: #cbd5e1 !important;
    font-weight: 500 !important;
}
button.primary, .primary button {
    background: linear-gradient(135deg, #38bdf8 0%, #6366f1 100%) !important;
    border: none !important;
    color: #0b1220 !important;
    font-weight: 600 !important;
    border-radius: 10px !important;
}
button.secondary, .secondary button {
    background: #1e293b !important;
    border: 1px solid #334155 !important;
    color: #e2e8f0 !important;
    border-radius: 10px !important;
}
footer { display: none !important; }

/* Force light text inside our custom HTML output — Gradio 6's prose styles
   otherwise darken anything rendered inside gr.HTML. */
.gradio-container .prose,
.gradio-container .prose * ,
.gradio-container .html-container,
.gradio-container .html-container * {
    color: #e2e8f0 !important;
}
.gradio-container .awa-sent,
.gradio-container .awa-sent * {
    color: #f8fafc !important;
}
"""

HEADER_HTML = """
<div id="app-header">
  <h1>AI Writing Analyzer</h1>
  <p>A classroom tool for examining student writing sentence by sentence. Paste a
  passage below and this tool will highlight each sentence with a color-coded
  heat map showing how likely it is to have been generated by an AI model.
  Use it as a starting point for conversation — not as a verdict.</p>
</div>
"""

EXAMPLE_TEXT = (
    "The old lighthouse had stood on that cliff for nearly two centuries, "
    "its white paint worn thin by salt and wind. Every evening, Marta climbed "
    "the spiral stairs with a cup of tea balanced in one hand. "
    "In conclusion, lighthouses serve as vital navigational aids that have "
    "played a crucial role in maritime safety throughout history. "
    "Furthermore, they represent an important cultural and architectural heritage "
    "that must be preserved for future generations."
)

with gr.Blocks(css=CUSTOM_CSS, title="AI Writing Analyzer", theme=gr.themes.Base()) as demo:
    gr.HTML(HEADER_HTML)

    with gr.Row():
        input_box = gr.Textbox(
            label="Student writing",
            placeholder="Paste a passage of writing here…",
            lines=10,
            value=EXAMPLE_TEXT,
        )

    with gr.Row():
        analyze_btn = gr.Button("Analyze", variant="primary")
        clear_btn = gr.Button("Clear", variant="secondary")

    output = gr.HTML(value=render_heatmap([], []))

    analyze_btn.click(fn=analyze, inputs=input_box, outputs=output)
    clear_btn.click(
        fn=lambda: ("", render_heatmap([], [])),
        inputs=None,
        outputs=[input_box, output],
    )

if __name__ == "__main__":
    demo.launch()