File size: 3,579 Bytes
958e345
ceeca7d
57bb1ed
6c56aca
ceeca7d
49d2f3f
ceeca7d
9267b26
 
 
 
 
 
 
 
49d2f3f
 
 
 
 
ceeca7d
 
9267b26
 
 
 
 
 
 
 
 
 
 
49d2f3f
ceeca7d
49d2f3f
 
9267b26
 
 
49d2f3f
9267b26
49d2f3f
9267b26
49d2f3f
 
 
 
 
 
9267b26
49d2f3f
 
 
 
 
9267b26
 
 
49d2f3f
9267b26
 
 
 
 
 
 
 
49d2f3f
9267b26
 
 
49d2f3f
9267b26
 
 
49d2f3f
9267b26
 
 
49d2f3f
9267b26
 
 
49d2f3f
9267b26
 
 
 
49d2f3f
9267b26
49d2f3f
9267b26
 
 
 
49d2f3f
9267b26
49d2f3f
 
9267b26
 
 
 
 
49d2f3f
9267b26
49d2f3f
 
 
9267b26
49d2f3f
 
 
9267b26
49d2f3f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import re
import pandas as pd
import gradio as gr

# -----------------------------
# STRONGEST MODEL
# -----------------------------
MODEL_NAME = "Hello-SimpleAI/HC3-Plus-OpenAI-Detector"

# -----------------------------
# LOAD MODEL
# -----------------------------
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, torch_dtype=dtype)
model.to(device).eval()


# -----------------------------
# PARAGRAPH TOKENIZER
# -----------------------------
def paragraph_split(text):
    paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
    return paragraphs


# -----------------------------
# INFERENCE FUNCTION
# -----------------------------
def classify_text(text):
    if not text.strip():
        return "⚠️ Please enter some text.", None, None

    paragraphs = paragraph_split(text)
    if not paragraphs:
        return "⚠️ No paragraphs detected.", None, None

    # Tokenize paragraphs
    inputs = tokenizer(
        paragraphs,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=model.config.max_position_embeddings
    ).to(device)

    # Predict
    with torch.no_grad():
        logits = model(**inputs).logits
        probs = F.softmax(logits, dim=-1).cpu()
        preds = torch.argmax(probs, dim=-1).cpu()

    # -----------------------------
    # BUILD RESULTS
    # -----------------------------
    results = []
    highlighted_paragraphs = []

    for i, p in enumerate(paragraphs):
        pred_label = preds[i].item()
        confidence = probs[i, pred_label].item()

        label = "AI" if pred_label == 0 else "Human"
        conf_text = f"{confidence:.2f}"

        results.append([p, label, conf_text])

        # Highlighting
        if label == "AI":
            highlighted_paragraphs.append(
                f"<p style='color:red; font-weight:bold; margin-bottom:10px'>{p}</p>"
            )
        else:
            highlighted_paragraphs.append(
                f"<p style='color:green; font-weight:bold; margin-bottom:10px'>{p}</p>"
            )

    # -----------------------------
    # DOCUMENT LEVEL SCORE
    # -----------------------------
    avg = torch.mean(probs, dim=0)
    ai_likelihood = avg[0].item() * 100  # class 0 = AI

    highlighted_html = "\n".join(highlighted_paragraphs)
    df = pd.DataFrame(results, columns=["Paragraph", "Classification", "Confidence"])

    return f"⚖️ Document AI Likelihood: {ai_likelihood:.1f}%", highlighted_html, df


# -----------------------------
# GRADIO INTERFACE
# -----------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 Writenix Advanced AI Detection (Paragraph-Level)")

    with gr.Row():
        text_input = gr.Textbox(
            label="Enter text",
            lines=14,
            placeholder="Paste your essay, article, or content here…"
        )

    classify_btn = gr.Button("🚀 Detect AI")

    ai_score = gr.Label(label="Overall AI Likelihood")
    highlighted = gr.HTML()
    table = gr.Dataframe(headers=["Paragraph", "Classification", "Confidence"], wrap=True)

    classify_btn.click(classify_text, inputs=text_input, outputs=[ai_score, highlighted, table])


if __name__ == "__main__":
    demo.launch()