profplate commited on
Commit
a5bf95a
·
verified ·
1 Parent(s): 0fcf38c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +318 -0
app.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI Writing Analyzer — sentence-level heat map for human vs. AI-generated text.
3
+
4
+ Built for classroom use. Loads a RoBERTa-based ChatGPT detector from
5
+ Hugging Face and runs it on each sentence independently, then renders the
6
+ input text with per-sentence color coding indicating the probability that
7
+ the sentence was AI-generated.
8
+
9
+ Runs comfortably on the free CPU tier.
10
+ """
11
+
12
+ import re
13
+ import html
14
+ import gradio as gr
15
+ import torch
16
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Model
20
+ # ---------------------------------------------------------------------------
21
+ # Hello-SimpleAI's RoBERTa detector — small, CPU-friendly, widely used.
22
+ MODEL_NAME = "Hello-SimpleAI/chatgpt-detector-roberta"
23
+
24
+ print(f"Loading model: {MODEL_NAME}")
25
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
26
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
27
+ model.eval()
28
+
29
+ # The model's label order: index 0 = Human, index 1 = ChatGPT/AI.
30
+ # (Confirmed from the model card's id2label.)
31
+ AI_INDEX = 1
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Sentence splitting
36
+ # ---------------------------------------------------------------------------
37
+ _SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+(?=[A-Z\"'\(\[])")
38
+
39
+ def split_sentences(text: str):
40
+ """Lightweight sentence splitter — no NLTK download needed on free CPU."""
41
+ text = text.strip()
42
+ if not text:
43
+ return []
44
+ # First split on paragraph breaks to preserve structure, then sentences.
45
+ chunks = []
46
+ for para in re.split(r"\n\s*\n", text):
47
+ para = para.strip()
48
+ if not para:
49
+ continue
50
+ parts = _SENT_SPLIT_RE.split(para)
51
+ parts = [p.strip() for p in parts if p.strip()]
52
+ chunks.extend(parts)
53
+ return chunks
54
+
55
+
56
+ # ---------------------------------------------------------------------------
57
+ # Scoring
58
+ # ---------------------------------------------------------------------------
59
+ @torch.no_grad()
60
+ def score_sentence(sentence: str) -> float:
61
+ """Return probability that `sentence` is AI-generated (0.0 – 1.0)."""
62
+ inputs = tokenizer(
63
+ sentence,
64
+ return_tensors="pt",
65
+ truncation=True,
66
+ max_length=512,
67
+ )
68
+ logits = model(**inputs).logits
69
+ probs = torch.softmax(logits, dim=-1)[0]
70
+ return float(probs[AI_INDEX].item())
71
+
72
+
73
+ # ---------------------------------------------------------------------------
74
+ # Rendering
75
+ # ---------------------------------------------------------------------------
76
+ def prob_to_color(p: float) -> str:
77
+ """
78
+ Map probability 0..1 to a background color.
79
+ Low (human) -> cool teal
80
+ Mid -> amber
81
+ High (AI) -> warm red
82
+ """
83
+ # Interpolate between three stops in RGB.
84
+ if p < 0.5:
85
+ t = p / 0.5
86
+ r = int(56 + (245 - 56) * t)
87
+ g = int(189 + (191 - 189) * t)
88
+ b = int(248 + (66 - 248) * t)
89
+ else:
90
+ t = (p - 0.5) / 0.5
91
+ r = int(245 + (248 - 245) * t)
92
+ g = int(191 + (80 - 191) * t)
93
+ b = int(66 + (80 - 66) * t)
94
+ # Low-opacity fill so text stays readable on dark background.
95
+ return f"rgba({r}, {g}, {b}, 0.28)"
96
+
97
+
98
+ def border_color(p: float) -> str:
99
+ if p < 0.5:
100
+ t = p / 0.5
101
+ r = int(56 + (245 - 56) * t)
102
+ g = int(189 + (191 - 189) * t)
103
+ b = int(248 + (66 - 248) * t)
104
+ else:
105
+ t = (p - 0.5) / 0.5
106
+ r = int(245 + (248 - 245) * t)
107
+ g = int(191 + (80 - 191) * t)
108
+ b = int(66 + (80 - 66) * t)
109
+ return f"rgba({r}, {g}, {b}, 0.95)"
110
+
111
+
112
+ def render_heatmap(sentences, scores) -> str:
113
+ if not sentences:
114
+ return (
115
+ "<div style='color:#94a3b8; font-style:italic; padding:1rem;'>"
116
+ "Paste some writing above and click <b>Analyze</b> to see a "
117
+ "sentence-by-sentence breakdown.</div>"
118
+ )
119
+
120
+ pieces = []
121
+ for sent, p in zip(sentences, scores):
122
+ bg = prob_to_color(p)
123
+ bd = border_color(p)
124
+ pct = int(round(p * 100))
125
+ safe = html.escape(sent)
126
+ pieces.append(
127
+ f"<span title='AI likelihood: {pct}%' "
128
+ f"style='background:{bg}; border-bottom:2px solid {bd}; "
129
+ f"padding:2px 4px; margin:1px 2px; border-radius:4px; "
130
+ f"line-height:2.1;'>{safe} "
131
+ f"<span style='font-size:0.72em; color:#cbd5e1; "
132
+ f"vertical-align:super;'>{pct}%</span></span>"
133
+ )
134
+
135
+ body = " ".join(pieces)
136
+
137
+ avg = sum(scores) / len(scores)
138
+ verdict, vcolor = classify_overall(avg)
139
+
140
+ summary = (
141
+ f"<div style='display:flex; align-items:center; gap:1rem; "
142
+ f"margin-bottom:1.25rem; padding:1rem 1.25rem; "
143
+ f"background:#0f172a; border:1px solid #1e293b; border-radius:12px;'>"
144
+ f"<div style='font-size:0.78rem; letter-spacing:0.12em; "
145
+ f"text-transform:uppercase; color:#94a3b8;'>Overall assessment</div>"
146
+ f"<div style='font-size:1.15rem; font-weight:600; color:{vcolor};'>"
147
+ f"{verdict}</div>"
148
+ f"<div style='margin-left:auto; color:#cbd5e1; font-variant-numeric:tabular-nums;'>"
149
+ f"Avg. AI likelihood: <b style='color:#f1f5f9;'>{int(round(avg*100))}%</b> "
150
+ f"&nbsp;·&nbsp; Sentences: <b style='color:#f1f5f9;'>{len(sentences)}</b></div>"
151
+ f"</div>"
152
+ )
153
+
154
+ legend = (
155
+ "<div style='display:flex; gap:0.75rem; align-items:center; "
156
+ "margin-top:1.25rem; font-size:0.82rem; color:#94a3b8;'>"
157
+ "<span>Legend:</span>"
158
+ "<span style='background:rgba(56,189,248,0.28); padding:2px 10px; "
159
+ "border-radius:4px; border-bottom:2px solid rgba(56,189,248,0.95);'>Likely human</span>"
160
+ "<span style='background:rgba(245,191,66,0.28); padding:2px 10px; "
161
+ "border-radius:4px; border-bottom:2px solid rgba(245,191,66,0.95);'>Uncertain</span>"
162
+ "<span style='background:rgba(248,80,80,0.28); padding:2px 10px; "
163
+ "border-radius:4px; border-bottom:2px solid rgba(248,80,80,0.95);'>Likely AI</span>"
164
+ "</div>"
165
+ )
166
+
167
+ return (
168
+ f"<div style='font-family: -apple-system, BlinkMacSystemFont, "
169
+ f"\"Segoe UI\", Inter, sans-serif; color:#e2e8f0;'>"
170
+ f"{summary}"
171
+ f"<div style='padding:1.25rem 1.5rem; background:#0b1220; "
172
+ f"border:1px solid #1e293b; border-radius:12px; font-size:1rem; "
173
+ f"line-height:2.1;'>{body}</div>"
174
+ f"{legend}"
175
+ f"</div>"
176
+ )
177
+
178
+
179
+ def classify_overall(avg: float):
180
+ if avg < 0.25:
181
+ return "Likely human-written", "#38bdf8"
182
+ if avg < 0.5:
183
+ return "Leaning human", "#7dd3fc"
184
+ if avg < 0.75:
185
+ return "Leaning AI", "#fbbf24"
186
+ return "Likely AI-generated", "#f87171"
187
+
188
+
189
+ # ---------------------------------------------------------------------------
190
+ # Main analyze function
191
+ # ---------------------------------------------------------------------------
192
+ def analyze(text: str):
193
+ if not text or not text.strip():
194
+ return render_heatmap([], [])
195
+ sentences = split_sentences(text)
196
+ if not sentences:
197
+ return render_heatmap([], [])
198
+ scores = [score_sentence(s) for s in sentences]
199
+ return render_heatmap(sentences, scores)
200
+
201
+
202
+ # ---------------------------------------------------------------------------
203
+ # UI
204
+ # ---------------------------------------------------------------------------
205
+ CUSTOM_CSS = """
206
+ :root, .gradio-container, body {
207
+ background: #060912 !important;
208
+ color: #e2e8f0 !important;
209
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Inter, system-ui, sans-serif !important;
210
+ }
211
+ .gradio-container {
212
+ max-width: 960px !important;
213
+ margin: 0 auto !important;
214
+ padding-top: 2.5rem !important;
215
+ }
216
+ #app-header {
217
+ text-align: left;
218
+ margin-bottom: 1.75rem;
219
+ padding: 1.75rem 2rem;
220
+ background: linear-gradient(135deg, #0f172a 0%, #111827 100%);
221
+ border: 1px solid #1e293b;
222
+ border-radius: 16px;
223
+ }
224
+ #app-header h1 {
225
+ margin: 0 0 0.5rem 0;
226
+ font-size: 1.9rem;
227
+ font-weight: 700;
228
+ letter-spacing: -0.02em;
229
+ background: linear-gradient(90deg, #38bdf8 0%, #a78bfa 100%);
230
+ -webkit-background-clip: text;
231
+ background-clip: text;
232
+ color: transparent;
233
+ }
234
+ #app-header p {
235
+ margin: 0;
236
+ color: #94a3b8;
237
+ font-size: 0.98rem;
238
+ line-height: 1.55;
239
+ max-width: 62ch;
240
+ }
241
+ textarea {
242
+ background: #0b1220 !important;
243
+ border: 1px solid #1e293b !important;
244
+ color: #e2e8f0 !important;
245
+ border-radius: 12px !important;
246
+ font-size: 0.98rem !important;
247
+ line-height: 1.6 !important;
248
+ }
249
+ textarea:focus {
250
+ border-color: #38bdf8 !important;
251
+ box-shadow: 0 0 0 3px rgba(56,189,248,0.15) !important;
252
+ }
253
+ label span {
254
+ color: #cbd5e1 !important;
255
+ font-weight: 500 !important;
256
+ }
257
+ button.primary, .primary button {
258
+ background: linear-gradient(135deg, #38bdf8 0%, #6366f1 100%) !important;
259
+ border: none !important;
260
+ color: #0b1220 !important;
261
+ font-weight: 600 !important;
262
+ border-radius: 10px !important;
263
+ }
264
+ button.secondary, .secondary button {
265
+ background: #1e293b !important;
266
+ border: 1px solid #334155 !important;
267
+ color: #e2e8f0 !important;
268
+ border-radius: 10px !important;
269
+ }
270
+ footer { display: none !important; }
271
+ """
272
+
273
+ HEADER_HTML = """
274
+ <div id="app-header">
275
+ <h1>AI Writing Analyzer</h1>
276
+ <p>A classroom tool for examining student writing sentence by sentence. Paste a
277
+ passage below and this tool will highlight each sentence with a color-coded
278
+ heat map showing how likely it is to have been generated by an AI model.
279
+ Use it as a starting point for conversation — not as a verdict.</p>
280
+ </div>
281
+ """
282
+
283
+ EXAMPLE_TEXT = (
284
+ "The old lighthouse had stood on that cliff for nearly two centuries, "
285
+ "its white paint worn thin by salt and wind. Every evening, Marta climbed "
286
+ "the spiral stairs with a cup of tea balanced in one hand. "
287
+ "In conclusion, lighthouses serve as vital navigational aids that have "
288
+ "played a crucial role in maritime safety throughout history. "
289
+ "Furthermore, they represent an important cultural and architectural heritage "
290
+ "that must be preserved for future generations."
291
+ )
292
+
293
+ with gr.Blocks(css=CUSTOM_CSS, title="AI Writing Analyzer", theme=gr.themes.Base()) as demo:
294
+ gr.HTML(HEADER_HTML)
295
+
296
+ with gr.Row():
297
+ input_box = gr.Textbox(
298
+ label="Student writing",
299
+ placeholder="Paste a passage of writing here…",
300
+ lines=10,
301
+ value=EXAMPLE_TEXT,
302
+ )
303
+
304
+ with gr.Row():
305
+ analyze_btn = gr.Button("Analyze", variant="primary")
306
+ clear_btn = gr.Button("Clear", variant="secondary")
307
+
308
+ output = gr.HTML(value=render_heatmap([], []))
309
+
310
+ analyze_btn.click(fn=analyze, inputs=input_box, outputs=output)
311
+ clear_btn.click(
312
+ fn=lambda: ("", render_heatmap([], [])),
313
+ inputs=None,
314
+ outputs=[input_box, output],
315
+ )
316
+
317
+ if __name__ == "__main__":
318
+ demo.launch()