VictorM-Coder commited on
Commit
8d27116
Β·
verified Β·
1 Parent(s): 70fc9f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -29
app.py CHANGED
@@ -43,17 +43,12 @@ def _restore(text):
43
  # PERFECT PARAGRAPH-PRESERVING SPLITTER
44
  # -----------------------------
45
  def split_preserving_structure(text):
46
- """
47
- Splits text into:
48
- - EXACT newline blocks (\n, \n\n, etc.)
49
- - Sentences inside non-newline blocks
50
- """
51
  blocks = re.split(r"(\n+)", text) # keep newline separators
52
  final_blocks = []
53
 
54
  for block in blocks:
55
  if block.startswith("\n"):
56
- final_blocks.append(block) # preserve EXACT paragraph spacing
57
  else:
58
  protected = _protect(block)
59
  parts = re.split(r"([.?!])(\s+)", protected)
@@ -72,9 +67,7 @@ def split_preserving_structure(text):
72
 
73
  return final_blocks
74
 
75
-
76
  def extract_sentences_only(blocks):
77
- """Return only sentence blocks (no whitespace/newlines)."""
78
  return [
79
  b for b in blocks
80
  if b.strip() != "" and not b.startswith("\n") and not b.isspace()
@@ -91,14 +84,12 @@ def group_sentences(sents, size=3):
91
  # -----------------------------
92
  def analyze(text, max_len=512):
93
 
94
- # Structured block split
95
  blocks = split_preserving_structure(text)
96
  pure_sentences = extract_sentences_only(blocks)
97
 
98
  if not pure_sentences:
99
  return "β€”", "β€”", "<em>Paste text to analyze.</em>", None
100
 
101
- # Group into 3-sentence windows (Turnitin style)
102
  grouped = group_sentences(pure_sentences, 3)
103
  clean_grouped = [re.sub(r"\s+", " ", g).strip() for g in grouped]
104
 
@@ -111,7 +102,7 @@ def analyze(text, max_len=512):
111
  logits = model(**inputs).logits
112
  chunk_probs = F.softmax(logits, dim=-1)[:, 1].cpu().tolist()
113
 
114
- # Expand group scores back to individual sentences
115
  ai_scores = []
116
  for idx, prob in enumerate(chunk_probs):
117
  start = idx * 3
@@ -120,53 +111,54 @@ def analyze(text, max_len=512):
120
  ai_scores.append(prob)
121
 
122
  # -----------------------------
123
- # RECONSTRUCT ORIGINAL TEXT W/ HIGHLIGHTING
124
  # -----------------------------
125
  highlighted = ""
126
  current_sentence = 0
127
 
128
  for block in blocks:
129
 
130
- # newline block β†’ keep EXACT
131
  if block.startswith("\n"):
132
  highlighted += block
133
  continue
134
 
135
- # whitespace block β†’ keep
136
  if block.isspace():
137
  highlighted += block
138
  continue
139
 
140
- # real sentence β†’ highlight
141
  ai_p = ai_scores[current_sentence]
142
  current_sentence += 1
143
-
144
  pct = f"{ai_p * 100:.1f}%"
145
 
 
146
  if ai_p < 0.30:
147
- color = "#11823b"
 
148
  elif ai_p < 0.70:
149
- color = "#b8860b"
 
150
  else:
151
- color = "#b80d0d"
 
152
 
153
  highlighted += (
154
- f"<span style='background:rgba(0,0,0,0.03); padding:3px 4px; "
155
- f"border-radius:4px;'><strong style='color:{color}'>[{pct}]</strong> "
156
- f"{block.strip()}</span>"
 
157
  )
158
 
159
- # maintain spacing after sentence
160
- highlighted += " "
161
-
162
  # -----------------------------
163
- # OVERALL SCORE
164
  # -----------------------------
165
  overall = sum(ai_scores) / len(ai_scores)
166
  overall_pct = f"{overall * 100:.1f}%"
167
  overall_label = "πŸ€– Likely AI Written" if overall >= THRESHOLD else "πŸ§’ Likely Human Written"
168
 
169
- # Table output
170
  df = pd.DataFrame(
171
  [[i + 1, s, ai_scores[i]] for i, s in enumerate(pure_sentences)],
172
  columns=["#", "Sentence", "AI_Prob"]
@@ -175,10 +167,10 @@ def analyze(text, max_len=512):
175
  return overall_label, overall_pct, highlighted, df
176
 
177
  # -----------------------------
178
- # UI
179
  # -----------------------------
180
  with gr.Blocks() as demo:
181
- gr.Markdown("### πŸ•΅οΈ AI Sentence-Level Detector β€” Exact Structure Highlighting")
182
 
183
  text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your text…")
184
  btn = gr.Button("Analyze")
 
43
  # PERFECT PARAGRAPH-PRESERVING SPLITTER
44
  # -----------------------------
45
  def split_preserving_structure(text):
 
 
 
 
 
46
  blocks = re.split(r"(\n+)", text) # keep newline separators
47
  final_blocks = []
48
 
49
  for block in blocks:
50
  if block.startswith("\n"):
51
+ final_blocks.append(block)
52
  else:
53
  protected = _protect(block)
54
  parts = re.split(r"([.?!])(\s+)", protected)
 
67
 
68
  return final_blocks
69
 
 
70
  def extract_sentences_only(blocks):
 
71
  return [
72
  b for b in blocks
73
  if b.strip() != "" and not b.startswith("\n") and not b.isspace()
 
84
  # -----------------------------
85
  def analyze(text, max_len=512):
86
 
 
87
  blocks = split_preserving_structure(text)
88
  pure_sentences = extract_sentences_only(blocks)
89
 
90
  if not pure_sentences:
91
  return "β€”", "β€”", "<em>Paste text to analyze.</em>", None
92
 
 
93
  grouped = group_sentences(pure_sentences, 3)
94
  clean_grouped = [re.sub(r"\s+", " ", g).strip() for g in grouped]
95
 
 
102
  logits = model(**inputs).logits
103
  chunk_probs = F.softmax(logits, dim=-1)[:, 1].cpu().tolist()
104
 
105
+ # Expand grouped probs to each sentence
106
  ai_scores = []
107
  for idx, prob in enumerate(chunk_probs):
108
  start = idx * 3
 
111
  ai_scores.append(prob)
112
 
113
  # -----------------------------
114
+ # COLOR HIGHLIGHTING (FULL SENTENCE BLOCK COLORING)
115
  # -----------------------------
116
  highlighted = ""
117
  current_sentence = 0
118
 
119
  for block in blocks:
120
 
121
+ # newline blocks
122
  if block.startswith("\n"):
123
  highlighted += block
124
  continue
125
 
126
+ # whitespace blocks
127
  if block.isspace():
128
  highlighted += block
129
  continue
130
 
131
+ # real sentence
132
  ai_p = ai_scores[current_sentence]
133
  current_sentence += 1
 
134
  pct = f"{ai_p * 100:.1f}%"
135
 
136
+ # COLOR LEVELS (background + text)
137
  if ai_p < 0.30:
138
+ bg = "rgba(17,130,59,0.18)" # green
139
+ color = "#0f5e2e"
140
  elif ai_p < 0.70:
141
+ bg = "rgba(184,134,11,0.23)" # yellow
142
+ color = "#7a5f00"
143
  else:
144
+ bg = "rgba(184,13,13,0.20)" # red
145
+ color = "#7a0000"
146
 
147
  highlighted += (
148
+ f"<span style='background:{bg}; padding:5px 8px; "
149
+ f"border-radius:6px; display:inline-block; margin-bottom:4px;'>"
150
+ f"<strong style='color:{color}'>[{pct}]</strong> "
151
+ f"{block.strip()}</span> "
152
  )
153
 
 
 
 
154
  # -----------------------------
155
+ # OVERALL
156
  # -----------------------------
157
  overall = sum(ai_scores) / len(ai_scores)
158
  overall_pct = f"{overall * 100:.1f}%"
159
  overall_label = "πŸ€– Likely AI Written" if overall >= THRESHOLD else "πŸ§’ Likely Human Written"
160
 
161
+ # Table
162
  df = pd.DataFrame(
163
  [[i + 1, s, ai_scores[i]] for i, s in enumerate(pure_sentences)],
164
  columns=["#", "Sentence", "AI_Prob"]
 
167
  return overall_label, overall_pct, highlighted, df
168
 
169
  # -----------------------------
170
+ # GRADIO UI
171
  # -----------------------------
172
  with gr.Blocks() as demo:
173
+ gr.Markdown("### πŸ•΅οΈ AI Sentence-Level Detector β€” Color Highlight Mode")
174
 
175
  text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your text…")
176
  btn = gr.Button("Analyze")