VictorM-Coder commited on
Commit
23b2adf
Β·
verified Β·
1 Parent(s): ea83121

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -17
app.py CHANGED
@@ -15,17 +15,17 @@ dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported(
15
  model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, dtype=dtype).to(device).eval()
16
 
17
  # -----------------------------
18
- # THRESHOLD FOR LABEL COLOR
19
  # -----------------------------
20
- THRESHOLD = 0.70
21
 
22
  # -----------------------------
23
  # SENTENCE SPLITTING UTILITIES
24
  # -----------------------------
25
  ABBR = [
26
  "e.g", "i.e", "mr", "mrs", "ms", "dr", "prof", "vs", "etc", "fig", "al",
27
- "jr", "sr", "st", "no", "vol", "pp", "mt", "inc", "ltd", "co", "u.s", "u.k",
28
- "a.m", "p.m"
29
  ]
30
  ABBR_REGEX = re.compile(r"\b(" + "|".join(map(re.escape, ABBR)) + r")\.", flags=re.IGNORECASE)
31
 
@@ -50,6 +50,7 @@ def sentence_split(text: str):
50
  if not t:
51
  return []
52
  parts = re.split(r"([.?!])\s+(?=(?:[\"β€œβ€β€˜β€™']?\s*[A-Z(])|$)", t)
 
53
  sentences, buf = [], ""
54
  for i, chunk in enumerate(parts):
55
  if i % 2 == 0:
@@ -58,8 +59,10 @@ def sentence_split(text: str):
58
  buf += chunk
59
  sentences.append(buf.strip())
60
  buf = ""
 
61
  if buf.strip():
62
  sentences.append(buf.strip())
 
63
  return [_restore(s).strip() for s in sentences if s.strip()]
64
 
65
  # -----------------------------
@@ -68,8 +71,7 @@ def sentence_split(text: str):
68
  def group_sentences(sents, size=3):
69
  grouped = []
70
  for i in range(0, len(sents), size):
71
- chunk = " ".join(sents[i:i+size])
72
- grouped.append(chunk)
73
  return grouped
74
 
75
  # -----------------------------
@@ -80,14 +82,14 @@ def analyze(text, max_len=512):
80
  if not sents:
81
  return "β€”", "β€”", "<em>Paste some text to analyze.</em>", None
82
 
83
- # GROUP sentences into 3-sentence chunks
84
  grouped = group_sentences(sents, size=3)
85
  clean_grouped = [re.sub(r"\s+", " ", g).strip() for g in grouped]
86
 
87
  # tokenize grouped
88
  inputs = tokenizer(
89
- clean_grouped, return_tensors="pt", padding=True,
90
- truncation=True, max_length=max_len
91
  ).to(device)
92
 
93
  # model inference
@@ -95,7 +97,7 @@ def analyze(text, max_len=512):
95
  logits = model(**inputs).logits
96
  chunk_probs = F.softmax(logits, dim=-1)[:, 1].detach().cpu().tolist()
97
 
98
- # EXPAND chunk-level probabilities back to per-sentence
99
  ai_probs = []
100
  for idx, prob in enumerate(chunk_probs):
101
  start = idx * 3
@@ -106,28 +108,34 @@ def analyze(text, max_len=512):
106
  # overall AI score
107
  overall_ai = sum(ai_probs) / len(ai_probs)
108
  overall_pct = f"{overall_ai * 100:.1f}%"
 
 
109
  overall_label = (
110
  "πŸ€– Likely AI Written" if overall_ai >= THRESHOLD else "πŸ§’ Likely Human Written"
111
  )
112
 
113
  # HIGHLIGHTS + TABLE
114
  rows, highlights = [], []
 
115
  for i, orig in enumerate(sents, start=1):
116
  ai_p = float(ai_probs[i-1])
 
 
 
117
  label = "AI" if ai_p >= THRESHOLD else "Human"
118
- pct = f"{ai_p*100:.1f}%"
119
 
120
- # color logic
121
  if ai_p < 0.30:
122
- color = "#11823b"
123
  elif ai_p < 0.70:
124
- color = "#b8860b"
125
  else:
126
- color = "#b80d0d"
127
 
128
  normalized = re.sub(r"\s+", " ", orig)
 
129
  highlights.append(
130
- "<div style='margin:6px 0; padding:6px 8px; border-radius:6px; "
131
  "background:rgba(0,0,0,0.03)'>"
132
  f"<strong style='color:{color}'>[{pct} {label}]</strong> "
133
  f"{normalized}</div>"
@@ -144,7 +152,7 @@ def analyze(text, max_len=512):
144
  # GRADIO UI
145
  # -----------------------------
146
  with gr.Blocks() as demo:
147
- gr.Markdown("### πŸ•΅οΈ AI Written Text Detector β€” Fakespot Model (Turnitin-Style)")
148
 
149
  text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
150
  btn = gr.Button("Analyze")
 
15
  model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, dtype=dtype).to(device).eval()
16
 
17
  # -----------------------------
18
+ # AI DECISION THRESHOLD (80%)
19
  # -----------------------------
20
+ THRESHOLD = 0.80 # AI from 80% and above
21
 
22
  # -----------------------------
23
  # SENTENCE SPLITTING UTILITIES
24
  # -----------------------------
25
  ABBR = [
26
  "e.g", "i.e", "mr", "mrs", "ms", "dr", "prof", "vs", "etc", "fig", "al",
27
+ "jr", "sr", "st", "no", "vol", "pp", "mt", "inc", "ltd", "co",
28
+ "u.s", "u.k", "a.m", "p.m"
29
  ]
30
  ABBR_REGEX = re.compile(r"\b(" + "|".join(map(re.escape, ABBR)) + r")\.", flags=re.IGNORECASE)
31
 
 
50
  if not t:
51
  return []
52
  parts = re.split(r"([.?!])\s+(?=(?:[\"β€œβ€β€˜β€™']?\s*[A-Z(])|$)", t)
53
+
54
  sentences, buf = [], ""
55
  for i, chunk in enumerate(parts):
56
  if i % 2 == 0:
 
59
  buf += chunk
60
  sentences.append(buf.strip())
61
  buf = ""
62
+
63
  if buf.strip():
64
  sentences.append(buf.strip())
65
+
66
  return [_restore(s).strip() for s in sentences if s.strip()]
67
 
68
  # -----------------------------
 
71
  def group_sentences(sents, size=3):
72
  grouped = []
73
  for i in range(0, len(sents), size):
74
+ grouped.append(" ".join(sents[i:i+size]))
 
75
  return grouped
76
 
77
  # -----------------------------
 
82
  if not sents:
83
  return "β€”", "β€”", "<em>Paste some text to analyze.</em>", None
84
 
85
+ # GROUP sentences (3 at a time)
86
  grouped = group_sentences(sents, size=3)
87
  clean_grouped = [re.sub(r"\s+", " ", g).strip() for g in grouped]
88
 
89
  # tokenize grouped
90
  inputs = tokenizer(
91
+ clean_grouped, return_tensors="pt",
92
+ padding=True, truncation=True, max_length=max_len
93
  ).to(device)
94
 
95
  # model inference
 
97
  logits = model(**inputs).logits
98
  chunk_probs = F.softmax(logits, dim=-1)[:, 1].detach().cpu().tolist()
99
 
100
+ # EXPAND chunk-level probabilities to per-sentence
101
  ai_probs = []
102
  for idx, prob in enumerate(chunk_probs):
103
  start = idx * 3
 
108
  # overall AI score
109
  overall_ai = sum(ai_probs) / len(ai_probs)
110
  overall_pct = f"{overall_ai * 100:.1f}%"
111
+
112
+ # UPDATED THRESHOLD (80%)
113
  overall_label = (
114
  "πŸ€– Likely AI Written" if overall_ai >= THRESHOLD else "πŸ§’ Likely Human Written"
115
  )
116
 
117
  # HIGHLIGHTS + TABLE
118
  rows, highlights = [], []
119
+
120
  for i, orig in enumerate(sents, start=1):
121
  ai_p = float(ai_probs[i-1])
122
+ pct = f"{ai_p * 100:.1f}%"
123
+
124
+ # UPDATED β†’ label decided by 80%
125
  label = "AI" if ai_p >= THRESHOLD else "Human"
 
126
 
127
+ # color logic (unchanged)
128
  if ai_p < 0.30:
129
+ color = "#11823b" # green
130
  elif ai_p < 0.70:
131
+ color = "#b8860b" # amber
132
  else:
133
+ color = "#b80d0d" # red
134
 
135
  normalized = re.sub(r"\s+", " ", orig)
136
+
137
  highlights.append(
138
+ "<div style='margin:6px 0; padding:6px 8px; border-radius:6px;"
139
  "background:rgba(0,0,0,0.03)'>"
140
  f"<strong style='color:{color}'>[{pct} {label}]</strong> "
141
  f"{normalized}</div>"
 
152
  # GRADIO UI
153
  # -----------------------------
154
  with gr.Blocks() as demo:
155
+ gr.Markdown("### πŸ•΅οΈ AI Written Text Detector β€” Fakespot Model (80% Threshold)")
156
 
157
  text_input = gr.Textbox(label="Paste text", lines=14, placeholder="Your content…")
158
  btn = gr.Button("Analyze")