dusan-presswhizz commited on
Commit
3d8425d
·
verified ·
1 Parent(s): 3840a00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -213
app.py CHANGED
@@ -13,8 +13,9 @@ UA = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML,
13
 
14
  # --- OpenAI settings ---
15
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # add in HF Spaces: Settings → Variables & secrets
16
- PREFERRED_OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-5o") # preferred model
17
- FALLBACK_OPENAI_MODEL = "gpt-4o-mini" # automatic fallback
 
18
  OPENAI_CHAT_URL = "https://api.openai.com/v1/chat/completions"
19
 
20
  # =========================
@@ -66,53 +67,28 @@ def embed(texts):
66
  out = enc(**batch)
67
  return mean_pool(out.last_hidden_state, batch["attention_mask"])
68
 
69
- # ---------- Fallback: integrate anchor mid-sentence (no em-dash, no clichés, neutral nouns)
70
  def inject_anchor_into_sentence(sentence, anchor_text, target_url):
71
- """
72
- Wrap anchor if present; otherwise integrate mid-sentence with a neutral preposition.
73
- No em-dash. Avoid CTA clichés. Do not assert target content type.
74
- Prefer 'Related resource' add-after if sentence begins with 'This guide' etc.
75
- """
76
  def norm(x): return re.sub(r'[^a-z0-9 ]','',x.lower())
77
  n_sent, n_anchor = norm(sentence), norm(anchor_text)
78
 
79
- # If sentence clearly has its own subject ("This guide", "Our platform", "Base Casino"), prefer add-after
80
- if n_sent.startswith("this guide") or n_sent.startswith("our platform") or n_sent.startswith("base casino"):
81
- html = sentence
82
- add_after = f' Related resource: <a href="{target_url}">{anchor_text}</a>.'
83
- return html + add_after, False
84
-
85
- # 1) If anchor words already present, wrap them
86
  if n_anchor and n_anchor in n_sent:
87
  pattern = re.compile(re.escape(anchor_text), re.IGNORECASE)
88
  return pattern.sub(f'<a href="{target_url}">{anchor_text}</a>', sentence), True
89
 
90
- # 2) Otherwise, insert "at/on/from <a>anchor</a>" near a suitable noun
91
- insert_html = f'<a href="{target_url}">{anchor_text}</a>'
92
-
93
- m = re.search(r'\b(games?|content|options?|features?|benefits?)\b', sentence, flags=re.I)
94
- if m:
95
- idx = m.start()
96
- return (sentence[:idx] + f' at {insert_html} ' + sentence[idx:]).strip(), False
97
-
98
- # after first comma
99
- m2 = re.search(r',\s*', sentence)
100
- if m2:
101
- idx = m2.end()
102
- return (sentence[:idx] + f' at {insert_html} ' + sentence[idx:]).strip(), False
103
-
104
- # around "to"
105
- m3 = re.search(r'\bto\b', sentence, flags=re.I)
106
- if m3:
107
- idx = m3.start()
108
- return (sentence[:idx] + f' at {insert_html} ' + sentence[idx:]).strip(), False
109
-
110
- # last resort: short neutral phrase
111
  if sentence.endswith(('.', '!', '?')):
112
  base, punct = sentence[:-1], sentence[-1]
113
  else:
114
  base, punct = sentence, '.'
115
- rewritten = f'{base} with additional context available at {insert_html}{punct}'
 
 
 
 
 
 
 
116
  return rewritten, False
117
 
118
  def suggest_insertions(source_url, target_url, anchor_text, top_k=1):
@@ -120,21 +96,17 @@ def suggest_insertions(source_url, target_url, anchor_text, top_k=1):
120
  if not blocks:
121
  return [{"error":"No text blocks found on the page."}]
122
 
123
- # -------- target context (title + meta desc)
124
  try:
125
  tgt_html = requests.get(target_url, timeout=20, headers=UA).text
126
- soup_tgt = BeautifulSoup(tgt_html, "html.parser")
127
- tt = soup_tgt.title.get_text().strip() if soup_tgt.title else ""
128
- md = soup_tgt.find("meta", attrs={"name": "description"})
129
- tgt_desc = (md.get("content") or "").strip() if md else ""
130
- tgt_title = tt
131
  except Exception:
132
- tgt_title, tgt_desc = "", ""
133
 
134
  ext = tldextract.extract(target_url)
135
  tgt_domain = ".".join([p for p in [ext.domain, ext.suffix] if p])
136
 
137
- # NOTE: internal query string only (not shown to users)
138
  query = f"{anchor_text} — relevant to: {tgt_title} ({tgt_domain})"
139
  q_emb = embed([query])[0]
140
 
@@ -144,7 +116,7 @@ def suggest_insertions(source_url, target_url, anchor_text, top_k=1):
144
 
145
  results = []
146
  for idx in top_idx:
147
- blk = blocks[idx] # full paragraph
148
  sents = re.split(r'(?<=[.!?])\s+', blk)
149
  s_embs = embed(sents)
150
  s_sims = F.cosine_similarity(s_embs, q_emb.repeat(len(sents),1))
@@ -154,87 +126,12 @@ def suggest_insertions(source_url, target_url, anchor_text, top_k=1):
154
  results.append({
155
  "anchor_was_present": exact_found,
156
  "best_sentence_original": best_sent,
157
- "best_sentence_with_anchor": rewritten_sent,
158
- "best_paragraph": blk,
159
- "tgt_title": tgt_title,
160
- "tgt_desc": tgt_desc
161
  })
162
  return results
163
 
164
- # ---------- Plain-text helper (preserve spacing between tags)
165
- def to_plain_text(html_or_text):
166
- return BeautifulSoup(html_or_text, "html.parser").get_text(separator=" ", strip=True)
167
-
168
- # ---------- Distortion / safety helpers
169
- def detect_primary_brand(paragraph: str) -> str:
170
- """
171
- Heuristic: catch brand phrases like 'Base Casino', 'Acme Platform', 'Something App'.
172
- Returns lowercased brand phrase or ''.
173
- """
174
- p = paragraph.strip()
175
- m = re.search(r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,2})\s+(Casino|Platform|Site|Service|App)\b', p)
176
- if m:
177
- return (m.group(0)).lower()
178
- m2 = re.search(r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2})\b', p)
179
- return m2.group(0).lower() if m2 else ""
180
-
181
- def rewrite_would_distort_meaning(original_text: str, rewritten_html: str, anchor_text: str, paragraph_text: str = "") -> bool:
182
- """
183
- True if the rewrite likely misattributes the subject or positions the anchor as the mechanism.
184
- Also flags if the anchor appears before the paragraph's main brand or too early overall,
185
- or if it introduces content-type nouns that weren't present in the original.
186
- """
187
- plain_rewrite = BeautifulSoup(rewritten_html, "html.parser").get_text(" ").strip().lower()
188
- plain_orig = original_text.strip().lower()
189
- a = anchor_text.strip().lower()
190
-
191
- brand = detect_primary_brand(paragraph_text)
192
- if brand and a in plain_rewrite:
193
- pos_a = plain_rewrite.find(a)
194
- pos_b = plain_rewrite.find(brand)
195
- if pos_b != -1 and pos_a != -1 and pos_a < pos_b:
196
- return True # anchor introduced before the paragraph’s brand
197
-
198
- # Anchor appears very early -> often implies subject shift
199
- if a in plain_rewrite:
200
- pos = plain_rewrite.find(a)
201
- if pos != -1 and pos <= max(4, int(0.20 * len(plain_rewrite))):
202
- return True
203
-
204
- # Anchor as the mechanism or double "at"
205
- mechanism_patterns = [
206
- rf'\bthrough\s+{re.escape(a)}\b',
207
- rf'\bvia\s+{re.escape(a)}\b',
208
- rf'\bat\s+{re.escape(a)}\s+at\b',
209
- rf'\bon\s+{re.escape(a)}\s+at\b',
210
- ]
211
- for pat in mechanism_patterns:
212
- if re.search(pat, plain_rewrite):
213
- return True
214
-
215
- # Re-attribute authorship/hosting to anchor
216
- bad_hosting = [
217
- rf'(this|the)\s+guide\s+(at|on|from)\s+{re.escape(a)}\b',
218
- rf'\b{re.escape(a)}\b\s+(explains|shows|details|covers)\b',
219
- r'\b(guide|article|post|review)\s+(at|on|from)\s+',
220
- ]
221
- for pat in bad_hosting:
222
- if re.search(pat, plain_rewrite):
223
- return True
224
-
225
- # Introducing content-type nouns when not present in original
226
- content_nouns = ["guide", "article", "post", "review", "platform", "site", "resource"]
227
- if any(n in plain_rewrite for n in content_nouns) and not any(n in plain_orig for n in content_nouns):
228
- return True
229
-
230
- return False
231
-
232
- def build_related_resource_line(target_url: str, anchor_text: str, plain_text=False) -> str:
233
- html = f'Related resource: <a href="{target_url}">{anchor_text}</a>.'
234
- return to_plain_text(html) if plain_text else html
235
-
236
  # =========================
237
- # GPT rewrite (editorial with paragraph context; can choose inline vs add-after)
238
  # =========================
239
  def _openai_chat(model_name: str, system: str, user_json: dict):
240
  headers = {"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"}
@@ -253,78 +150,61 @@ def _openai_chat(model_name: str, system: str, user_json: dict):
253
  txt = r.json()["choices"][0]["message"]["content"]
254
  return json.loads(txt)
255
 
256
- def gpt_decide_and_rewrite(paragraph_text, chosen_sentence, anchor_text, target_url, tgt_title, tgt_desc):
257
  """
258
- Sends FULL PARAGRAPH + CHOSEN SENTENCE + TARGET METADATA to GPT.
259
- GPT must return:
260
- - mode: "inline" or "add_after"
261
- - sentence_html (required if mode=inline)
262
- - add_after_html (required if mode=add_after)
263
- Enforces: no em-dash, no CTA clichés, neutral attribution unless metadata allows.
264
  """
265
  if not OPENAI_API_KEY:
266
- print("[GPT] No OPENAI_API_KEY found → using fallback inline.")
267
- return {"mode": "inline", "sentence_html": chosen_sentence}
268
-
269
- # Determine which content-type nouns are allowed based on metadata
270
- meta = f"{tgt_title} {tgt_desc}".lower()
271
- allowed_nouns = [w for w in ["guide","article","blog","review","platform","site","resource"] if w in meta]
272
 
273
  system = (
274
- "You are a professional content editor.\n"
275
- "You receive the full paragraph, the chosen sentence, the anchor text, the target URL, and target metadata.\n"
276
- "Decide the safest strategy:\n"
277
- "A) inline produce ONE rewritten version of the chosen sentence with the anchor integrated mid-sentence, "
278
- "not at the end, preserving the paragraph’s subject/scope. Use clear, publication-quality English.\n"
279
- "B) add_after if an inline rewrite would distort the meaning or re-attribute authorship/hosting to the anchor site, "
280
- "leave the sentence unchanged and instead output a short neutral line to add after the paragraph.\n\n"
281
- "HARD RULES:\n"
282
- "1) If inline: include an <a href> with the EXACT anchor text; keep length close; no em-dash; avoid 'for details', "
283
- "'click here', 'learn more', 'visit', 'read more', 'via', 'through'. Do NOT present the anchor as the mechanism "
284
- "for the action (never 'through ANCHOR', 'via ANCHOR'). Prefer neutral adjuncts like 'also at', 'with context at', "
285
- "'additional information at', or 'resources at' before the anchor. Place the anchor within the first 70% of the sentence "
286
- "but after the paragraph’s brand/subject.\n"
287
- "2) If add_after: return a single short line like 'Related resource: <a href=\"URL\">ANCHOR</a>.' "
288
- "(12–14 words max, neutral tone).\n\n"
289
- "OUTPUT JSON ONLY with keys: mode ('inline'|'add_after'), sentence_html (if inline), add_after_html (if add_after)."
290
  )
291
-
292
  user = {
293
- "paragraph_text": paragraph_text,
294
- "chosen_sentence": chosen_sentence,
295
  "anchor_text": anchor_text,
296
  "target_url": target_url,
297
- "target_metadata": {"title": tgt_title, "description": tgt_desc},
298
- "allowed_nouns_from_metadata": allowed_nouns,
299
  "constraints": {
300
- "avoid": [
301
- "for details", "click here", "learn more", "visit", "read more",
302
- "via", "through", "—", "--", " - "
303
- ],
304
- "preferred_connectors": ["at", "on", "from", "in"],
305
- "place_anchor": "inside_first_70_percent"
306
  }
307
  }
308
 
 
309
  try:
 
310
  obj = _openai_chat(PREFERRED_OPENAI_MODEL, system, user)
311
  except Exception as e:
312
  print(f"[GPT] Preferred model failed: {e}. Falling back to {FALLBACK_OPENAI_MODEL}.")
313
  try:
314
  obj = _openai_chat(FALLBACK_OPENAI_MODEL, system, user)
315
  except Exception as e2:
316
- print(f"[GPT] Fallback failed: {e2}. Using inline fallback.")
317
- return {"mode": "inline", "sentence_html": chosen_sentence}
318
-
319
- # Normalize output
320
- mode = obj.get("mode", "inline")
321
- if mode not in ("inline", "add_after"):
322
- mode = "inline"
323
- return {
324
- "mode": mode,
325
- "sentence_html": obj.get("sentence_html", ""),
326
- "add_after_html": obj.get("add_after_html", "")
327
- }
 
328
 
329
  # =========================
330
  # Gradio UI
@@ -345,47 +225,25 @@ def run_tool(source_url, target_url, anchor_text, smart_rewrite, plain_text):
345
  if "error" in res:
346
  return f"❌ {res['error']}"
347
 
348
- draft_html = res["best_sentence_with_anchor"]
349
- orig_sentence = res["best_sentence_original"]
350
- paragraph = res["best_paragraph"]
351
- tgt_title = res.get("tgt_title", "")
352
- tgt_desc = res.get("tgt_desc", "")
353
-
354
- # Optional conservative rule: force add-after for "This guide ..."
355
- # if orig_sentence.strip().lower().startswith("this guide"):
356
- # add_after = build_related_resource_line(target_url, anchor_text, plain_text)
357
- # return warn + "Add this mini-line after the paragraph:\n\n" + add_after
358
 
 
359
  if smart_rewrite:
360
- # Ask GPT to decide: inline vs add-after (with full paragraph context)
361
- decision = gpt_decide_and_rewrite(paragraph, orig_sentence, anchor_text, target_url, tgt_title, tgt_desc)
362
- mode = decision.get("mode", "inline")
363
-
364
- if mode == "inline":
365
- final_html = decision.get("sentence_html", "") or draft_html
366
- # Safety gate: reject if it would distort meaning
367
- if rewrite_would_distort_meaning(orig_sentence, final_html, anchor_text, paragraph):
368
- add_after = build_related_resource_line(target_url, anchor_text, plain_text)
369
- return warn + "Add this mini-line after the paragraph (to avoid changing its meaning):\n\n" + add_after
370
-
371
- final_output = to_plain_text(final_html) if plain_text else final_html
372
- # We propose a replacement to ensure the exact integrated version is used
373
- return warn + f"Change this sentence:\n\n{orig_sentence}\n\nWith this one:\n\n{final_output}"
374
-
375
- else: # add_after
376
- add_line = decision.get("add_after_html") or build_related_resource_line(target_url, anchor_text, False)
377
- add_line_out = to_plain_text(add_line) if plain_text else add_line
378
- return warn + "Add this mini-line after the paragraph:\n\n" + add_line_out
379
 
 
 
 
 
 
380
  else:
381
- # No GPT: use heuristic inline fallback already injected in draft_html
382
- final_output = to_plain_text(draft_html) if plain_text else draft_html
383
- if res.get("anchor_was_present", False):
384
- return warn + f"✅ Add link here:\n\n{final_output}"
385
- else:
386
- return warn + f"Change this sentence:\n\n{orig_sentence}\n\nWith this one:\n\n{final_output}"
387
-
388
- # Show GPT status / model in the header
389
  gpt_status = "ON" if OPENAI_API_KEY else "OFF"
390
  title_model = PREFERRED_OPENAI_MODEL if OPENAI_API_KEY else "OFF"
391
 
@@ -400,7 +258,7 @@ demo = gr.Interface(
400
  ],
401
  outputs=gr.Textbox(label="Result", lines=12),
402
  title=f"Link Insertion Helper • GPT: {gpt_status} • Model: {title_model}",
403
- description="Chooses safe inline rewrite vs neutral add-after using full paragraph context. Toggle GPT and Plain text (no URL) as needed."
404
  )
405
 
406
  if __name__ == "__main__":
 
13
 
14
  # --- OpenAI settings ---
15
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # add in HF Spaces: Settings → Variables & secrets
16
+ # Preferred model (you asked for “the new 5”): try it first, fallback to a widely-available fast model
17
+ PREFERRED_OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-5o") # change here if you like
18
+ FALLBACK_OPENAI_MODEL = "gpt-4o-mini" # automatic fallback
19
  OPENAI_CHAT_URL = "https://api.openai.com/v1/chat/completions"
20
 
21
  # =========================
 
67
  out = enc(**batch)
68
  return mean_pool(out.last_hidden_state, batch["attention_mask"])
69
 
 
70
  def inject_anchor_into_sentence(sentence, anchor_text, target_url):
71
+ """Wrap anchor if present; otherwise integrate link smoothly (no em-dash, no clichés)."""
 
 
 
 
72
  def norm(x): return re.sub(r'[^a-z0-9 ]','',x.lower())
73
  n_sent, n_anchor = norm(sentence), norm(anchor_text)
74
 
 
 
 
 
 
 
 
75
  if n_anchor and n_anchor in n_sent:
76
  pattern = re.compile(re.escape(anchor_text), re.IGNORECASE)
77
  return pattern.sub(f'<a href="{target_url}">{anchor_text}</a>', sentence), True
78
 
79
+ # Build a natural, short integration clause (no em-dash)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  if sentence.endswith(('.', '!', '?')):
81
  base, punct = sentence[:-1], sentence[-1]
82
  else:
83
  base, punct = sentence, '.'
84
+
85
+ clause_options = [
86
+ f' with insights from <a href="{target_url}">{anchor_text}</a>',
87
+ f' through <a href="{target_url}">{anchor_text}</a>',
88
+ f' via <a href="{target_url}">{anchor_text}</a>',
89
+ ]
90
+ clause = clause_options[0]
91
+ rewritten = f'{base}{clause}{punct}'
92
  return rewritten, False
93
 
94
  def suggest_insertions(source_url, target_url, anchor_text, top_k=1):
 
96
  if not blocks:
97
  return [{"error":"No text blocks found on the page."}]
98
 
99
+ # target context
100
  try:
101
  tgt_html = requests.get(target_url, timeout=20, headers=UA).text
102
+ tt = BeautifulSoup(tgt_html, "html.parser").title
103
+ tgt_title = tt.get_text().strip() if tt else ""
 
 
 
104
  except Exception:
105
+ tgt_title = ""
106
 
107
  ext = tldextract.extract(target_url)
108
  tgt_domain = ".".join([p for p in [ext.domain, ext.suffix] if p])
109
 
 
110
  query = f"{anchor_text} — relevant to: {tgt_title} ({tgt_domain})"
111
  q_emb = embed([query])[0]
112
 
 
116
 
117
  results = []
118
  for idx in top_idx:
119
+ blk = blocks[idx]
120
  sents = re.split(r'(?<=[.!?])\s+', blk)
121
  s_embs = embed(sents)
122
  s_sims = F.cosine_similarity(s_embs, q_emb.repeat(len(sents),1))
 
126
  results.append({
127
  "anchor_was_present": exact_found,
128
  "best_sentence_original": best_sent,
129
+ "best_sentence_with_anchor": rewritten_sent
 
 
 
130
  })
131
  return results
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  # =========================
134
+ # GPT rewrite (editorial, no em-dash, no clichés)
135
  # =========================
136
  def _openai_chat(model_name: str, system: str, user_json: dict):
137
  headers = {"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"}
 
150
  txt = r.json()["choices"][0]["message"]["content"]
151
  return json.loads(txt)
152
 
153
+ def gpt_rewrite(sentence_html, anchor_text, target_url, style="neutral"):
154
  """
155
+ Stronger editorial rewrite:
156
+ - Integrates the anchor naturally (subject/object/prepositional phrase)
157
+ - No em-dash; avoid “for details / click here / learn more / visit / read more”
158
+ - Returns: {"sentence_html": "<final html>"}
 
 
159
  """
160
  if not OPENAI_API_KEY:
161
+ print("[GPT] No OPENAI_API_KEY found → using fallback.")
162
+ return {"sentence_html": sentence_html}
 
 
 
 
163
 
164
  system = (
165
+ "You are a skilled content editor. Improve fluency and integrate the given anchor naturally "
166
+ "into ONE sentence of similar length. Use clear, publication-quality English. "
167
+ "STRICT RULES: (1) Include an <a href> tag that uses the EXACT anchor text. "
168
+ "(2) Do NOT use an em dash or any dash. "
169
+ '(3) Avoid phrases like "for details", "click here", "learn more", "visit", "read more". '
170
+ "Prefer integrating the anchor as part of the sentence (subject, object, or prepositional phrase), "
171
+ "e.g., “with insights from <a ...>ANCHOR</a>”, “through <a ...>ANCHOR</a>”, or “via <a ...>ANCHOR</a>”. "
172
+ "Return a compact JSON object with key sentence_html only. No extra keys, no markdown."
 
 
 
 
 
 
 
 
173
  )
 
174
  user = {
175
+ "task": "rewrite_for_link_insertion",
176
+ "sentence_html": sentence_html,
177
  "anchor_text": anchor_text,
178
  "target_url": target_url,
179
+ "style": style,
 
180
  "constraints": {
181
+ "max_extra_words": 20,
182
+ "avoid": ["for details", "click here", "learn more", "visit", "read more", "—", "--", " - "]
 
 
 
 
183
  }
184
  }
185
 
186
+ # Try preferred model first, then fallback if needed
187
  try:
188
+ print("[GPT] Calling OpenAI Chat Completions with preferred model…")
189
  obj = _openai_chat(PREFERRED_OPENAI_MODEL, system, user)
190
  except Exception as e:
191
  print(f"[GPT] Preferred model failed: {e}. Falling back to {FALLBACK_OPENAI_MODEL}.")
192
  try:
193
  obj = _openai_chat(FALLBACK_OPENAI_MODEL, system, user)
194
  except Exception as e2:
195
+ print(f"[GPT] Fallback failed: {e2}. Using fallback sentence.")
196
+ return {"sentence_html": sentence_html}
197
+
198
+ out = obj.get("sentence_html", sentence_html)
199
+
200
+ # Safety: ensure the anchor words are present (model must not drop the anchor)
201
+ if anchor_text.lower() not in BeautifulSoup(out, "html.parser").get_text().lower():
202
+ return {"sentence_html": sentence_html}
203
+ return {"sentence_html": out}
204
+
205
+ # ---------- Plain-text helper (preserve spacing between tags)
206
+ def to_plain_text(html_or_text):
207
+ return BeautifulSoup(html_or_text, "html.parser").get_text(separator=" ", strip=True)
208
 
209
  # =========================
210
  # Gradio UI
 
225
  if "error" in res:
226
  return f"❌ {res['error']}"
227
 
228
+ draft_html = res["best_sentence_with_anchor"]
 
 
 
 
 
 
 
 
 
229
 
230
+ # Optionally pass through GPT for a cleaner sentence
231
  if smart_rewrite:
232
+ g = gpt_rewrite(draft_html, anchor_text, target_url, style="neutral")
233
+ final_html = g["sentence_html"]
234
+ else:
235
+ final_html = draft_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
+ # Optionally convert to plain text (no <a>, no tags)
238
+ final_output = to_plain_text(final_html) if plain_text else final_html
239
+
240
+ if res.get("anchor_was_present", False):
241
+ return warn + f"✅ Add link here:\n\n{final_output}"
242
  else:
243
+ original_sentence = res['best_sentence_original']
244
+ return warn + f"Change this sentence:\n\n{original_sentence}\n\nWith this one:\n\n{final_output}"
245
+
246
+ # Show GPT status in the header
 
 
 
 
247
  gpt_status = "ON" if OPENAI_API_KEY else "OFF"
248
  title_model = PREFERRED_OPENAI_MODEL if OPENAI_API_KEY else "OFF"
249
 
 
258
  ],
259
  outputs=gr.Textbox(label="Result", lines=12),
260
  title=f"Link Insertion Helper • GPT: {gpt_status} • Model: {title_model}",
261
+ description="Suggests the best place to add your link and returns one clean instruction. Toggle GPT and Plain text (no URL) as needed."
262
  )
263
 
264
  if __name__ == "__main__":