dusan-presswhizz commited on
Commit
1cee888
Β·
verified Β·
1 Parent(s): 890b1ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -42
app.py CHANGED
@@ -11,9 +11,10 @@ import gradio as gr
11
  MODEL = "michiyasunaga/LinkBERT-base"
12
  UA = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36"}
13
 
14
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # set in HF Spaces: Settings β†’ Variables & secrets
15
- OPENAI_URL = "https://api.openai.com/v1/responses" # OpenAI Responses API
16
- OPENAI_MODEL = "gpt-4o"
 
17
 
18
  # =========================
19
  # Load LinkBERT
@@ -57,8 +58,9 @@ def inject_anchor_into_sentence(sentence, anchor_text, target_url):
57
  pattern = re.compile(re.escape(anchor_text), re.IGNORECASE)
58
  return pattern.sub(f'<a href="{target_url}">{anchor_text}</a>', sentence), True
59
 
60
- # add short clause non-spammy
61
  base, punct = (sentence[:-1], sentence[-1]) if sentence[-1:] in ".!?" else (sentence, ".")
 
62
  rewritten = f'{base} β€” see <a href="{target_url}">{anchor_text}</a> for details{punct}'
63
  return rewritten, False
64
 
@@ -101,20 +103,19 @@ def suggest_insertions(source_url, target_url, anchor_text, top_k=1):
101
  })
102
  return results
103
 
104
- # ---------- GPT rewrite (optional)
105
  def gpt_rewrite(sentence_html, anchor_text, target_url, style="neutral"):
106
  """Improve HTML sentence via OpenAI; fail-soft to the provided sentence."""
107
  if not OPENAI_API_KEY:
108
- return {
109
- "sentence_html": sentence_html,
110
- "micro_paragraph_html": f'For more detail, see <a href="{target_url}">{anchor_text}</a>.'
111
- }
112
 
 
 
113
  system = (
114
  "You are a precise editor. Rewrite minimally for clarity and flow. "
115
- "Output valid HTML only (no markdown). "
116
- "Must include an <a href> with the exact anchor text provided. "
117
- "Return a compact JSON object with keys sentence_html and micro_paragraph_html."
118
  )
119
  user = {
120
  "task": "rewrite_for_link_insertion",
@@ -124,34 +125,30 @@ def gpt_rewrite(sentence_html, anchor_text, target_url, style="neutral"):
124
  "style": style,
125
  "constraints": {"max_extra_words": 20, "avoid": ["spammy phrasing", "overclaiming"]}
126
  }
127
- headers = {"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"}
128
  body = {
129
  "model": OPENAI_MODEL,
130
- "input": [
131
- {"role":"system", "content": system},
132
- {"role":"user", "content": json.dumps(user)}
 
133
  ],
134
- "response_format": {"type":"json_object"}
135
  }
136
  try:
137
- r = requests.post(OPENAI_URL, headers=headers, data=json.dumps(body), timeout=60)
 
138
  r.raise_for_status()
139
- text = r.json().get("output", "")
140
- obj = json.loads(text)
141
- return {
142
- "sentence_html": obj.get("sentence_html", sentence_html),
143
- "micro_paragraph_html": obj.get("micro_paragraph_html", f'For more detail, see <a href="{target_url}">{anchor_text}</a>.')
144
- }
145
- except Exception:
146
- return {
147
- "sentence_html": sentence_html,
148
- "micro_paragraph_html": f'For more detail, see <a href="{target_url}">{anchor_text}</a>.'
149
- }
150
-
151
- # ---------- Plain-text helper (strip HTML tags)
152
  def to_plain_text(html_or_text):
153
- # Converts HTML to plain text (keeps inner anchor text, removes <a ...>)
154
- return BeautifulSoup(html_or_text, "html.parser").get_text(separator="", strip=True)
155
 
156
  # =========================
157
  # Gradio UI
@@ -164,28 +161,27 @@ def run_tool(source_url, target_url, anchor_text, smart_rewrite, plain_text):
164
  if "error" in res:
165
  return f"❌ {res['error']}"
166
 
167
- # Choose sentence to polish (already has <a> if anchor existed)
168
  draft_html = res["best_sentence_with_anchor"]
169
 
 
170
  if smart_rewrite:
171
  g = gpt_rewrite(draft_html, anchor_text, target_url, style="neutral")
172
  final_html = g["sentence_html"]
173
  else:
174
  final_html = draft_html
175
 
176
- # Optionally return plain text (no <a>, no tags)
177
  final_output = to_plain_text(final_html) if plain_text else final_html
178
 
179
  if res.get("anchor_was_present", False):
180
  return f"βœ… Add link here:\n\n{final_output}"
181
  else:
182
- # show original (plain text, since it never had a link)
183
  original_sentence = res['best_sentence_original']
184
- if plain_text:
185
- # ensure the replacement is also plain
186
- return f"Change this sentence:\n\n{original_sentence}\n\nWith this one:\n\n{final_output}"
187
- else:
188
- return f"Change this sentence:\n\n{original_sentence}\n\nWith this one:\n\n{final_output}"
189
 
190
  demo = gr.Interface(
191
  fn=run_tool,
@@ -197,7 +193,7 @@ demo = gr.Interface(
197
  gr.Checkbox(label="Plain text (no URL)", value=False)
198
  ],
199
  outputs=gr.Textbox(label="Result", lines=12),
200
- title="Link Insertion Helper",
201
  description="Suggests the best place to add your link and returns one clean instruction. Toggle GPT and Plain text (no URL) as needed."
202
  )
203
 
 
11
  MODEL = "michiyasunaga/LinkBERT-base"
12
  UA = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36"}
13
 
14
+ # OpenAI (Chat Completions)
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # add in HF Spaces: Settings β†’ Variables & secrets
16
+ OPENAI_MODEL = "gpt-4o-mini" # fast & good; you can switch to "gpt-4o"
17
+ OPENAI_CHAT_URL = "https://api.openai.com/v1/chat/completions"
18
 
19
  # =========================
20
  # Load LinkBERT
 
58
  pattern = re.compile(re.escape(anchor_text), re.IGNORECASE)
59
  return pattern.sub(f'<a href="{target_url}">{anchor_text}</a>', sentence), True
60
 
61
+ # add short clause (non-spammy)
62
  base, punct = (sentence[:-1], sentence[-1]) if sentence[-1:] in ".!?" else (sentence, ".")
63
+ # NOTE: spaces around <a> so plain-text stripping preserves spacing
64
  rewritten = f'{base} β€” see <a href="{target_url}">{anchor_text}</a> for details{punct}'
65
  return rewritten, False
66
 
 
103
  })
104
  return results
105
 
106
+ # ---------- GPT rewrite (Chat Completions)
107
  def gpt_rewrite(sentence_html, anchor_text, target_url, style="neutral"):
108
  """Improve HTML sentence via OpenAI; fail-soft to the provided sentence."""
109
  if not OPENAI_API_KEY:
110
+ print("[GPT] No OPENAI_API_KEY found β†’ using fallback.")
111
+ return {"sentence_html": sentence_html}
 
 
112
 
113
+ print("[GPT] Calling OpenAI Chat Completions...")
114
+ headers = {"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"}
115
  system = (
116
  "You are a precise editor. Rewrite minimally for clarity and flow. "
117
+ "Output valid HTML only (no markdown). Must include an <a href> tag using the exact anchor text. "
118
+ "Return a compact JSON object with key sentence_html only."
 
119
  )
120
  user = {
121
  "task": "rewrite_for_link_insertion",
 
125
  "style": style,
126
  "constraints": {"max_extra_words": 20, "avoid": ["spammy phrasing", "overclaiming"]}
127
  }
 
128
  body = {
129
  "model": OPENAI_MODEL,
130
+ "response_format": {"type": "json_object"},
131
+ "messages": [
132
+ {"role": "system", "content": system},
133
+ {"role": "user", "content": json.dumps(user)}
134
  ],
135
+ "temperature": 0.2
136
  }
137
  try:
138
+ r = requests.post(OPENAI_CHAT_URL, headers=headers, json=body, timeout=60)
139
+ print(f"[GPT] HTTP {r.status_code}")
140
  r.raise_for_status()
141
+ txt = r.json()["choices"][0]["message"]["content"]
142
+ obj = json.loads(txt)
143
+ return {"sentence_html": obj.get("sentence_html", sentence_html)}
144
+ except Exception as e:
145
+ print(f"[GPT] Error: {e}")
146
+ return {"sentence_html": sentence_html}
147
+
148
+ # ---------- Plain-text helper (fixes spacing)
 
 
 
 
 
149
  def to_plain_text(html_or_text):
150
+ # IMPORTANT: separator=" " inserts spaces between tag boundaries -> avoids "seeUFABETfor"
151
+ return BeautifulSoup(html_or_text, "html.parser").get_text(separator=" ", strip=True)
152
 
153
  # =========================
154
  # Gradio UI
 
161
  if "error" in res:
162
  return f"❌ {res['error']}"
163
 
 
164
  draft_html = res["best_sentence_with_anchor"]
165
 
166
+ # Optionally pass through GPT for a cleaner sentence
167
  if smart_rewrite:
168
  g = gpt_rewrite(draft_html, anchor_text, target_url, style="neutral")
169
  final_html = g["sentence_html"]
170
  else:
171
  final_html = draft_html
172
 
173
+ # Optionally convert to plain text (no <a>, no tags), with proper spacing
174
  final_output = to_plain_text(final_html) if plain_text else final_html
175
 
176
  if res.get("anchor_was_present", False):
177
  return f"βœ… Add link here:\n\n{final_output}"
178
  else:
 
179
  original_sentence = res['best_sentence_original']
180
+ # show original (never had link) + final replacement (HTML or plain, per toggle)
181
+ return f"Change this sentence:\n\n{original_sentence}\n\nWith this one:\n\n{final_output}"
182
+
183
+ # Show GPT status in the header
184
+ gpt_status = "ON" if OPENAI_API_KEY else "OFF"
185
 
186
  demo = gr.Interface(
187
  fn=run_tool,
 
193
  gr.Checkbox(label="Plain text (no URL)", value=False)
194
  ],
195
  outputs=gr.Textbox(label="Result", lines=12),
196
+ title=f"Link Insertion Helper β€’ GPT: {gpt_status}",
197
  description="Suggests the best place to add your link and returns one clean instruction. Toggle GPT and Plain text (no URL) as needed."
198
  )
199