Update app.py
Browse files
app.py
CHANGED
|
@@ -261,163 +261,136 @@ def inject_anchor_into_sentence(sentence, anchor_text, target_url):
|
|
| 261 |
rewritten = f'{base}{clause}{punct}'
|
| 262 |
return rewritten, False
|
| 263 |
|
| 264 |
-
def find_alternative_anchor(blocks, target_url, original_anchor
|
| 265 |
"""Find a better anchor text from the article that relates to the target URL."""
|
| 266 |
try:
|
| 267 |
-
#
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
full_text = " ".join(blocks)
|
| 296 |
-
sentences = re.split(r'[.!?]', full_text)
|
| 297 |
|
| 298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
|
|
|
|
|
|
|
| 300 |
for sentence in sentences:
|
| 301 |
-
if not sentence
|
| 302 |
continue
|
| 303 |
-
|
| 304 |
-
sentence_lower = sentence.lower()
|
| 305 |
words = sentence.split()
|
| 306 |
|
| 307 |
-
#
|
| 308 |
-
# Single important words
|
| 309 |
-
for word in words:
|
| 310 |
-
clean_word = word.strip('.,!?;:"\'()[]{}')
|
| 311 |
-
if (len(clean_word) > 4 and clean_word.isalpha()):
|
| 312 |
-
# Calculate relevance even for loose matches
|
| 313 |
-
try:
|
| 314 |
-
word_emb = embed([clean_word])[0]
|
| 315 |
-
target_emb = embed([target_context.get("summary", "")[:500]])[0]
|
| 316 |
-
semantic_score = F.cosine_similarity(
|
| 317 |
-
word_emb.unsqueeze(0),
|
| 318 |
-
target_emb.unsqueeze(0)
|
| 319 |
-
).item()
|
| 320 |
-
|
| 321 |
-
# Lower threshold for considering candidates
|
| 322 |
-
if semantic_score > 0.15: # Much lower threshold
|
| 323 |
-
needs_bridge = semantic_score < 0.3 # Mark if needs bridge content
|
| 324 |
-
if clean_word not in candidate_anchors or candidate_anchors[clean_word][1] < semantic_score:
|
| 325 |
-
candidate_anchors[clean_word] = (sentence.strip(), semantic_score, needs_bridge)
|
| 326 |
-
except:
|
| 327 |
-
continue
|
| 328 |
-
|
| 329 |
-
# Look for 2-4 word phrases
|
| 330 |
for length in range(2, min(5, len(words) + 1)):
|
| 331 |
for i in range(len(words) - length + 1):
|
| 332 |
-
if i < 0 or i
|
| 333 |
-
continue
|
| 334 |
-
|
| 335 |
-
phrase_words = words[i:i+length]
|
| 336 |
-
phrase = ' '.join(phrase_words)
|
| 337 |
-
phrase_clean = phrase.strip('.,!?;:"\'()')
|
| 338 |
-
|
| 339 |
-
# More lenient filtering
|
| 340 |
-
skip_words = {'the', 'a', 'an', 'and', 'or', 'but', 'if', 'then', 'than'}
|
| 341 |
-
|
| 342 |
-
first_word = phrase_words[0].lower().strip('.,!?;:"\'')
|
| 343 |
-
last_word = phrase_words[-1].lower().strip('.,!?;:"\'')
|
| 344 |
-
|
| 345 |
-
# Allow more phrases through
|
| 346 |
-
if (len(phrase_clean) < 5 or
|
| 347 |
-
len(phrase_clean) > 50 or
|
| 348 |
-
not phrase_clean[0].isalpha()):
|
| 349 |
continue
|
|
|
|
|
|
|
| 350 |
|
| 351 |
-
#
|
| 352 |
-
if
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
# Calculate relevance score
|
| 356 |
-
try:
|
| 357 |
-
phrase_emb = embed([phrase_clean])[0]
|
| 358 |
-
target_emb = embed([target_context.get("summary", "")[:500]])[0]
|
| 359 |
-
semantic_score = F.cosine_similarity(
|
| 360 |
-
phrase_emb.unsqueeze(0),
|
| 361 |
-
target_emb.unsqueeze(0)
|
| 362 |
-
).item()
|
| 363 |
|
| 364 |
-
#
|
| 365 |
-
if
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
total_score = semantic_score + bonus
|
| 379 |
-
|
| 380 |
-
if phrase_clean not in candidate_anchors or candidate_anchors[phrase_clean][1] < total_score:
|
| 381 |
-
candidate_anchors[phrase_clean] = (sentence.strip(), total_score, needs_bridge)
|
| 382 |
-
if total_score > 0.2: # Only print decent candidates
|
| 383 |
-
print(f" Candidate: '{phrase_clean}' (score: {total_score:.3f}, needs_bridge: {needs_bridge})")
|
| 384 |
-
except:
|
| 385 |
-
continue
|
| 386 |
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
print("\nNo semantic matches found, looking for any noun phrases...")
|
| 390 |
-
for sentence in sentences[:10]: # Check first 10 sentences
|
| 391 |
-
words = sentence.split()
|
| 392 |
-
for word in words:
|
| 393 |
-
clean_word = word.strip('.,!?;:"\'()[]{}')
|
| 394 |
-
# Any proper noun or long word
|
| 395 |
-
if clean_word and len(clean_word) > 5 and clean_word[0].isupper():
|
| 396 |
-
candidate_anchors[clean_word] = (sentence.strip(), 0.1, True) # Low score, needs bridge
|
| 397 |
-
break
|
| 398 |
-
if candidate_anchors:
|
| 399 |
-
break
|
| 400 |
|
| 401 |
-
#
|
| 402 |
-
|
| 403 |
-
print("\n✗ No alternative anchor found at all")
|
| 404 |
-
return None, None, False
|
| 405 |
|
| 406 |
-
#
|
| 407 |
-
|
| 408 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
|
| 414 |
-
return best_anchor, best_sentence
|
| 415 |
|
| 416 |
except Exception as e:
|
| 417 |
print(f"Critical error in find_alternative_anchor: {e}")
|
| 418 |
-
|
| 419 |
-
traceback.print_exc()
|
| 420 |
-
return None, None, False
|
| 421 |
|
| 422 |
except Exception as e:
|
| 423 |
print(f"Critical error in find_alternative_anchor: {e}")
|
|
@@ -669,8 +642,7 @@ def suggest_insertions(source_url, target_url, anchor_text, top_k=1, suggest_alt
|
|
| 669 |
if suggest_alternative and not keyword_present:
|
| 670 |
try:
|
| 671 |
# Find a completely different anchor and sentence
|
| 672 |
-
|
| 673 |
-
alt_anchor, alt_sentence, needs_bridge = find_alternative_anchor(blocks, target_url, anchor_text, target_context)
|
| 674 |
|
| 675 |
if alt_anchor and alt_sentence:
|
| 676 |
# Create the sentence with the alternative anchor
|
|
@@ -679,7 +651,6 @@ def suggest_insertions(source_url, target_url, anchor_text, top_k=1, suggest_alt
|
|
| 679 |
result["alternative_sentence_original"] = alt_sentence
|
| 680 |
result["alternative_sentence"] = alt_rewritten
|
| 681 |
result["alternative_exact_match"] = alt_exact
|
| 682 |
-
result["needs_bridge_paragraph"] = needs_bridge
|
| 683 |
except Exception as e:
|
| 684 |
print(f"Error finding alternative anchor: {e}")
|
| 685 |
# Continue without alternative
|
|
@@ -798,51 +769,89 @@ def gpt_rewrite(sentence_html, anchor_text, target_url, style="neutral", languag
|
|
| 798 |
# Don't check for exact anchor text match as it might have special chars
|
| 799 |
return {"sentence_html": out}
|
| 800 |
|
| 801 |
-
def
|
| 802 |
-
"""
|
| 803 |
if not OPENAI_API_KEY:
|
| 804 |
-
return
|
| 805 |
|
| 806 |
# Create cache key
|
| 807 |
-
cache_key = hashlib.md5(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 808 |
|
| 809 |
-
|
| 810 |
-
|
|
|
|
| 811 |
|
| 812 |
system = (
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
"
|
| 816 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 817 |
"RULES: "
|
| 818 |
-
"
|
| 819 |
-
"
|
| 820 |
-
"
|
| 821 |
-
"
|
| 822 |
-
"
|
| 823 |
-
"
|
|
|
|
|
|
|
| 824 |
)
|
| 825 |
|
| 826 |
user = {
|
| 827 |
-
"
|
| 828 |
-
"
|
| 829 |
-
"anchor_text": anchor_text,
|
| 830 |
"target_url": target_url,
|
| 831 |
-
"target_title": target_title,
|
| 832 |
-
"target_topic": target_topic,
|
| 833 |
"language": language,
|
| 834 |
-
"
|
| 835 |
}
|
| 836 |
|
| 837 |
try:
|
| 838 |
obj = _openai_chat_cached(cache_key, PREFERRED_OPENAI_MODEL, system, user)
|
|
|
|
| 839 |
return obj
|
| 840 |
-
except:
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
return obj
|
| 844 |
-
except:
|
| 845 |
-
return {"paragraph": sentence}
|
| 846 |
"""
|
| 847 |
Final QA pass with language support.
|
| 848 |
"""
|
|
|
|
| 261 |
rewritten = f'{base}{clause}{punct}'
|
| 262 |
return rewritten, False
|
| 263 |
|
| 264 |
+
def find_alternative_anchor(blocks, target_url, original_anchor):
|
| 265 |
"""Find a better anchor text from the article that relates to the target URL."""
|
| 266 |
try:
|
| 267 |
+
# Get target page context
|
| 268 |
+
try:
|
| 269 |
+
tgt_html = requests.get(target_url, timeout=20, headers=UA).text
|
| 270 |
+
soup = BeautifulSoup(tgt_html, "html.parser")
|
| 271 |
+
|
| 272 |
+
# Extract target page title and meta description
|
| 273 |
+
title = soup.title.get_text().strip() if soup.title else ""
|
| 274 |
+
meta_desc = ""
|
| 275 |
+
meta_tag = soup.find("meta", attrs={"name": "description"})
|
| 276 |
+
if meta_tag:
|
| 277 |
+
meta_desc = meta_tag.get("content", "")
|
| 278 |
+
|
| 279 |
+
# Extract key terms from target page (first few paragraphs)
|
| 280 |
+
target_paragraphs = []
|
| 281 |
+
for p in soup.find_all("p")[:5]:
|
| 282 |
+
text = p.get_text().strip()
|
| 283 |
+
if len(text) > 50:
|
| 284 |
+
target_paragraphs.append(text)
|
| 285 |
+
target_content = " ".join(target_paragraphs[:3])
|
| 286 |
+
|
| 287 |
+
except Exception as e:
|
| 288 |
+
print(f"Error fetching target URL: {e}")
|
| 289 |
+
title = ""
|
| 290 |
+
meta_desc = ""
|
| 291 |
+
target_content = original_anchor
|
| 292 |
+
|
| 293 |
+
# Extract all potential anchor phrases from the source article
|
| 294 |
+
all_phrases = set()
|
| 295 |
full_text = " ".join(blocks)
|
|
|
|
| 296 |
|
| 297 |
+
# Common words to exclude
|
| 298 |
+
stopwords = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
|
| 299 |
+
'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'were', 'be',
|
| 300 |
+
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
|
| 301 |
+
'should', 'je', 'i', 'u', 'na', 'se', 'da', 'su', 'za', 'od', 'sa',
|
| 302 |
+
'po', 'iz', 'će', 'bi', 'ako', 'ali', 'jer', 'kada', 'gdje', 'što'}
|
| 303 |
|
| 304 |
+
# Extract noun phrases and important terms (2-4 words)
|
| 305 |
+
sentences = re.split(r'[.!?]', full_text)
|
| 306 |
for sentence in sentences:
|
| 307 |
+
if not sentence:
|
| 308 |
continue
|
|
|
|
|
|
|
| 309 |
words = sentence.split()
|
| 310 |
|
| 311 |
+
# Extract phrases of 2-4 words
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
for length in range(2, min(5, len(words) + 1)):
|
| 313 |
for i in range(len(words) - length + 1):
|
| 314 |
+
if i < 0 or i+length > len(words):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
continue
|
| 316 |
+
phrase = ' '.join(words[i:i+length])
|
| 317 |
+
phrase_clean = phrase.strip('.,!?;:"\'')
|
| 318 |
|
| 319 |
+
# Check if phrase is meaningful
|
| 320 |
+
if i < len(words) and i+length-1 < len(words):
|
| 321 |
+
first_word = words[i].lower().strip('.,!?;:')
|
| 322 |
+
last_word = words[i+length-1].lower().strip('.,!?;:')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
+
# Skip if starts/ends with stopwords or is too short
|
| 325 |
+
if (first_word not in stopwords and
|
| 326 |
+
last_word not in stopwords and
|
| 327 |
+
len(phrase_clean) > 5 and
|
| 328 |
+
len(phrase_clean) < 50):
|
| 329 |
+
all_phrases.add(phrase_clean)
|
| 330 |
+
|
| 331 |
+
# Also extract single important words (proper nouns, long words)
|
| 332 |
+
for word in words:
|
| 333 |
+
clean_word = word.strip('.,!?;:"\'')
|
| 334 |
+
if clean_word and (len(clean_word) > 6 or
|
| 335 |
+
(len(clean_word) > 0 and clean_word[0].isupper() and clean_word.lower() not in stopwords)):
|
| 336 |
+
all_phrases.add(clean_word)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
|
| 338 |
+
if not all_phrases:
|
| 339 |
+
return None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
|
| 341 |
+
# Create context query from target URL info
|
| 342 |
+
target_context = f"{title} {meta_desc} {target_content}"[:500]
|
|
|
|
|
|
|
| 343 |
|
| 344 |
+
# Score each phrase based on relevance to target
|
| 345 |
+
try:
|
| 346 |
+
target_emb = embed([target_context])[0]
|
| 347 |
+
except:
|
| 348 |
+
return None, None
|
| 349 |
+
|
| 350 |
+
best_anchor = None
|
| 351 |
+
best_score = -1
|
| 352 |
+
best_sentence = None
|
| 353 |
|
| 354 |
+
# Evaluate each potential anchor
|
| 355 |
+
for phrase in list(all_phrases)[:50]: # Limit to first 50 to avoid too much processing
|
| 356 |
+
# Skip if too similar to original anchor (we want something different)
|
| 357 |
+
if phrase.lower() == original_anchor.lower():
|
| 358 |
+
continue
|
| 359 |
+
|
| 360 |
+
try:
|
| 361 |
+
# Score this phrase against target context
|
| 362 |
+
phrase_emb = embed([phrase])[0]
|
| 363 |
+
relevance_score = F.cosine_similarity(phrase_emb.unsqueeze(0), target_emb.unsqueeze(0)).item()
|
| 364 |
+
|
| 365 |
+
# Check if this phrase appears in article and find its best context
|
| 366 |
+
if phrase.lower() in full_text.lower():
|
| 367 |
+
# Find sentences containing this phrase
|
| 368 |
+
for block in blocks:
|
| 369 |
+
if phrase.lower() in block.lower():
|
| 370 |
+
sents = re.split(r'(?<=[.!?])\s+', block)
|
| 371 |
+
for sent in sents:
|
| 372 |
+
if sent and phrase.lower() in sent.lower():
|
| 373 |
+
# Score this sentence-phrase combination
|
| 374 |
+
try:
|
| 375 |
+
sent_emb = embed([sent])[0]
|
| 376 |
+
context_score = F.cosine_similarity(sent_emb.unsqueeze(0), target_emb.unsqueeze(0)).item()
|
| 377 |
+
combined_score = (relevance_score * 0.6) + (context_score * 0.4)
|
| 378 |
+
|
| 379 |
+
if combined_score > best_score:
|
| 380 |
+
best_score = combined_score
|
| 381 |
+
best_anchor = phrase
|
| 382 |
+
best_sentence = sent
|
| 383 |
+
except:
|
| 384 |
+
continue
|
| 385 |
+
except Exception as e:
|
| 386 |
+
print(f"Error evaluating phrase '{phrase}': {e}")
|
| 387 |
+
continue
|
| 388 |
|
| 389 |
+
return best_anchor, best_sentence
|
| 390 |
|
| 391 |
except Exception as e:
|
| 392 |
print(f"Critical error in find_alternative_anchor: {e}")
|
| 393 |
+
return None, None
|
|
|
|
|
|
|
| 394 |
|
| 395 |
except Exception as e:
|
| 396 |
print(f"Critical error in find_alternative_anchor: {e}")
|
|
|
|
| 642 |
if suggest_alternative and not keyword_present:
|
| 643 |
try:
|
| 644 |
# Find a completely different anchor and sentence
|
| 645 |
+
alt_anchor, alt_sentence = find_alternative_anchor(blocks, target_url, anchor_text)
|
|
|
|
| 646 |
|
| 647 |
if alt_anchor and alt_sentence:
|
| 648 |
# Create the sentence with the alternative anchor
|
|
|
|
| 651 |
result["alternative_sentence_original"] = alt_sentence
|
| 652 |
result["alternative_sentence"] = alt_rewritten
|
| 653 |
result["alternative_exact_match"] = alt_exact
|
|
|
|
| 654 |
except Exception as e:
|
| 655 |
print(f"Error finding alternative anchor: {e}")
|
| 656 |
# Continue without alternative
|
|
|
|
| 769 |
# Don't check for exact anchor text match as it might have special chars
|
| 770 |
return {"sentence_html": out}
|
| 771 |
|
| 772 |
+
def gpt_get_target_keywords(target_url, target_context, language="English"):
|
| 773 |
+
"""Ask GPT to suggest 5-10 relevant search keywords users would use to find this page."""
|
| 774 |
if not OPENAI_API_KEY:
|
| 775 |
+
return []
|
| 776 |
|
| 777 |
# Create cache key
|
| 778 |
+
cache_key = hashlib.md5(f"keywords_{target_url}{language}".encode()).hexdigest()
|
| 779 |
+
|
| 780 |
+
if cache_key in API_RESPONSE_CACHE:
|
| 781 |
+
print(f"[GPT] Using cached keywords for {target_url[:30]}...")
|
| 782 |
+
return API_RESPONSE_CACHE[cache_key].get("keywords", [])
|
| 783 |
|
| 784 |
+
title = target_context.get("title", "")
|
| 785 |
+
meta = target_context.get("meta_description", "")
|
| 786 |
+
content = target_context.get("main_content", "")[:500]
|
| 787 |
|
| 788 |
system = (
|
| 789 |
+
"You are an SEO expert. Based on the page content provided, suggest 5-10 search keywords or phrases "
|
| 790 |
+
"that users would likely type into Google to find this page. "
|
| 791 |
+
"Include both short keywords (1-2 words) and long-tail keywords (3-5 words). "
|
| 792 |
+
"Make them realistic search terms, not just words from the page. "
|
| 793 |
+
f"Consider the {language} language and local search patterns. "
|
| 794 |
+
"Return JSON with a 'keywords' array."
|
| 795 |
+
)
|
| 796 |
+
|
| 797 |
+
user = {
|
| 798 |
+
"url": target_url,
|
| 799 |
+
"title": title,
|
| 800 |
+
"meta_description": meta,
|
| 801 |
+
"content_preview": content,
|
| 802 |
+
"task": "Generate search keywords users would use to find this page"
|
| 803 |
+
}
|
| 804 |
+
|
| 805 |
+
try:
|
| 806 |
+
obj = _openai_chat_cached(cache_key, PREFERRED_OPENAI_MODEL, system, user)
|
| 807 |
+
keywords = obj.get("keywords", [])
|
| 808 |
+
print(f"\n[GPT] Target page keywords: {keywords}")
|
| 809 |
+
return keywords
|
| 810 |
+
except Exception as e:
|
| 811 |
+
print(f"[GPT] Error getting keywords: {e}")
|
| 812 |
+
return []
|
| 813 |
+
|
| 814 |
+
def gpt_add_keyword_to_content(blocks, keywords, target_url, language="English"):
|
| 815 |
+
"""Ask GPT to naturally add one of the keywords to the content with proper context."""
|
| 816 |
+
if not OPENAI_API_KEY or not keywords:
|
| 817 |
+
return None
|
| 818 |
+
|
| 819 |
+
# Create cache key
|
| 820 |
+
blocks_preview = " ".join(blocks[:3])[:500]
|
| 821 |
+
cache_key = hashlib.md5(f"add_kw_{blocks_preview}{str(keywords)}{target_url}".encode()).hexdigest()
|
| 822 |
+
|
| 823 |
+
if cache_key in API_RESPONSE_CACHE:
|
| 824 |
+
return API_RESPONSE_CACHE[cache_key]
|
| 825 |
+
|
| 826 |
+
system = (
|
| 827 |
+
f"You are a skilled content editor writing in {language}. "
|
| 828 |
+
"Your task is to naturally integrate ONE of the provided keywords into the article content. "
|
| 829 |
"RULES: "
|
| 830 |
+
"1. Choose the keyword that fits most naturally with the existing content "
|
| 831 |
+
"2. Add 2-3 sentences or a short paragraph that includes the keyword "
|
| 832 |
+
"3. Make it flow naturally - it should feel like it belongs there "
|
| 833 |
+
"4. Include an HTML link using the keyword as anchor text "
|
| 834 |
+
"5. Specify WHERE to add it (e.g., 'after the second paragraph', 'before the conclusion') "
|
| 835 |
+
"6. The addition should provide value, not just keyword stuffing "
|
| 836 |
+
f"7. Write in {language} and preserve special characters "
|
| 837 |
+
"Return JSON with: 'keyword_used', 'content_to_add', 'placement_instruction'"
|
| 838 |
)
|
| 839 |
|
| 840 |
user = {
|
| 841 |
+
"article_preview": " ".join(blocks[:5]),
|
| 842 |
+
"available_keywords": keywords,
|
|
|
|
| 843 |
"target_url": target_url,
|
|
|
|
|
|
|
| 844 |
"language": language,
|
| 845 |
+
"task": "Add one keyword naturally to the content"
|
| 846 |
}
|
| 847 |
|
| 848 |
try:
|
| 849 |
obj = _openai_chat_cached(cache_key, PREFERRED_OPENAI_MODEL, system, user)
|
| 850 |
+
API_RESPONSE_CACHE[cache_key] = obj
|
| 851 |
return obj
|
| 852 |
+
except Exception as e:
|
| 853 |
+
print(f"[GPT] Error adding keyword: {e}")
|
| 854 |
+
return None
|
|
|
|
|
|
|
|
|
|
| 855 |
"""
|
| 856 |
Final QA pass with language support.
|
| 857 |
"""
|