Update app.py
Browse files
app.py
CHANGED
|
@@ -262,134 +262,63 @@ def inject_anchor_into_sentence(sentence, anchor_text, target_url):
|
|
| 262 |
return rewritten, False
|
| 263 |
|
| 264 |
def find_alternative_anchor(blocks, target_url, original_anchor):
|
| 265 |
-
"""
|
|
|
|
|
|
|
| 266 |
try:
|
| 267 |
-
|
| 268 |
-
try:
|
| 269 |
-
tgt_html = requests.get(target_url, timeout=20, headers=UA).text
|
| 270 |
-
soup = BeautifulSoup(tgt_html, "html.parser")
|
| 271 |
-
|
| 272 |
-
# Extract target page title and meta description
|
| 273 |
-
title = soup.title.get_text().strip() if soup.title else ""
|
| 274 |
-
meta_desc = ""
|
| 275 |
-
meta_tag = soup.find("meta", attrs={"name": "description"})
|
| 276 |
-
if meta_tag:
|
| 277 |
-
meta_desc = meta_tag.get("content", "")
|
| 278 |
-
|
| 279 |
-
# Extract key terms from target page (first few paragraphs)
|
| 280 |
-
target_paragraphs = []
|
| 281 |
-
for p in soup.find_all("p")[:5]:
|
| 282 |
-
text = p.get_text().strip()
|
| 283 |
-
if len(text) > 50:
|
| 284 |
-
target_paragraphs.append(text)
|
| 285 |
-
target_content = " ".join(target_paragraphs[:3])
|
| 286 |
-
|
| 287 |
-
except Exception as e:
|
| 288 |
-
print(f"Error fetching target URL: {e}")
|
| 289 |
-
title = ""
|
| 290 |
-
meta_desc = ""
|
| 291 |
-
target_content = original_anchor
|
| 292 |
-
|
| 293 |
-
# Extract all potential anchor phrases from the source article
|
| 294 |
-
all_phrases = set()
|
| 295 |
-
full_text = " ".join(blocks)
|
| 296 |
|
| 297 |
-
#
|
| 298 |
-
|
| 299 |
-
'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'were', 'be',
|
| 300 |
-
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
|
| 301 |
-
'should', 'je', 'i', 'u', 'na', 'se', 'da', 'su', 'za', 'od', 'sa',
|
| 302 |
-
'po', 'iz', 'Δe', 'bi', 'ako', 'ali', 'jer', 'kada', 'gdje', 'Ε‘to'}
|
| 303 |
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
for i in range(len(words) - length + 1):
|
| 314 |
-
if i < 0 or i+length > len(words):
|
| 315 |
-
continue
|
| 316 |
-
phrase = ' '.join(words[i:i+length])
|
| 317 |
-
phrase_clean = phrase.strip('.,!?;:"\' ')
|
| 318 |
-
|
| 319 |
-
# Check if phrase is meaningful
|
| 320 |
-
if i < len(words) and i+length-1 < len(words):
|
| 321 |
-
first_word = words[i].lower().strip('.,!?;:')
|
| 322 |
-
last_word = words[i+length-1].lower().strip('.,!?;:')
|
| 323 |
-
|
| 324 |
-
# Skip if starts/ends with stopwords or is too short
|
| 325 |
-
if (first_word not in stopwords and
|
| 326 |
-
last_word not in stopwords and
|
| 327 |
-
len(phrase_clean) > 5 and
|
| 328 |
-
len(phrase_clean) < 50):
|
| 329 |
-
all_phrases.add(phrase_clean)
|
| 330 |
-
|
| 331 |
-
# Also extract single important words (proper nouns, long words)
|
| 332 |
-
for word in words:
|
| 333 |
-
clean_word = word.strip('.,!?;:"\' ')
|
| 334 |
-
if clean_word and (len(clean_word) > 6 or
|
| 335 |
-
(len(clean_word) > 0 and clean_word[0].isupper() and clean_word.lower() not in stopwords)):
|
| 336 |
-
all_phrases.add(clean_word)
|
| 337 |
|
| 338 |
-
if not
|
|
|
|
| 339 |
return None, None
|
| 340 |
|
| 341 |
-
#
|
| 342 |
-
|
|
|
|
|
|
|
|
|
|
| 343 |
|
| 344 |
-
#
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
return None, None
|
| 349 |
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
|
|
|
|
|
|
| 353 |
|
| 354 |
-
#
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
if phrase.lower() == original_anchor.lower():
|
| 358 |
-
continue
|
| 359 |
-
|
| 360 |
-
try:
|
| 361 |
-
# Score this phrase against target context
|
| 362 |
-
phrase_emb = embed([phrase])[0]
|
| 363 |
-
relevance_score = F.cosine_similarity(phrase_emb.unsqueeze(0), target_emb.unsqueeze(0)).item()
|
| 364 |
-
|
| 365 |
-
# Check if this phrase appears in article and find its best context
|
| 366 |
-
if phrase.lower() in full_text.lower():
|
| 367 |
-
# Find sentences containing this phrase
|
| 368 |
-
for block in blocks:
|
| 369 |
-
if phrase.lower() in block.lower():
|
| 370 |
-
sents = re.split(r'(?<=[.!?])\s+', block)
|
| 371 |
-
for sent in sents:
|
| 372 |
-
if sent and phrase.lower() in sent.lower():
|
| 373 |
-
# Score this sentence-phrase combination
|
| 374 |
-
try:
|
| 375 |
-
sent_emb = embed([sent])[0]
|
| 376 |
-
context_score = F.cosine_similarity(sent_emb.unsqueeze(0), target_emb.unsqueeze(0)).item()
|
| 377 |
-
combined_score = (relevance_score * 0.6) + (context_score * 0.4)
|
| 378 |
-
|
| 379 |
-
if combined_score > best_score:
|
| 380 |
-
best_score = combined_score
|
| 381 |
-
best_anchor = phrase
|
| 382 |
-
best_sentence = sent
|
| 383 |
-
except:
|
| 384 |
-
continue
|
| 385 |
-
except Exception as e:
|
| 386 |
-
print(f"Error evaluating phrase '{phrase}': {e}")
|
| 387 |
-
continue
|
| 388 |
|
| 389 |
-
return
|
| 390 |
|
| 391 |
except Exception as e:
|
| 392 |
-
print(f"Critical error
|
|
|
|
|
|
|
| 393 |
return None, None
|
| 394 |
|
| 395 |
def suggest_insertions(source_url, target_url, anchor_text, top_k=1, suggest_alternative=False):
|
|
@@ -514,18 +443,16 @@ def suggest_insertions(source_url, target_url, anchor_text, top_k=1, suggest_alt
|
|
| 514 |
# If anchor not present in article and alternative suggestion requested
|
| 515 |
if suggest_alternative and not keyword_present:
|
| 516 |
try:
|
| 517 |
-
#
|
| 518 |
-
alt_anchor,
|
| 519 |
|
| 520 |
-
if alt_anchor and
|
| 521 |
-
# Create the sentence with the alternative anchor
|
| 522 |
-
alt_rewritten, alt_exact = inject_anchor_into_sentence(alt_sentence, alt_anchor, target_url)
|
| 523 |
result["alternative_anchor"] = alt_anchor
|
| 524 |
-
result["alternative_sentence_original"] =
|
| 525 |
-
result["alternative_sentence"] =
|
| 526 |
-
result["alternative_exact_match"] =
|
| 527 |
except Exception as e:
|
| 528 |
-
print(f"Error
|
| 529 |
# Continue without alternative
|
| 530 |
|
| 531 |
results.append(result)
|
|
@@ -694,6 +621,97 @@ def gpt_validate_and_polish(sentence_html, anchor_text, target_url, language="En
|
|
| 694 |
|
| 695 |
return {"sentence_html": out}
|
| 696 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
def to_plain_text(html_or_text):
|
| 698 |
"""Convert HTML to plain text, properly handling special characters."""
|
| 699 |
text = BeautifulSoup(html_or_text, "html.parser").get_text(separator=" ", strip=True)
|
|
@@ -746,7 +764,7 @@ def run_tool(source_url, target_url, anchor_text, smart_rewrite, plain_text, sug
|
|
| 746 |
# Anchor is in the suggested sentence - just show where to add the link
|
| 747 |
final_output = to_plain_text(draft_html) if plain_text else draft_html
|
| 748 |
result = warn + f"β
**Anchor text '{anchor_text}' found in article!**\n\n"
|
| 749 |
-
result += f"
|
| 750 |
result += f"{final_output}"
|
| 751 |
else:
|
| 752 |
# Anchor is in article but not in this sentence
|
|
@@ -761,7 +779,7 @@ def run_tool(source_url, target_url, anchor_text, smart_rewrite, plain_text, sug
|
|
| 761 |
final_output = to_plain_text(final_html) if plain_text else final_html
|
| 762 |
|
| 763 |
result = warn + f"β
**Anchor text '{anchor_text}' found in article!**\n\n"
|
| 764 |
-
result += f"
|
| 765 |
result += f"{final_output}"
|
| 766 |
else:
|
| 767 |
# Anchor doesn't exist in article at all - need to add it
|
|
@@ -776,41 +794,35 @@ def run_tool(source_url, target_url, anchor_text, smart_rewrite, plain_text, sug
|
|
| 776 |
final_output = to_plain_text(final_html) if plain_text else final_html
|
| 777 |
|
| 778 |
result = warn + f"β οΈ **Anchor text '{anchor_text}' not found in article**\n\n"
|
| 779 |
-
result += f"
|
| 780 |
result += f"Original: {original_sentence}\n\n"
|
| 781 |
result += f"Suggested: {final_output}"
|
| 782 |
|
| 783 |
# Show alternative if requested and available
|
| 784 |
if suggest_alternative_anchor and res.get("alternative_anchor"):
|
| 785 |
alt_anchor = res["alternative_anchor"]
|
| 786 |
-
|
| 787 |
-
alt_sentence = res.get("alternative_sentence", "")
|
| 788 |
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
if smart_rewrite and alt_sentence:
|
| 796 |
-
alt_g = gpt_rewrite(alt_sentence, alt_anchor, target_url, style="neutral", language=alt_language_name)
|
| 797 |
-
alt_final = alt_g["sentence_html"]
|
| 798 |
else:
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
# Polish if needed
|
| 802 |
-
if not res.get("alternative_exact_match", False):
|
| 803 |
-
alt_polished = gpt_validate_and_polish(alt_final, alt_anchor, target_url, language=alt_language_name)
|
| 804 |
-
alt_final = alt_polished.get("sentence_html", alt_final)
|
| 805 |
|
| 806 |
-
|
|
|
|
| 807 |
|
| 808 |
# Add alternative as Result 2
|
| 809 |
result += f"\n\n{'='*50}\n\n"
|
| 810 |
-
result += f"
|
| 811 |
-
result += f"π‘
|
| 812 |
-
|
| 813 |
-
|
|
|
|
| 814 |
|
| 815 |
return result
|
| 816 |
|
|
|
|
| 262 |
return rewritten, False
|
| 263 |
|
| 264 |
def find_alternative_anchor(blocks, target_url, original_anchor):
|
| 265 |
+
"""
|
| 266 |
+
NEW VERSION: Generate new content with keywords from target page.
|
| 267 |
+
"""
|
| 268 |
try:
|
| 269 |
+
print(f"[Alternative] Extracting target page content from {target_url}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
+
# Step 1: Extract target page content using Trafilatura
|
| 272 |
+
target_blocks = get_text_blocks(target_url, max_paragraphs=5)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
|
| 274 |
+
if not target_blocks:
|
| 275 |
+
print("[Alternative] No content extracted from target page")
|
| 276 |
+
return None, None
|
| 277 |
+
|
| 278 |
+
print(f"[Alternative] Extracted {len(target_blocks)} blocks from target")
|
| 279 |
+
|
| 280 |
+
# Step 2: Get search keywords from target content
|
| 281 |
+
keywords = gpt_get_search_keywords(target_blocks, target_url)
|
| 282 |
+
print(f"[Alternative] Keywords identified: {keywords}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
+
if not keywords or not isinstance(keywords, list):
|
| 285 |
+
print("[Alternative] No valid keywords returned")
|
| 286 |
return None, None
|
| 287 |
|
| 288 |
+
# Step 3: Detect language from source article
|
| 289 |
+
source_text = " ".join(blocks[:2]) # Use first 2 paragraphs for detection
|
| 290 |
+
detected_lang = detect_language(source_text)
|
| 291 |
+
language_name = get_language_name(detected_lang)
|
| 292 |
+
print(f"[Alternative] Detected language: {language_name}")
|
| 293 |
|
| 294 |
+
# Step 4: Generate new content with keyword
|
| 295 |
+
result = gpt_generate_content_with_keyword(
|
| 296 |
+
source_blocks=blocks,
|
| 297 |
+
keywords=keywords,
|
| 298 |
+
target_url=target_url,
|
| 299 |
+
language=language_name
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
if not result:
|
| 303 |
+
print("[Alternative] Content generation failed")
|
| 304 |
return None, None
|
| 305 |
|
| 306 |
+
# Return in format compatible with existing code
|
| 307 |
+
chosen_keyword = result.get("chosen_keyword", keywords[0] if keywords else original_anchor)
|
| 308 |
+
new_content = result.get("new_content", "")
|
| 309 |
+
insert_after = result.get("insert_after_paragraph", 0)
|
| 310 |
+
reasoning = result.get("reasoning", "")
|
| 311 |
|
| 312 |
+
# Format the response for compatibility
|
| 313 |
+
# Return: (anchor_text, formatted_content_with_position)
|
| 314 |
+
position_text = f"[Insert after paragraph {insert_after + 1}]: {reasoning}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
+
return chosen_keyword, f"{position_text}\n\n{new_content}"
|
| 317 |
|
| 318 |
except Exception as e:
|
| 319 |
+
print(f"[Alternative] Critical error: {e}")
|
| 320 |
+
import traceback
|
| 321 |
+
traceback.print_exc()
|
| 322 |
return None, None
|
| 323 |
|
| 324 |
def suggest_insertions(source_url, target_url, anchor_text, top_k=1, suggest_alternative=False):
|
|
|
|
| 443 |
# If anchor not present in article and alternative suggestion requested
|
| 444 |
if suggest_alternative and not keyword_present:
|
| 445 |
try:
|
| 446 |
+
# Generate new content with keywords from target page
|
| 447 |
+
alt_anchor, alt_content = find_alternative_anchor(blocks, target_url, anchor_text)
|
| 448 |
|
| 449 |
+
if alt_anchor and alt_content:
|
|
|
|
|
|
|
| 450 |
result["alternative_anchor"] = alt_anchor
|
| 451 |
+
result["alternative_sentence_original"] = "" # No original since it's new content
|
| 452 |
+
result["alternative_sentence"] = alt_content
|
| 453 |
+
result["alternative_exact_match"] = True # It's generated with the link
|
| 454 |
except Exception as e:
|
| 455 |
+
print(f"Error generating alternative content: {e}")
|
| 456 |
# Continue without alternative
|
| 457 |
|
| 458 |
results.append(result)
|
|
|
|
| 621 |
|
| 622 |
return {"sentence_html": out}
|
| 623 |
|
| 624 |
+
def gpt_get_search_keywords(target_content, target_url):
|
| 625 |
+
"""
|
| 626 |
+
Analyze target page content and get search keywords people would use.
|
| 627 |
+
"""
|
| 628 |
+
if not OPENAI_API_KEY:
|
| 629 |
+
return ["related content", "learn more", "additional information"]
|
| 630 |
+
|
| 631 |
+
# Create cache key
|
| 632 |
+
cache_key = hashlib.md5(f"keywords_{target_url}_{target_content[:500]}".encode()).hexdigest()
|
| 633 |
+
|
| 634 |
+
system = (
|
| 635 |
+
"You are an SEO expert. Analyze the provided web page content and identify "
|
| 636 |
+
"5-10 search keywords or phrases that people would typically use to find this page. "
|
| 637 |
+
"Focus on practical, real search terms that users would type into Google. "
|
| 638 |
+
"Return a JSON object with a 'keywords' array containing 5-10 keyword phrases."
|
| 639 |
+
)
|
| 640 |
+
|
| 641 |
+
# Limit content to avoid token limits
|
| 642 |
+
content_preview = " ".join(target_content[:5]) if isinstance(target_content, list) else target_content[:3000]
|
| 643 |
+
|
| 644 |
+
user = {
|
| 645 |
+
"task": "identify_search_keywords",
|
| 646 |
+
"page_content": content_preview,
|
| 647 |
+
"url": target_url,
|
| 648 |
+
"requirements": {
|
| 649 |
+
"count": "5-10 keywords",
|
| 650 |
+
"type": "practical search terms",
|
| 651 |
+
"focus": "what users would actually search for"
|
| 652 |
+
}
|
| 653 |
+
}
|
| 654 |
+
|
| 655 |
+
try:
|
| 656 |
+
obj = _openai_chat_cached(cache_key, PREFERRED_OPENAI_MODEL, system, user)
|
| 657 |
+
except Exception as e:
|
| 658 |
+
print(f"[GPT] Keywords extraction failed: {e}")
|
| 659 |
+
return ["related content", "learn more", "additional information"]
|
| 660 |
+
|
| 661 |
+
return obj.get("keywords", ["related content"])
|
| 662 |
+
|
| 663 |
+
def gpt_generate_content_with_keyword(source_blocks, keywords, target_url, language="English"):
|
| 664 |
+
"""
|
| 665 |
+
Generate new content with the best keyword and specify where to insert it.
|
| 666 |
+
"""
|
| 667 |
+
if not OPENAI_API_KEY or not keywords:
|
| 668 |
+
return None
|
| 669 |
+
|
| 670 |
+
# Create cache key
|
| 671 |
+
source_preview = " ".join(source_blocks[:3])[:500]
|
| 672 |
+
cache_key = hashlib.md5(f"generate_{source_preview}_{str(keywords)}_{target_url}_{language}".encode()).hexdigest()
|
| 673 |
+
|
| 674 |
+
system = (
|
| 675 |
+
f"You are a skilled content writer writing in {language}. "
|
| 676 |
+
"Given an article and a list of keywords related to a target page, "
|
| 677 |
+
"create a NATURAL addition to the article that incorporates the most suitable keyword. "
|
| 678 |
+
"The addition should flow seamlessly with the existing content. "
|
| 679 |
+
"\n\nYOUR TASK:\n"
|
| 680 |
+
"1. Choose the ONE keyword that fits most naturally with the article's context\n"
|
| 681 |
+
"2. Create new content (1-3 sentences OR a paragraph if needed) that naturally includes this keyword\n"
|
| 682 |
+
"3. Specify AFTER which paragraph number (0-based) to insert this content\n"
|
| 683 |
+
"4. The keyword should be wrapped in an HTML link to the target URL\n"
|
| 684 |
+
f"5. Write in {language} and preserve special characters\n"
|
| 685 |
+
"\n\nReturn JSON with keys:\n"
|
| 686 |
+
"- 'chosen_keyword': the keyword you selected\n"
|
| 687 |
+
"- 'new_content': the HTML content with <a href> link\n"
|
| 688 |
+
"- 'insert_after_paragraph': paragraph number (0-based) after which to insert\n"
|
| 689 |
+
"- 'reasoning': brief explanation of placement choice"
|
| 690 |
+
)
|
| 691 |
+
|
| 692 |
+
user = {
|
| 693 |
+
"article_paragraphs": source_blocks[:7], # First 7 paragraphs for context
|
| 694 |
+
"available_keywords": keywords,
|
| 695 |
+
"target_url": target_url,
|
| 696 |
+
"language": language,
|
| 697 |
+
"requirements": {
|
| 698 |
+
"natural_flow": True,
|
| 699 |
+
"include_link": True,
|
| 700 |
+
"preserve_tone": True
|
| 701 |
+
}
|
| 702 |
+
}
|
| 703 |
+
|
| 704 |
+
try:
|
| 705 |
+
obj = _openai_chat_cached(cache_key, PREFERRED_OPENAI_MODEL, system, user)
|
| 706 |
+
return obj
|
| 707 |
+
except Exception as e:
|
| 708 |
+
print(f"[GPT] Content generation failed: {e}")
|
| 709 |
+
try:
|
| 710 |
+
obj = _openai_chat_cached(cache_key + "_fallback", FALLBACK_OPENAI_MODEL, system, user)
|
| 711 |
+
return obj
|
| 712 |
+
except:
|
| 713 |
+
return None
|
| 714 |
+
|
| 715 |
def to_plain_text(html_or_text):
|
| 716 |
"""Convert HTML to plain text, properly handling special characters."""
|
| 717 |
text = BeautifulSoup(html_or_text, "html.parser").get_text(separator=" ", strip=True)
|
|
|
|
| 764 |
# Anchor is in the suggested sentence - just show where to add the link
|
| 765 |
final_output = to_plain_text(draft_html) if plain_text else draft_html
|
| 766 |
result = warn + f"β
**Anchor text '{anchor_text}' found in article!**\n\n"
|
| 767 |
+
result += f"π Add link here:\n\n"
|
| 768 |
result += f"{final_output}"
|
| 769 |
else:
|
| 770 |
# Anchor is in article but not in this sentence
|
|
|
|
| 779 |
final_output = to_plain_text(final_html) if plain_text else final_html
|
| 780 |
|
| 781 |
result = warn + f"β
**Anchor text '{anchor_text}' found in article!**\n\n"
|
| 782 |
+
result += f"π Add link here:\n\n"
|
| 783 |
result += f"{final_output}"
|
| 784 |
else:
|
| 785 |
# Anchor doesn't exist in article at all - need to add it
|
|
|
|
| 794 |
final_output = to_plain_text(final_html) if plain_text else final_html
|
| 795 |
|
| 796 |
result = warn + f"β οΈ **Anchor text '{anchor_text}' not found in article**\n\n"
|
| 797 |
+
result += f"π Result 1 - Suggested placement:\n\n"
|
| 798 |
result += f"Original: {original_sentence}\n\n"
|
| 799 |
result += f"Suggested: {final_output}"
|
| 800 |
|
| 801 |
# Show alternative if requested and available
|
| 802 |
if suggest_alternative_anchor and res.get("alternative_anchor"):
|
| 803 |
alt_anchor = res["alternative_anchor"]
|
| 804 |
+
alt_content = res.get("alternative_sentence", "") # This now contains position info + content
|
|
|
|
| 805 |
|
| 806 |
+
if alt_content:
|
| 807 |
+
# Parse if there's position information
|
| 808 |
+
if "[Insert after paragraph" in alt_content:
|
| 809 |
+
parts = alt_content.split("\n\n", 1)
|
| 810 |
+
position_info = parts[0] if len(parts) > 0 else ""
|
| 811 |
+
actual_content = parts[1] if len(parts) > 1 else alt_content
|
|
|
|
|
|
|
|
|
|
| 812 |
else:
|
| 813 |
+
position_info = ""
|
| 814 |
+
actual_content = alt_content
|
|
|
|
|
|
|
|
|
|
|
|
|
| 815 |
|
| 816 |
+
# The content already has the link included from GPT
|
| 817 |
+
alt_output = to_plain_text(actual_content) if plain_text else actual_content
|
| 818 |
|
| 819 |
# Add alternative as Result 2
|
| 820 |
result += f"\n\n{'='*50}\n\n"
|
| 821 |
+
result += f"π Result 2 - Suggested new content to add:\n"
|
| 822 |
+
result += f"π‘ Using keyword: '{alt_anchor}'\n"
|
| 823 |
+
if position_info:
|
| 824 |
+
result += f"π {position_info}\n"
|
| 825 |
+
result += f"\n{alt_output}"
|
| 826 |
|
| 827 |
return result
|
| 828 |
|