Update app.py
Browse files
app.py
CHANGED
|
@@ -286,52 +286,48 @@ def find_alternative_anchor(blocks, target_url, original_anchor, target_context=
|
|
| 286 |
clean = word.strip('.,!?;:"\'()[]{}')
|
| 287 |
if len(clean) > 3 and clean.isalpha():
|
| 288 |
important_words.append(clean)
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
target_text_combined = f"{target_title} {target_meta} {target_headings}".lower()
|
| 292 |
-
|
| 293 |
-
# Look for domain-specific terms (cleaning, hotel, service, luxury, etc.)
|
| 294 |
-
domain_indicators = ['hotel', 'cleaning', 'service', 'luxury', 'housekeeping',
|
| 295 |
-
'maintenance', 'staff', 'room', 'suite', 'amenities',
|
| 296 |
-
'hospitality', 'facility', 'hygiene', 'sanitation',
|
| 297 |
-
'laundry', 'janitorial', 'professional', 'quality']
|
| 298 |
-
|
| 299 |
-
for indicator in domain_indicators:
|
| 300 |
-
if indicator in target_text_combined:
|
| 301 |
-
target_keywords.add(indicator)
|
| 302 |
|
| 303 |
print(f"\nTarget page keywords detected: {list(target_keywords)[:10]}")
|
| 304 |
|
| 305 |
-
# Now search for
|
| 306 |
full_text = " ".join(blocks)
|
| 307 |
sentences = re.split(r'[.!?]', full_text)
|
| 308 |
|
| 309 |
-
candidate_anchors = {} # phrase -> (sentence, score)
|
| 310 |
|
| 311 |
for sentence in sentences:
|
| 312 |
if not sentence or len(sentence.strip()) < 20:
|
| 313 |
continue
|
| 314 |
|
| 315 |
sentence_lower = sentence.lower()
|
| 316 |
-
|
| 317 |
-
# Look for meaningful phrases (not random fragments)
|
| 318 |
words = sentence.split()
|
| 319 |
|
| 320 |
-
#
|
|
|
|
| 321 |
for word in words:
|
| 322 |
clean_word = word.strip('.,!?;:"\'()[]{}')
|
| 323 |
-
if (len(clean_word) > 4 and
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
|
| 333 |
-
# Look for 2-4 word
|
| 334 |
-
for length in range(2, 5):
|
| 335 |
for i in range(len(words) - length + 1):
|
| 336 |
if i < 0 or i + length > len(words):
|
| 337 |
continue
|
|
@@ -340,87 +336,88 @@ def find_alternative_anchor(blocks, target_url, original_anchor, target_context=
|
|
| 340 |
phrase = ' '.join(phrase_words)
|
| 341 |
phrase_clean = phrase.strip('.,!?;:"\'()')
|
| 342 |
|
| 343 |
-
#
|
| 344 |
-
skip_words = {'the', 'a', 'an', 'and', 'or', 'but', '
|
| 345 |
-
'for', 'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are',
|
| 346 |
-
'were', 'be', 'have', 'has', 'had', 'do', 'does', 'did',
|
| 347 |
-
'will', 'would', 'could', 'should', 'may', 'might', 'must',
|
| 348 |
-
'shall', 'can', 'need', 'ought', 'used', 'if', 'then', 'than'}
|
| 349 |
|
| 350 |
first_word = phrase_words[0].lower().strip('.,!?;:"\'')
|
| 351 |
last_word = phrase_words[-1].lower().strip('.,!?;:"\'')
|
| 352 |
|
| 353 |
-
#
|
| 354 |
-
if (
|
| 355 |
-
last_word in skip_words or
|
| 356 |
-
len(phrase_clean) < 8 or
|
| 357 |
len(phrase_clean) > 50 or
|
| 358 |
-
not phrase_clean[0].isalpha()
|
| 359 |
-
phrase_clean.endswith("'s")): # Skip possessives
|
| 360 |
continue
|
| 361 |
|
| 362 |
-
#
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
# Score based on relevance to target page
|
| 366 |
-
relevance_score = 0
|
| 367 |
-
|
| 368 |
-
# Direct keyword matches
|
| 369 |
-
for kw in target_keywords:
|
| 370 |
-
if kw in phrase_lower:
|
| 371 |
-
relevance_score += 2
|
| 372 |
-
|
| 373 |
-
# Semantic relevance to hotel/cleaning/service domain
|
| 374 |
-
for indicator in ['hotel', 'luxury', 'service', 'room', 'suite', 'clean',
|
| 375 |
-
'staff', 'guest', 'resort', 'boutique', 'accommodation']:
|
| 376 |
-
if indicator in phrase_lower:
|
| 377 |
-
relevance_score += 1
|
| 378 |
|
| 379 |
-
#
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
|
| 390 |
-
|
| 391 |
-
total_score = (relevance_score * 0.4) + (semantic_score * 0.6)
|
| 392 |
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
# Select the best anchor from candidates
|
| 403 |
if not candidate_anchors:
|
| 404 |
-
print("\n✗ No
|
| 405 |
-
return None, None
|
| 406 |
|
| 407 |
# Sort by score and get the best one
|
| 408 |
sorted_candidates = sorted(candidate_anchors.items(), key=lambda x: x[1][1], reverse=True)
|
| 409 |
-
best_anchor, (best_sentence, best_score) = sorted_candidates[0]
|
| 410 |
-
|
| 411 |
-
# Final validation - make sure it's actually good
|
| 412 |
-
if best_score < 0.35 or len(best_anchor) < 5:
|
| 413 |
-
print(f"\n✗ Best candidate '{best_anchor}' not good enough (score: {best_score:.3f})")
|
| 414 |
-
return None, None
|
| 415 |
|
| 416 |
print(f"\n✓ Best alternative anchor: '{best_anchor}' (relevance: {best_score:.3f})")
|
| 417 |
-
|
|
|
|
|
|
|
|
|
|
| 418 |
|
| 419 |
except Exception as e:
|
| 420 |
print(f"Critical error in find_alternative_anchor: {e}")
|
| 421 |
import traceback
|
| 422 |
traceback.print_exc()
|
| 423 |
-
return None, None
|
| 424 |
|
| 425 |
except Exception as e:
|
| 426 |
print(f"Critical error in find_alternative_anchor: {e}")
|
|
@@ -673,7 +670,7 @@ def suggest_insertions(source_url, target_url, anchor_text, top_k=1, suggest_alt
|
|
| 673 |
try:
|
| 674 |
# Find a completely different anchor and sentence
|
| 675 |
# Pass the target_context we already analyzed
|
| 676 |
-
alt_anchor, alt_sentence = find_alternative_anchor(blocks, target_url, anchor_text, target_context)
|
| 677 |
|
| 678 |
if alt_anchor and alt_sentence:
|
| 679 |
# Create the sentence with the alternative anchor
|
|
@@ -682,6 +679,7 @@ def suggest_insertions(source_url, target_url, anchor_text, top_k=1, suggest_alt
|
|
| 682 |
result["alternative_sentence_original"] = alt_sentence
|
| 683 |
result["alternative_sentence"] = alt_rewritten
|
| 684 |
result["alternative_exact_match"] = alt_exact
|
|
|
|
| 685 |
except Exception as e:
|
| 686 |
print(f"Error finding alternative anchor: {e}")
|
| 687 |
# Continue without alternative
|
|
@@ -800,7 +798,51 @@ def gpt_rewrite(sentence_html, anchor_text, target_url, style="neutral", languag
|
|
| 800 |
# Don't check for exact anchor text match as it might have special chars
|
| 801 |
return {"sentence_html": out}
|
| 802 |
|
| 803 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 804 |
"""
|
| 805 |
Final QA pass with language support.
|
| 806 |
"""
|
|
@@ -957,30 +999,51 @@ def run_tool(source_url, target_url, anchor_text, smart_rewrite, plain_text, sug
|
|
| 957 |
alt_anchor = res["alternative_anchor"]
|
| 958 |
alt_sentence_original = res.get("alternative_sentence_original", "")
|
| 959 |
alt_sentence = res.get("alternative_sentence", "")
|
|
|
|
| 960 |
|
| 961 |
# Detect language for alternative sentence
|
| 962 |
if alt_sentence_original:
|
| 963 |
alt_detected_lang = detect_language(alt_sentence_original)
|
| 964 |
alt_language_name = get_language_name(alt_detected_lang)
|
| 965 |
|
| 966 |
-
#
|
| 967 |
-
if
|
| 968 |
-
|
| 969 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 970 |
else:
|
| 971 |
-
|
| 972 |
-
|
| 973 |
-
|
| 974 |
-
|
| 975 |
-
|
| 976 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 977 |
|
| 978 |
alt_output = to_plain_text(alt_final) if plain_text else alt_final
|
| 979 |
|
| 980 |
# Add alternative as Result 2
|
| 981 |
result += f"\n\n{'='*50}\n\n"
|
| 982 |
result += f"🔗 Result 2 - Alternative from article:\n"
|
| 983 |
-
result += f"💡 Alternative anchor: '{alt_anchor}'\n
|
|
|
|
|
|
|
|
|
|
|
|
|
| 984 |
result += f"Original: {alt_sentence_original}\n\n"
|
| 985 |
result += f"Suggested: {alt_output}"
|
| 986 |
|
|
|
|
| 286 |
clean = word.strip('.,!?;:"\'()[]{}')
|
| 287 |
if len(clean) > 3 and clean.isalpha():
|
| 288 |
important_words.append(clean)
|
| 289 |
+
if len(clean) > 4: # Add to keywords
|
| 290 |
+
target_keywords.add(clean)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
print(f"\nTarget page keywords detected: {list(target_keywords)[:10]}")
|
| 293 |
|
| 294 |
+
# Now search for phrases in source article
|
| 295 |
full_text = " ".join(blocks)
|
| 296 |
sentences = re.split(r'[.!?]', full_text)
|
| 297 |
|
| 298 |
+
candidate_anchors = {} # phrase -> (sentence, score, needs_bridge)
|
| 299 |
|
| 300 |
for sentence in sentences:
|
| 301 |
if not sentence or len(sentence.strip()) < 20:
|
| 302 |
continue
|
| 303 |
|
| 304 |
sentence_lower = sentence.lower()
|
|
|
|
|
|
|
| 305 |
words = sentence.split()
|
| 306 |
|
| 307 |
+
# Look for ALL potential phrases, even loosely related ones
|
| 308 |
+
# Single important words
|
| 309 |
for word in words:
|
| 310 |
clean_word = word.strip('.,!?;:"\'()[]{}')
|
| 311 |
+
if (len(clean_word) > 4 and clean_word.isalpha()):
|
| 312 |
+
# Calculate relevance even for loose matches
|
| 313 |
+
try:
|
| 314 |
+
word_emb = embed([clean_word])[0]
|
| 315 |
+
target_emb = embed([target_context.get("summary", "")[:500]])[0]
|
| 316 |
+
semantic_score = F.cosine_similarity(
|
| 317 |
+
word_emb.unsqueeze(0),
|
| 318 |
+
target_emb.unsqueeze(0)
|
| 319 |
+
).item()
|
| 320 |
+
|
| 321 |
+
# Lower threshold for considering candidates
|
| 322 |
+
if semantic_score > 0.15: # Much lower threshold
|
| 323 |
+
needs_bridge = semantic_score < 0.3 # Mark if needs bridge content
|
| 324 |
+
if clean_word not in candidate_anchors or candidate_anchors[clean_word][1] < semantic_score:
|
| 325 |
+
candidate_anchors[clean_word] = (sentence.strip(), semantic_score, needs_bridge)
|
| 326 |
+
except:
|
| 327 |
+
continue
|
| 328 |
|
| 329 |
+
# Look for 2-4 word phrases
|
| 330 |
+
for length in range(2, min(5, len(words) + 1)):
|
| 331 |
for i in range(len(words) - length + 1):
|
| 332 |
if i < 0 or i + length > len(words):
|
| 333 |
continue
|
|
|
|
| 336 |
phrase = ' '.join(phrase_words)
|
| 337 |
phrase_clean = phrase.strip('.,!?;:"\'()')
|
| 338 |
|
| 339 |
+
# More lenient filtering
|
| 340 |
+
skip_words = {'the', 'a', 'an', 'and', 'or', 'but', 'if', 'then', 'than'}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
|
| 342 |
first_word = phrase_words[0].lower().strip('.,!?;:"\'')
|
| 343 |
last_word = phrase_words[-1].lower().strip('.,!?;:"\'')
|
| 344 |
|
| 345 |
+
# Allow more phrases through
|
| 346 |
+
if (len(phrase_clean) < 5 or
|
|
|
|
|
|
|
| 347 |
len(phrase_clean) > 50 or
|
| 348 |
+
not phrase_clean[0].isalpha()):
|
|
|
|
| 349 |
continue
|
| 350 |
|
| 351 |
+
# Skip only the worst fragments
|
| 352 |
+
if first_word in skip_words and last_word in skip_words:
|
| 353 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
|
| 355 |
+
# Calculate relevance score
|
| 356 |
+
try:
|
| 357 |
+
phrase_emb = embed([phrase_clean])[0]
|
| 358 |
+
target_emb = embed([target_context.get("summary", "")[:500]])[0]
|
| 359 |
+
semantic_score = F.cosine_similarity(
|
| 360 |
+
phrase_emb.unsqueeze(0),
|
| 361 |
+
target_emb.unsqueeze(0)
|
| 362 |
+
).item()
|
| 363 |
+
|
| 364 |
+
# Accept even loosely related phrases
|
| 365 |
+
if semantic_score > 0.15: # Much lower threshold
|
| 366 |
+
needs_bridge = semantic_score < 0.3 # Mark if needs bridge
|
| 367 |
+
|
| 368 |
+
# Check for topic-related words (beauty, skincare, nail, etc.)
|
| 369 |
+
bonus = 0
|
| 370 |
+
general_beauty_terms = ['beauty', 'skincare', 'cosmetic', 'product', 'treatment',
|
| 371 |
+
'care', 'skin', 'nail', 'makeup', 'store', 'shop',
|
| 372 |
+
'korean', 'k-beauty', 'routine', 'regimen']
|
| 373 |
+
for term in general_beauty_terms:
|
| 374 |
+
if term in phrase_clean.lower():
|
| 375 |
+
bonus = 0.1
|
| 376 |
+
break
|
| 377 |
|
| 378 |
+
total_score = semantic_score + bonus
|
|
|
|
| 379 |
|
| 380 |
+
if phrase_clean not in candidate_anchors or candidate_anchors[phrase_clean][1] < total_score:
|
| 381 |
+
candidate_anchors[phrase_clean] = (sentence.strip(), total_score, needs_bridge)
|
| 382 |
+
if total_score > 0.2: # Only print decent candidates
|
| 383 |
+
print(f" Candidate: '{phrase_clean}' (score: {total_score:.3f}, needs_bridge: {needs_bridge})")
|
| 384 |
+
except:
|
| 385 |
+
continue
|
| 386 |
+
|
| 387 |
+
# If no candidates at all, try to find ANY noun phrase in the article
|
| 388 |
+
if not candidate_anchors:
|
| 389 |
+
print("\nNo semantic matches found, looking for any noun phrases...")
|
| 390 |
+
for sentence in sentences[:10]: # Check first 10 sentences
|
| 391 |
+
words = sentence.split()
|
| 392 |
+
for word in words:
|
| 393 |
+
clean_word = word.strip('.,!?;:"\'()[]{}')
|
| 394 |
+
# Any proper noun or long word
|
| 395 |
+
if clean_word and len(clean_word) > 5 and clean_word[0].isupper():
|
| 396 |
+
candidate_anchors[clean_word] = (sentence.strip(), 0.1, True) # Low score, needs bridge
|
| 397 |
+
break
|
| 398 |
+
if candidate_anchors:
|
| 399 |
+
break
|
| 400 |
|
| 401 |
# Select the best anchor from candidates
|
| 402 |
if not candidate_anchors:
|
| 403 |
+
print("\n✗ No alternative anchor found at all")
|
| 404 |
+
return None, None, False
|
| 405 |
|
| 406 |
# Sort by score and get the best one
|
| 407 |
sorted_candidates = sorted(candidate_anchors.items(), key=lambda x: x[1][1], reverse=True)
|
| 408 |
+
best_anchor, (best_sentence, best_score, needs_bridge) = sorted_candidates[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
|
| 410 |
print(f"\n✓ Best alternative anchor: '{best_anchor}' (relevance: {best_score:.3f})")
|
| 411 |
+
if needs_bridge:
|
| 412 |
+
print(f" → Will need bridge paragraph to connect to target topic")
|
| 413 |
+
|
| 414 |
+
return best_anchor, best_sentence, needs_bridge
|
| 415 |
|
| 416 |
except Exception as e:
|
| 417 |
print(f"Critical error in find_alternative_anchor: {e}")
|
| 418 |
import traceback
|
| 419 |
traceback.print_exc()
|
| 420 |
+
return None, None, False
|
| 421 |
|
| 422 |
except Exception as e:
|
| 423 |
print(f"Critical error in find_alternative_anchor: {e}")
|
|
|
|
| 670 |
try:
|
| 671 |
# Find a completely different anchor and sentence
|
| 672 |
# Pass the target_context we already analyzed
|
| 673 |
+
alt_anchor, alt_sentence, needs_bridge = find_alternative_anchor(blocks, target_url, anchor_text, target_context)
|
| 674 |
|
| 675 |
if alt_anchor and alt_sentence:
|
| 676 |
# Create the sentence with the alternative anchor
|
|
|
|
| 679 |
result["alternative_sentence_original"] = alt_sentence
|
| 680 |
result["alternative_sentence"] = alt_rewritten
|
| 681 |
result["alternative_exact_match"] = alt_exact
|
| 682 |
+
result["needs_bridge_paragraph"] = needs_bridge
|
| 683 |
except Exception as e:
|
| 684 |
print(f"Error finding alternative anchor: {e}")
|
| 685 |
# Continue without alternative
|
|
|
|
| 798 |
# Don't check for exact anchor text match as it might have special chars
|
| 799 |
return {"sentence_html": out}
|
| 800 |
|
| 801 |
+
def gpt_create_bridge_paragraph(anchor_text, sentence, target_url, target_context, language="English"):
|
| 802 |
+
"""Create a bridge paragraph that naturally connects loosely related topics."""
|
| 803 |
+
if not OPENAI_API_KEY:
|
| 804 |
+
return {"paragraph": sentence}
|
| 805 |
+
|
| 806 |
+
# Create cache key
|
| 807 |
+
cache_key = hashlib.md5(f"bridge_{anchor_text}{sentence}{target_url}{language}".encode()).hexdigest()
|
| 808 |
+
|
| 809 |
+
target_title = target_context.get("title", "")
|
| 810 |
+
target_topic = target_context.get("meta_description", "")
|
| 811 |
+
|
| 812 |
+
system = (
|
| 813 |
+
f"You are a skilled content writer writing in {language}. "
|
| 814 |
+
f"IMPORTANT: Preserve all special characters and diacritics from the {language} language. "
|
| 815 |
+
"Your task is to create a natural bridge paragraph that connects two loosely related topics. "
|
| 816 |
+
"The paragraph should flow naturally from the source topic to the target topic. "
|
| 817 |
+
"RULES: "
|
| 818 |
+
"(1) Start with the context from the source article "
|
| 819 |
+
"(2) Create a natural transition to the target topic "
|
| 820 |
+
"(3) Include the anchor link naturally "
|
| 821 |
+
"(4) Make it 2-3 sentences that feel organic, not forced "
|
| 822 |
+
"(5) Avoid obvious transitions like 'Speaking of...' or 'On a related note...' "
|
| 823 |
+
"Return JSON with key 'paragraph' containing the HTML with the link included."
|
| 824 |
+
)
|
| 825 |
+
|
| 826 |
+
user = {
|
| 827 |
+
"task": "create_bridge_paragraph",
|
| 828 |
+
"source_context": sentence,
|
| 829 |
+
"anchor_text": anchor_text,
|
| 830 |
+
"target_url": target_url,
|
| 831 |
+
"target_title": target_title,
|
| 832 |
+
"target_topic": target_topic,
|
| 833 |
+
"language": language,
|
| 834 |
+
"instructions": "Create a smooth, natural paragraph that connects these topics"
|
| 835 |
+
}
|
| 836 |
+
|
| 837 |
+
try:
|
| 838 |
+
obj = _openai_chat_cached(cache_key, PREFERRED_OPENAI_MODEL, system, user)
|
| 839 |
+
return obj
|
| 840 |
+
except:
|
| 841 |
+
try:
|
| 842 |
+
obj = _openai_chat_cached(cache_key + "_fallback", FALLBACK_OPENAI_MODEL, system, user)
|
| 843 |
+
return obj
|
| 844 |
+
except:
|
| 845 |
+
return {"paragraph": sentence}
|
| 846 |
"""
|
| 847 |
Final QA pass with language support.
|
| 848 |
"""
|
|
|
|
| 999 |
alt_anchor = res["alternative_anchor"]
|
| 1000 |
alt_sentence_original = res.get("alternative_sentence_original", "")
|
| 1001 |
alt_sentence = res.get("alternative_sentence", "")
|
| 1002 |
+
needs_bridge = res.get("needs_bridge_paragraph", False)
|
| 1003 |
|
| 1004 |
# Detect language for alternative sentence
|
| 1005 |
if alt_sentence_original:
|
| 1006 |
alt_detected_lang = detect_language(alt_sentence_original)
|
| 1007 |
alt_language_name = get_language_name(alt_detected_lang)
|
| 1008 |
|
| 1009 |
+
# If needs bridge paragraph, create one
|
| 1010 |
+
if needs_bridge and smart_rewrite:
|
| 1011 |
+
# Get target context for bridge creation
|
| 1012 |
+
target_info = {
|
| 1013 |
+
"title": res.get("target_title", ""),
|
| 1014 |
+
"meta_description": res.get("target_topic", "")
|
| 1015 |
+
}
|
| 1016 |
+
bridge_result = gpt_create_bridge_paragraph(
|
| 1017 |
+
alt_anchor,
|
| 1018 |
+
alt_sentence_original,
|
| 1019 |
+
target_url,
|
| 1020 |
+
target_info,
|
| 1021 |
+
alt_language_name
|
| 1022 |
+
)
|
| 1023 |
+
alt_final = bridge_result.get("paragraph", alt_sentence)
|
| 1024 |
else:
|
| 1025 |
+
# Apply normal GPT rewriting
|
| 1026 |
+
if smart_rewrite and alt_sentence:
|
| 1027 |
+
alt_g = gpt_rewrite(alt_sentence, alt_anchor, target_url, style="neutral", language=alt_language_name)
|
| 1028 |
+
alt_final = alt_g["sentence_html"]
|
| 1029 |
+
else:
|
| 1030 |
+
alt_final = alt_sentence
|
| 1031 |
+
|
| 1032 |
+
# Polish if needed
|
| 1033 |
+
if not res.get("alternative_exact_match", False) and smart_rewrite:
|
| 1034 |
+
alt_polished = gpt_validate_and_polish(alt_final, alt_anchor, target_url, language=alt_language_name)
|
| 1035 |
+
alt_final = alt_polished.get("sentence_html", alt_final)
|
| 1036 |
|
| 1037 |
alt_output = to_plain_text(alt_final) if plain_text else alt_final
|
| 1038 |
|
| 1039 |
# Add alternative as Result 2
|
| 1040 |
result += f"\n\n{'='*50}\n\n"
|
| 1041 |
result += f"🔗 Result 2 - Alternative from article:\n"
|
| 1042 |
+
result += f"💡 Alternative anchor: '{alt_anchor}'\n"
|
| 1043 |
+
if needs_bridge:
|
| 1044 |
+
result += f"🌉 Bridge paragraph created (topics were loosely related)\n\n"
|
| 1045 |
+
else:
|
| 1046 |
+
result += f"\n"
|
| 1047 |
result += f"Original: {alt_sentence_original}\n\n"
|
| 1048 |
result += f"Suggested: {alt_output}"
|
| 1049 |
|