|
|
import requests
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
def get_synonyms_from_daum(word: str) -> list[str]:
|
|
|
try:
|
|
|
headers = {
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
|
|
|
}
|
|
|
params = {
|
|
|
'q': word
|
|
|
}
|
|
|
|
|
|
response = requests.get("https://dic.daum.net/search.do", params=params, headers=headers)
|
|
|
response.raise_for_status()
|
|
|
|
|
|
return extract_synonyms_from_html(response.text)
|
|
|
except Exception as e:
|
|
|
print(f"Error fetching from Daum: {e}")
|
|
|
|
|
|
def extract_synonyms_from_html(html: str) -> list[str]:
|
|
|
try:
|
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
|
synonyms = []
|
|
|
|
|
|
for tag in soup.select('.link_relate'):
|
|
|
text = tag.get_text(strip=True)
|
|
|
if text and text not in synonyms:
|
|
|
synonyms.append(text)
|
|
|
|
|
|
print(f"Extracted synonyms: {synonyms}")
|
|
|
return synonyms
|
|
|
except Exception as e:
|
|
|
print(f"Error parsing HTML: {e}")
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def mask_by_position(sentence: str, start: int, end: int) -> str:
|
|
|
return sentence[:start-1] + "[MASK]" + sentence[end-1:]
|
|
|
|
|
|
|
|
|
|
|
|
|