| |
|
| |
|
| | from keybert import KeyBERT
|
| |
|
| |
|
| | kw_model = KeyBERT('all-MiniLM-L6-v2')
|
| |
|
| | def extract_keywords(articles, num_keywords=10):
|
| | """
|
| | articles: list of dicts, each with 'title' and 'content'
|
| | returns: list of unique keywords/phrases (strings)
|
| | """
|
| | all_text = " ".join([
|
| | art.get("title", "") + " " + art.get("content", "")
|
| | for art in articles if art
|
| | ])
|
| |
|
| | keywords = kw_model.extract_keywords(
|
| | all_text,
|
| | keyphrase_ngram_range=(1, 2),
|
| | stop_words='english',
|
| | top_n=num_keywords
|
| | )
|
| |
|
| | keywords = [kw for kw, score in keywords]
|
| | return keywords
|
| |
|
| |
|
| |
|
| |
|
| |
|