Military_Topical_Sentiment_Analysis / keyword_extractor.py
Sami2000's picture
Upload 7 files
12d8780 verified
raw
history blame contribute delete
849 Bytes
# keyword_extractor.py (using KeyBERT, no OpenAI required)
from keybert import KeyBERT
# Initialize KeyBERT with a small, efficient model
kw_model = KeyBERT('all-MiniLM-L6-v2')
def extract_keywords(articles, num_keywords=10):
"""
articles: list of dicts, each with 'title' and 'content'
returns: list of unique keywords/phrases (strings)
"""
all_text = " ".join([
art.get("title", "") + " " + art.get("content", "")
for art in articles if art
])
# Extract top keywords and phrases
keywords = kw_model.extract_keywords(
all_text,
keyphrase_ngram_range=(1, 2),
stop_words='english',
top_n=num_keywords
)
# keywords is a list of tuples: [(keyword, score), ...]
keywords = [kw for kw, score in keywords]
return keywords