# keyword_extractor.py (using KeyBERT, no OpenAI required) from keybert import KeyBERT # Initialize KeyBERT with a small, efficient model kw_model = KeyBERT('all-MiniLM-L6-v2') def extract_keywords(articles, num_keywords=10): """ articles: list of dicts, each with 'title' and 'content' returns: list of unique keywords/phrases (strings) """ all_text = " ".join([ art.get("title", "") + " " + art.get("content", "") for art in articles if art ]) # Extract top keywords and phrases keywords = kw_model.extract_keywords( all_text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=num_keywords ) # keywords is a list of tuples: [(keyword, score), ...] keywords = [kw for kw, score in keywords] return keywords