summarizer_sentiments / fallback.py
ANMOL1140W's picture
Initial backend API
e817ebe
Raw
History Blame Contribute Delete
867 Bytes
from sklearn.feature_extraction.text import TfidfVectorizer
def extract_top_keywords(text: str, num_keywords: int = 7) -> str:
"""
Extracts the top N keywords from a text using TF-IDF.
Returns them as a comma-separated string.
"""
try:
# The vectorizer will ignore common English "stop words" (like 'the', 'a', 'is')
vectorizer = TfidfVectorizer(stop_words='english', max_features=num_keywords)
# We pass the text inside a list because the vectorizer expects an iterable
tfidf_matrix = vectorizer.fit_transform([text])
# Get the actual words (features)
feature_names = vectorizer.get_feature_names_out()
return ", ".join(feature_names)
except Exception as e:
print(f"Keyword extraction failed: {e}")
return "Error: Could not process text for keywords."