Spaces:
Sleeping
Sleeping
File size: 1,702 Bytes
540b123 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | from keybert import KeyBERT
from transformers import logging
logging.set_verbosity_error()
class KeywordExtractionError(Exception):
"""Raised when keyword extraction fails and no fallback is possible."""
pass
class KeywordExtractor:
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
try:
self.kw_model = KeyBERT(model=model_name)
except Exception as e:
raise KeywordExtractionError(
f"Failed to load KeyBERT model '{model_name}': {e}"
) from e
def extract(
self,
text: str,
num_keywords: int = 3,
ngram_range: tuple = (1, 2),
) -> list[str]:
"""
Extract keywords from text.
Returns a list of keyword strings.
Raises KeywordExtractionError if extraction fails completely.
"""
if not isinstance(text, str) or not text.strip():
raise ValueError("Input text must be a non-empty string.")
try:
keywords = self.kw_model.extract_keywords(
text,
keyphrase_ngram_range=ngram_range,
stop_words="english",
top_n=num_keywords,
)
# extract_keywords returns list of (keyword, score) tuples
result = [kw[0] for kw in keywords if kw]
if not result:
raise KeywordExtractionError("Model returned no keywords for the given text.")
return result
except KeywordExtractionError:
raise # let it bubble up cleanly
except Exception as e:
raise KeywordExtractionError(f"Unexpected error during keyword extraction: {e}") from e
|