File size: 1,702 Bytes
540b123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from keybert import KeyBERT
from transformers import logging

logging.set_verbosity_error()


class KeywordExtractionError(Exception):
    """Raised when keyword extraction fails and no fallback is possible."""
    pass


class KeywordExtractor:
    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
        try:
            self.kw_model = KeyBERT(model=model_name)
        except Exception as e:
            raise KeywordExtractionError(
                f"Failed to load KeyBERT model '{model_name}': {e}"
            ) from e

    def extract(
        self,
        text: str,
        num_keywords: int = 3,
        ngram_range: tuple = (1, 2),
    ) -> list[str]:
        """
        Extract keywords from text.
        Returns a list of keyword strings.
        Raises KeywordExtractionError if extraction fails completely.
        """
        if not isinstance(text, str) or not text.strip():
            raise ValueError("Input text must be a non-empty string.")

        try:
            keywords = self.kw_model.extract_keywords(
                text,
                keyphrase_ngram_range=ngram_range,
                stop_words="english",
                top_n=num_keywords,
            )

            # extract_keywords returns list of (keyword, score) tuples
            result = [kw[0] for kw in keywords if kw]

            if not result:
                raise KeywordExtractionError("Model returned no keywords for the given text.")

            return result

        except KeywordExtractionError:
            raise  # let it bubble up cleanly

        except Exception as e:
            raise KeywordExtractionError(f"Unexpected error during keyword extraction: {e}") from e