Spaces:
Sleeping
Sleeping
| from typing import List, Tuple | |
| from keybert import KeyBERT | |
| class KeywordExtractor: | |
| def __init__(self): | |
| """ | |
| Inisialisasi Keyword Extractor menggunakan KeyBERT. | |
| Secara default menggunakan model multilingual yang ringan. | |
| """ | |
| # "paraphrase-multilingual-MiniLM-L12-v2" is good for Indonesian | |
| model_name = "paraphrase-multilingual-MiniLM-L12-v2" | |
| print(f"Loading KeyBERT model: {model_name}...") | |
| try: | |
| self.kw_model = KeyBERT(model=model_name) | |
| except Exception as e: | |
| print(f"Error loading KeyBERT: {e}") | |
| self.kw_model = None | |
| def extract_keywords(self, text: str, top_n: int = 5) -> List[str]: | |
| """ | |
| Mengekstrak kata kunci dari teks menggunakan KeyBERT. | |
| Mengembalikan daftar string (kata kunci). | |
| """ | |
| if not text or not self.kw_model: | |
| return [] | |
| try: | |
| # We can extract n-grams (1 to 2 words) | |
| keywords_with_scores = self.kw_model.extract_keywords( | |
| text, | |
| keyphrase_ngram_range=(1, 2), | |
| stop_words=None, | |
| top_n=top_n | |
| ) | |
| # KeyBERT returns a list of tuples: (keyword, score) | |
| # We just want the keywords sorted by score (descending) | |
| keywords = [kw[0] for kw in keywords_with_scores] | |
| return keywords | |
| except Exception as e: | |
| print(f"Keyword extraction error: {e}") | |
| return [] | |
| def extract_with_scores(self, text: str, top_n: int = 5) -> List[Tuple[str, float]]: | |
| """ | |
| Mengekstrak kata kunci beserta skor kepentingannya. | |
| """ | |
| if not text or not self.kw_model: | |
| return [] | |
| return self.kw_model.extract_keywords( | |
| text, | |
| keyphrase_ngram_range=(1, 2), | |
| stop_words=None, | |
| top_n=top_n | |
| ) | |
| # Example usage: | |
| # extractor = KeywordExtractor() | |
| # keywords = extractor.extract_keywords("Pemerintah mengalokasikan dana pendidikan sebesar 20 triliun rupiah.") | |
| # print(keywords) | |