Update app.py
Browse files
app.py
CHANGED
|
@@ -6,14 +6,13 @@ import re
|
|
| 6 |
import time
|
| 7 |
warnings.filterwarnings('ignore')
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
|
| 11 |
-
'
|
| 12 |
-
'
|
| 13 |
-
'
|
| 14 |
-
'
|
| 15 |
-
'
|
| 16 |
-
'kw_pke_positionrank': 'PositionRank - Incorporates word positions'
|
| 17 |
}
|
| 18 |
|
| 19 |
# Color palette for keywords based on scores
|
|
@@ -32,78 +31,103 @@ KEYWORD_COLORS = [
|
|
| 32 |
|
| 33 |
class KeywordExtractionManager:
|
| 34 |
def __init__(self):
|
| 35 |
-
self.
|
| 36 |
-
self.spacy_model = None
|
| 37 |
|
| 38 |
-
def
|
| 39 |
-
"""Load
|
| 40 |
-
if self.
|
| 41 |
try:
|
| 42 |
-
import
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
print("spaCy model not found. Please install with: python -m spacy download en_core_web_sm")
|
| 48 |
-
return None
|
| 49 |
except Exception as e:
|
| 50 |
-
print(f"Error loading
|
| 51 |
return None
|
| 52 |
-
return self.
|
| 53 |
|
| 54 |
def extract_keywords(self, text, model_name, num_keywords=10, ngram_range=(1, 3), progress=None):
|
| 55 |
-
"""Extract keywords using the specified
|
| 56 |
try:
|
| 57 |
-
import pke
|
| 58 |
-
|
| 59 |
if progress:
|
| 60 |
progress(0.3, desc="Loading model...")
|
| 61 |
|
| 62 |
-
#
|
| 63 |
-
if '
|
| 64 |
-
|
| 65 |
-
elif '
|
| 66 |
-
|
| 67 |
-
elif '
|
| 68 |
-
|
| 69 |
-
elif 'topicrank' in model_name:
|
| 70 |
-
extractor = pke.unsupervised.TopicRank()
|
| 71 |
-
elif 'textrank' in model_name:
|
| 72 |
-
extractor = pke.unsupervised.TextRank()
|
| 73 |
-
elif 'positionrank' in model_name:
|
| 74 |
-
extractor = pke.unsupervised.PositionRank()
|
| 75 |
else:
|
| 76 |
raise ValueError(f"Unknown model: {model_name}")
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
if progress:
|
| 79 |
-
progress(0.5, desc="Processing
|
| 80 |
-
|
| 81 |
-
#
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
-
# Select candidates based on model
|
| 85 |
-
if 'multipartiterank' in model_name:
|
| 86 |
-
extractor.candidate_selection(pos={'NOUN', 'PROPN', 'ADJ'})
|
| 87 |
-
extractor.candidate_weighting(alpha=1.1, threshold=0.75, method='average')
|
| 88 |
-
elif 'topicrank' in model_name:
|
| 89 |
-
extractor.candidate_selection(pos={'NOUN', 'PROPN', 'ADJ'})
|
| 90 |
-
extractor.candidate_weighting(threshold=0.74, method='average')
|
| 91 |
-
elif 'positionrank' in model_name:
|
| 92 |
-
extractor.candidate_selection(maximum_word_number=3)
|
| 93 |
-
extractor.candidate_weighting(window=10)
|
| 94 |
-
elif 'tfidf' in model_name:
|
| 95 |
-
extractor.candidate_selection(n=ngram_range[1], stoplist=['en'])
|
| 96 |
-
extractor.candidate_weighting()
|
| 97 |
-
else:
|
| 98 |
-
# SingleRank and TextRank
|
| 99 |
-
extractor.candidate_selection(pos={'NOUN', 'PROPN', 'ADJ'})
|
| 100 |
-
extractor.candidate_weighting(window=10)
|
| 101 |
-
|
| 102 |
if progress:
|
| 103 |
progress(0.7, desc="Extracting keywords...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
# Format results
|
| 109 |
results = []
|
|
@@ -111,16 +135,60 @@ class KeywordExtractionManager:
|
|
| 111 |
results.append({
|
| 112 |
'keyword': keyword,
|
| 113 |
'score': score,
|
| 114 |
-
'model': model_name.replace('
|
| 115 |
})
|
| 116 |
|
| 117 |
return results
|
| 118 |
|
| 119 |
except ImportError:
|
| 120 |
-
print("
|
| 121 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 125 |
|
| 126 |
def fallback_keyword_extraction(self, text, num_keywords=10):
|
|
@@ -322,7 +390,7 @@ def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progr
|
|
| 322 |
summary = f"""
|
| 323 |
## 📊 Analysis Summary
|
| 324 |
- **Keywords extracted:** {len(keywords)}
|
| 325 |
-
- **Model used:** {selected_model.replace('
|
| 326 |
- **Average relevance score:** {avg_score:.4f}
|
| 327 |
- **N-gram range:** {ngram_min}-{ngram_max} words
|
| 328 |
"""
|
|
@@ -337,7 +405,7 @@ def create_interface():
|
|
| 337 |
gr.Markdown("""
|
| 338 |
# Keyword Extraction Explorer Tool
|
| 339 |
|
| 340 |
-
Extract the most important keywords and phrases from your text using various algorithms! This tool uses
|
| 341 |
|
| 342 |
### How to use:
|
| 343 |
1. **📝 Enter your text** in the text area below
|
|
@@ -365,8 +433,8 @@ def create_interface():
|
|
| 365 |
with gr.Column(scale=1):
|
| 366 |
# Model selector
|
| 367 |
model_dropdown = gr.Dropdown(
|
| 368 |
-
choices=list(
|
| 369 |
-
value='
|
| 370 |
label="🎯 Select Keyword Extraction Model"
|
| 371 |
)
|
| 372 |
|
|
@@ -394,6 +462,13 @@ def create_interface():
|
|
| 394 |
step=1,
|
| 395 |
label="Max N-gram"
|
| 396 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
# Add model descriptions
|
| 399 |
gr.HTML("""
|
|
@@ -404,28 +479,24 @@ def create_interface():
|
|
| 404 |
<div style="margin-top: 10px; padding: 10px;">
|
| 405 |
<dl style="margin: 0; font-size: 14px;">
|
| 406 |
<div style="margin-bottom: 8px;">
|
| 407 |
-
<dt style="font-weight: bold; display: inline; color: #
|
| 408 |
-
<dd style="display: inline; margin-left: 5px;">
|
| 409 |
</div>
|
| 410 |
<div style="margin-bottom: 8px;">
|
| 411 |
-
<dt style="font-weight: bold; display: inline; color: #
|
| 412 |
-
<dd style="display: inline; margin-left: 5px;">
|
| 413 |
</div>
|
| 414 |
<div style="margin-bottom: 8px;">
|
| 415 |
-
<dt style="font-weight: bold; display: inline; color: #
|
| 416 |
-
<dd style="display: inline; margin-left: 5px;">
|
| 417 |
</div>
|
| 418 |
<div style="margin-bottom: 8px;">
|
| 419 |
-
<dt style="font-weight: bold; display: inline; color: #
|
| 420 |
-
<dd style="display: inline; margin-left: 5px;">
|
| 421 |
</div>
|
| 422 |
<div style="margin-bottom: 8px;">
|
| 423 |
-
<dt style="font-weight: bold; display: inline; color: #
|
| 424 |
-
<dd style="display: inline; margin-left: 5px;">
|
| 425 |
-
</div>
|
| 426 |
-
<div style="margin-bottom: 8px;">
|
| 427 |
-
<dt style="font-weight: bold; display: inline; color: #E17055;">PositionRank:</dt>
|
| 428 |
-
<dd style="display: inline; margin-left: 5px;">Incorporates word positions - good for structured documents</dd>
|
| 429 |
</div>
|
| 430 |
</dl>
|
| 431 |
</div>
|
|
@@ -464,21 +535,21 @@ def create_interface():
|
|
| 464 |
examples=[
|
| 465 |
[
|
| 466 |
"On June 6, 1944, Allied forces launched Operation Overlord, the invasion of Normandy. General Dwight D. Eisenhower commanded the operation, while Field Marshal Bernard Montgomery led ground forces. The BBC broadcast coded messages to the French Resistance, including the famous line 'The long sobs of autumn violins.'",
|
| 467 |
-
"
|
| 468 |
10,
|
| 469 |
1,
|
| 470 |
3
|
| 471 |
],
|
| 472 |
[
|
| 473 |
"In Jane Austen's 'Pride and Prejudice', Elizabeth Bennet first meets Mr. Darcy at the Meryton assembly. The novel, published in 1813, explores themes of marriage and social class in Regency England. Austen wrote to her sister Cassandra about the manuscript while staying at Chawton Cottage.",
|
| 474 |
-
"
|
| 475 |
10,
|
| 476 |
1,
|
| 477 |
3
|
| 478 |
],
|
| 479 |
[
|
| 480 |
"Charles Darwin arrived at the Galápagos Islands aboard HMS Beagle in September 1835. During his five-week visit, Darwin collected specimens of finches, tortoises, and mockingbirds. His observations of these species' variations across different islands later contributed to his theory of evolution by natural selection, published in 'On the Origin of Species' in 1859.",
|
| 481 |
-
"
|
| 482 |
10,
|
| 483 |
1,
|
| 484 |
3
|
|
@@ -500,14 +571,24 @@ def create_interface():
|
|
| 500 |
<h4 style="margin-top: 0;">📚 Model Information & Documentation</h4>
|
| 501 |
<p style="font-size: 14px; margin-bottom: 15px;">Learn more about the algorithms used in this tool:</p>
|
| 502 |
<ul style="font-size: 14px; line-height: 1.8;">
|
| 503 |
-
<li><strong>
|
| 504 |
-
<a href="https://github.com/
|
| 505 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 506 |
</a>
|
| 507 |
</li>
|
| 508 |
-
<li><strong>
|
| 509 |
-
<a href="https://
|
| 510 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
</a>
|
| 512 |
</li>
|
| 513 |
</ul>
|
|
@@ -531,6 +612,4 @@ def create_interface():
|
|
| 531 |
|
| 532 |
if __name__ == "__main__":
|
| 533 |
demo = create_interface()
|
| 534 |
-
demo.launch()
|
| 535 |
-
|
| 536 |
-
|
|
|
|
| 6 |
import time
|
| 7 |
warnings.filterwarnings('ignore')
|
| 8 |
|
| 9 |
+
# Reliable model names and descriptions (PKE removed for compatibility)
|
| 10 |
+
KEYWORD_MODELS = {
|
| 11 |
+
'yake_yake': 'YAKE - Yet Another Keyword Extractor (statistical)',
|
| 12 |
+
'keybert_all-mpnet-base-v2': 'KeyBERT MPNet - BERT-based semantic similarity',
|
| 13 |
+
'keybert_all-MiniLM-L6-v2': 'KeyBERT MiniLM - Lightweight BERT-based extraction',
|
| 14 |
+
'keybert_paraphrase-mpnet-base-v2': 'KeyBERT Paraphrase - Optimized for paraphrase detection',
|
| 15 |
+
'rakun_rakun': 'RaKUn - Rapid Automatic Keyword Extraction'
|
|
|
|
| 16 |
}
|
| 17 |
|
| 18 |
# Color palette for keywords based on scores
|
|
|
|
| 31 |
|
| 32 |
class KeywordExtractionManager:
|
| 33 |
def __init__(self):
|
| 34 |
+
self.keybert_models = {}
|
|
|
|
| 35 |
|
| 36 |
+
def load_keybert_model(self, model_name):
|
| 37 |
+
"""Load KeyBERT model"""
|
| 38 |
+
if model_name not in self.keybert_models:
|
| 39 |
try:
|
| 40 |
+
from keybert import KeyBERT
|
| 41 |
+
# Extract the actual model name from the identifier
|
| 42 |
+
actual_model = model_name.replace('keybert_', '')
|
| 43 |
+
self.keybert_models[model_name] = KeyBERT(model=actual_model)
|
| 44 |
+
print(f"✓ KeyBERT model {actual_model} loaded successfully")
|
|
|
|
|
|
|
| 45 |
except Exception as e:
|
| 46 |
+
print(f"Error loading KeyBERT model {model_name}: {str(e)}")
|
| 47 |
return None
|
| 48 |
+
return self.keybert_models[model_name]
|
| 49 |
|
| 50 |
def extract_keywords(self, text, model_name, num_keywords=10, ngram_range=(1, 3), progress=None):
|
| 51 |
+
"""Extract keywords using the specified model"""
|
| 52 |
try:
|
|
|
|
|
|
|
| 53 |
if progress:
|
| 54 |
progress(0.3, desc="Loading model...")
|
| 55 |
|
| 56 |
+
# Handle different model types
|
| 57 |
+
if model_name.startswith('yake_'):
|
| 58 |
+
return self.extract_yake_keywords(text, num_keywords, ngram_range, progress)
|
| 59 |
+
elif model_name.startswith('keybert_'):
|
| 60 |
+
return self.extract_keybert_keywords(text, model_name, num_keywords, ngram_range, progress)
|
| 61 |
+
elif model_name.startswith('rakun_'):
|
| 62 |
+
return self.extract_rakun_keywords(text, num_keywords, progress)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
else:
|
| 64 |
raise ValueError(f"Unknown model: {model_name}")
|
| 65 |
|
| 66 |
+
except Exception as e:
|
| 67 |
+
print(f"Error with {model_name}: {str(e)}")
|
| 68 |
+
return self.fallback_keyword_extraction(text, num_keywords)
|
| 69 |
+
|
| 70 |
+
def extract_yake_keywords(self, text, num_keywords, ngram_range, progress):
|
| 71 |
+
"""Extract keywords using YAKE"""
|
| 72 |
+
try:
|
| 73 |
+
import yake
|
| 74 |
+
|
| 75 |
if progress:
|
| 76 |
+
progress(0.5, desc="Processing with YAKE...")
|
| 77 |
+
|
| 78 |
+
# Configure YAKE
|
| 79 |
+
kw_extractor = yake.KeywordExtractor(
|
| 80 |
+
lan="en",
|
| 81 |
+
n=ngram_range[1],
|
| 82 |
+
dedupLim=0.7,
|
| 83 |
+
top=num_keywords
|
| 84 |
+
)
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
if progress:
|
| 87 |
progress(0.7, desc="Extracting keywords...")
|
| 88 |
+
|
| 89 |
+
keywords = kw_extractor.extract_keywords(text)
|
| 90 |
+
|
| 91 |
+
# Format results (YAKE returns lower scores for better keywords)
|
| 92 |
+
results = []
|
| 93 |
+
for keyword, score in keywords:
|
| 94 |
+
# Invert score for consistency (higher = better)
|
| 95 |
+
inverted_score = 1.0 / (1.0 + score)
|
| 96 |
+
results.append({
|
| 97 |
+
'keyword': keyword,
|
| 98 |
+
'score': inverted_score,
|
| 99 |
+
'model': 'YAKE'
|
| 100 |
+
})
|
| 101 |
|
| 102 |
+
return results
|
| 103 |
+
|
| 104 |
+
except ImportError:
|
| 105 |
+
print("YAKE library not found. Using fallback keyword extraction...")
|
| 106 |
+
return self.fallback_keyword_extraction(text, num_keywords)
|
| 107 |
+
|
| 108 |
+
def extract_keybert_keywords(self, text, model_name, num_keywords, ngram_range, progress):
|
| 109 |
+
"""Extract keywords using KeyBERT"""
|
| 110 |
+
try:
|
| 111 |
+
if progress:
|
| 112 |
+
progress(0.4, desc="Loading KeyBERT model...")
|
| 113 |
+
|
| 114 |
+
kw_model = self.load_keybert_model(model_name)
|
| 115 |
+
if kw_model is None:
|
| 116 |
+
return self.fallback_keyword_extraction(text, num_keywords)
|
| 117 |
+
|
| 118 |
+
if progress:
|
| 119 |
+
progress(0.6, desc="Processing with KeyBERT...")
|
| 120 |
+
|
| 121 |
+
# Extract keywords
|
| 122 |
+
keywords = kw_model.extract_keywords(
|
| 123 |
+
text,
|
| 124 |
+
keyphrase_ngram_range=ngram_range,
|
| 125 |
+
stop_words='english',
|
| 126 |
+
top_k=num_keywords
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
if progress:
|
| 130 |
+
progress(0.8, desc="Formatting results...")
|
| 131 |
|
| 132 |
# Format results
|
| 133 |
results = []
|
|
|
|
| 135 |
results.append({
|
| 136 |
'keyword': keyword,
|
| 137 |
'score': score,
|
| 138 |
+
'model': f"KeyBERT-{model_name.replace('keybert_', '')}"
|
| 139 |
})
|
| 140 |
|
| 141 |
return results
|
| 142 |
|
| 143 |
except ImportError:
|
| 144 |
+
print("KeyBERT library not found. Using fallback keyword extraction...")
|
| 145 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 146 |
+
|
| 147 |
+
def extract_rakun_keywords(self, text, num_keywords, progress):
|
| 148 |
+
"""Extract keywords using RaKUn"""
|
| 149 |
+
try:
|
| 150 |
+
from rakun import RakunDetector
|
| 151 |
+
|
| 152 |
+
if progress:
|
| 153 |
+
progress(0.5, desc="Processing with RaKUn...")
|
| 154 |
+
|
| 155 |
+
# Initialize RaKUn
|
| 156 |
+
hyperparameters = {
|
| 157 |
+
"distance_threshold": 3,
|
| 158 |
+
"num_keywords": num_keywords,
|
| 159 |
+
"pair_diff_length": 2,
|
| 160 |
+
"stopwords": "english",
|
| 161 |
+
"bigram_count_threshold": 2,
|
| 162 |
+
"num_tokens": [1, 2, 3]
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
keyword_detector = RakunDetector(hyperparameters)
|
| 166 |
+
|
| 167 |
+
if progress:
|
| 168 |
+
progress(0.7, desc="Extracting keywords...")
|
| 169 |
+
|
| 170 |
+
keywords = keyword_detector.find_keywords(text)
|
| 171 |
+
|
| 172 |
+
# Format results
|
| 173 |
+
results = []
|
| 174 |
+
for keyword_data in keywords[:num_keywords]:
|
| 175 |
+
if isinstance(keyword_data, tuple):
|
| 176 |
+
keyword, score = keyword_data
|
| 177 |
+
else:
|
| 178 |
+
# If no score available, assign based on rank
|
| 179 |
+
keyword = keyword_data
|
| 180 |
+
score = 1.0 / (keywords.index(keyword_data) + 1)
|
| 181 |
+
|
| 182 |
+
results.append({
|
| 183 |
+
'keyword': keyword,
|
| 184 |
+
'score': score,
|
| 185 |
+
'model': 'RaKUn'
|
| 186 |
+
})
|
| 187 |
+
|
| 188 |
+
return results
|
| 189 |
+
|
| 190 |
+
except ImportError:
|
| 191 |
+
print("RaKUn library not found. Using fallback keyword extraction...")
|
| 192 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 193 |
|
| 194 |
def fallback_keyword_extraction(self, text, num_keywords=10):
|
|
|
|
| 390 |
summary = f"""
|
| 391 |
## 📊 Analysis Summary
|
| 392 |
- **Keywords extracted:** {len(keywords)}
|
| 393 |
+
- **Model used:** {selected_model.replace('yake_', '').replace('keybert_', 'KeyBERT-').replace('rakun_', '').title()}
|
| 394 |
- **Average relevance score:** {avg_score:.4f}
|
| 395 |
- **N-gram range:** {ngram_min}-{ngram_max} words
|
| 396 |
"""
|
|
|
|
| 405 |
gr.Markdown("""
|
| 406 |
# Keyword Extraction Explorer Tool
|
| 407 |
|
| 408 |
+
Extract the most important keywords and phrases from your text using various algorithms! This tool uses modern keyword extraction methods including YAKE, KeyBERT, and RaKUn for comprehensive analysis.
|
| 409 |
|
| 410 |
### How to use:
|
| 411 |
1. **📝 Enter your text** in the text area below
|
|
|
|
| 433 |
with gr.Column(scale=1):
|
| 434 |
# Model selector
|
| 435 |
model_dropdown = gr.Dropdown(
|
| 436 |
+
choices=list(KEYWORD_MODELS.keys()),
|
| 437 |
+
value='yake_yake',
|
| 438 |
label="🎯 Select Keyword Extraction Model"
|
| 439 |
)
|
| 440 |
|
|
|
|
| 462 |
step=1,
|
| 463 |
label="Max N-gram"
|
| 464 |
)
|
| 465 |
+
|
| 466 |
+
# Add N-gram tip box
|
| 467 |
+
gr.HTML("""
|
| 468 |
+
<div style="background-color: #e3f2fd; border: 1px solid #90caf9; border-radius: 8px; padding: 10px; margin: 10px 0;">
|
| 469 |
+
<strong style="color: #1565c0;">💡 N-gram Guide:</strong> N-grams are sequences of words. Set Min=1, Max=3 to extract single words, phrases of 2 words, and phrases of 3 words. Higher values capture longer phrases but may reduce precision.
|
| 470 |
+
</div>
|
| 471 |
+
""")
|
| 472 |
|
| 473 |
# Add model descriptions
|
| 474 |
gr.HTML("""
|
|
|
|
| 479 |
<div style="margin-top: 10px; padding: 10px;">
|
| 480 |
<dl style="margin: 0; font-size: 14px;">
|
| 481 |
<div style="margin-bottom: 8px;">
|
| 482 |
+
<dt style="font-weight: bold; display: inline; color: #FF6B6B;">YAKE:</dt>
|
| 483 |
+
<dd style="display: inline; margin-left: 5px;">Statistical approach requiring no training - works well on short texts and multilingual content</dd>
|
| 484 |
</div>
|
| 485 |
<div style="margin-bottom: 8px;">
|
| 486 |
+
<dt style="font-weight: bold; display: inline; color: #9C27B0;">KeyBERT MPNet:</dt>
|
| 487 |
+
<dd style="display: inline; margin-left: 5px;">BERT-based semantic similarity - excellent for contextual understanding</dd>
|
| 488 |
</div>
|
| 489 |
<div style="margin-bottom: 8px;">
|
| 490 |
+
<dt style="font-weight: bold; display: inline; color: #795548;">KeyBERT MiniLM:</dt>
|
| 491 |
+
<dd style="display: inline; margin-left: 5px;">Lightweight BERT model - faster processing with good results</dd>
|
| 492 |
</div>
|
| 493 |
<div style="margin-bottom: 8px;">
|
| 494 |
+
<dt style="font-weight: bold; display: inline; color: #607D8B;">KeyBERT Paraphrase:</dt>
|
| 495 |
+
<dd style="display: inline; margin-left: 5px;">Optimized for paraphrase detection - great for similar concept extraction</dd>
|
| 496 |
</div>
|
| 497 |
<div style="margin-bottom: 8px;">
|
| 498 |
+
<dt style="font-weight: bold; display: inline; color: #FF5722;">RaKUn:</dt>
|
| 499 |
+
<dd style="display: inline; margin-left: 5px;">Graph-based rapid extraction - efficient for large texts</dd>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
</div>
|
| 501 |
</dl>
|
| 502 |
</div>
|
|
|
|
| 535 |
examples=[
|
| 536 |
[
|
| 537 |
"On June 6, 1944, Allied forces launched Operation Overlord, the invasion of Normandy. General Dwight D. Eisenhower commanded the operation, while Field Marshal Bernard Montgomery led ground forces. The BBC broadcast coded messages to the French Resistance, including the famous line 'The long sobs of autumn violins.'",
|
| 538 |
+
"yake_yake",
|
| 539 |
10,
|
| 540 |
1,
|
| 541 |
3
|
| 542 |
],
|
| 543 |
[
|
| 544 |
"In Jane Austen's 'Pride and Prejudice', Elizabeth Bennet first meets Mr. Darcy at the Meryton assembly. The novel, published in 1813, explores themes of marriage and social class in Regency England. Austen wrote to her sister Cassandra about the manuscript while staying at Chawton Cottage.",
|
| 545 |
+
"keybert_all-mpnet-base-v2",
|
| 546 |
10,
|
| 547 |
1,
|
| 548 |
3
|
| 549 |
],
|
| 550 |
[
|
| 551 |
"Charles Darwin arrived at the Galápagos Islands aboard HMS Beagle in September 1835. During his five-week visit, Darwin collected specimens of finches, tortoises, and mockingbirds. His observations of these species' variations across different islands later contributed to his theory of evolution by natural selection, published in 'On the Origin of Species' in 1859.",
|
| 552 |
+
"keybert_all-MiniLM-L6-v2",
|
| 553 |
10,
|
| 554 |
1,
|
| 555 |
3
|
|
|
|
| 571 |
<h4 style="margin-top: 0;">📚 Model Information & Documentation</h4>
|
| 572 |
<p style="font-size: 14px; margin-bottom: 15px;">Learn more about the algorithms used in this tool:</p>
|
| 573 |
<ul style="font-size: 14px; line-height: 1.8;">
|
| 574 |
+
<li><strong>YAKE:</strong>
|
| 575 |
+
<a href="https://github.com/LIAAD/yake" target="_blank" style="color: #1976d2;">
|
| 576 |
+
Yet Another Keyword Extractor ↗
|
| 577 |
+
</a>
|
| 578 |
+
</li>
|
| 579 |
+
<li><strong>KeyBERT:</strong>
|
| 580 |
+
<a href="https://github.com/MaartenGr/KeyBERT" target="_blank" style="color: #1976d2;">
|
| 581 |
+
Minimal keyword extraction with BERT ↗
|
| 582 |
</a>
|
| 583 |
</li>
|
| 584 |
+
<li><strong>RaKUn:</strong>
|
| 585 |
+
<a href="https://github.com/SkBlaz/rakun" target="_blank" style="color: #1976d2;">
|
| 586 |
+
Rapid Automatic Keyword Extraction ↗
|
| 587 |
+
</a>
|
| 588 |
+
</li>
|
| 589 |
+
<li><strong>Sentence Transformers:</strong>
|
| 590 |
+
<a href="https://www.sbert.net/" target="_blank" style="color: #1976d2;">
|
| 591 |
+
BERT-based models for semantic similarity ↗
|
| 592 |
</a>
|
| 593 |
</li>
|
| 594 |
</ul>
|
|
|
|
| 612 |
|
| 613 |
if __name__ == "__main__":
|
| 614 |
demo = create_interface()
|
| 615 |
+
demo.launch()
|
|
|
|
|
|