Update app.py
Browse files
app.py
CHANGED
|
@@ -4,17 +4,27 @@ import warnings
|
|
| 4 |
import random
|
| 5 |
import re
|
| 6 |
import time
|
|
|
|
|
|
|
| 7 |
warnings.filterwarnings('ignore')
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
# Reliable model names and descriptions
|
| 10 |
KEYWORD_MODELS = {
|
| 11 |
'yake_yake': 'YAKE - Yet Another Keyword Extractor (statistical)',
|
| 12 |
-
'keybert_all-mpnet-base-v2': 'KeyBERT MPNet - BERT-based semantic similarity',
|
| 13 |
'keybert_all-MiniLM-L6-v2': 'KeyBERT MiniLM - Lightweight BERT-based extraction',
|
| 14 |
-
'keybert_paraphrase-mpnet-base-v2': 'KeyBERT Paraphrase - Optimized for paraphrase detection',
|
| 15 |
'rake_nltk': 'RAKE-NLTK - Rapid Automatic Keyword Extraction'
|
| 16 |
}
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
# Color palette for keywords based on scores
|
| 19 |
SCORE_COLORS = {
|
| 20 |
'high': '#00B894', # Green - High relevance
|
|
@@ -33,41 +43,84 @@ class KeywordExtractionManager:
|
|
| 33 |
def __init__(self):
|
| 34 |
self.keybert_models = {}
|
| 35 |
self.rake_extractor = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
def load_keybert_model(self, model_name):
|
| 38 |
-
"""Load KeyBERT model"""
|
| 39 |
if model_name not in self.keybert_models:
|
| 40 |
try:
|
| 41 |
from keybert import KeyBERT
|
|
|
|
|
|
|
| 42 |
# Extract the actual model name from the identifier
|
| 43 |
actual_model = model_name.replace('keybert_', '')
|
| 44 |
-
|
| 45 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
except Exception as e:
|
| 47 |
print(f"Error loading KeyBERT model {model_name}: {str(e)}")
|
|
|
|
| 48 |
return None
|
| 49 |
-
return self.keybert_models
|
| 50 |
|
| 51 |
def load_rake_extractor(self):
|
| 52 |
-
"""Load RAKE extractor"""
|
| 53 |
if self.rake_extractor is None:
|
| 54 |
try:
|
| 55 |
from rake_nltk import Rake
|
| 56 |
-
import nltk
|
| 57 |
-
# Download required NLTK data
|
| 58 |
-
try:
|
| 59 |
-
nltk.data.find('corpora/stopwords')
|
| 60 |
-
except LookupError:
|
| 61 |
-
nltk.download('stopwords', quiet=True)
|
| 62 |
-
try:
|
| 63 |
-
nltk.data.find('tokenizers/punkt')
|
| 64 |
-
except LookupError:
|
| 65 |
-
nltk.download('punkt', quiet=True)
|
| 66 |
|
|
|
|
| 67 |
self.rake_extractor = Rake()
|
| 68 |
print("✓ RAKE extractor loaded successfully")
|
| 69 |
except Exception as e:
|
| 70 |
print(f"Error loading RAKE extractor: {str(e)}")
|
|
|
|
| 71 |
return None
|
| 72 |
return self.rake_extractor
|
| 73 |
|
|
@@ -77,6 +130,8 @@ class KeywordExtractionManager:
|
|
| 77 |
if progress:
|
| 78 |
progress(0.3, desc="Loading model...")
|
| 79 |
|
|
|
|
|
|
|
| 80 |
# Handle different model types
|
| 81 |
if model_name.startswith('yake_'):
|
| 82 |
return self.extract_yake_keywords(text, num_keywords, ngram_range, progress)
|
|
@@ -89,6 +144,7 @@ class KeywordExtractionManager:
|
|
| 89 |
|
| 90 |
except Exception as e:
|
| 91 |
print(f"Error with {model_name}: {str(e)}")
|
|
|
|
| 92 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 93 |
|
| 94 |
def extract_yake_keywords(self, text, num_keywords, ngram_range, progress):
|
|
@@ -123,10 +179,11 @@ class KeywordExtractionManager:
|
|
| 123 |
'model': 'YAKE'
|
| 124 |
})
|
| 125 |
|
|
|
|
| 126 |
return results
|
| 127 |
|
| 128 |
-
except
|
| 129 |
-
print("YAKE
|
| 130 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 131 |
|
| 132 |
def extract_keybert_keywords(self, text, model_name, num_keywords, ngram_range, progress):
|
|
@@ -137,18 +194,28 @@ class KeywordExtractionManager:
|
|
| 137 |
|
| 138 |
kw_model = self.load_keybert_model(model_name)
|
| 139 |
if kw_model is None:
|
|
|
|
| 140 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 141 |
|
| 142 |
if progress:
|
| 143 |
progress(0.6, desc="Processing with KeyBERT...")
|
| 144 |
|
| 145 |
-
# Extract keywords
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
if progress:
|
| 154 |
progress(0.8, desc="Formatting results...")
|
|
@@ -162,10 +229,11 @@ class KeywordExtractionManager:
|
|
| 162 |
'model': f"KeyBERT-{model_name.replace('keybert_', '')}"
|
| 163 |
})
|
| 164 |
|
|
|
|
| 165 |
return results
|
| 166 |
|
| 167 |
-
except
|
| 168 |
-
print("KeyBERT
|
| 169 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 170 |
|
| 171 |
def extract_rake_keywords(self, text, num_keywords, progress):
|
|
@@ -176,6 +244,7 @@ class KeywordExtractionManager:
|
|
| 176 |
|
| 177 |
rake_extractor = self.load_rake_extractor()
|
| 178 |
if rake_extractor is None:
|
|
|
|
| 179 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 180 |
|
| 181 |
if progress:
|
|
@@ -185,23 +254,33 @@ class KeywordExtractionManager:
|
|
| 185 |
rake_extractor.extract_keywords_from_text(text)
|
| 186 |
keywords_with_scores = rake_extractor.get_ranked_phrases_with_scores()
|
| 187 |
|
| 188 |
-
#
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
results.append({
|
| 192 |
-
'keyword': keyword,
|
| 193 |
-
'score': score,
|
| 194 |
-
'model': 'RAKE-NLTK'
|
| 195 |
-
})
|
| 196 |
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
-
except
|
| 200 |
-
print("RAKE
|
| 201 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 202 |
|
| 203 |
def fallback_keyword_extraction(self, text, num_keywords=10):
|
| 204 |
"""Simple fallback keyword extraction using basic statistics"""
|
|
|
|
| 205 |
import re
|
| 206 |
from collections import Counter
|
| 207 |
|
|
@@ -363,6 +442,7 @@ def create_legend_html():
|
|
| 363 |
return html
|
| 364 |
|
| 365 |
# Initialize the keyword extraction manager
|
|
|
|
| 366 |
keyword_manager = KeywordExtractionManager()
|
| 367 |
|
| 368 |
def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progress=gr.Progress()):
|
|
@@ -436,8 +516,8 @@ def create_interface():
|
|
| 436 |
text_input = gr.Textbox(
|
| 437 |
label="📝 Text to Analyse",
|
| 438 |
placeholder="Enter your text here...",
|
| 439 |
-
lines=
|
| 440 |
-
max_lines=
|
| 441 |
)
|
| 442 |
|
| 443 |
with gr.Column(scale=1):
|
|
@@ -492,18 +572,10 @@ def create_interface():
|
|
| 492 |
<dt style="font-weight: bold; display: inline; color: #FF6B6B;">YAKE:</dt>
|
| 493 |
<dd style="display: inline; margin-left: 5px;">Statistical approach requiring no training - works well on short texts and multilingual content</dd>
|
| 494 |
</div>
|
| 495 |
-
<div style="margin-bottom: 8px;">
|
| 496 |
-
<dt style="font-weight: bold; display: inline; color: #9C27B0;">KeyBERT MPNet:</dt>
|
| 497 |
-
<dd style="display: inline; margin-left: 5px;">BERT-based semantic similarity - excellent for contextual understanding</dd>
|
| 498 |
-
</div>
|
| 499 |
<div style="margin-bottom: 8px;">
|
| 500 |
<dt style="font-weight: bold; display: inline; color: #795548;">KeyBERT MiniLM:</dt>
|
| 501 |
<dd style="display: inline; margin-left: 5px;">Lightweight BERT model - faster processing with good results</dd>
|
| 502 |
</div>
|
| 503 |
-
<div style="margin-bottom: 8px;">
|
| 504 |
-
<dt style="font-weight: bold; display: inline; color: #607D8B;">KeyBERT Paraphrase:</dt>
|
| 505 |
-
<dd style="display: inline; margin-left: 5px;">Optimized for paraphrase detection - great for similar concept extraction</dd>
|
| 506 |
-
</div>
|
| 507 |
<div style="margin-bottom: 8px;">
|
| 508 |
<dt style="font-weight: bold; display: inline; color: #FF5722;">RAKE-NLTK:</dt>
|
| 509 |
<dd style="display: inline; margin-left: 5px;">Classic keyword extraction algorithm - fast and reliable for phrase extraction</dd>
|
|
@@ -552,7 +624,7 @@ def create_interface():
|
|
| 552 |
],
|
| 553 |
[
|
| 554 |
"In Jane Austen's 'Pride and Prejudice', Elizabeth Bennet first meets Mr. Darcy at the Meryton assembly. The novel, published in 1813, explores themes of marriage and social class in Regency England. Austen wrote to her sister Cassandra about the manuscript while staying at Chawton Cottage.",
|
| 555 |
-
"keybert_all-
|
| 556 |
10,
|
| 557 |
1,
|
| 558 |
3
|
|
|
|
| 4 |
import random
|
| 5 |
import re
|
| 6 |
import time
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
warnings.filterwarnings('ignore')
|
| 10 |
|
| 11 |
+
# Pre-download NLTK data at startup
|
| 12 |
+
import nltk
|
| 13 |
+
print("Downloading NLTK data...")
|
| 14 |
+
nltk.download('stopwords', quiet=True)
|
| 15 |
+
nltk.download('punkt', quiet=True)
|
| 16 |
+
print("NLTK data downloaded.")
|
| 17 |
+
|
| 18 |
# Reliable model names and descriptions
|
| 19 |
KEYWORD_MODELS = {
|
| 20 |
'yake_yake': 'YAKE - Yet Another Keyword Extractor (statistical)',
|
|
|
|
| 21 |
'keybert_all-MiniLM-L6-v2': 'KeyBERT MiniLM - Lightweight BERT-based extraction',
|
|
|
|
| 22 |
'rake_nltk': 'RAKE-NLTK - Rapid Automatic Keyword Extraction'
|
| 23 |
}
|
| 24 |
|
| 25 |
+
# Reduced model list for better compatibility
|
| 26 |
+
# Removed models that might be too large for Spaces
|
| 27 |
+
|
| 28 |
# Color palette for keywords based on scores
|
| 29 |
SCORE_COLORS = {
|
| 30 |
'high': '#00B894', # Green - High relevance
|
|
|
|
| 43 |
def __init__(self):
|
| 44 |
self.keybert_models = {}
|
| 45 |
self.rake_extractor = None
|
| 46 |
+
self.models_initialized = False
|
| 47 |
+
self.initialize_models()
|
| 48 |
+
|
| 49 |
+
def initialize_models(self):
|
| 50 |
+
"""Pre-initialize models to check availability"""
|
| 51 |
+
print("Initializing models...")
|
| 52 |
+
|
| 53 |
+
# Test YAKE
|
| 54 |
+
try:
|
| 55 |
+
import yake
|
| 56 |
+
print("✓ YAKE available")
|
| 57 |
+
except ImportError as e:
|
| 58 |
+
print(f"✗ YAKE not available: {e}")
|
| 59 |
+
|
| 60 |
+
# Test KeyBERT
|
| 61 |
+
try:
|
| 62 |
+
from keybert import KeyBERT
|
| 63 |
+
from sentence_transformers import SentenceTransformer
|
| 64 |
+
print("✓ KeyBERT library available")
|
| 65 |
+
|
| 66 |
+
# Try to load a small model
|
| 67 |
+
try:
|
| 68 |
+
test_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 69 |
+
print("✓ Sentence transformers working")
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"✗ Sentence transformer model failed: {e}")
|
| 72 |
+
except ImportError as e:
|
| 73 |
+
print(f"✗ KeyBERT not available: {e}")
|
| 74 |
+
|
| 75 |
+
# Test RAKE
|
| 76 |
+
try:
|
| 77 |
+
from rake_nltk import Rake
|
| 78 |
+
print("✓ RAKE-NLTK available")
|
| 79 |
+
except ImportError as e:
|
| 80 |
+
print(f"✗ RAKE-NLTK not available: {e}")
|
| 81 |
+
|
| 82 |
+
self.models_initialized = True
|
| 83 |
|
| 84 |
def load_keybert_model(self, model_name):
|
| 85 |
+
"""Load KeyBERT model with better error handling"""
|
| 86 |
if model_name not in self.keybert_models:
|
| 87 |
try:
|
| 88 |
from keybert import KeyBERT
|
| 89 |
+
from sentence_transformers import SentenceTransformer
|
| 90 |
+
|
| 91 |
# Extract the actual model name from the identifier
|
| 92 |
actual_model = model_name.replace('keybert_', '')
|
| 93 |
+
|
| 94 |
+
print(f"Loading KeyBERT with {actual_model}...")
|
| 95 |
+
|
| 96 |
+
# Try to load the sentence transformer first
|
| 97 |
+
try:
|
| 98 |
+
sentence_model = SentenceTransformer(actual_model)
|
| 99 |
+
self.keybert_models[model_name] = KeyBERT(model=sentence_model)
|
| 100 |
+
print(f"✓ KeyBERT model {actual_model} loaded successfully")
|
| 101 |
+
except Exception as e:
|
| 102 |
+
print(f"Failed to load sentence transformer {actual_model}: {e}")
|
| 103 |
+
# Try with just the model name
|
| 104 |
+
self.keybert_models[model_name] = KeyBERT(model=actual_model)
|
| 105 |
+
|
| 106 |
except Exception as e:
|
| 107 |
print(f"Error loading KeyBERT model {model_name}: {str(e)}")
|
| 108 |
+
print(f"Full error: {type(e).__name__}: {str(e)}")
|
| 109 |
return None
|
| 110 |
+
return self.keybert_models.get(model_name)
|
| 111 |
|
| 112 |
def load_rake_extractor(self):
|
| 113 |
+
"""Load RAKE extractor with better error handling"""
|
| 114 |
if self.rake_extractor is None:
|
| 115 |
try:
|
| 116 |
from rake_nltk import Rake
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
+
# Create RAKE instance
|
| 119 |
self.rake_extractor = Rake()
|
| 120 |
print("✓ RAKE extractor loaded successfully")
|
| 121 |
except Exception as e:
|
| 122 |
print(f"Error loading RAKE extractor: {str(e)}")
|
| 123 |
+
print(f"Full error: {type(e).__name__}: {str(e)}")
|
| 124 |
return None
|
| 125 |
return self.rake_extractor
|
| 126 |
|
|
|
|
| 130 |
if progress:
|
| 131 |
progress(0.3, desc="Loading model...")
|
| 132 |
|
| 133 |
+
print(f"Attempting to extract keywords with {model_name}")
|
| 134 |
+
|
| 135 |
# Handle different model types
|
| 136 |
if model_name.startswith('yake_'):
|
| 137 |
return self.extract_yake_keywords(text, num_keywords, ngram_range, progress)
|
|
|
|
| 144 |
|
| 145 |
except Exception as e:
|
| 146 |
print(f"Error with {model_name}: {str(e)}")
|
| 147 |
+
print(f"Full error: {type(e).__name__}: {str(e)}")
|
| 148 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 149 |
|
| 150 |
def extract_yake_keywords(self, text, num_keywords, ngram_range, progress):
|
|
|
|
| 179 |
'model': 'YAKE'
|
| 180 |
})
|
| 181 |
|
| 182 |
+
print(f"YAKE extracted {len(results)} keywords")
|
| 183 |
return results
|
| 184 |
|
| 185 |
+
except Exception as e:
|
| 186 |
+
print(f"YAKE extraction failed: {type(e).__name__}: {str(e)}")
|
| 187 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 188 |
|
| 189 |
def extract_keybert_keywords(self, text, model_name, num_keywords, ngram_range, progress):
|
|
|
|
| 194 |
|
| 195 |
kw_model = self.load_keybert_model(model_name)
|
| 196 |
if kw_model is None:
|
| 197 |
+
print(f"KeyBERT model {model_name} could not be loaded")
|
| 198 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 199 |
|
| 200 |
if progress:
|
| 201 |
progress(0.6, desc="Processing with KeyBERT...")
|
| 202 |
|
| 203 |
+
# Extract keywords with error handling
|
| 204 |
+
try:
|
| 205 |
+
keywords = kw_model.extract_keywords(
|
| 206 |
+
text,
|
| 207 |
+
keyphrase_ngram_range=ngram_range,
|
| 208 |
+
stop_words='english',
|
| 209 |
+
top_n=num_keywords
|
| 210 |
+
)
|
| 211 |
+
except Exception as e:
|
| 212 |
+
print(f"KeyBERT extraction error: {e}")
|
| 213 |
+
# Try without stop words
|
| 214 |
+
keywords = kw_model.extract_keywords(
|
| 215 |
+
text,
|
| 216 |
+
keyphrase_ngram_range=ngram_range,
|
| 217 |
+
top_n=num_keywords
|
| 218 |
+
)
|
| 219 |
|
| 220 |
if progress:
|
| 221 |
progress(0.8, desc="Formatting results...")
|
|
|
|
| 229 |
'model': f"KeyBERT-{model_name.replace('keybert_', '')}"
|
| 230 |
})
|
| 231 |
|
| 232 |
+
print(f"KeyBERT extracted {len(results)} keywords")
|
| 233 |
return results
|
| 234 |
|
| 235 |
+
except Exception as e:
|
| 236 |
+
print(f"KeyBERT extraction failed: {type(e).__name__}: {str(e)}")
|
| 237 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 238 |
|
| 239 |
def extract_rake_keywords(self, text, num_keywords, progress):
|
|
|
|
| 244 |
|
| 245 |
rake_extractor = self.load_rake_extractor()
|
| 246 |
if rake_extractor is None:
|
| 247 |
+
print("RAKE extractor could not be loaded")
|
| 248 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 249 |
|
| 250 |
if progress:
|
|
|
|
| 254 |
rake_extractor.extract_keywords_from_text(text)
|
| 255 |
keywords_with_scores = rake_extractor.get_ranked_phrases_with_scores()
|
| 256 |
|
| 257 |
+
# Normalize scores
|
| 258 |
+
if keywords_with_scores:
|
| 259 |
+
max_score = max(score for score, _ in keywords_with_scores)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
+
# Format results
|
| 262 |
+
results = []
|
| 263 |
+
for score, keyword in keywords_with_scores[:num_keywords]:
|
| 264 |
+
normalized_score = score / max_score if max_score > 0 else 0
|
| 265 |
+
results.append({
|
| 266 |
+
'keyword': keyword,
|
| 267 |
+
'score': normalized_score,
|
| 268 |
+
'model': 'RAKE-NLTK'
|
| 269 |
+
})
|
| 270 |
+
|
| 271 |
+
print(f"RAKE extracted {len(results)} keywords")
|
| 272 |
+
return results
|
| 273 |
+
else:
|
| 274 |
+
print("RAKE returned no keywords")
|
| 275 |
+
return self.fallback_keyword_extraction(text, num_keywords)
|
| 276 |
|
| 277 |
+
except Exception as e:
|
| 278 |
+
print(f"RAKE extraction failed: {type(e).__name__}: {str(e)}")
|
| 279 |
return self.fallback_keyword_extraction(text, num_keywords)
|
| 280 |
|
| 281 |
def fallback_keyword_extraction(self, text, num_keywords=10):
|
| 282 |
"""Simple fallback keyword extraction using basic statistics"""
|
| 283 |
+
print("Using fallback keyword extraction")
|
| 284 |
import re
|
| 285 |
from collections import Counter
|
| 286 |
|
|
|
|
| 442 |
return html
|
| 443 |
|
| 444 |
# Initialize the keyword extraction manager
|
| 445 |
+
print("Initializing keyword extraction manager...")
|
| 446 |
keyword_manager = KeywordExtractionManager()
|
| 447 |
|
| 448 |
def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progress=gr.Progress()):
|
|
|
|
| 516 |
text_input = gr.Textbox(
|
| 517 |
label="📝 Text to Analyse",
|
| 518 |
placeholder="Enter your text here...",
|
| 519 |
+
lines=20,
|
| 520 |
+
max_lines=23
|
| 521 |
)
|
| 522 |
|
| 523 |
with gr.Column(scale=1):
|
|
|
|
| 572 |
<dt style="font-weight: bold; display: inline; color: #FF6B6B;">YAKE:</dt>
|
| 573 |
<dd style="display: inline; margin-left: 5px;">Statistical approach requiring no training - works well on short texts and multilingual content</dd>
|
| 574 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 575 |
<div style="margin-bottom: 8px;">
|
| 576 |
<dt style="font-weight: bold; display: inline; color: #795548;">KeyBERT MiniLM:</dt>
|
| 577 |
<dd style="display: inline; margin-left: 5px;">Lightweight BERT model - faster processing with good results</dd>
|
| 578 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 579 |
<div style="margin-bottom: 8px;">
|
| 580 |
<dt style="font-weight: bold; display: inline; color: #FF5722;">RAKE-NLTK:</dt>
|
| 581 |
<dd style="display: inline; margin-left: 5px;">Classic keyword extraction algorithm - fast and reliable for phrase extraction</dd>
|
|
|
|
| 624 |
],
|
| 625 |
[
|
| 626 |
"In Jane Austen's 'Pride and Prejudice', Elizabeth Bennet first meets Mr. Darcy at the Meryton assembly. The novel, published in 1813, explores themes of marriage and social class in Regency England. Austen wrote to her sister Cassandra about the manuscript while staying at Chawton Cottage.",
|
| 627 |
+
"keybert_all-MiniLM-L6-v2",
|
| 628 |
10,
|
| 629 |
1,
|
| 630 |
3
|