Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -119,77 +119,90 @@ class ImageScraper:
|
|
| 119 |
|
| 120 |
|
| 121 |
def extract_key_topics(self, script: str) -> List[str]:
|
| 122 |
-
"""Extract key topics from
|
| 123 |
try:
|
| 124 |
# Define relevant categories for VaultGenix
|
| 125 |
categories = {
|
| 126 |
-
'
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
'
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
}
|
| 133 |
|
| 134 |
# Process text
|
| 135 |
text = script.lower()
|
| 136 |
found_topics = set()
|
| 137 |
|
| 138 |
-
# Extract
|
| 139 |
-
words = text.split()
|
| 140 |
for category, terms in categories.items():
|
| 141 |
for term in terms:
|
| 142 |
if term in text:
|
|
|
|
| 143 |
found_topics.add(term)
|
| 144 |
-
|
|
|
|
|
|
|
| 145 |
|
| 146 |
-
# Extract
|
| 147 |
important_phrases = [
|
| 148 |
-
'digital legacy',
|
| 149 |
-
'
|
| 150 |
-
'digital
|
| 151 |
-
'
|
| 152 |
-
'
|
| 153 |
-
'digital
|
| 154 |
-
'
|
| 155 |
-
'
|
| 156 |
-
'
|
| 157 |
-
'
|
| 158 |
]
|
| 159 |
|
| 160 |
for phrase in important_phrases:
|
| 161 |
-
if phrase in text:
|
| 162 |
found_topics.add(phrase)
|
| 163 |
|
| 164 |
-
#
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
# Prioritize topics
|
| 178 |
-
priority_topics = [
|
| 179 |
-
topic for topic in found_topics
|
| 180 |
-
if any(key in topic for key in ['digital', 'security', 'legacy', 'AI'])
|
| 181 |
-
]
|
| 182 |
-
|
| 183 |
-
# Ensure we have enough topics
|
| 184 |
-
if len(priority_topics) < 3:
|
| 185 |
-
priority_topics.extend(['digital security', 'legacy management', 'data protection'][:3 - len(priority_topics)])
|
| 186 |
|
| 187 |
-
|
|
|
|
| 188 |
|
| 189 |
except Exception as e:
|
| 190 |
-
|
| 191 |
-
return [
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
def get_images_for_keyword(self, keyword: str) -> List[Dict[str, str]]:
|
| 194 |
"""Get images for a specific keyword with improved relevance"""
|
| 195 |
try:
|
|
|
|
| 119 |
|
| 120 |
|
| 121 |
def extract_key_topics(self, script: str) -> List[str]:
|
| 122 |
+
"""Extract key topics from script with improved VaultGenix-specific processing"""
|
| 123 |
try:
|
| 124 |
# Define relevant categories for VaultGenix
|
| 125 |
categories = {
|
| 126 |
+
'legacy': [
|
| 127 |
+
'digital legacy', 'legacy management', 'digital estate',
|
| 128 |
+
'posthumous', 'inheritance', 'heir', 'custodian'
|
| 129 |
+
],
|
| 130 |
+
'security': [
|
| 131 |
+
'encryption', 'security', 'protection', 'privacy', 'AES-256',
|
| 132 |
+
'data security', 'secure', 'authentication'
|
| 133 |
+
],
|
| 134 |
+
'technology': [
|
| 135 |
+
'AI', 'artificial intelligence', 'platform', 'digital',
|
| 136 |
+
'automation', 'analytics'
|
| 137 |
+
],
|
| 138 |
+
'management': [
|
| 139 |
+
'asset management', 'directive', 'planning', 'preservation',
|
| 140 |
+
'customization', 'optimization'
|
| 141 |
+
],
|
| 142 |
+
'identity': [
|
| 143 |
+
'digital identity', 'presence', 'account', 'profile',
|
| 144 |
+
'digital footprint'
|
| 145 |
+
]
|
| 146 |
}
|
| 147 |
|
| 148 |
# Process text
|
| 149 |
text = script.lower()
|
| 150 |
found_topics = set()
|
| 151 |
|
| 152 |
+
# Extract category-based matches
|
|
|
|
| 153 |
for category, terms in categories.items():
|
| 154 |
for term in terms:
|
| 155 |
if term in text:
|
| 156 |
+
# Add both the term and its category combination
|
| 157 |
found_topics.add(term)
|
| 158 |
+
if category in ['legacy', 'security', 'technology']:
|
| 159 |
+
found_topics.add(f"digital {term}")
|
| 160 |
+
found_topics.add(f"{category} management")
|
| 161 |
|
| 162 |
+
# Extract key compound phrases
|
| 163 |
important_phrases = [
|
| 164 |
+
'digital legacy management',
|
| 165 |
+
'AI-driven platform',
|
| 166 |
+
'digital estate planning',
|
| 167 |
+
'legacy preservation',
|
| 168 |
+
'secure inheritance',
|
| 169 |
+
'digital asset protection',
|
| 170 |
+
'intelligent legacy system',
|
| 171 |
+
'automated legacy management',
|
| 172 |
+
'digital identity preservation',
|
| 173 |
+
'secure legacy platform'
|
| 174 |
]
|
| 175 |
|
| 176 |
for phrase in important_phrases:
|
| 177 |
+
if phrase.lower() in text:
|
| 178 |
found_topics.add(phrase)
|
| 179 |
|
| 180 |
+
# Prioritize topics based on VaultGenix focus
|
| 181 |
+
priority_topics = sorted(
|
| 182 |
+
found_topics,
|
| 183 |
+
key=lambda x: (
|
| 184 |
+
'digital legacy' in x,
|
| 185 |
+
'security' in x or 'secure' in x,
|
| 186 |
+
'AI' in x.lower() or 'intelligence' in x.lower(),
|
| 187 |
+
'management' in x,
|
| 188 |
+
len(x.split()) # Prefer compound terms
|
| 189 |
+
),
|
| 190 |
+
reverse=True
|
| 191 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
+
# Return top unique topics
|
| 194 |
+
return list(dict.fromkeys(priority_topics))[:8]
|
| 195 |
|
| 196 |
except Exception as e:
|
| 197 |
+
self.logger.error(f"Topic extraction error: {e}")
|
| 198 |
+
return [
|
| 199 |
+
'digital legacy management',
|
| 200 |
+
'secure inheritance',
|
| 201 |
+
'AI-driven platform',
|
| 202 |
+
'digital asset protection',
|
| 203 |
+
'legacy preservation'
|
| 204 |
+
]
|
| 205 |
+
|
| 206 |
def get_images_for_keyword(self, keyword: str) -> List[Dict[str, str]]:
|
| 207 |
"""Get images for a specific keyword with improved relevance"""
|
| 208 |
try:
|