shaheerawan3 commited on
Commit
dc4fbff
·
verified ·
1 Parent(s): 1315880

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -48
app.py CHANGED
@@ -119,77 +119,90 @@ class ImageScraper:
119
 
120
 
121
  def extract_key_topics(self, script: str) -> List[str]:
122
- """Extract key topics from a long text prompt with improved accuracy"""
123
  try:
124
  # Define relevant categories for VaultGenix
125
  categories = {
126
- 'security': ['security', 'encryption', 'protection', 'privacy', 'safe', 'secure'],
127
- 'digital': ['digital', 'online', 'virtual', 'cyber', 'electronic'],
128
- 'legacy': ['legacy', 'inheritance', 'heir', 'posthumous', 'estate'],
129
- 'management': ['management', 'planning', 'organization', 'control', 'administration'],
130
- 'technology': ['AI', 'artificial intelligence', 'technology', 'platform', 'system'],
131
- 'family': ['family', 'heir', 'custodian', 'relative', 'loved ones']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  }
133
 
134
  # Process text
135
  text = script.lower()
136
  found_topics = set()
137
 
138
- # Extract single-word matches
139
- words = text.split()
140
  for category, terms in categories.items():
141
  for term in terms:
142
  if term in text:
 
143
  found_topics.add(term)
144
- found_topics.add(category)
 
 
145
 
146
- # Extract meaningful phrases
147
  important_phrases = [
148
- 'digital legacy',
149
- 'legacy management',
150
- 'digital security',
151
- 'data protection',
152
- 'artificial intelligence',
153
- 'digital estate',
154
- 'digital identity',
155
- 'secure platform',
156
- 'family protection',
157
- 'digital inheritance'
158
  ]
159
 
160
  for phrase in important_phrases:
161
- if phrase in text:
162
  found_topics.add(phrase)
163
 
164
- # Combine related topics
165
- combined_topics = []
166
- for topic in found_topics:
167
- # Create meaningful combinations
168
- if topic in ['digital', 'secure', 'smart', 'AI']:
169
- related = ['legacy', 'security', 'protection', 'management']
170
- for rel in related:
171
- if rel in found_topics:
172
- combined_topics.append(f"{topic} {rel}")
173
-
174
- # Add combined topics to results
175
- found_topics.update(combined_topics)
176
-
177
- # Prioritize topics
178
- priority_topics = [
179
- topic for topic in found_topics
180
- if any(key in topic for key in ['digital', 'security', 'legacy', 'AI'])
181
- ]
182
-
183
- # Ensure we have enough topics
184
- if len(priority_topics) < 3:
185
- priority_topics.extend(['digital security', 'legacy management', 'data protection'][:3 - len(priority_topics)])
186
 
187
- return list(set(priority_topics))[:5] # Return top 5 unique topics
 
188
 
189
  except Exception as e:
190
- print(f"Topic extraction error: {e}")
191
- return ['digital security', 'legacy management', 'data protection']
192
-
 
 
 
 
 
 
193
  def get_images_for_keyword(self, keyword: str) -> List[Dict[str, str]]:
194
  """Get images for a specific keyword with improved relevance"""
195
  try:
 
119
 
120
 
121
  def extract_key_topics(self, script: str) -> List[str]:
122
+ """Extract key topics from script with improved VaultGenix-specific processing"""
123
  try:
124
  # Define relevant categories for VaultGenix
125
  categories = {
126
+ 'legacy': [
127
+ 'digital legacy', 'legacy management', 'digital estate',
128
+ 'posthumous', 'inheritance', 'heir', 'custodian'
129
+ ],
130
+ 'security': [
131
+ 'encryption', 'security', 'protection', 'privacy', 'AES-256',
132
+ 'data security', 'secure', 'authentication'
133
+ ],
134
+ 'technology': [
135
+ 'AI', 'artificial intelligence', 'platform', 'digital',
136
+ 'automation', 'analytics'
137
+ ],
138
+ 'management': [
139
+ 'asset management', 'directive', 'planning', 'preservation',
140
+ 'customization', 'optimization'
141
+ ],
142
+ 'identity': [
143
+ 'digital identity', 'presence', 'account', 'profile',
144
+ 'digital footprint'
145
+ ]
146
  }
147
 
148
  # Process text
149
  text = script.lower()
150
  found_topics = set()
151
 
152
+ # Extract category-based matches
 
153
  for category, terms in categories.items():
154
  for term in terms:
155
  if term in text:
156
+ # Add both the term and its category combination
157
  found_topics.add(term)
158
+ if category in ['legacy', 'security', 'technology']:
159
+ found_topics.add(f"digital {term}")
160
+ found_topics.add(f"{category} management")
161
 
162
+ # Extract key compound phrases
163
  important_phrases = [
164
+ 'digital legacy management',
165
+ 'AI-driven platform',
166
+ 'digital estate planning',
167
+ 'legacy preservation',
168
+ 'secure inheritance',
169
+ 'digital asset protection',
170
+ 'intelligent legacy system',
171
+ 'automated legacy management',
172
+ 'digital identity preservation',
173
+ 'secure legacy platform'
174
  ]
175
 
176
  for phrase in important_phrases:
177
+ if phrase.lower() in text:
178
  found_topics.add(phrase)
179
 
180
+ # Prioritize topics based on VaultGenix focus
181
+ priority_topics = sorted(
182
+ found_topics,
183
+ key=lambda x: (
184
+ 'digital legacy' in x,
185
+ 'security' in x or 'secure' in x,
186
+ 'AI' in x.lower() or 'intelligence' in x.lower(),
187
+ 'management' in x,
188
+ len(x.split()) # Prefer compound terms
189
+ ),
190
+ reverse=True
191
+ )
 
 
 
 
 
 
 
 
 
 
192
 
193
+ # Return top unique topics
194
+ return list(dict.fromkeys(priority_topics))[:8]
195
 
196
  except Exception as e:
197
+ self.logger.error(f"Topic extraction error: {e}")
198
+ return [
199
+ 'digital legacy management',
200
+ 'secure inheritance',
201
+ 'AI-driven platform',
202
+ 'digital asset protection',
203
+ 'legacy preservation'
204
+ ]
205
+
206
  def get_images_for_keyword(self, keyword: str) -> List[Dict[str, str]]:
207
  """Get images for a specific keyword with improved relevance"""
208
  try: