Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -290,53 +290,43 @@ class ImageScraper:
|
|
| 290 |
]
|
| 291 |
|
| 292 |
def get_images(self, query: str, num_images: int = 15) -> Dict[str, List[Dict[str, str]]]:
|
| 293 |
-
"""Get images with AI-driven selection and ranking"""
|
| 294 |
try:
|
| 295 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
result = {
|
| 297 |
'primary': [],
|
| 298 |
'secondary': [],
|
| 299 |
'general': []
|
| 300 |
}
|
| 301 |
|
| 302 |
-
#
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
# Score and rank keywords based on relevance to query
|
| 307 |
-
keyword_scores = self.score_keywords(query, keywords)
|
| 308 |
-
ranked_keywords = sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True)
|
| 309 |
-
|
| 310 |
-
# Fetch and analyze images for each keyword
|
| 311 |
-
all_images = []
|
| 312 |
-
for keyword, score in ranked_keywords:
|
| 313 |
-
images = self.get_images_for_keyword(keyword)
|
| 314 |
for img in images:
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
'url': url,
|
| 335 |
-
'keyword': 'technology',
|
| 336 |
-
'relevance': 'Fallback',
|
| 337 |
-
'tags': 'technology',
|
| 338 |
-
'relevance_score': 0.5
|
| 339 |
-
} for url in stock_images[:num_images]]
|
| 340 |
|
| 341 |
return result
|
| 342 |
|
|
@@ -344,6 +334,37 @@ class ImageScraper:
|
|
| 344 |
print(f"Error in get_images: {str(e)}")
|
| 345 |
return self.get_fallback_images(num_images)
|
| 346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
def score_keywords(self, query: str, keywords: List[str]) -> Dict[str, float]:
|
| 348 |
"""Score keywords based on relevance to query"""
|
| 349 |
scores = {}
|
|
|
|
| 290 |
]
|
| 291 |
|
| 292 |
def get_images(self, query: str, num_images: int = 15) -> Dict[str, List[Dict[str, str]]]:
|
| 293 |
+
"""Get images with enhanced AI-driven selection and ranking"""
|
| 294 |
try:
|
| 295 |
+
# Extract key topics and their importance
|
| 296 |
+
topics = self.extract_key_topics(query)
|
| 297 |
+
topic_scores = {topic: score for score, topic in
|
| 298 |
+
zip(np.linspace(1.0, 0.6, len(topics)), topics)}
|
| 299 |
+
|
| 300 |
+
# Initialize categories
|
| 301 |
result = {
|
| 302 |
'primary': [],
|
| 303 |
'secondary': [],
|
| 304 |
'general': []
|
| 305 |
}
|
| 306 |
|
| 307 |
+
# Fetch and analyze images for each topic
|
| 308 |
+
for topic, base_score in topic_scores.items():
|
| 309 |
+
images = self.get_images_for_keyword(topic)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
for img in images:
|
| 311 |
+
# Enhanced relevance scoring
|
| 312 |
+
relevance_score = self.calculate_relevance_score(img, topic, base_score, query)
|
| 313 |
+
img['relevance_score'] = relevance_score
|
| 314 |
+
|
| 315 |
+
# Categorize based on relevance score
|
| 316 |
+
if relevance_score > 0.8:
|
| 317 |
+
result['primary'].append(img)
|
| 318 |
+
elif relevance_score > 0.6:
|
| 319 |
+
result['secondary'].append(img)
|
| 320 |
+
else:
|
| 321 |
+
result['general'].append(img)
|
| 322 |
+
|
| 323 |
+
# Sort each category by relevance score
|
| 324 |
+
for category in result:
|
| 325 |
+
result[category] = sorted(
|
| 326 |
+
result[category],
|
| 327 |
+
key=lambda x: x['relevance_score'],
|
| 328 |
+
reverse=True
|
| 329 |
+
)[:num_images // 3] # Limit images per category
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
return result
|
| 332 |
|
|
|
|
| 334 |
print(f"Error in get_images: {str(e)}")
|
| 335 |
return self.get_fallback_images(num_images)
|
| 336 |
|
| 337 |
+
def calculate_relevance_score(self, image: Dict[str, str], topic: str, base_score: float, query: str) -> float:
|
| 338 |
+
"""Calculate enhanced relevance score for an image"""
|
| 339 |
+
score = base_score
|
| 340 |
+
|
| 341 |
+
# Analyze image tags
|
| 342 |
+
tags = set(image['tags'].lower().split(','))
|
| 343 |
+
query_words = set(query.lower().split())
|
| 344 |
+
|
| 345 |
+
# Direct matches with query
|
| 346 |
+
query_matches = len(tags.intersection(query_words))
|
| 347 |
+
score += query_matches * 0.1
|
| 348 |
+
|
| 349 |
+
# Topic relevance
|
| 350 |
+
if topic.lower() in tags:
|
| 351 |
+
score += 0.2
|
| 352 |
+
|
| 353 |
+
# Context relevance
|
| 354 |
+
relevant_terms = {
|
| 355 |
+
'digital': 0.15,
|
| 356 |
+
'security': 0.15,
|
| 357 |
+
'technology': 0.1,
|
| 358 |
+
'professional': 0.1,
|
| 359 |
+
'modern': 0.05
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
for term, weight in relevant_terms.items():
|
| 363 |
+
if term in tags:
|
| 364 |
+
score += weight
|
| 365 |
+
|
| 366 |
+
return min(score, 1.0) # Normalize to 0-1
|
| 367 |
+
|
| 368 |
def score_keywords(self, query: str, keywords: List[str]) -> Dict[str, float]:
|
| 369 |
"""Score keywords based on relevance to query"""
|
| 370 |
scores = {}
|