shaheerawan3 commited on
Commit
83700ae
·
verified ·
1 Parent(s): e1c4ef8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -39
app.py CHANGED
@@ -290,53 +290,43 @@ class ImageScraper:
290
  ]
291
 
292
  def get_images(self, query: str, num_images: int = 15) -> Dict[str, List[Dict[str, str]]]:
293
- """Get images with AI-driven selection and ranking"""
294
  try:
295
- # Initialize result structure
 
 
 
 
 
296
  result = {
297
  'primary': [],
298
  'secondary': [],
299
  'general': []
300
  }
301
 
302
- # Extract and analyze keywords using AI
303
- keywords = self.extract_key_topics(query)
304
- print(f"AI extracted keywords: {keywords}")
305
-
306
- # Score and rank keywords based on relevance to query
307
- keyword_scores = self.score_keywords(query, keywords)
308
- ranked_keywords = sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True)
309
-
310
- # Fetch and analyze images for each keyword
311
- all_images = []
312
- for keyword, score in ranked_keywords:
313
- images = self.get_images_for_keyword(keyword)
314
  for img in images:
315
- img['relevance_score'] = score * self.analyze_image_relevance(img, query)
316
- all_images.append(img)
317
-
318
- # Sort images by relevance score
319
- sorted_images = sorted(all_images, key=lambda x: x['relevance_score'], reverse=True)
320
-
321
- # Distribute images across categories
322
- total_images = min(len(sorted_images), num_images)
323
- primary_count = total_images // 2
324
- secondary_count = total_images // 3
325
-
326
- result['primary'] = sorted_images[:primary_count]
327
- result['secondary'] = sorted_images[primary_count:primary_count + secondary_count]
328
- result['general'] = sorted_images[primary_count + secondary_count:total_images]
329
-
330
- # If no images found, use stock images
331
- if not any(result.values()):
332
- stock_images = self.get_stock_images()
333
- result['general'] = [{
334
- 'url': url,
335
- 'keyword': 'technology',
336
- 'relevance': 'Fallback',
337
- 'tags': 'technology',
338
- 'relevance_score': 0.5
339
- } for url in stock_images[:num_images]]
340
 
341
  return result
342
 
@@ -344,6 +334,37 @@ class ImageScraper:
344
  print(f"Error in get_images: {str(e)}")
345
  return self.get_fallback_images(num_images)
346
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  def score_keywords(self, query: str, keywords: List[str]) -> Dict[str, float]:
348
  """Score keywords based on relevance to query"""
349
  scores = {}
 
290
  ]
291
 
292
  def get_images(self, query: str, num_images: int = 15) -> Dict[str, List[Dict[str, str]]]:
293
+ """Get images with enhanced AI-driven selection and ranking"""
294
  try:
295
+ # Extract key topics and their importance
296
+ topics = self.extract_key_topics(query)
297
+ topic_scores = {topic: score for score, topic in
298
+ zip(np.linspace(1.0, 0.6, len(topics)), topics)}
299
+
300
+ # Initialize categories
301
  result = {
302
  'primary': [],
303
  'secondary': [],
304
  'general': []
305
  }
306
 
307
+ # Fetch and analyze images for each topic
308
+ for topic, base_score in topic_scores.items():
309
+ images = self.get_images_for_keyword(topic)
 
 
 
 
 
 
 
 
 
310
  for img in images:
311
+ # Enhanced relevance scoring
312
+ relevance_score = self.calculate_relevance_score(img, topic, base_score, query)
313
+ img['relevance_score'] = relevance_score
314
+
315
+ # Categorize based on relevance score
316
+ if relevance_score > 0.8:
317
+ result['primary'].append(img)
318
+ elif relevance_score > 0.6:
319
+ result['secondary'].append(img)
320
+ else:
321
+ result['general'].append(img)
322
+
323
+ # Sort each category by relevance score
324
+ for category in result:
325
+ result[category] = sorted(
326
+ result[category],
327
+ key=lambda x: x['relevance_score'],
328
+ reverse=True
329
+ )[:num_images // 3] # Limit images per category
 
 
 
 
 
 
330
 
331
  return result
332
 
 
334
  print(f"Error in get_images: {str(e)}")
335
  return self.get_fallback_images(num_images)
336
 
337
+ def calculate_relevance_score(self, image: Dict[str, str], topic: str, base_score: float, query: str) -> float:
338
+ """Calculate enhanced relevance score for an image"""
339
+ score = base_score
340
+
341
+ # Analyze image tags
342
+ tags = set(image['tags'].lower().split(','))
343
+ query_words = set(query.lower().split())
344
+
345
+ # Direct matches with query
346
+ query_matches = len(tags.intersection(query_words))
347
+ score += query_matches * 0.1
348
+
349
+ # Topic relevance
350
+ if topic.lower() in tags:
351
+ score += 0.2
352
+
353
+ # Context relevance
354
+ relevant_terms = {
355
+ 'digital': 0.15,
356
+ 'security': 0.15,
357
+ 'technology': 0.1,
358
+ 'professional': 0.1,
359
+ 'modern': 0.05
360
+ }
361
+
362
+ for term, weight in relevant_terms.items():
363
+ if term in tags:
364
+ score += weight
365
+
366
+ return min(score, 1.0) # Normalize to 0-1
367
+
368
  def score_keywords(self, query: str, keywords: List[str]) -> Dict[str, float]:
369
  """Score keywords based on relevance to query"""
370
  scores = {}