File size: 1,712 Bytes
c30b4ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from urllib.parse import quote
from typing import List
from .base_scraper import BaseScraper
import logging

logger = logging.getLogger(__name__)

class WikimediaScraper(BaseScraper):
    async def search_images(self, recipe_name: str, num_images: int) -> List[str]:
        search_query = quote(recipe_name)
        url = f"https://commons.wikimedia.org/w/api.php"
        params = {
            "action": "query",
            "format": "json",
            "list": "search",
            "srsearch": f"{search_query} food",
            "srnamespace": "6",  # File namespace
            "srlimit": num_images
        }
        
        try:
            async with self.session.get(url, params=params, headers=await self.get_headers()) as response:
                if response.status != 200:
                    return []
                
                data = await response.json()
                images = set()
                
                for item in data.get('query', {}).get('search', []):
                    title = item.get('title', '')
                    if title.startswith('File:'):
                        file_url = f"https://commons.wikimedia.org/wiki/Special:FilePath/{quote(title[5:])}"
                        images.add(file_url)
                
                valid_images = []
                for img_url in images:
                    if len(valid_images) >= num_images:
                        break
                    if await self.verify_image_url(img_url):
                        valid_images.append(img_url)
                
                return valid_images
        except Exception as e:
            logger.error(f"Wikimedia scraping error: {str(e)}")
            return []