File size: 7,288 Bytes
2ae3f7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import aiohttp
import asyncio
from bs4 import BeautifulSoup
import json
from typing import Dict, List, Optional
import re
from urllib.parse import quote_plus

class Scraper:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        self.session = None

    async def __aenter__(self):
        self.session = aiohttp.ClientSession(headers=self.headers)
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        if self.session:
            await self.session.close()

    async def search_amazon(self, query: str) -> List[Dict]:
        """Search Amazon India for products"""
        url = f"https://www.amazon.in/s?k={quote_plus(query)}"
        async with self.session.get(url) as response:
            if response.status == 200:
                html = await response.text()
                soup = BeautifulSoup(html, 'html.parser')
                products = []
                
                for item in soup.select('.s-result-item[data-asin]'):
                    try:
                        title = item.select_one('.a-text-normal')
                        price = item.select_one('.a-price-whole')
                        url = item.select_one('a.a-link-normal')
                        
                        if title and price and url:
                            products.append({
                                'platform': 'amazon',
                                'title': title.text.strip(),
                                'price': float(price.text.replace(',', '')),
                                'url': 'https://www.amazon.in' + url['href']
                            })
                    except Exception:
                        continue
                        
                return products[:5]  # Return top 5 results
        return []

    async def search_blinkit(self, query: str) -> List[Dict]:
        """Search Blinkit for products"""
        url = f"https://blinkit.com/v2/search?q={quote_plus(query)}"
        async with self.session.get(url) as response:
            if response.status == 200:
                try:
                    data = await response.json()
                    products = []
                    
                    for item in data.get('products', [])[:5]:
                        products.append({
                            'platform': 'blinkit',
                            'title': item.get('name', ''),
                            'price': float(item.get('price', 0)),
                            'url': f"https://blinkit.com/products/{item.get('slug', '')}"
                        })
                    
                    return products
                except Exception:
                    return []
        return []

    async def search_zepto(self, query: str) -> List[Dict]:
        """Search Zepto for products"""
        url = f"https://www.zeptonow.com/api/search?q={quote_plus(query)}"
        async with self.session.get(url) as response:
            if response.status == 200:
                try:
                    data = await response.json()
                    products = []
                    
                    for item in data.get('products', [])[:5]:
                        products.append({
                            'platform': 'zepto',
                            'title': item.get('name', ''),
                            'price': float(item.get('mrp', 0)),
                            'url': f"https://www.zeptonow.com/product/{item.get('slug', '')}"
                        })
                    
                    return products
                except Exception:
                    return []
        return []

    async def search_swiggy_instamart(self, query: str) -> List[Dict]:
        """Search Swiggy Instamart for products"""
        url = f"https://www.swiggy.com/api/instamart/search?q={quote_plus(query)}"
        async with self.session.get(url) as response:
            if response.status == 200:
                try:
                    data = await response.json()
                    products = []
                    
                    for item in data.get('data', {}).get('products', [])[:5]:
                        products.append({
                            'platform': 'swiggy_instamart',
                            'title': item.get('name', ''),
                            'price': float(item.get('price', 0)),
                            'url': f"https://www.swiggy.com/instamart/product/{item.get('id', '')}"
                        })
                    
                    return products
                except Exception:
                    return []
        return []

    def extract_ingredients(self, text: str) -> List[str]:
        """Extract ingredients from product description text"""
        # Common ingredient list markers
        markers = [
            r"ingredients?[:|\s]+(.*?)(?=\.|$)",
            r"contains?[:|\s]+(.*?)(?=\.|$)",
            r"composition?[:|\s]+(.*?)(?=\.|$)"
        ]
        
        for marker in markers:
            match = re.search(marker, text, re.IGNORECASE)
            if match:
                ingredients_text = match.group(1)
                # Split by common separators
                ingredients = re.split(r'[,;]|\sand\s', ingredients_text)
                # Clean up each ingredient
                return [ing.strip() for ing in ingredients if ing.strip()]
        
        return []

    def extract_nutrition_info(self, text: str) -> Dict:
        """Extract nutrition information from product description text"""
        nutrition_info = {}
        
        # Common nutrition patterns
        patterns = {
            'calories': r'(\d+)\s*(?:kcal|calories)',
            'protein': r'protein\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g',
            'carbohydrates': r'carbohydrates?\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g',
            'fat': r'fat\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g',
            'sugar': r'sugar\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g',
            'fiber': r'fiber\s*(?:\w+\s+)?(\d+(?:\.\d+)?)\s*g'
        }
        
        for nutrient, pattern in patterns.items():
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                try:
                    nutrition_info[nutrient] = float(match.group(1))
                except ValueError:
                    continue
        
        return nutrition_info

    async def get_all_prices(self, query: str) -> List[Dict]:
        """Get prices from all supported platforms"""
        tasks = [
            self.search_amazon(query),
            self.search_blinkit(query),
            self.search_zepto(query),
            self.search_swiggy_instamart(query)
        ]
        
        results = await asyncio.gather(*tasks, return_exceptions=True)
        all_prices = []
        
        for result in results:
            if isinstance(result, list):
                all_prices.extend(result)
        
        return all_prices