Franko Fišter commited on
Commit
028bcd8
·
1 Parent(s): 56cdc39

Working script that fetches products by brand

Browse files
Files changed (4) hide show
  1. api/cijene_routes.py +426 -0
  2. api/main.py +2 -0
  3. db/cijene_repository.py +169 -0
  4. requirements.txt +3 -1
api/cijene_routes.py ADDED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from fastapi.responses import JSONResponse
3
+ import httpx
4
+ import asyncio
5
+ import logging
6
+ import sys
7
+ from typing import Dict, List
8
+ from datetime import datetime
9
+ from db.cijene_repository import CijeneRepository
10
+
11
+ # Configure logger with proper UTF-8 encoding
12
+ class UTFFormatter(logging.Formatter):
13
+ def format(self, record):
14
+ # Ensure the message is properly encoded
15
+ if hasattr(record, 'msg') and isinstance(record.msg, str):
16
+ try:
17
+ record.msg = record.msg.encode('utf-8', errors='replace').decode('utf-8')
18
+ except:
19
+ record.msg = record.msg.encode('ascii', errors='replace').decode('ascii')
20
+ return super().format(record)
21
+
22
+ # Configure logging with UTF-8 support
23
+ logging.basicConfig(
24
+ level=logging.INFO,
25
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
26
+ handlers=[
27
+ logging.StreamHandler(sys.stdout), # Use stdout instead of stderr
28
+ logging.FileHandler('cijene_processing.log', encoding='utf-8') # Specify UTF-8 encoding
29
+ ]
30
+ )
31
+
32
+ # Apply custom formatter to handle Croatian characters
33
+ for handler in logging.getLogger().handlers:
34
+ handler.setFormatter(UTFFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+ router = APIRouter(prefix="/cijene", tags=["Cijene API"])
39
+
40
+ @router.post("/populate-database")
41
+ async def populate_database_from_cijene():
42
+ """
43
+ Populate database with products and prices from Cijene API
44
+ Goes through all brands and fetches their products
45
+ """
46
+ logger.info("=== Starting database population from Cijene API ===")
47
+ repository = CijeneRepository()
48
+
49
+ try:
50
+ # Get all brands and store chains
51
+ logger.info("Initializing data fetching...")
52
+ brands = repository.get_all_brands()
53
+ store_chains = repository.get_all_store_chains()
54
+
55
+ if not brands:
56
+ logger.error("No brands found in database")
57
+ raise HTTPException(status_code=404, detail="No brands found in database")
58
+
59
+ logger.info(f"Found {len(brands)} brands and {len(store_chains)} store chains")
60
+
61
+ results = {
62
+ "total_brands_processed": 0,
63
+ "total_products_found": 0,
64
+ "total_products_inserted": 0,
65
+ "total_products_updated": 0,
66
+ "total_store_products_created": 0,
67
+ "total_price_records_inserted": 0,
68
+ "errors": []
69
+ }
70
+
71
+ # API configuration
72
+ base_url = "https://api.cijene.dev/v1/products/"
73
+ headers = {
74
+ "Authorization": "Bearer aibeeboh2cuataChi2doMa2Aacah0eli",
75
+ "Content-Type": "application/json"
76
+ }
77
+
78
+ logger.info("Starting brand processing...")
79
+
80
+ async with httpx.AsyncClient(timeout=30.0) as client:
81
+ for index, brand in enumerate(brands, 1):
82
+ brand_name = brand['brand_name']
83
+ brand_id = brand['brand_id']
84
+
85
+ safe_brand_name = repository.safe_log_string(brand_name)
86
+ logger.info(f"Processing brand {index}/{len(brands)}: '{safe_brand_name}' (ID: {brand_id})")
87
+
88
+ try:
89
+ # Make API request for this brand
90
+ url = f"{base_url}?q={brand_name}"
91
+ logger.info(f"Making API request to: {url}")
92
+
93
+ response = await client.get(url, headers=headers)
94
+
95
+ if response.status_code != 200:
96
+ error_msg = f"API error for brand {safe_brand_name}: HTTP {response.status_code}"
97
+ logger.error(error_msg)
98
+ results["errors"].append(error_msg)
99
+ continue
100
+
101
+ logger.info(f"API request successful for brand '{safe_brand_name}'")
102
+ data = response.json()
103
+ products = data.get("products", [])
104
+
105
+ results["total_brands_processed"] += 1
106
+ results["total_products_found"] += len(products)
107
+
108
+ logger.info(f"Found {len(products)} products for brand '{safe_brand_name}'")
109
+
110
+ # Process each product
111
+ for product_index, product in enumerate(products, 1):
112
+ try:
113
+ safe_product_name = repository.safe_log_string(product.get('name', 'unknown'))
114
+ logger.debug(f"Processing product {product_index}/{len(products)} for brand '{safe_brand_name}': {safe_product_name}")
115
+ await process_product(product, brand_id, store_chains, repository, results)
116
+ except Exception as e:
117
+ error_msg = f"Error processing product {product.get('ean', 'unknown')} for brand '{safe_brand_name}': {str(e)}"
118
+ logger.error(error_msg)
119
+ results["errors"].append(error_msg)
120
+ continue
121
+
122
+ logger.info(f"Completed processing brand '{safe_brand_name}' - Products: {len(products)}")
123
+
124
+ # Add small delay to avoid rate limiting
125
+ await asyncio.sleep(0.1)
126
+
127
+ # Log progress every 10 brands
128
+ if index % 10 == 0:
129
+ logger.info(f"Progress update: {index}/{len(brands)} brands processed")
130
+ logger.info(f"Current stats - Products inserted: {results['total_products_inserted']}, Updated: {results['total_products_updated']}, Store products: {results['total_store_products_created']}, Price records: {results['total_price_records_inserted']}")
131
+
132
+ except Exception as e:
133
+ error_msg = f"Error processing brand {safe_brand_name}: {str(e)}"
134
+ logger.error(error_msg)
135
+ results["errors"].append(error_msg)
136
+ continue
137
+
138
+ logger.info("=== Database population completed ===")
139
+ logger.info(f"Final results: {results}")
140
+
141
+ return JSONResponse(content={
142
+ "message": "Database population completed",
143
+ "results": results
144
+ })
145
+
146
+ except Exception as e:
147
+ logger.error(f"Critical error during database population: {str(e)}")
148
+ raise HTTPException(status_code=500, detail=f"Failed to populate database: {str(e)}")
149
+
150
+ async def process_product(product: Dict, brand_id: str, store_chains: List[Dict],
151
+ repository: CijeneRepository, results: Dict):
152
+ """Process a single product and its price data"""
153
+
154
+ ean = product.get("ean", "")
155
+ name = product.get("name", "")
156
+ quantity = product.get("quantity", "")
157
+ unit = product.get("unit", "")
158
+ chains = product.get("chains", [])
159
+
160
+ if not ean:
161
+ safe_name = repository.safe_log_string(name)
162
+ logger.warning(f"Skipping product without EAN: {safe_name}")
163
+ return
164
+
165
+ safe_name = repository.safe_log_string(name)
166
+ logger.debug(f"Processing product: {safe_name} (EAN: {ean})")
167
+
168
+ # Check if product already exists (by EAN or product_name+brand_id)
169
+ existing_product_id = repository.check_product_exists(ean, name, brand_id)
170
+
171
+ if existing_product_id:
172
+ product_id = existing_product_id
173
+ results["total_products_updated"] += 1
174
+ logger.debug(f"Product exists, using existing ID: {product_id}")
175
+ else:
176
+ # Create new product
177
+ weight = repository.combine_quantity_unit(quantity, unit)
178
+
179
+ product_data = {
180
+ "product_ean": ean,
181
+ "product_name": name,
182
+ "product_weight": weight,
183
+ "product_quantity": 1, # Add default quantity
184
+ "brand_id": brand_id
185
+ }
186
+
187
+ product_id = repository.insert_product(product_data)
188
+ results["total_products_inserted"] += 1
189
+ logger.debug(f"Created new product with ID: {product_id}")
190
+
191
+ # Process price data for each chain
192
+ today = datetime.now().date().isoformat()
193
+ logger.debug(f"Processing {len(chains)} price entries for product {safe_name}")
194
+
195
+ price_records_for_product = 0
196
+
197
+ for chain_data in chains:
198
+ chain_name = chain_data.get("chain", "")
199
+ min_price = chain_data.get("min_price")
200
+ max_price = chain_data.get("max_price")
201
+ avg_price = chain_data.get("avg_price")
202
+
203
+ if not chain_name:
204
+ logger.debug("Skipping chain entry without name")
205
+ continue
206
+
207
+ # Find matching store chain
208
+ store_chain_id = repository.find_matching_store_chain(chain_name, store_chains)
209
+
210
+ if not store_chain_id:
211
+ safe_chain_name = repository.safe_log_string(chain_name)
212
+ logger.debug(f"No matching store chain found for '{safe_chain_name}', skipping price entry")
213
+ continue
214
+
215
+ # Get or create store_product mapping
216
+ try:
217
+ store_product_id = repository.get_or_create_store_product(product_id, store_chain_id)
218
+ results["total_store_products_created"] += 1
219
+ except Exception as e:
220
+ logger.error(f"Failed to get/create store_product mapping: {str(e)}")
221
+ continue
222
+
223
+ # Validate price data
224
+ try:
225
+ min_price_float = float(min_price) if min_price else None
226
+ max_price_float = float(max_price) if max_price else None
227
+ avg_price_float = float(avg_price) if avg_price else None
228
+
229
+ if not any([min_price_float, max_price_float, avg_price_float]):
230
+ safe_chain_name = repository.safe_log_string(chain_name)
231
+ logger.debug(f"No valid price data for chain '{safe_chain_name}', skipping")
232
+ continue
233
+
234
+ except (ValueError, TypeError) as e:
235
+ safe_chain_name = repository.safe_log_string(chain_name)
236
+ logger.warning(f"Invalid price data for chain '{safe_chain_name}': {e}")
237
+ continue
238
+
239
+ # Insert price history - FIXED: use store_product_id and correct column name
240
+ price_data = {
241
+ "store_product_id": store_product_id, # CHANGED from product_id
242
+ "price_date": today,
243
+ "min_price": min_price_float,
244
+ "max_price": max_price_float,
245
+ "avg_price": avg_price_float
246
+ }
247
+
248
+ repository.insert_price_history(price_data)
249
+ results["total_price_records_inserted"] += 1
250
+ price_records_for_product += 1
251
+
252
+ logger.debug(f"Completed processing product {safe_name}: {price_records_for_product} price records inserted")
253
+
254
+ @router.post("/test-brand/{brand_name}")
255
+ async def test_single_brand(brand_name: str):
256
+ """
257
+ Test endpoint that processes a single brand the same way as populate-database
258
+ Fetches products and inserts them into the database
259
+ """
260
+ safe_brand_name = CijeneRepository().safe_log_string(brand_name)
261
+ logger.info(f"=== Starting single brand processing for: '{safe_brand_name}' ===")
262
+ repository = CijeneRepository()
263
+
264
+ try:
265
+ # Get all store chains
266
+ logger.info("Fetching store chains for processing...")
267
+ store_chains = repository.get_all_store_chains()
268
+
269
+ # Find the specific brand in database
270
+ logger.info(f"Looking up brand '{safe_brand_name}' in database...")
271
+ brands = repository.get_all_brands()
272
+ target_brand = None
273
+
274
+ for brand in brands:
275
+ if brand['brand_name'].lower() == brand_name.lower():
276
+ target_brand = brand
277
+ break
278
+
279
+ if not target_brand:
280
+ logger.error(f"Brand '{safe_brand_name}' not found in database")
281
+ raise HTTPException(status_code=404, detail=f"Brand '{brand_name}' not found in database")
282
+
283
+ brand_id = target_brand['brand_id']
284
+ logger.info(f"Found brand '{safe_brand_name}' with ID: {brand_id}")
285
+
286
+ results = {
287
+ "brand_name": brand_name,
288
+ "brand_id": brand_id,
289
+ "total_products_found": 0,
290
+ "total_products_inserted": 0,
291
+ "total_products_updated": 0,
292
+ "total_store_products_created": 0,
293
+ "total_price_records_inserted": 0,
294
+ "store_chains_matched": [],
295
+ "store_chains_not_matched": [],
296
+ "errors": []
297
+ }
298
+
299
+ # API configuration
300
+ base_url = "https://api.cijene.dev/v1/products/"
301
+ headers = {
302
+ "Authorization": "Bearer aibeeboh2cuataChi2doMa2Aacah0eli",
303
+ "Content-Type": "application/json"
304
+ }
305
+
306
+ # Make API request for this brand
307
+ url = f"{base_url}?q={brand_name}"
308
+ logger.info(f"Making API request to: {url}")
309
+
310
+ async with httpx.AsyncClient(timeout=30.0) as client:
311
+ response = await client.get(url, headers=headers)
312
+
313
+ if response.status_code != 200:
314
+ error_msg = f"API error for brand {safe_brand_name}: HTTP {response.status_code}"
315
+ logger.error(error_msg)
316
+ raise HTTPException(status_code=response.status_code, detail=error_msg)
317
+
318
+ logger.info(f"API request successful for brand '{safe_brand_name}'")
319
+ data = response.json()
320
+ products = data.get("products", [])
321
+
322
+ results["total_products_found"] = len(products)
323
+ logger.info(f"Found {len(products)} products for brand '{safe_brand_name}'")
324
+
325
+ # Process each product
326
+ for product_index, product in enumerate(products, 1):
327
+ try:
328
+ safe_product_name = repository.safe_log_string(product.get('name', 'unknown'))
329
+ logger.info(f"Processing product {product_index}/{len(products)}: {safe_product_name}")
330
+
331
+ # Process product using the same function as populate-database
332
+ await process_product(product, brand_id, store_chains, repository, results)
333
+
334
+ except Exception as e:
335
+ error_msg = f"Error processing product {product.get('ean', 'unknown')}: {str(e)}"
336
+ logger.error(error_msg)
337
+ results["errors"].append(error_msg)
338
+ continue
339
+
340
+ # Determine which store chains were matched vs not matched
341
+ api_chain_names = set()
342
+ for product in products:
343
+ for chain_data in product.get("chains", []):
344
+ if chain_data.get("chain"):
345
+ api_chain_names.add(chain_data.get("chain"))
346
+
347
+ db_chain_names = {repository.normalize_string(sc['store_chain_name']): sc['store_chain_name']
348
+ for sc in store_chains}
349
+
350
+ for api_chain in api_chain_names:
351
+ normalized_api_chain = repository.normalize_string(api_chain)
352
+ if normalized_api_chain in db_chain_names:
353
+ if api_chain not in [item['api_name'] for item in results["store_chains_matched"]]:
354
+ results["store_chains_matched"].append({
355
+ "api_name": api_chain,
356
+ "db_name": db_chain_names[normalized_api_chain]
357
+ })
358
+ else:
359
+ if api_chain not in results["store_chains_not_matched"]:
360
+ results["store_chains_not_matched"].append(api_chain)
361
+
362
+ logger.info(f"=== Completed processing brand '{safe_brand_name}' ===")
363
+ # Use safe logging for results
364
+ safe_results = {
365
+ **results,
366
+ "brand_name": repository.safe_log_string(results["brand_name"])
367
+ }
368
+ logger.info(f"Results: {safe_results}")
369
+
370
+ return JSONResponse(content={
371
+ "message": f"Successfully processed brand '{brand_name}'",
372
+ "results": results
373
+ })
374
+
375
+ except HTTPException:
376
+ raise
377
+ except Exception as e:
378
+ logger.error(f"Critical error processing brand '{safe_brand_name}': {str(e)}")
379
+ raise HTTPException(status_code=500, detail=f"Failed to process brand: {str(e)}")
380
+
381
+ # Keep the original test function as a separate endpoint for just fetching data
382
+ @router.get("/fetch-brand-data/{brand_name}")
383
+ async def fetch_brand_data_only(brand_name: str):
384
+ """Fetch raw API data for a single brand without processing"""
385
+ safe_brand_name = CijeneRepository().safe_log_string(brand_name)
386
+ logger.info(f"Fetching raw data for brand: {safe_brand_name}")
387
+
388
+ try:
389
+ url = f"https://api.cijene.dev/v1/products/?q={brand_name}"
390
+ headers = {
391
+ "Authorization": "Bearer aibeeboh2cuataChi2doMa2Aacah0eli",
392
+ "Content-Type": "application/json"
393
+ }
394
+
395
+ logger.info(f"Making API request to: {url}")
396
+
397
+ async with httpx.AsyncClient(timeout=30.0) as client:
398
+ response = await client.get(url, headers=headers)
399
+
400
+ if response.status_code != 200:
401
+ logger.error(f"API request failed with status: {response.status_code}")
402
+ raise HTTPException(status_code=response.status_code, detail="API request failed")
403
+
404
+ data = response.json()
405
+ products_count = len(data.get("products", []))
406
+ logger.info(f"Successfully fetched {products_count} products for brand '{safe_brand_name}'")
407
+
408
+ return data
409
+
410
+ except Exception as e:
411
+ logger.error(f"Error fetching data for brand '{safe_brand_name}': {str(e)}")
412
+ raise HTTPException(status_code=500, detail=f"Error fetching data: {str(e)}")
413
+
414
+ @router.get("/processing-logs")
415
+ async def get_processing_logs():
416
+ """Get recent processing logs"""
417
+ try:
418
+ # Read last 100 lines from log file
419
+ with open('cijene_processing.log', 'r', encoding='utf-8') as f:
420
+ lines = f.readlines()
421
+ return {"logs": lines[-100:]} # Return last 100 lines
422
+ except FileNotFoundError:
423
+ return {"logs": ["No log file found"]}
424
+ except Exception as e:
425
+ logger.error(f"Error reading logs: {str(e)}")
426
+ raise HTTPException(status_code=500, detail="Error reading logs")
api/main.py CHANGED
@@ -4,6 +4,7 @@ from config.settings import API_HOST, API_PORT
4
  from api.product_routes import router as product_router
5
  from api.receipt_routes import router as receipt_router
6
  from api.scrape_routes import router as scrape_router
 
7
 
8
  # Initialize FastAPI
9
  app = FastAPI(title="SupaKuna API")
@@ -22,6 +23,7 @@ app.add_middleware(
22
  app.include_router(product_router)
23
  app.include_router(receipt_router)
24
  app.include_router(scrape_router)
 
25
 
26
  @app.get("/", tags=["Health"])
27
  def health_check():
 
4
  from api.product_routes import router as product_router
5
  from api.receipt_routes import router as receipt_router
6
  from api.scrape_routes import router as scrape_router
7
+ from api.cijene_routes import router as cijene_router
8
 
9
  # Initialize FastAPI
10
  app = FastAPI(title="SupaKuna API")
 
23
  app.include_router(product_router)
24
  app.include_router(receipt_router)
25
  app.include_router(scrape_router)
26
+ app.include_router(cijene_router)
27
 
28
  @app.get("/", tags=["Health"])
29
  def health_check():
db/cijene_repository.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from typing import List, Dict, Optional
3
+ import re
4
+ import unidecode
5
+ import logging
6
+ from db.supabase_client import SupabaseClient
7
+
8
+ # Configure logger
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class CijeneRepository:
12
+ def __init__(self):
13
+ self.supabase = SupabaseClient().get_client()
14
+
15
+ def get_all_brands(self) -> List[Dict]:
16
+ """Get all brands from the database"""
17
+ try:
18
+ logger.info("Fetching all brands from database...")
19
+ response = self.supabase.table("brands").select("brand_id, brand_name").execute()
20
+ logger.info(f"Successfully fetched {len(response.data)} brands")
21
+ return response.data
22
+ except Exception as e:
23
+ logger.error(f"Error fetching brands: {str(e)}")
24
+ raise Exception(f"Error fetching brands: {str(e)}")
25
+
26
+ def get_all_store_chains(self) -> List[Dict]:
27
+ """Get all store chains from the database"""
28
+ try:
29
+ logger.info("Fetching all store chains from database...")
30
+ response = self.supabase.table("store_chains").select("store_chain_id, store_chain_name").execute()
31
+ logger.info(f"Successfully fetched {len(response.data)} store chains")
32
+ return response.data
33
+ except Exception as e:
34
+ logger.error(f"Error fetching store chains: {str(e)}")
35
+ raise Exception(f"Error fetching store chains: {str(e)}")
36
+
37
+ def normalize_string(self, text: str) -> str:
38
+ """Normalize string for comparison by removing special characters and converting to lowercase"""
39
+ if not text:
40
+ return ""
41
+ # Remove special characters and convert to lowercase
42
+ normalized = unidecode.unidecode(text)
43
+ normalized = re.sub(r'[^a-zA-Z0-9\s]', '', normalized)
44
+ normalized = re.sub(r'\s+', ' ', normalized).strip().lower()
45
+ return normalized
46
+
47
+ def safe_log_string(self, text: str) -> str:
48
+ """Safely encode string for logging to avoid Unicode errors"""
49
+ if not text:
50
+ return ""
51
+ try:
52
+ # Try to encode/decode to handle special characters
53
+ return text.encode('utf-8', errors='replace').decode('utf-8')
54
+ except:
55
+ # Fallback to ASCII if UTF-8 fails
56
+ return unidecode.unidecode(text)
57
+
58
+ def find_matching_store_chain(self, chain_name: str, store_chains: List[Dict]) -> Optional[str]:
59
+ """Find matching store chain ID by comparing normalized names"""
60
+ normalized_chain = self.normalize_string(chain_name)
61
+ safe_chain_name = self.safe_log_string(chain_name)
62
+ logger.debug(f"Looking for store chain match for: '{safe_chain_name}' (normalized: '{normalized_chain}')")
63
+
64
+ for store_chain in store_chains:
65
+ normalized_store_chain = self.normalize_string(store_chain['store_chain_name'])
66
+ if normalized_chain == normalized_store_chain:
67
+ safe_store_name = self.safe_log_string(store_chain['store_chain_name'])
68
+ logger.debug(f"Found match: '{safe_chain_name}' -> '{safe_store_name}' (ID: {store_chain['store_chain_id']})")
69
+ return store_chain['store_chain_id']
70
+
71
+ logger.warning(f"No matching store chain found for: '{safe_chain_name}'")
72
+ return None
73
+
74
+ def check_product_exists(self, ean: str, product_name: str, brand_id: str) -> Optional[str]:
75
+ """Check if product already exists by EAN or (product_name, brand_id) and return product_id if found"""
76
+ try:
77
+ # First check by EAN
78
+ logger.debug(f"Checking if product exists with EAN: {ean}")
79
+ response = self.supabase.table("products").select("product_id").eq("product_ean", ean).execute()
80
+ if response.data:
81
+ logger.debug(f"Product exists with EAN {ean}, ID: {response.data[0]['product_id']}")
82
+ return response.data[0]['product_id']
83
+
84
+ # Then check by product_name and brand_id
85
+ safe_name = self.safe_log_string(product_name)
86
+ logger.debug(f"Checking if product exists with name '{safe_name}' and brand_id: {brand_id}")
87
+ response = self.supabase.table("products").select("product_id").eq("product_name", product_name).eq("brand_id", brand_id).execute()
88
+ if response.data:
89
+ logger.debug(f"Product exists with name and brand, ID: {response.data[0]['product_id']}")
90
+ return response.data[0]['product_id']
91
+
92
+ logger.debug(f"Product with EAN {ean} or name '{safe_name}' does not exist")
93
+ return None
94
+ except Exception as e:
95
+ logger.error(f"Error checking product existence for EAN {ean}: {str(e)}")
96
+ raise Exception(f"Error checking product existence: {str(e)}")
97
+
98
+ def insert_product(self, product_data: Dict) -> str:
99
+ """Insert a new product and return the product_id"""
100
+ try:
101
+ safe_name = self.safe_log_string(product_data['product_name'])
102
+ logger.info(f"Inserting new product: {safe_name} (EAN: {product_data['product_ean']})")
103
+ response = self.supabase.table("products").insert(product_data).execute()
104
+ product_id = response.data[0]['product_id']
105
+ logger.info(f"Successfully inserted product with ID: {product_id}")
106
+ return product_id
107
+ except Exception as e:
108
+ safe_name = self.safe_log_string(product_data.get('product_name', 'unknown'))
109
+ logger.error(f"Error inserting product {safe_name}: {str(e)}")
110
+ raise Exception(f"Error inserting product: {str(e)}")
111
+
112
+ def get_or_create_store_product(self, product_id: str, store_chain_id: str) -> str:
113
+ """Get existing store_product or create new one, return store_product_id"""
114
+ try:
115
+ # Check if store_product mapping already exists
116
+ logger.debug(f"Checking if store_product exists for product {product_id} and store_chain {store_chain_id}")
117
+ response = self.supabase.table("store_products").select("store_product_id").eq("product_id", product_id).eq("store_chain_id", store_chain_id).execute()
118
+
119
+ if response.data:
120
+ store_product_id = response.data[0]['store_product_id']
121
+ logger.debug(f"Store_product exists with ID: {store_product_id}")
122
+ return store_product_id
123
+
124
+ # Create new store_product mapping
125
+ logger.debug(f"Creating new store_product mapping for product {product_id} and store_chain {store_chain_id}")
126
+ store_product_data = {
127
+ "product_id": product_id,
128
+ "store_chain_id": store_chain_id
129
+ }
130
+ response = self.supabase.table("store_products").insert(store_product_data).execute()
131
+ store_product_id = response.data[0]['store_product_id']
132
+ logger.debug(f"Successfully created store_product with ID: {store_product_id}")
133
+ return store_product_id
134
+
135
+ except Exception as e:
136
+ logger.error(f"Error getting/creating store_product for product {product_id} and store_chain {store_chain_id}: {str(e)}")
137
+ raise Exception(f"Error getting/creating store_product: {str(e)}")
138
+
139
+ def insert_price_history(self, price_data: Dict) -> None:
140
+ """Insert price history record"""
141
+ try:
142
+ logger.debug(f"Inserting price history for store_product {price_data['store_product_id']}")
143
+ self.supabase.table("product_price_history").insert(price_data).execute()
144
+ logger.debug("Price history inserted successfully")
145
+ except Exception as e:
146
+ logger.error(f"Error inserting price history for store_product {price_data.get('store_product_id', 'unknown')}: {str(e)}")
147
+ raise Exception(f"Error inserting price history: {str(e)}")
148
+
149
+ def combine_quantity_unit(self, quantity: str, unit: str) -> str:
150
+ """Combine quantity and unit into a single weight string without spaces"""
151
+ if not quantity and not unit:
152
+ return ""
153
+
154
+ # Clean quantity (remove commas, extra spaces)
155
+ clean_quantity = quantity.replace(',', '.').strip() if quantity else ""
156
+ clean_unit = unit.lower().strip() if unit else ""
157
+
158
+ # Combine without space
159
+ if clean_quantity and clean_unit:
160
+ result = f"{clean_quantity}{clean_unit}"
161
+ elif clean_quantity:
162
+ result = clean_quantity
163
+ elif clean_unit:
164
+ result = clean_unit
165
+ else:
166
+ result = ""
167
+
168
+ logger.debug(f"Combined quantity '{quantity}' and unit '{unit}' -> '{result}'")
169
+ return result
requirements.txt CHANGED
@@ -10,4 +10,6 @@ python-multipart
10
  google-cloud-vision
11
  python-dotenv
12
  supabase
13
- rembg
 
 
 
10
  google-cloud-vision
11
  python-dotenv
12
  supabase
13
+ rembg
14
+ httpx
15
+ unidecode