Franko Fiลกter commited on
Commit
1139dbb
ยท
1 Parent(s): 15b9bc1

Working dictionary check when upserting promo products

Browse files
db/receipt_repository.py CHANGED
@@ -2,7 +2,7 @@ import uuid
2
  import json
3
  from datetime import datetime
4
  from typing import Optional, Dict, Any
5
- from supabase_client import SupabaseClient
6
 
7
 
8
  class ReceiptRepository:
 
2
  import json
3
  from datetime import datetime
4
  from typing import Optional, Dict, Any
5
+ from db.supabase_client import SupabaseClient
6
 
7
 
8
  class ReceiptRepository:
db/scrape_repository.py CHANGED
@@ -1,72 +1,601 @@
1
  from typing import Dict, Any, List
2
- from datetime import datetime
3
- from supabase_client import SupabaseClient
 
 
 
 
 
4
 
5
 
6
  class PromoProductRepository:
7
  def __init__(self):
8
  self.supabase = SupabaseClient().get_client()
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def upsert_multiple_products(self, products: List[Dict[str, Any]]) -> int:
11
  """
12
- Upsert multiple promo products in batches
13
- Returns the number of successfully upserted products
14
  """
15
- batch_size = 100 # Adjust based on Supabase capacity
16
- successfully_upserted = 0
 
 
 
 
 
 
17
  timestamp = datetime.now().isoformat()
18
 
19
- # Process in batches to avoid request size limitations
20
  for i in range(0, len(products), batch_size):
21
  batch = products[i:i+batch_size]
22
 
23
  for product in batch:
24
  store = product.get("store")
25
  name = product.get("name")
26
-
27
- formatted_promo_product = {
28
- "store": store,
29
- "picture_id": product.get("pictureId"),
30
- "name": name,
31
- "description": product.get("description", ""),
32
- "promo_start_date": product.get("promoStartDate"),
33
- "promo_end_date": product.get("promoEndDate"),
34
- "regular_price": product.get("regularPrice"),
35
- "promo_price": product.get("promoPrice"),
36
- "last_updated": timestamp
37
- }
38
 
39
  try:
40
- # Check if product exists with same store and name
41
- result = self.supabase.table("promo_products").select("*") \
42
- .eq("store", store) \
43
- .eq("name", name) \
44
- .execute()
45
 
46
- if result.data and len(result.data) > 0:
47
- # Product exists, update it
48
- record_id = result.data[0]["id"]
49
- self.supabase.table("promo_products") \
50
- .update(formatted_promo_product) \
51
- .eq("id", record_id) \
52
- .execute()
53
-
54
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  else:
56
- # Product doesn't exist, insert it
57
- self.supabase.table("promo_products") \
58
- .insert(formatted_promo_product) \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  .execute()
60
-
61
- successfully_upserted += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # Print progress periodically
64
- if successfully_upserted % 50 == 0:
65
- print(f"Processed {successfully_upserted} products so far...")
 
66
 
67
  except Exception as e:
68
- print(f"Failed to upsert product '{name}' from '{store}': {str(e)}")
69
- # Continue with next product instead of failing completely
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- print(f"Successfully upserted {successfully_upserted} products")
72
- return successfully_upserted
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import Dict, Any, List
2
+ from datetime import datetime, timedelta
3
+ import time
4
+ import requests
5
+ from io import BytesIO
6
+ import asyncio
7
+ from db.supabase_client import SupabaseClient
8
+ from utils.image_processing import process_and_store_product_image
9
 
10
 
11
  class PromoProductRepository:
12
  def __init__(self):
13
  self.supabase = SupabaseClient().get_client()
14
 
15
+ def fix_promo_date(self, promo_date: str, date_type: str = "start") -> str:
16
+ """Replace invalid promo dates with appropriate fallback dates"""
17
+ if promo_date is None:
18
+ fallback_date = datetime.now() if date_type == "start" else datetime.now() + timedelta(days=7)
19
+ print(f"โš ๏ธ {date_type} date is None, using fallback: {fallback_date.isoformat()}")
20
+ return fallback_date.isoformat()
21
+
22
+ try:
23
+ # Parse the date string
24
+ dt = datetime.fromisoformat(promo_date.replace('Z', '+00:00'))
25
+
26
+ # Check for Unix epoch start date (1970-01-01)
27
+ if dt.year == 1970 and dt.month == 1 and dt.day == 1:
28
+ fallback_date = datetime.now() if date_type == "start" else datetime.now() + timedelta(days=7)
29
+ print(f"โš ๏ธ {date_type} date is Unix epoch (1970), using fallback: {fallback_date.isoformat()}")
30
+ return fallback_date.isoformat()
31
+
32
+ # Check for dates too far in the past (more than 1 year ago)
33
+ if dt < datetime.now() - timedelta(days=365):
34
+ fallback_date = datetime.now() if date_type == "start" else datetime.now() + timedelta(days=7)
35
+ print(f"โš ๏ธ {date_type} date too old ({dt.date()}), using fallback: {fallback_date.isoformat()}")
36
+ return fallback_date.isoformat()
37
+
38
+ # Check for dates too far in the future (more than 1 year from now)
39
+ if dt > datetime.now() + timedelta(days=365):
40
+ fallback_date = datetime.now() if date_type == "start" else datetime.now() + timedelta(days=7)
41
+ print(f"โš ๏ธ {date_type} date too far in future ({dt.date()}), using fallback: {fallback_date.isoformat()}")
42
+ return fallback_date.isoformat()
43
+
44
+ return promo_date
45
+
46
+ except Exception as e:
47
+ # If parsing fails, replace with fallback
48
+ fallback_date = datetime.now() if date_type == "start" else datetime.now() + timedelta(days=7)
49
+ print(f"โš ๏ธ {date_type} date parsing failed ({promo_date}), using fallback: {fallback_date.isoformat()}")
50
+ return fallback_date.isoformat()
51
+
52
+ def check_dictionary(self, product_name: str, store: str) -> str | None:
53
+ """Check dictionary for existing product match"""
54
+ if not product_name or not store:
55
+ return None
56
+
57
+ # Clean and format store name for column lookup
58
+ store_key = store.lower().strip()
59
+ column_name = f"promo_input_{store_key}"
60
+
61
+ try:
62
+ # Use a more explicit query approach
63
+ query = self.supabase.table("product_input_dictionary").select("product_id")
64
+
65
+ # Apply the filter dynamically
66
+ result = query.filter(column_name, "eq", product_name).execute()
67
+
68
+ # Validate the response structure
69
+ if (result and
70
+ hasattr(result, 'data') and
71
+ result.data is not None and
72
+ len(result.data) > 0):
73
+
74
+ product_id = result.data[0].get("product_id")
75
+ if product_id:
76
+ print(f"โœ… Found existing product ID {product_id} for '{product_name}' in column '{column_name}'")
77
+ return product_id
78
+
79
+ print(f"๐Ÿ“ No match found for '{product_name}' in column '{column_name}'")
80
+ return None
81
+
82
+ except Exception as e:
83
+ print(f"โŒ Error checking dictionary for '{product_name}' in column '{column_name}': {e}")
84
+ return None
85
+
86
+ def normalize_store_name(self, name: str) -> str:
87
+ """Helper function for relaxed string comparison"""
88
+ if not name:
89
+ return ""
90
+ import unicodedata
91
+ normalized = unicodedata.normalize('NFD', name.lower())
92
+ return ''.join(c for c in normalized if unicodedata.category(c) != 'Mn' and c.isalnum())
93
+
94
+ def get_all_store_chains(self) -> List[Dict]:
95
+ """Get all store chains"""
96
+ try:
97
+ result = self.supabase.table("store_chains") \
98
+ .select("store_chain_id, store_chain_name") \
99
+ .execute()
100
+
101
+ return [{"id": chain["store_chain_id"], "name": chain["store_chain_name"]}
102
+ for chain in result.data]
103
+ except Exception as e:
104
+ print(f"Error fetching store chains: {e}")
105
+ return []
106
+
107
+ def get_stores_by_chain(self, chain_id: str) -> List[Dict]:
108
+ """Get stores for a specific chain"""
109
+ try:
110
+ result = self.supabase.table("stores") \
111
+ .select("store_id, store_location, store_address") \
112
+ .eq("store_chain_id", chain_id) \
113
+ .execute()
114
+
115
+ return [{"id": store["store_id"],
116
+ "location": store["store_location"],
117
+ "address": store["store_address"]}
118
+ for store in result.data]
119
+ except Exception as e:
120
+ print(f"Error fetching stores for chain {chain_id}: {e}")
121
+ return []
122
+
123
+ def validate_date_range(self, start_date: str, end_date: str) -> bool:
124
+ """Validate and limit date range to prevent timeout issues"""
125
+ try:
126
+ start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
127
+ end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
128
+
129
+ # Calculate number of days
130
+ days_diff = (end_dt - start_dt).days + 1
131
+
132
+ if days_diff > 90: # Limit to 90 days to prevent timeouts
133
+ print(f"โš ๏ธ Date range too large ({days_diff} days), limiting to 90 days")
134
+ return False
135
+
136
+ if days_diff < 1: # End date before start date
137
+ print(f"โš ๏ธ Invalid date range (end before start), skipping")
138
+ return False
139
+
140
+ print(f"๐Ÿ“… Date range validated: {days_diff} days ({start_dt.date()} to {end_dt.date()})")
141
+ return True
142
+
143
+ except Exception as e:
144
+ print(f"โŒ Error validating date range: {e}")
145
+ return False
146
+
147
+ def process_single_store_pricing(self, store_id: str, product_id: str,
148
+ start_date: str, end_date: str, price: float) -> bool:
149
+ """Process pricing for a single store with enhanced timeout handling"""
150
+ max_retries = 3
151
+ retry_delay = 2
152
+
153
+ # Validate date range first
154
+ if not self.validate_date_range(start_date, end_date):
155
+ print(f"โŒ Skipping store {store_id} due to invalid date range")
156
+ return False
157
+
158
+ for attempt in range(max_retries):
159
+ try:
160
+ print(f" ๐Ÿ”„ Attempt {attempt + 1}: Processing store {store_id}")
161
+
162
+ # Check if store-product relationship exists with timeout
163
+ print(f" ๐Ÿ“Š Checking store-product relationship...")
164
+ store_product_result = self.supabase.table("store_products") \
165
+ .select("store_product_id") \
166
+ .eq("store_id", store_id) \
167
+ .eq("product_id", product_id) \
168
+ .maybe_single() \
169
+ .execute()
170
+
171
+ if store_product_result.data:
172
+ store_product_id = store_product_result.data["store_product_id"]
173
+ print(f" โœ… Found existing store-product relationship: {store_product_id}")
174
+ else:
175
+ # Create new store-product relationship
176
+ print(f" โž• Creating new store-product relationship...")
177
+ new_store_product = self.supabase.table("store_products") \
178
+ .insert({"store_id": store_id, "product_id": product_id}) \
179
+ .select("store_product_id") \
180
+ .single() \
181
+ .execute()
182
+ store_product_id = new_store_product.data["store_product_id"]
183
+ print(f" โœ… Created store-product relationship: {store_product_id}")
184
+
185
+ # Count existing entries first to understand the scope
186
+ print(f" ๐Ÿ” Checking existing price history entries...")
187
+ existing_count_result = self.supabase.table("product_price_history") \
188
+ .select("*", count="exact") \
189
+ .eq("store_product_id", store_product_id) \
190
+ .gte("price_date", start_date) \
191
+ .lte("price_date", end_date) \
192
+ .execute()
193
+
194
+ existing_count = existing_count_result.count if existing_count_result.count else 0
195
+ print(f" ๐Ÿ“ˆ Found {existing_count} existing price entries to delete")
196
+
197
+ # Delete existing entries in smaller batches if there are many
198
+ if existing_count > 0:
199
+ print(f" ๐Ÿ—‘๏ธ Deleting {existing_count} existing entries...")
200
+ if existing_count > 100:
201
+ # For large deletions, do it in smaller chunks
202
+ print(f" โš ๏ธ Large deletion detected, processing in chunks...")
203
+ # Delete in 30-day chunks to avoid timeouts
204
+ current_start = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
205
+ end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
206
+
207
+ while current_start <= end_dt:
208
+ chunk_end = min(current_start + timedelta(days=30), end_dt)
209
+ chunk_start_str = current_start.strftime("%Y-%m-%d")
210
+ chunk_end_str = chunk_end.strftime("%Y-%m-%d")
211
+
212
+ print(f" ๐Ÿ—‘๏ธ Deleting chunk: {chunk_start_str} to {chunk_end_str}")
213
+ self.supabase.table("product_price_history") \
214
+ .delete() \
215
+ .eq("store_product_id", store_product_id) \
216
+ .gte("price_date", chunk_start_str) \
217
+ .lte("price_date", chunk_end_str) \
218
+ .execute()
219
+
220
+ current_start = chunk_end + timedelta(days=1)
221
+ time.sleep(0.2) # Small delay between chunks
222
+ else:
223
+ # Small deletion, do it all at once
224
+ self.supabase.table("product_price_history") \
225
+ .delete() \
226
+ .eq("store_product_id", store_product_id) \
227
+ .gte("price_date", start_date) \
228
+ .lte("price_date", end_date) \
229
+ .execute()
230
+
231
+ # Create price history entries in very small batches
232
+ print(f" ๐Ÿ“Š Creating new price history entries...")
233
+ start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
234
+ end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
235
+
236
+ current_date = start_dt
237
+ batch_size = 25 # Very small batch size for Konzum
238
+ price_entries = []
239
+ total_days = (end_dt - start_dt).days + 1
240
+ processed_days = 0
241
+
242
+ while current_date <= end_dt:
243
+ price_entries.append({
244
+ "store_product_id": store_product_id,
245
+ "current_price": price,
246
+ "price_date": current_date.strftime("%Y-%m-%d")
247
+ })
248
+ current_date += timedelta(days=1)
249
+ processed_days += 1
250
+
251
+ # Insert in small batches
252
+ if len(price_entries) >= batch_size:
253
+ print(f" ๐Ÿ“ˆ Inserting batch ({processed_days}/{total_days} days)")
254
+ self.supabase.table("product_price_history") \
255
+ .insert(price_entries) \
256
+ .execute()
257
+ price_entries = []
258
+ time.sleep(0.3) # Longer delay for Konzum
259
+
260
+ # Insert remaining entries
261
+ if price_entries:
262
+ print(f" ๐Ÿ“ˆ Inserting final batch ({processed_days}/{total_days} days)")
263
+ self.supabase.table("product_price_history") \
264
+ .insert(price_entries) \
265
+ .execute()
266
+
267
+ print(f" โœ… Successfully processed store {store_id}")
268
+ return True
269
+
270
+ except Exception as e:
271
+ error_msg = str(e)
272
+ if ("520" in error_msg or "timeout" in error_msg.lower()) and attempt < max_retries - 1:
273
+ print(f" โš ๏ธ Timeout/520 error on attempt {attempt + 1}, retrying in {retry_delay}s...")
274
+ time.sleep(retry_delay)
275
+ retry_delay *= 2 # Exponential backoff
276
+ continue
277
+ else:
278
+ print(f" โŒ Error processing store {store_id}: {e}")
279
+ return False
280
+
281
+ return False
282
+
283
+ def process_product_pricing(self, product_id: str, store_name: str, start_date: str,
284
+ end_date: str, promo_price: float, regular_price: float) -> bool:
285
+ """Process product pricing for date range across all stores in a chain"""
286
+ if not product_id or not store_name:
287
+ print("Missing required parameters for price processing")
288
+ return False
289
+
290
+ try:
291
+ print(f"Starting price processing for product ID: {product_id}")
292
+
293
+ # Fix invalid dates BEFORE processing
294
+ print(f"๐Ÿ“… Original dates - Start: {start_date}, End: {end_date}")
295
+ fixed_start_date = self.fix_promo_date(start_date, "start")
296
+ fixed_end_date = self.fix_promo_date(end_date, "end")
297
+ print(f"๐Ÿ“… Fixed dates - Start: {fixed_start_date}, End: {fixed_end_date}")
298
+
299
+ # Use the fixed dates
300
+ start_date = fixed_start_date
301
+ end_date = fixed_end_date
302
+
303
+ # Get all store chains
304
+ store_chains = self.get_all_store_chains()
305
+
306
+ # Normalize the promo store name
307
+ promo_store_normalized = self.normalize_store_name(store_name)
308
+
309
+ # Find matching store chain with relaxed comparison
310
+ matched_chain = None
311
+ for chain in store_chains:
312
+ chain_normalized = self.normalize_store_name(chain["name"])
313
+
314
+ if (promo_store_normalized in chain_normalized or
315
+ chain_normalized in promo_store_normalized):
316
+ matched_chain = chain
317
+ print(f"โœ… Matched store chain: {matched_chain['name']} (ID: {matched_chain['id']})")
318
+ break
319
+
320
+ if not matched_chain:
321
+ print("No matching store chain found")
322
+ return False
323
+
324
+ # Get stores for the matched chain
325
+ stores_in_chain = self.get_stores_by_chain(matched_chain["id"])
326
+
327
+ if not stores_in_chain:
328
+ print(f"No stores found for chain ID: {matched_chain['id']}")
329
+ return False
330
+
331
+ # Use promo price if available, otherwise use regular price
332
+ price_to_use = promo_price if promo_price and promo_price > 0 else regular_price or 0
333
+
334
+ successful_stores = 0
335
+ total_stores = len(stores_in_chain)
336
+
337
+ # Special handling for Konzum (longer delays)
338
+ is_konzum = "konzum" in matched_chain["name"].lower()
339
+ store_delay = 2.0 if is_konzum else 0.5
340
+
341
+ print(f"๐Ÿ“Š Processing {total_stores} stores for {matched_chain['name']}")
342
+ if is_konzum:
343
+ print(f"โš ๏ธ Konzum detected - using enhanced timeout handling")
344
+
345
+ # Process each store individually with delays
346
+ for i, store in enumerate(stores_in_chain):
347
+ print(f"Processing store {i+1}/{total_stores}: {store['location']} (ID: {store['id']})")
348
+
349
+ success = self.process_single_store_pricing(
350
+ store_id=store["id"],
351
+ product_id=product_id,
352
+ start_date=start_date,
353
+ end_date=end_date,
354
+ price=price_to_use
355
+ )
356
+
357
+ if success:
358
+ successful_stores += 1
359
+ print(f" โœ… Store {i+1}/{total_stores} completed successfully")
360
+ else:
361
+ print(f" โŒ Store {i+1}/{total_stores} failed")
362
+
363
+ # Add delay between stores (longer for Konzum)
364
+ if i < total_stores - 1: # Don't sleep after the last store
365
+ print(f" โณ Waiting {store_delay}s before next store...")
366
+ time.sleep(store_delay)
367
+
368
+ success_rate = successful_stores / total_stores if total_stores > 0 else 0
369
+ print(f"โœ… Completed price processing: {successful_stores}/{total_stores} stores ({success_rate:.1%})")
370
+
371
+ # Consider it successful if at least 70% of stores were updated (lower threshold for Konzum)
372
+ threshold = 0.7 if is_konzum else 0.8
373
+ return success_rate >= threshold
374
+
375
+ except Exception as e:
376
+ print(f"Error processing product pricing: {e}")
377
+ return False
378
+
379
+ def process_product_image_sync(self, picture_id: str, product_id: str) -> bool:
380
+ """Process product image using direct function calls - sync wrapper"""
381
+ if not picture_id or not product_id:
382
+ print("No image or product ID provided for image processing")
383
+ return False
384
+
385
+ try:
386
+ print(f"๐Ÿ–ผ๏ธ Processing image for product ID: {product_id}")
387
+
388
+ # Get the original image URL (same pattern as admin dashboard)
389
+ original_image_url = f"https://backend.360promo.hr/contents/products/{picture_id}.jpg"
390
+
391
+ # Fetch the image
392
+ print(f"๐Ÿ“ฅ Downloading image from: {original_image_url}")
393
+ response = requests.get(original_image_url, timeout=30)
394
+
395
+ if not response.ok:
396
+ print(f"โŒ Failed to fetch image: HTTP {response.status_code}")
397
+ return False
398
+
399
+ # Create a mock UploadFile object from the downloaded image
400
+ class MockUploadFile:
401
+ def __init__(self, content: bytes, filename: str):
402
+ self.file = BytesIO(content)
403
+ self.filename = filename
404
+ self.content_type = "image/jpeg"
405
+
406
+ async def read(self) -> bytes:
407
+ self.file.seek(0)
408
+ return self.file.read()
409
+
410
+ mock_file = MockUploadFile(response.content, f"product_{picture_id}.jpg")
411
+
412
+ # Run the async function in a new event loop
413
+ async def process_image():
414
+ return await process_and_store_product_image(
415
+ file=mock_file,
416
+ remove_bg=True,
417
+ upscale=True,
418
+ scale_factor=2,
419
+ process_order="remove_first",
420
+ product_id=product_id
421
+ )
422
+
423
+ # Process the image directly using the imported function
424
+ print(f"๐Ÿ”„ Processing image directly...")
425
+
426
+ # Check if we're in an event loop
427
+ try:
428
+ loop = asyncio.get_running_loop()
429
+ # We're in an async context, run in thread pool
430
+ import concurrent.futures
431
+ with concurrent.futures.ThreadPoolExecutor() as executor:
432
+ future = executor.submit(asyncio.run, process_image())
433
+ result = future.result(timeout=60)
434
+ except RuntimeError:
435
+ # No event loop running, we can use asyncio.run
436
+ result = asyncio.run(process_image())
437
+
438
+ if result.get('status') == 'success':
439
+ print(f"โœ… Image processed successfully: {result.get('image_url')}")
440
+ return True
441
+ else:
442
+ print(f"โŒ Image processing failed: {result}")
443
+ return False
444
+
445
+ except Exception as e:
446
+ print(f"โŒ Error processing product image: {e}")
447
+ return False
448
+
449
  def upsert_multiple_products(self, products: List[Dict[str, Any]]) -> int:
450
  """
451
+ Upsert multiple promo products in batches with dictionary check and image processing
452
+ Returns the number of successfully processed products
453
  """
454
+ batch_size = 100
455
+ successfully_processed = 0
456
+ automatically_adjusted = 0 # Counter for products found in dictionary
457
+ upserted_to_promo = 0 # Counter for products added to promo_products table
458
+ failed_pricing_updates = 0 # Counter for failed pricing updates
459
+ images_processed = 0 # Counter for successfully processed images
460
+ images_failed = 0 # Counter for failed image processing
461
+ date_fixes = 0 # Counter for fixed dates
462
  timestamp = datetime.now().isoformat()
463
 
 
464
  for i in range(0, len(products), batch_size):
465
  batch = products[i:i+batch_size]
466
 
467
  for product in batch:
468
  store = product.get("store")
469
  name = product.get("name")
470
+ picture_id = product.get("pictureId")
 
 
 
 
 
 
 
 
 
 
 
471
 
472
  try:
473
+ # Check dictionary first
474
+ existing_product_id = self.check_dictionary(name, store)
 
 
 
475
 
476
+ if existing_product_id:
477
+ # Product exists in dictionary - update pricing and process image
478
+ print(f"Found existing product ID {existing_product_id} for '{name}' from '{store}' - updating pricing and processing image")
479
+
480
+ # Check if dates need fixing
481
+ original_start = product.get("promoStartDate")
482
+ original_end = product.get("promoEndDate")
483
+
484
+ if (original_start is None or
485
+ original_start == "1970-01-01T00:00:00Z" or
486
+ original_end is None or
487
+ original_end == "1970-01-01T00:00:00Z"):
488
+ date_fixes += 1
489
+
490
+ # Process pricing
491
+ pricing_success = self.process_product_pricing(
492
+ product_id=existing_product_id,
493
+ store_name=store,
494
+ start_date=product.get("promoStartDate"),
495
+ end_date=product.get("promoEndDate"),
496
+ promo_price=product.get("promoPrice"),
497
+ regular_price=product.get("regularPrice")
498
+ )
499
+
500
+ # Process image if available (using sync wrapper)
501
+ image_success = False
502
+ if picture_id:
503
+ image_success = self.process_product_image_sync(picture_id, existing_product_id)
504
+ if image_success:
505
+ images_processed += 1
506
+ print(f"๐Ÿ–ผ๏ธ Successfully processed image for: {name}")
507
+ else:
508
+ images_failed += 1
509
+ print(f"๐Ÿ–ผ๏ธ Failed to process image for: {name}")
510
+
511
+ if pricing_success:
512
+ successfully_processed += 1
513
+ automatically_adjusted += 1
514
+ print(f"โœ… Automatically adjusted pricing for: {name}")
515
+ else:
516
+ failed_pricing_updates += 1
517
+ print(f"โŒ Failed to update pricing for: {name}")
518
  else:
519
+ # Product not in dictionary - proceed with normal upsert to promo_products
520
+ formatted_promo_product = {
521
+ "store": store,
522
+ "picture_id": product.get("pictureId"),
523
+ "name": name,
524
+ "description": product.get("description", ""),
525
+ "promo_start_date": product.get("promoStartDate"),
526
+ "promo_end_date": product.get("promoEndDate"),
527
+ "regular_price": product.get("regularPrice"),
528
+ "promo_price": product.get("promoPrice"),
529
+ "last_updated": timestamp
530
+ }
531
+
532
+ # Check if product exists in promo_products
533
+ result = self.supabase.table("promo_products").select("*") \
534
+ .eq("store", store) \
535
+ .eq("name", name) \
536
  .execute()
537
+
538
+ if result.data and len(result.data) > 0:
539
+ # Update existing promo product
540
+ record_id = result.data[0]["id"]
541
+ self.supabase.table("promo_products") \
542
+ .update(formatted_promo_product) \
543
+ .eq("id", record_id) \
544
+ .execute()
545
+ print(f"๐Ÿ”„ Updated existing promo product: {name}")
546
+ else:
547
+ # Insert new promo product
548
+ self.supabase.table("promo_products") \
549
+ .insert(formatted_promo_product) \
550
+ .execute()
551
+ print(f"โž• Inserted new promo product: {name}")
552
+
553
+ successfully_processed += 1
554
+ upserted_to_promo += 1
555
 
556
  # Print progress periodically
557
+ total_processed = successfully_processed + failed_pricing_updates
558
+ if total_processed % 50 == 0:
559
+ print(f"Processed {total_processed} / {len(products)} products so far...")
560
 
561
  except Exception as e:
562
+ print(f"Failed to process product '{name}' from '{store}': {str(e)}")
563
+ continue
564
+
565
+ # Detailed summary logging
566
+ total_processed = successfully_processed + failed_pricing_updates
567
+ print(f"\n{'='*60}")
568
+ print(f"SCRAPING PROCESS SUMMARY")
569
+ print(f"{'='*60}")
570
+ print(f"๐Ÿ“Š Total products processed: {len(products)}")
571
+ print(f"โœ… Successfully processed: {successfully_processed}")
572
+ print(f"๐Ÿ”ง Automatically adjusted (existing products): {automatically_adjusted}")
573
+ print(f"๐Ÿ“‹ Upserted to promo_products table: {upserted_to_promo}")
574
+ print(f"โš ๏ธ Failed pricing updates: {failed_pricing_updates}")
575
+ print(f"๐Ÿ–ผ๏ธ Images successfully processed: {images_processed}")
576
+ print(f"๐Ÿ–ผ๏ธ Images failed to process: {images_failed}")
577
+ print(f"๐Ÿ“… Invalid dates fixed: {date_fixes}")
578
+ print(f"โŒ Failed to process: {len(products) - total_processed}")
579
+ print(f"{'='*60}")
580
+
581
+ if automatically_adjusted > 0:
582
+ print(f"๐ŸŽฏ {automatically_adjusted} products were found in the dictionary and had their pricing automatically updated across all stores in their respective chains.")
583
+
584
+ if images_processed > 0:
585
+ print(f"๐Ÿ–ผ๏ธ {images_processed} product images were successfully processed and updated.")
586
 
587
+ if images_failed > 0:
588
+ print(f"โš ๏ธ {images_failed} product images failed to process.")
589
+
590
+ if date_fixes > 0:
591
+ print(f"๐Ÿ“… {date_fixes} products had invalid dates (null/1970) that were automatically corrected.")
592
+
593
+ if upserted_to_promo > 0:
594
+ print(f"๐Ÿ“ {upserted_to_promo} products were added/updated in the temporary promo_products table for manual review.")
595
+
596
+ if failed_pricing_updates > 0:
597
+ print(f"โš ๏ธ {failed_pricing_updates} products had dictionary matches but failed pricing updates (likely due to API limits).")
598
+
599
+ print(f"{'='*60}\n")
600
+
601
+ return successfully_processed
utils/image_processing.py CHANGED
@@ -159,7 +159,7 @@ async def update_product_image(product_id: str, image_url: str) -> dict[str, any
159
 
160
  result = supabase.table("products").update({
161
  "product_image": image_url
162
- }).eq("id", product_id).execute()
163
 
164
  if not result.data:
165
  raise Exception(f"Failed to update product {product_id}")
 
159
 
160
  result = supabase.table("products").update({
161
  "product_image": image_url
162
+ }).eq("product_id", product_id).execute()
163
 
164
  if not result.data:
165
  raise Exception(f"Failed to update product {product_id}")