Spaces:

dal4933
/

TEST-FRANKO

Runtime error

App Files Files Community

Franko Fišter commited on May 23, 2025

Commit

f40eb86

unverified ·

2 Parent(s): 4c7debd 1139dbb

Merge pull request #4 from ff1574/product-image

Browse files

Image processing, dictionary check in promo upsert

Files changed (7) hide show

api/main.py +2 -0
api/product_routes.py +71 -2
api/scrape_routes.py +158 -0
db/receipt_repository.py +2 -1
db/scrape_repository.py +601 -0
requirements.txt +2 -1
utils/image_processing.py +202 -0

api/main.py CHANGED Viewed

@@ -3,6 +3,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from config.settings import API_HOST, API_PORT
 from api.product_routes import router as product_router
 from api.receipt_routes import router as receipt_router
 # Initialize FastAPI
 app = FastAPI(title="Product and Receipt API")
@@ -20,6 +21,7 @@ app.add_middleware(
 # Add routers
 app.include_router(product_router)
 app.include_router(receipt_router)
 @app.get("/", tags=["Health"])
 def health_check():

 from config.settings import API_HOST, API_PORT
 from api.product_routes import router as product_router
 from api.receipt_routes import router as receipt_router
+from api.scrape_routes import router as scrape_router
 # Initialize FastAPI
 app = FastAPI(title="Product and Receipt API")
 # Add routers
 app.include_router(product_router)
 app.include_router(receipt_router)
+app.include_router(scrape_router)
 @app.get("/", tags=["Health"])
 def health_check():

api/product_routes.py CHANGED Viewed

@@ -1,7 +1,8 @@
-from fastapi import APIRouter, File, UploadFile, HTTPException
-from utils.image_processing import read_image_file
 from product_detector.detector import ObjectDetector
 from config.settings import MODEL_ONNX_PATH, CLASS_NAMES, INPUT_SIZE
 # Initialize the detector
 detector = ObjectDetector(
@@ -12,10 +13,12 @@ detector = ObjectDetector(
 router = APIRouter(tags=["Product Detection"])
 @router.options("/detect-product")
 async def detect_options():
     return {"Allow": "POST"}
 @router.post("/detect-product")
 async def detect_objects(file: UploadFile = File(...)):
     try:
@@ -31,3 +34,69 @@ async def detect_objects(file: UploadFile = File(...)):
         raise
     except Exception as e:
         raise HTTPException(500, f"Processing error: {str(e)}")

+from fastapi import APIRouter, File, UploadFile, HTTPException, Form
+from utils.image_processing import read_image_file, process_product_image
 from product_detector.detector import ObjectDetector
 from config.settings import MODEL_ONNX_PATH, CLASS_NAMES, INPUT_SIZE
+from utils.image_processing import process_and_store_product_image
 # Initialize the detector
 detector = ObjectDetector(
 router = APIRouter(tags=["Product Detection"])
 @router.options("/detect-product")
 async def detect_options():
     return {"Allow": "POST"}
 @router.post("/detect-product")
 async def detect_objects(file: UploadFile = File(...)):
     try:
         raise
     except Exception as e:
         raise HTTPException(500, f"Processing error: {str(e)}")
+@router.post("/process-image")
+async def process_image(
+        file: UploadFile = File(...),
+        remove_bg: bool = Form(True),
+        upscale: bool = Form(True),
+        scale_factor: int = Form(2),
+        process_order: str = Form("remove_first")
+):
+    """
+    Process product images by removing background and/or upscaling
+    """
+    try:
+        # Validate inputs
+        if scale_factor not in [2, 3, 4]:
+            raise HTTPException(400, "Scale factor must be 2, 3, or 4")
+        if process_order not in ["remove_first", "upscale_first"]:
+            raise HTTPException(400, "Process order must be 'remove_first' or 'upscale_first'")
+        if not file.content_type.startswith("image/"):
+            raise HTTPException(400, "File must be an image")
+        # Use the combined processing and storage function
+        result = await process_and_store_product_image(
+            file,
+            remove_bg=remove_bg,
+            upscale=upscale,
+            scale_factor=scale_factor,
+            process_order=process_order
+        )
+        return result
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(500, f"Image processing error: {str(e)}")
+@router.post("/process-product-image")
+async def process_product_image_endpoint(
+        file: UploadFile = File(...),
+        remove_bg: bool = Form(True),
+        upscale: bool = Form(True),
+        scale_factor: int = Form(2),
+        process_order: str = Form("remove_first"),
+        product_id: str = Form(None)
+):
+    """
+    Process a product image and update the product record
+    """
+    try:
+        # Use the combined processing, storage and database function
+        result = await process_and_store_product_image(
+            file,
+            remove_bg=remove_bg,
+            upscale=upscale,
+            scale_factor=scale_factor,
+            process_order=process_order,
+            product_id=product_id
+        )
+        return result
+    except Exception as e:
+        raise HTTPException(500, f"Image processing error: {str(e)}")

api/scrape_routes.py ADDED Viewed

	@@ -0,0 +1,158 @@

+from fastapi import APIRouter, HTTPException, Request, Depends, BackgroundTasks
+from db.scrape_repository import PromoProductRepository
+from utils.rate_limiter import RateLimiter
+import requests
+from concurrent.futures import ThreadPoolExecutor, as_completed
+# Initialize rate limiter and repository
+rate_limiter = RateLimiter(max_requests=5)  # Lower limit for scraping operations
+promo_repository = PromoProductRepository()
+router = APIRouter(prefix="/scrape", tags=["Data Scraping"])
+RETAILER_MAPPING = {
+    8: "Studenac",
+    4: "Konzum",
+    3: "Kaufland",
+    9: "Tommy",
+    109: "Spar",
+    6: "Plodine",
+    5: "Lidl"
+}
+def fetch_page(session, page):
+    """Fetch a single page of products"""
+    url = f"https://backend.360promo.hr/api/promotions/products?pageNumber={page}&sortBySalePercentage=False"
+    try:
+        print(f"📄 Fetching page {page}...")
+        response = session.get(url, timeout=10)
+        response.raise_for_status()
+        return page, response.json()
+    except Exception as e:
+        print(f"❌ Error on page {page}: {str(e)}")
+        return page, []
+def fetch_all_products():
+    products = []
+    max_workers = 8  # Adjust based on API capacity
+    with requests.Session() as session:
+        # First, fetch page 1 to see if there's data
+        _, page1_data = fetch_page(session, 1)
+        if not page1_data:
+            print("No data found on first page")
+            return []
+        products.extend(page1_data)
+        # Set up for concurrent fetching of subsequent pages
+        last_page_with_data = 1
+        while True:
+            # Determine next batch of pages to fetch
+            start_page = last_page_with_data + 1
+            end_page = start_page + max_workers - 1
+            if start_page > 1000:  # Safety limit
+                print("Reached maximum page limit")
+                break
+            pages_to_fetch = list(range(start_page, end_page + 1))
+            # Fetch pages concurrently
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                futures = [executor.submit(fetch_page, session, page) for page in pages_to_fetch]
+                # Process results
+                new_data_found = False
+                highest_page_with_data = 0
+                for future in as_completed(futures):
+                    page, data = future.result()
+                    if data:  # If we got data
+                        products.extend(data)
+                        new_data_found = True
+                        highest_page_with_data = max(highest_page_with_data, page)
+            # If no new data was found in this batch, we're done
+            if not new_data_found:
+                break
+            # Update the last page with data
+            last_page_with_data = highest_page_with_data
+    print(f"\n✅ Total products collected: {len(products)}")
+    return products
+def process_products(products):
+    unified_products = []
+    for product in products:
+        retailer_id = product.get('retailerId')
+        if not retailer_id or retailer_id not in RETAILER_MAPPING:
+            continue
+        store_name = RETAILER_MAPPING[retailer_id]
+        price = product.get('promoPrice') or product.get('regularPrice')
+        if price is None:
+            continue
+        item = {
+            "store": store_name,
+            "pictureId": product.get('id'),
+            "name": product.get('name', 'Unknown Product'),
+            "description": product.get('description', ''),
+            "promoStartDate": product.get('promoStartDate'),
+            "promoEndDate": product.get('promoEndDate'),
+            "regularPrice": product.get('regularPrice'),
+            "promoPrice": product.get('promoPrice')
+        }
+        unified_products.append(item)
+    return unified_products
+async def scrape_and_store_products():
+    """Background task to scrape products and store them in the database"""
+    try:
+        # Fetch products from the API
+        products = fetch_all_products()
+        if not products:
+            print("No products found to scrape")
+            return 0
+        # Process products into standardized format
+        unified_products = process_products(products)
+        if not unified_products:
+            print("No valid products found to store")
+            return 0
+        # Store products in Supabase
+        stored_count = promo_repository.upsert_multiple_products(unified_products)
+        print(f"Successfully stored {stored_count} products")
+        return stored_count
+    except Exception as e:
+        print(f"Error during scraping: {str(e)}")
+        return 0
+@router.post("/promo")
+async def trigger_promo_scrape(
+    background_tasks: BackgroundTasks,
+    request: Request
+):
+    """
+    Admin only: Trigger a promotional product scraping operation.
+    This runs in the background to avoid timeout issues.
+    """
+    try:
+        # Add scraping task to background tasks
+        background_tasks.add_task(scrape_and_store_products)
+        return {
+            "status": "success",
+            "message": "Promotional product scraping started. Results will be stored in the database."
+        }
+    except Exception as e:
+        print(f"ERROR: {str(e)}")
+        raise HTTPException(500, f"Failed to start scraping operation: {str(e)}")

db/receipt_repository.py CHANGED Viewed

@@ -2,7 +2,8 @@ import uuid
 import json
 from datetime import datetime
 from typing import Optional, Dict, Any
-from .supabase_client import SupabaseClient
 class ReceiptRepository:
     def __init__(self):

 import json
 from datetime import datetime
 from typing import Optional, Dict, Any
+from db.supabase_client import SupabaseClient
 class ReceiptRepository:
     def __init__(self):

db/scrape_repository.py ADDED Viewed

	@@ -0,0 +1,601 @@

+from typing import Dict, Any, List
+from datetime import datetime, timedelta
+import time
+import requests
+from io import BytesIO
+import asyncio
+from db.supabase_client import SupabaseClient
+from utils.image_processing import process_and_store_product_image
+class PromoProductRepository:
+    def __init__(self):
+        self.supabase = SupabaseClient().get_client()
+    def fix_promo_date(self, promo_date: str, date_type: str = "start") -> str:
+        """Replace invalid promo dates with appropriate fallback dates"""
+        if promo_date is None:
+            fallback_date = datetime.now() if date_type == "start" else datetime.now() + timedelta(days=7)
+            print(f"⚠️ {date_type} date is None, using fallback: {fallback_date.isoformat()}")
+            return fallback_date.isoformat()
+        try:
+            # Parse the date string
+            dt = datetime.fromisoformat(promo_date.replace('Z', '+00:00'))
+            # Check for Unix epoch start date (1970-01-01)
+            if dt.year == 1970 and dt.month == 1 and dt.day == 1:
+                fallback_date = datetime.now() if date_type == "start" else datetime.now() + timedelta(days=7)
+                print(f"⚠️ {date_type} date is Unix epoch (1970), using fallback: {fallback_date.isoformat()}")
+                return fallback_date.isoformat()
+            # Check for dates too far in the past (more than 1 year ago)
+            if dt < datetime.now() - timedelta(days=365):
+                fallback_date = datetime.now() if date_type == "start" else datetime.now() + timedelta(days=7)
+                print(f"⚠️ {date_type} date too old ({dt.date()}), using fallback: {fallback_date.isoformat()}")
+                return fallback_date.isoformat()
+            # Check for dates too far in the future (more than 1 year from now)
+            if dt > datetime.now() + timedelta(days=365):
+                fallback_date = datetime.now() if date_type == "start" else datetime.now() + timedelta(days=7)
+                print(f"⚠️ {date_type} date too far in future ({dt.date()}), using fallback: {fallback_date.isoformat()}")
+                return fallback_date.isoformat()
+            return promo_date
+        except Exception as e:
+            # If parsing fails, replace with fallback
+            fallback_date = datetime.now() if date_type == "start" else datetime.now() + timedelta(days=7)
+            print(f"⚠️ {date_type} date parsing failed ({promo_date}), using fallback: {fallback_date.isoformat()}")
+            return fallback_date.isoformat()
+    def check_dictionary(self, product_name: str, store: str) -> str | None:
+        """Check dictionary for existing product match"""
+        if not product_name or not store:
+            return None
+        # Clean and format store name for column lookup
+        store_key = store.lower().strip()
+        column_name = f"promo_input_{store_key}"
+        try:
+            # Use a more explicit query approach
+            query = self.supabase.table("product_input_dictionary").select("product_id")
+            # Apply the filter dynamically
+            result = query.filter(column_name, "eq", product_name).execute()
+            # Validate the response structure
+            if (result and
+                hasattr(result, 'data') and
+                result.data is not None and
+                len(result.data) > 0):
+                product_id = result.data[0].get("product_id")
+                if product_id:
+                    print(f"✅ Found existing product ID {product_id} for '{product_name}' in column '{column_name}'")
+                    return product_id
+            print(f"📝 No match found for '{product_name}' in column '{column_name}'")
+            return None
+        except Exception as e:
+            print(f"❌ Error checking dictionary for '{product_name}' in column '{column_name}': {e}")
+            return None
+    def normalize_store_name(self, name: str) -> str:
+        """Helper function for relaxed string comparison"""
+        if not name:
+            return ""
+        import unicodedata
+        normalized = unicodedata.normalize('NFD', name.lower())
+        return ''.join(c for c in normalized if unicodedata.category(c) != 'Mn' and c.isalnum())
+    def get_all_store_chains(self) -> List[Dict]:
+        """Get all store chains"""
+        try:
+            result = self.supabase.table("store_chains") \
+                .select("store_chain_id, store_chain_name") \
+                .execute()
+            return [{"id": chain["store_chain_id"], "name": chain["store_chain_name"]}
+                   for chain in result.data]
+        except Exception as e:
+            print(f"Error fetching store chains: {e}")
+            return []
+    def get_stores_by_chain(self, chain_id: str) -> List[Dict]:
+        """Get stores for a specific chain"""
+        try:
+            result = self.supabase.table("stores") \
+                .select("store_id, store_location, store_address") \
+                .eq("store_chain_id", chain_id) \
+                .execute()
+            return [{"id": store["store_id"],
+                    "location": store["store_location"],
+                    "address": store["store_address"]}
+                   for store in result.data]
+        except Exception as e:
+            print(f"Error fetching stores for chain {chain_id}: {e}")
+            return []
+    def validate_date_range(self, start_date: str, end_date: str) -> bool:
+        """Validate and limit date range to prevent timeout issues"""
+        try:
+            start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
+            end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
+            # Calculate number of days
+            days_diff = (end_dt - start_dt).days + 1
+            if days_diff > 90:  # Limit to 90 days to prevent timeouts
+                print(f"⚠️ Date range too large ({days_diff} days), limiting to 90 days")
+                return False
+            if days_diff < 1:  # End date before start date
+                print(f"⚠️ Invalid date range (end before start), skipping")
+                return False
+            print(f"📅 Date range validated: {days_diff} days ({start_dt.date()} to {end_dt.date()})")
+            return True
+        except Exception as e:
+            print(f"❌ Error validating date range: {e}")
+            return False
+    def process_single_store_pricing(self, store_id: str, product_id: str,
+                                   start_date: str, end_date: str, price: float) -> bool:
+        """Process pricing for a single store with enhanced timeout handling"""
+        max_retries = 3
+        retry_delay = 2
+        # Validate date range first
+        if not self.validate_date_range(start_date, end_date):
+            print(f"❌ Skipping store {store_id} due to invalid date range")
+            return False
+        for attempt in range(max_retries):
+            try:
+                print(f"  🔄 Attempt {attempt + 1}: Processing store {store_id}")
+                # Check if store-product relationship exists with timeout
+                print(f"  📊 Checking store-product relationship...")
+                store_product_result = self.supabase.table("store_products") \
+                    .select("store_product_id") \
+                    .eq("store_id", store_id) \
+                    .eq("product_id", product_id) \
+                    .maybe_single() \
+                    .execute()
+                if store_product_result.data:
+                    store_product_id = store_product_result.data["store_product_id"]
+                    print(f"  ✅ Found existing store-product relationship: {store_product_id}")
+                else:
+                    # Create new store-product relationship
+                    print(f"  ➕ Creating new store-product relationship...")
+                    new_store_product = self.supabase.table("store_products") \
+                        .insert({"store_id": store_id, "product_id": product_id}) \
+                        .select("store_product_id") \
+                        .single() \
+                        .execute()
+                    store_product_id = new_store_product.data["store_product_id"]
+                    print(f"  ✅ Created store-product relationship: {store_product_id}")
+                # Count existing entries first to understand the scope
+                print(f"  🔍 Checking existing price history entries...")
+                existing_count_result = self.supabase.table("product_price_history") \
+                    .select("*", count="exact") \
+                    .eq("store_product_id", store_product_id) \
+                    .gte("price_date", start_date) \
+                    .lte("price_date", end_date) \
+                    .execute()
+                existing_count = existing_count_result.count if existing_count_result.count else 0
+                print(f"  📈 Found {existing_count} existing price entries to delete")
+                # Delete existing entries in smaller batches if there are many
+                if existing_count > 0:
+                    print(f"  🗑️ Deleting {existing_count} existing entries...")
+                    if existing_count > 100:
+                        # For large deletions, do it in smaller chunks
+                        print(f"  ⚠️ Large deletion detected, processing in chunks...")
+                        # Delete in 30-day chunks to avoid timeouts
+                        current_start = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
+                        end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
+                        while current_start <= end_dt:
+                            chunk_end = min(current_start + timedelta(days=30), end_dt)
+                            chunk_start_str = current_start.strftime("%Y-%m-%d")
+                            chunk_end_str = chunk_end.strftime("%Y-%m-%d")
+                            print(f"    🗑️ Deleting chunk: {chunk_start_str} to {chunk_end_str}")
+                            self.supabase.table("product_price_history") \
+                                .delete() \
+                                .eq("store_product_id", store_product_id) \
+                                .gte("price_date", chunk_start_str) \
+                                .lte("price_date", chunk_end_str) \
+                                .execute()
+                            current_start = chunk_end + timedelta(days=1)
+                            time.sleep(0.2)  # Small delay between chunks
+                    else:
+                        # Small deletion, do it all at once
+                        self.supabase.table("product_price_history") \
+                            .delete() \
+                            .eq("store_product_id", store_product_id) \
+                            .gte("price_date", start_date) \
+                            .lte("price_date", end_date) \
+                            .execute()
+                # Create price history entries in very small batches
+                print(f"  📊 Creating new price history entries...")
+                start_dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
+                end_dt = datetime.fromisoformat(end_date.replace('Z', '+00:00'))
+                current_date = start_dt
+                batch_size = 25  # Very small batch size for Konzum
+                price_entries = []
+                total_days = (end_dt - start_dt).days + 1
+                processed_days = 0
+                while current_date <= end_dt:
+                    price_entries.append({
+                        "store_product_id": store_product_id,
+                        "current_price": price,
+                        "price_date": current_date.strftime("%Y-%m-%d")
+                    })
+                    current_date += timedelta(days=1)
+                    processed_days += 1
+                    # Insert in small batches
+                    if len(price_entries) >= batch_size:
+                        print(f"    📈 Inserting batch ({processed_days}/{total_days} days)")
+                        self.supabase.table("product_price_history") \
+                            .insert(price_entries) \
+                            .execute()
+                        price_entries = []
+                        time.sleep(0.3)  # Longer delay for Konzum
+                # Insert remaining entries
+                if price_entries:
+                    print(f"    📈 Inserting final batch ({processed_days}/{total_days} days)")
+                    self.supabase.table("product_price_history") \
+                        .insert(price_entries) \
+                        .execute()
+                print(f"  ✅ Successfully processed store {store_id}")
+                return True
+            except Exception as e:
+                error_msg = str(e)
+                if ("520" in error_msg or "timeout" in error_msg.lower()) and attempt < max_retries - 1:
+                    print(f"  ⚠️ Timeout/520 error on attempt {attempt + 1}, retrying in {retry_delay}s...")
+                    time.sleep(retry_delay)
+                    retry_delay *= 2  # Exponential backoff
+                    continue
+                else:
+                    print(f"  ❌ Error processing store {store_id}: {e}")
+                    return False
+        return False
+    def process_product_pricing(self, product_id: str, store_name: str, start_date: str,
+                               end_date: str, promo_price: float, regular_price: float) -> bool:
+        """Process product pricing for date range across all stores in a chain"""
+        if not product_id or not store_name:
+            print("Missing required parameters for price processing")
+            return False
+        try:
+            print(f"Starting price processing for product ID: {product_id}")
+            # Fix invalid dates BEFORE processing
+            print(f"📅 Original dates - Start: {start_date}, End: {end_date}")
+            fixed_start_date = self.fix_promo_date(start_date, "start")
+            fixed_end_date = self.fix_promo_date(end_date, "end")
+            print(f"📅 Fixed dates - Start: {fixed_start_date}, End: {fixed_end_date}")
+            # Use the fixed dates
+            start_date = fixed_start_date
+            end_date = fixed_end_date
+            # Get all store chains
+            store_chains = self.get_all_store_chains()
+            # Normalize the promo store name
+            promo_store_normalized = self.normalize_store_name(store_name)
+            # Find matching store chain with relaxed comparison
+            matched_chain = None
+            for chain in store_chains:
+                chain_normalized = self.normalize_store_name(chain["name"])
+                if (promo_store_normalized in chain_normalized or
+                    chain_normalized in promo_store_normalized):
+                    matched_chain = chain
+                    print(f"✅ Matched store chain: {matched_chain['name']} (ID: {matched_chain['id']})")
+                    break
+            if not matched_chain:
+                print("No matching store chain found")
+                return False
+            # Get stores for the matched chain
+            stores_in_chain = self.get_stores_by_chain(matched_chain["id"])
+            if not stores_in_chain:
+                print(f"No stores found for chain ID: {matched_chain['id']}")
+                return False
+            # Use promo price if available, otherwise use regular price
+            price_to_use = promo_price if promo_price and promo_price > 0 else regular_price or 0
+            successful_stores = 0
+            total_stores = len(stores_in_chain)
+            # Special handling for Konzum (longer delays)
+            is_konzum = "konzum" in matched_chain["name"].lower()
+            store_delay = 2.0 if is_konzum else 0.5
+            print(f"📊 Processing {total_stores} stores for {matched_chain['name']}")
+            if is_konzum:
+                print(f"⚠️ Konzum detected - using enhanced timeout handling")
+            # Process each store individually with delays
+            for i, store in enumerate(stores_in_chain):
+                print(f"Processing store {i+1}/{total_stores}: {store['location']} (ID: {store['id']})")
+                success = self.process_single_store_pricing(
+                    store_id=store["id"],
+                    product_id=product_id,
+                    start_date=start_date,
+                    end_date=end_date,
+                    price=price_to_use
+                )
+                if success:
+                    successful_stores += 1
+                    print(f"  ✅ Store {i+1}/{total_stores} completed successfully")
+                else:
+                    print(f"  ❌ Store {i+1}/{total_stores} failed")
+                # Add delay between stores (longer for Konzum)
+                if i < total_stores - 1:  # Don't sleep after the last store
+                    print(f"  ⏳ Waiting {store_delay}s before next store...")
+                    time.sleep(store_delay)
+            success_rate = successful_stores / total_stores if total_stores > 0 else 0
+            print(f"✅ Completed price processing: {successful_stores}/{total_stores} stores ({success_rate:.1%})")
+            # Consider it successful if at least 70% of stores were updated (lower threshold for Konzum)
+            threshold = 0.7 if is_konzum else 0.8
+            return success_rate >= threshold
+        except Exception as e:
+            print(f"Error processing product pricing: {e}")
+            return False
+    def process_product_image_sync(self, picture_id: str, product_id: str) -> bool:
+        """Process product image using direct function calls - sync wrapper"""
+        if not picture_id or not product_id:
+            print("No image or product ID provided for image processing")
+            return False
+        try:
+            print(f"🖼️ Processing image for product ID: {product_id}")
+            # Get the original image URL (same pattern as admin dashboard)
+            original_image_url = f"https://backend.360promo.hr/contents/products/{picture_id}.jpg"
+            # Fetch the image
+            print(f"📥 Downloading image from: {original_image_url}")
+            response = requests.get(original_image_url, timeout=30)
+            if not response.ok:
+                print(f"❌ Failed to fetch image: HTTP {response.status_code}")
+                return False
+            # Create a mock UploadFile object from the downloaded image
+            class MockUploadFile:
+                def __init__(self, content: bytes, filename: str):
+                    self.file = BytesIO(content)
+                    self.filename = filename
+                    self.content_type = "image/jpeg"
+                async def read(self) -> bytes:
+                    self.file.seek(0)
+                    return self.file.read()
+            mock_file = MockUploadFile(response.content, f"product_{picture_id}.jpg")
+            # Run the async function in a new event loop
+            async def process_image():
+                return await process_and_store_product_image(
+                    file=mock_file,
+                    remove_bg=True,
+                    upscale=True,
+                    scale_factor=2,
+                    process_order="remove_first",
+                    product_id=product_id
+                )
+            # Process the image directly using the imported function
+            print(f"🔄 Processing image directly...")
+            # Check if we're in an event loop
+            try:
+                loop = asyncio.get_running_loop()
+                # We're in an async context, run in thread pool
+                import concurrent.futures
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    future = executor.submit(asyncio.run, process_image())
+                    result = future.result(timeout=60)
+            except RuntimeError:
+                # No event loop running, we can use asyncio.run
+                result = asyncio.run(process_image())
+            if result.get('status') == 'success':
+                print(f"✅ Image processed successfully: {result.get('image_url')}")
+                return True
+            else:
+                print(f"❌ Image processing failed: {result}")
+                return False
+        except Exception as e:
+            print(f"❌ Error processing product image: {e}")
+            return False
+    def upsert_multiple_products(self, products: List[Dict[str, Any]]) -> int:
+        """
+        Upsert multiple promo products in batches with dictionary check and image processing
+        Returns the number of successfully processed products
+        """
+        batch_size = 100
+        successfully_processed = 0
+        automatically_adjusted = 0  # Counter for products found in dictionary
+        upserted_to_promo = 0      # Counter for products added to promo_products table
+        failed_pricing_updates = 0  # Counter for failed pricing updates
+        images_processed = 0       # Counter for successfully processed images
+        images_failed = 0          # Counter for failed image processing
+        date_fixes = 0             # Counter for fixed dates
+        timestamp = datetime.now().isoformat()
+        for i in range(0, len(products), batch_size):
+            batch = products[i:i+batch_size]
+            for product in batch:
+                store = product.get("store")
+                name = product.get("name")
+                picture_id = product.get("pictureId")
+                try:
+                    # Check dictionary first
+                    existing_product_id = self.check_dictionary(name, store)
+                    if existing_product_id:
+                        # Product exists in dictionary - update pricing and process image
+                        print(f"Found existing product ID {existing_product_id} for '{name}' from '{store}' - updating pricing and processing image")
+                        # Check if dates need fixing
+                        original_start = product.get("promoStartDate")
+                        original_end = product.get("promoEndDate")
+                        if (original_start is None or
+                            original_start == "1970-01-01T00:00:00Z" or
+                            original_end is None or
+                            original_end == "1970-01-01T00:00:00Z"):
+                            date_fixes += 1
+                        # Process pricing
+                        pricing_success = self.process_product_pricing(
+                            product_id=existing_product_id,
+                            store_name=store,
+                            start_date=product.get("promoStartDate"),
+                            end_date=product.get("promoEndDate"),
+                            promo_price=product.get("promoPrice"),
+                            regular_price=product.get("regularPrice")
+                        )
+                        # Process image if available (using sync wrapper)
+                        image_success = False
+                        if picture_id:
+                            image_success = self.process_product_image_sync(picture_id, existing_product_id)
+                            if image_success:
+                                images_processed += 1
+                                print(f"🖼️ Successfully processed image for: {name}")
+                            else:
+                                images_failed += 1
+                                print(f"🖼️ Failed to process image for: {name}")
+                        if pricing_success:
+                            successfully_processed += 1
+                            automatically_adjusted += 1
+                            print(f"✅ Automatically adjusted pricing for: {name}")
+                        else:
+                            failed_pricing_updates += 1
+                            print(f"❌ Failed to update pricing for: {name}")
+                    else:
+                        # Product not in dictionary - proceed with normal upsert to promo_products
+                        formatted_promo_product = {
+                            "store": store,
+                            "picture_id": product.get("pictureId"),
+                            "name": name,
+                            "description": product.get("description", ""),
+                            "promo_start_date": product.get("promoStartDate"),
+                            "promo_end_date": product.get("promoEndDate"),
+                            "regular_price": product.get("regularPrice"),
+                            "promo_price": product.get("promoPrice"),
+                            "last_updated": timestamp
+                        }
+                        # Check if product exists in promo_products
+                        result = self.supabase.table("promo_products").select("*") \
+                            .eq("store", store) \
+                            .eq("name", name) \
+                            .execute()
+                        if result.data and len(result.data) > 0:
+                            # Update existing promo product
+                            record_id = result.data[0]["id"]
+                            self.supabase.table("promo_products") \
+                                .update(formatted_promo_product) \
+                                .eq("id", record_id) \
+                                .execute()
+                            print(f"🔄 Updated existing promo product: {name}")
+                        else:
+                            # Insert new promo product
+                            self.supabase.table("promo_products") \
+                                .insert(formatted_promo_product) \
+                                .execute()
+                            print(f"➕ Inserted new promo product: {name}")
+                        successfully_processed += 1
+                        upserted_to_promo += 1
+                    # Print progress periodically
+                    total_processed = successfully_processed + failed_pricing_updates
+                    if total_processed % 50 == 0:
+                        print(f"Processed {total_processed} / {len(products)} products so far...")
+                except Exception as e:
+                    print(f"Failed to process product '{name}' from '{store}': {str(e)}")
+                    continue
+        # Detailed summary logging
+        total_processed = successfully_processed + failed_pricing_updates
+        print(f"\n{'='*60}")
+        print(f"SCRAPING PROCESS SUMMARY")
+        print(f"{'='*60}")
+        print(f"📊 Total products processed: {len(products)}")
+        print(f"✅ Successfully processed: {successfully_processed}")
+        print(f"🔧 Automatically adjusted (existing products): {automatically_adjusted}")
+        print(f"📋 Upserted to promo_products table: {upserted_to_promo}")
+        print(f"⚠️ Failed pricing updates: {failed_pricing_updates}")
+        print(f"🖼️ Images successfully processed: {images_processed}")
+        print(f"🖼️ Images failed to process: {images_failed}")
+        print(f"📅 Invalid dates fixed: {date_fixes}")
+        print(f"❌ Failed to process: {len(products) - total_processed}")
+        print(f"{'='*60}")
+        if automatically_adjusted > 0:
+            print(f"🎯 {automatically_adjusted} products were found in the dictionary and had their pricing automatically updated across all stores in their respective chains.")
+        if images_processed > 0:
+            print(f"🖼️ {images_processed} product images were successfully processed and updated.")
+        if images_failed > 0:
+            print(f"⚠️ {images_failed} product images failed to process.")
+        if date_fixes > 0:
+            print(f"📅 {date_fixes} products had invalid dates (null/1970) that were automatically corrected.")
+        if upserted_to_promo > 0:
+            print(f"📝 {upserted_to_promo} products were added/updated in the temporary promo_products table for manual review.")
+        if failed_pricing_updates > 0:
+            print(f"⚠️ {failed_pricing_updates} products had dictionary matches but failed pricing updates (likely due to API limits).")
+        print(f"{'='*60}\n")
+        return successfully_processed

requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ ultralytics
 python-multipart
 google-cloud-vision
 python-dotenv
-supabase

 python-multipart
 google-cloud-vision
 python-dotenv
+supabase
+rembg

utils/image_processing.py CHANGED Viewed

@@ -1,6 +1,15 @@
 import numpy as np
 import cv2
 from fastapi import UploadFile, HTTPException
 async def read_image_file(file: UploadFile) -> np.ndarray:
     """Read and process an image file from FastAPI UploadFile"""
@@ -14,3 +23,196 @@ async def read_image_file(file: UploadFile) -> np.ndarray:
         raise HTTPException(400, "Invalid image data")
     return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

 import numpy as np
 import cv2
 from fastapi import UploadFile, HTTPException
+from rembg import remove
+import time
+import uuid
+from typing import Tuple, Optional
+from db.supabase_client import SupabaseClient
+# Initialize Supabase client
+supabase = SupabaseClient().get_client()
 async def read_image_file(file: UploadFile) -> np.ndarray:
     """Read and process an image file from FastAPI UploadFile"""
         raise HTTPException(400, "Invalid image data")
     return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+def remove_background(image_bytes: bytes) -> bytes:
+    """Remove white background from image using rembg"""
+    try:
+        return remove(image_bytes,
+                      alpha_matting=True,
+                      alpha_matting_background_threshold=5,
+                      alpha_matting_foreground_threshold=220,
+                      alpha_matting_erode_size=5)
+    except Exception as e:
+        print(f"Error removing background: {str(e)}")
+        raise Exception(f"Background removal error: {str(e)}")
+def upscale_image(image_bytes: bytes, scale_factor: int = 2) -> bytes:
+    """Upscale image using OpenCV"""
+    try:
+        # Create a numpy array from the image bytes
+        nparr = np.frombuffer(image_bytes, np.uint8)
+        img = cv2.imdecode(nparr, cv2.IMREAD_UNCHANGED)
+        # Handle images with alpha channel
+        if len(img.shape) > 2 and img.shape[2] == 4:
+            # Split channels
+            b, g, r, a = cv2.split(img)
+            # Scale RGB channels
+            rgb_channels = cv2.merge([b, g, r])
+            scaled_rgb = cv2.resize(rgb_channels, None, fx=scale_factor, fy=scale_factor,
+                                   interpolation=cv2.INTER_CUBIC)
+            # Scale alpha channel separately
+            scaled_alpha = cv2.resize(a, None, fx=scale_factor, fy=scale_factor,
+                                     interpolation=cv2.INTER_CUBIC)
+            # Merge channels back together
+            scaled_img = cv2.merge([
+                scaled_rgb[:, :, 0],
+                scaled_rgb[:, :, 1],
+                scaled_rgb[:, :, 2],
+                scaled_alpha
+            ])
+        else:
+            # Regular RGB image
+            scaled_img = cv2.resize(img, None, fx=scale_factor, fy=scale_factor,
+                                   interpolation=cv2.INTER_CUBIC)
+        # Encode the image back to bytes
+        success, buffer = cv2.imencode('.png', scaled_img)
+        if not success:
+            raise Exception("Failed to encode upscaled image")
+        return buffer.tobytes()
+    except Exception as e:
+        print(f"Error upscaling image: {str(e)}")
+        raise Exception(f"Image upscaling error: {str(e)}")
+async def process_product_image(
+    file: UploadFile,
+    remove_bg: bool = True,
+    upscale: bool = True,
+    scale_factor: int = 2,
+    process_order: str = "remove_first"
+) -> Tuple[bytes, str]:
+    """Process a product image with background removal and upscaling"""
+    # Read the file content
+    content = await file.read()
+    file.file.seek(0)  # Reset file pointer for potential reuse
+    # Create a descriptive filename with timestamp for uniqueness
+    timestamp = int(time.time())
+    original_filename = file.filename.split('.')
+    base_name = original_filename[0] if len(original_filename) > 0 else 'product'
+    extension = 'png'  # Always use PNG to preserve transparency
+    # Process the image based on the parameters and order
+    processed_content = content
+    if process_order == "remove_first" and remove_bg and upscale:
+        processed_content = remove_background(processed_content)
+        processed_content = upscale_image(processed_content, scale_factor)
+    elif process_order == "upscale_first" and remove_bg and upscale:
+        processed_content = upscale_image(processed_content, scale_factor)
+        processed_content = remove_background(processed_content)
+    elif remove_bg:
+        processed_content = remove_background(processed_content)
+    elif upscale:
+        processed_content = upscale_image(processed_content, scale_factor)
+    # Create descriptive filename with processing info
+    processed_filename = f"{base_name}_{'nobg' if remove_bg else ''}_{'upx' + str(scale_factor) if upscale else ''}_{timestamp}.{extension}"
+    return processed_content, processed_filename
+async def upload_processed_image(
+        processed_image: bytes,
+        filename: str,
+        bucket: str = "product-images"
+) -> Tuple[str, str]:
+    """
+    Upload a processed image to Supabase Storage
+    Returns:
+        Tuple[str, str]: (image_path, image_url)
+    """
+    # Generate a unique ID for the image
+    image_id = str(uuid.uuid4())
+    image_path = f"{image_id}_{filename}"
+    # Upload the processed image to Supabase Storage
+    supabase.storage.from_(bucket).upload(
+        file=processed_image,
+        path=image_path,
+        file_options={"content-type": "image/png", "upsert": "true"}
+    )
+    # Get the public URL for the uploaded image
+    image_url = supabase.storage.from_(bucket).get_public_url(image_path)
+    return image_path, image_url
+async def update_product_image(product_id: str, image_url: str) -> dict[str, any]:
+    """
+    Update the product_image field for a product
+    Returns:
+        Dict[str, Any]: The updated product data
+    """
+    if not product_id:
+        raise ValueError("Product ID is required")
+    result = supabase.table("products").update({
+        "product_image": image_url
+    }).eq("product_id", product_id).execute()
+    if not result.data:
+        raise Exception(f"Failed to update product {product_id}")
+    return result.data[0]
+async def process_and_store_product_image(
+        file: UploadFile,
+        remove_bg: bool = True,
+        upscale: bool = True,
+        scale_factor: int = 2,
+        process_order: str = "remove_first",
+        product_id: Optional[str] = None
+) -> dict[str, any]:
+    """
+    Complete workflow for processing a product image and storing it
+    This function:
+    1. Processes the image (remove background, upscale)
+    2. Uploads it to storage
+    3. Updates the product record if product_id is provided
+    Returns:
+        Dict[str, Any]: Result with status, urls, and processing info
+    """
+    # Process the image
+    processed_image, filename = await process_product_image(
+        file,
+        remove_bg=remove_bg,
+        upscale=upscale,
+        scale_factor=scale_factor,
+        process_order=process_order
+    )
+    # Upload to storage
+    image_path, image_url = await upload_processed_image(processed_image, filename)
+    # Update product record if needed
+    product_data = None
+    if product_id:
+        product_data = await update_product_image(product_id, image_url)
+    # Return comprehensive result
+    return {
+        "status": "success",
+        "message": "Image processed successfully",
+        "image_url": image_url,
+        "image_path": image_path,
+        "product_data": product_data,
+        "processing": {
+            "background_removed": remove_bg,
+            "upscaled": upscale,
+            "scale_factor": scale_factor if upscale else None,
+            "process_order": process_order
+        }
+    }