Spaces:
Runtime error
Runtime error
Franko Fišter
commited on
Commit
·
e245176
1
Parent(s):
4c7debd
Working scraping endpoint, WIP product image scraping and processing
Browse files- api/main.py +2 -0
- api/product_routes.py +128 -2
- api/scrape_routes.py +158 -0
- db/scrape_repository.py +69 -0
- requirements.txt +2 -1
- utils/image_processing.py +94 -0
api/main.py
CHANGED
|
@@ -3,6 +3,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
| 3 |
from config.settings import API_HOST, API_PORT
|
| 4 |
from api.product_routes import router as product_router
|
| 5 |
from api.receipt_routes import router as receipt_router
|
|
|
|
| 6 |
|
| 7 |
# Initialize FastAPI
|
| 8 |
app = FastAPI(title="Product and Receipt API")
|
|
@@ -20,6 +21,7 @@ app.add_middleware(
|
|
| 20 |
# Add routers
|
| 21 |
app.include_router(product_router)
|
| 22 |
app.include_router(receipt_router)
|
|
|
|
| 23 |
|
| 24 |
@app.get("/", tags=["Health"])
|
| 25 |
def health_check():
|
|
|
|
| 3 |
from config.settings import API_HOST, API_PORT
|
| 4 |
from api.product_routes import router as product_router
|
| 5 |
from api.receipt_routes import router as receipt_router
|
| 6 |
+
from api.scrape_routes import router as scrape_router
|
| 7 |
|
| 8 |
# Initialize FastAPI
|
| 9 |
app = FastAPI(title="Product and Receipt API")
|
|
|
|
| 21 |
# Add routers
|
| 22 |
app.include_router(product_router)
|
| 23 |
app.include_router(receipt_router)
|
| 24 |
+
app.include_router(scrape_router)
|
| 25 |
|
| 26 |
@app.get("/", tags=["Health"])
|
| 27 |
def health_check():
|
api/product_routes.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
| 1 |
-
from fastapi import APIRouter, File, UploadFile, HTTPException
|
| 2 |
-
from utils.image_processing import read_image_file
|
| 3 |
from product_detector.detector import ObjectDetector
|
| 4 |
from config.settings import MODEL_ONNX_PATH, CLASS_NAMES, INPUT_SIZE
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Initialize the detector
|
| 7 |
detector = ObjectDetector(
|
|
@@ -31,3 +34,126 @@ async def detect_objects(file: UploadFile = File(...)):
|
|
| 31 |
raise
|
| 32 |
except Exception as e:
|
| 33 |
raise HTTPException(500, f"Processing error: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, File, UploadFile, HTTPException, Form
|
| 2 |
+
from utils.image_processing import read_image_file, process_product_image
|
| 3 |
from product_detector.detector import ObjectDetector
|
| 4 |
from config.settings import MODEL_ONNX_PATH, CLASS_NAMES, INPUT_SIZE
|
| 5 |
+
from typing import Optional
|
| 6 |
+
from db.supabase_client import SupabaseClient
|
| 7 |
+
import uuid
|
| 8 |
|
| 9 |
# Initialize the detector
|
| 10 |
detector = ObjectDetector(
|
|
|
|
| 34 |
raise
|
| 35 |
except Exception as e:
|
| 36 |
raise HTTPException(500, f"Processing error: {str(e)}")
|
| 37 |
+
|
| 38 |
+
@router.post("/process-image")
|
| 39 |
+
async def process_image(
|
| 40 |
+
file: UploadFile = File(...),
|
| 41 |
+
remove_bg: bool = Form(True),
|
| 42 |
+
upscale: bool = Form(True),
|
| 43 |
+
scale_factor: int = Form(2),
|
| 44 |
+
process_order: str = Form("remove_first")
|
| 45 |
+
):
|
| 46 |
+
"""
|
| 47 |
+
Process product images by removing background and/or upscaling
|
| 48 |
+
|
| 49 |
+
- remove_bg: Whether to remove the white background
|
| 50 |
+
- upscale: Whether to upscale the image
|
| 51 |
+
- scale_factor: Scale factor for upscaling (2, 3, or 4)
|
| 52 |
+
- process_order: Order of operations ('remove_first' or 'upscale_first')
|
| 53 |
+
"""
|
| 54 |
+
try:
|
| 55 |
+
# Validate inputs
|
| 56 |
+
if scale_factor not in [2, 3, 4]:
|
| 57 |
+
raise HTTPException(400, "Scale factor must be 2, 3, or 4")
|
| 58 |
+
|
| 59 |
+
if process_order not in ["remove_first", "upscale_first"]:
|
| 60 |
+
raise HTTPException(400, "Process order must be 'remove_first' or 'upscale_first'")
|
| 61 |
+
|
| 62 |
+
if not file.content_type.startswith("image/"):
|
| 63 |
+
raise HTTPException(400, "File must be an image")
|
| 64 |
+
|
| 65 |
+
# Process the image
|
| 66 |
+
processed_image, filename = await process_product_image(
|
| 67 |
+
file,
|
| 68 |
+
remove_bg=remove_bg,
|
| 69 |
+
upscale=upscale,
|
| 70 |
+
scale_factor=scale_factor,
|
| 71 |
+
process_order=process_order
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
# Generate a unique ID for the image
|
| 75 |
+
image_id = str(uuid.uuid4())
|
| 76 |
+
image_path = f"{image_id}_{filename}"
|
| 77 |
+
|
| 78 |
+
# Upload the processed image to Supabase Storage
|
| 79 |
+
supabase.storage.from_("product-images").upload(
|
| 80 |
+
file=processed_image,
|
| 81 |
+
path=image_path,
|
| 82 |
+
file_options={"content-type": "image/png", "upsert": "true"}
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
# Get the public URL for the uploaded image
|
| 86 |
+
image_url = supabase.storage.from_("product-images").get_public_url(image_path)
|
| 87 |
+
|
| 88 |
+
return {
|
| 89 |
+
"status": "success",
|
| 90 |
+
"message": "Image processed successfully",
|
| 91 |
+
"image_url": image_url,
|
| 92 |
+
"image_path": image_path,
|
| 93 |
+
"processing": {
|
| 94 |
+
"background_removed": remove_bg,
|
| 95 |
+
"upscaled": upscale,
|
| 96 |
+
"scale_factor": scale_factor if upscale else None,
|
| 97 |
+
"process_order": process_order
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
except HTTPException:
|
| 102 |
+
raise
|
| 103 |
+
except Exception as e:
|
| 104 |
+
raise HTTPException(500, f"Image processing error: {str(e)}")
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
@router.post("/process-product-image")
|
| 108 |
+
async def process_product_image_endpoint(
|
| 109 |
+
file: UploadFile = File(...),
|
| 110 |
+
remove_bg: bool = Form(True),
|
| 111 |
+
upscale: bool = Form(True),
|
| 112 |
+
scale_factor: int = Form(2),
|
| 113 |
+
process_order: str = Form("remove_first"),
|
| 114 |
+
product_id: str = Form(None)
|
| 115 |
+
):
|
| 116 |
+
"""
|
| 117 |
+
Process a product image by removing background and upscaling,
|
| 118 |
+
then save to Supabase Storage
|
| 119 |
+
"""
|
| 120 |
+
try:
|
| 121 |
+
# Process the image
|
| 122 |
+
processed_image, filename = await process_product_image(
|
| 123 |
+
file,
|
| 124 |
+
remove_bg=remove_bg,
|
| 125 |
+
upscale=upscale,
|
| 126 |
+
scale_factor=scale_factor,
|
| 127 |
+
process_order=process_order
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# Generate a unique ID for the image
|
| 131 |
+
image_id = str(uuid.uuid4())
|
| 132 |
+
image_path = f"{image_id}_{filename}"
|
| 133 |
+
|
| 134 |
+
# Upload the processed image to Supabase Storage
|
| 135 |
+
supabase.storage.from_("product-images").upload(
|
| 136 |
+
file=processed_image,
|
| 137 |
+
path=image_path,
|
| 138 |
+
file_options={"content-type": "image/png", "upsert": "true"}
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
# Get the public URL for the uploaded image
|
| 142 |
+
image_url = supabase.storage.from_("product-images").get_public_url(image_path)
|
| 143 |
+
|
| 144 |
+
# If product_id is provided, update the product record
|
| 145 |
+
if product_id:
|
| 146 |
+
# Update product_image column in the database
|
| 147 |
+
result = supabase.table("products").update({
|
| 148 |
+
"product_image": image_url
|
| 149 |
+
}).eq("id", product_id).execute()
|
| 150 |
+
|
| 151 |
+
return {
|
| 152 |
+
"status": "success",
|
| 153 |
+
"message": "Image processed successfully",
|
| 154 |
+
"image_url": image_url,
|
| 155 |
+
"image_path": image_path
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
except Exception as e:
|
| 159 |
+
raise HTTPException(500, f"Image processing error: {str(e)}")
|
api/scrape_routes.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException, Request, Depends, BackgroundTasks
|
| 2 |
+
from db.scrape_repository import PromoProductRepository
|
| 3 |
+
from utils.rate_limiter import RateLimiter
|
| 4 |
+
import requests
|
| 5 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 6 |
+
|
| 7 |
+
# Initialize rate limiter and repository
|
| 8 |
+
rate_limiter = RateLimiter(max_requests=5) # Lower limit for scraping operations
|
| 9 |
+
promo_repository = PromoProductRepository()
|
| 10 |
+
|
| 11 |
+
router = APIRouter(prefix="/scrape", tags=["Data Scraping"])
|
| 12 |
+
|
| 13 |
+
RETAILER_MAPPING = {
|
| 14 |
+
8: "Studenac",
|
| 15 |
+
4: "Konzum",
|
| 16 |
+
3: "Kaufland",
|
| 17 |
+
9: "Tommy",
|
| 18 |
+
109: "Spar",
|
| 19 |
+
6: "Plodine",
|
| 20 |
+
5: "Lidl"
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
def fetch_page(session, page):
|
| 24 |
+
"""Fetch a single page of products"""
|
| 25 |
+
url = f"https://backend.360promo.hr/api/promotions/products?pageNumber={page}&sortBySalePercentage=False"
|
| 26 |
+
try:
|
| 27 |
+
print(f"📄 Fetching page {page}...")
|
| 28 |
+
response = session.get(url, timeout=10)
|
| 29 |
+
response.raise_for_status()
|
| 30 |
+
return page, response.json()
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f"❌ Error on page {page}: {str(e)}")
|
| 33 |
+
return page, []
|
| 34 |
+
|
| 35 |
+
def fetch_all_products():
|
| 36 |
+
products = []
|
| 37 |
+
max_workers = 8 # Adjust based on API capacity
|
| 38 |
+
|
| 39 |
+
with requests.Session() as session:
|
| 40 |
+
# First, fetch page 1 to see if there's data
|
| 41 |
+
_, page1_data = fetch_page(session, 1)
|
| 42 |
+
if not page1_data:
|
| 43 |
+
print("No data found on first page")
|
| 44 |
+
return []
|
| 45 |
+
|
| 46 |
+
products.extend(page1_data)
|
| 47 |
+
|
| 48 |
+
# Set up for concurrent fetching of subsequent pages
|
| 49 |
+
last_page_with_data = 1
|
| 50 |
+
while True:
|
| 51 |
+
# Determine next batch of pages to fetch
|
| 52 |
+
start_page = last_page_with_data + 1
|
| 53 |
+
end_page = start_page + max_workers - 1
|
| 54 |
+
|
| 55 |
+
if start_page > 1000: # Safety limit
|
| 56 |
+
print("Reached maximum page limit")
|
| 57 |
+
break
|
| 58 |
+
|
| 59 |
+
pages_to_fetch = list(range(start_page, end_page + 1))
|
| 60 |
+
|
| 61 |
+
# Fetch pages concurrently
|
| 62 |
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 63 |
+
futures = [executor.submit(fetch_page, session, page) for page in pages_to_fetch]
|
| 64 |
+
|
| 65 |
+
# Process results
|
| 66 |
+
new_data_found = False
|
| 67 |
+
highest_page_with_data = 0
|
| 68 |
+
|
| 69 |
+
for future in as_completed(futures):
|
| 70 |
+
page, data = future.result()
|
| 71 |
+
if data: # If we got data
|
| 72 |
+
products.extend(data)
|
| 73 |
+
new_data_found = True
|
| 74 |
+
highest_page_with_data = max(highest_page_with_data, page)
|
| 75 |
+
|
| 76 |
+
# If no new data was found in this batch, we're done
|
| 77 |
+
if not new_data_found:
|
| 78 |
+
break
|
| 79 |
+
|
| 80 |
+
# Update the last page with data
|
| 81 |
+
last_page_with_data = highest_page_with_data
|
| 82 |
+
|
| 83 |
+
print(f"\n✅ Total products collected: {len(products)}")
|
| 84 |
+
return products
|
| 85 |
+
|
| 86 |
+
def process_products(products):
|
| 87 |
+
unified_products = []
|
| 88 |
+
|
| 89 |
+
for product in products:
|
| 90 |
+
retailer_id = product.get('retailerId')
|
| 91 |
+
if not retailer_id or retailer_id not in RETAILER_MAPPING:
|
| 92 |
+
continue
|
| 93 |
+
|
| 94 |
+
store_name = RETAILER_MAPPING[retailer_id]
|
| 95 |
+
price = product.get('promoPrice') or product.get('regularPrice')
|
| 96 |
+
|
| 97 |
+
if price is None:
|
| 98 |
+
continue
|
| 99 |
+
|
| 100 |
+
item = {
|
| 101 |
+
"store": store_name,
|
| 102 |
+
"pictureId": product.get('id'),
|
| 103 |
+
"name": product.get('name', 'Unknown Product'),
|
| 104 |
+
"description": product.get('description', ''),
|
| 105 |
+
"promoStartDate": product.get('promoStartDate'),
|
| 106 |
+
"promoEndDate": product.get('promoEndDate'),
|
| 107 |
+
"regularPrice": product.get('regularPrice'),
|
| 108 |
+
"promoPrice": product.get('promoPrice')
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
unified_products.append(item)
|
| 112 |
+
|
| 113 |
+
return unified_products
|
| 114 |
+
|
| 115 |
+
async def scrape_and_store_products():
|
| 116 |
+
"""Background task to scrape products and store them in the database"""
|
| 117 |
+
try:
|
| 118 |
+
# Fetch products from the API
|
| 119 |
+
products = fetch_all_products()
|
| 120 |
+
if not products:
|
| 121 |
+
print("No products found to scrape")
|
| 122 |
+
return 0
|
| 123 |
+
|
| 124 |
+
# Process products into standardized format
|
| 125 |
+
unified_products = process_products(products)
|
| 126 |
+
if not unified_products:
|
| 127 |
+
print("No valid products found to store")
|
| 128 |
+
return 0
|
| 129 |
+
|
| 130 |
+
# Store products in Supabase
|
| 131 |
+
stored_count = promo_repository.upsert_multiple_products(unified_products)
|
| 132 |
+
print(f"Successfully stored {stored_count} products")
|
| 133 |
+
return stored_count
|
| 134 |
+
|
| 135 |
+
except Exception as e:
|
| 136 |
+
print(f"Error during scraping: {str(e)}")
|
| 137 |
+
return 0
|
| 138 |
+
|
| 139 |
+
@router.post("/promo")
|
| 140 |
+
async def trigger_promo_scrape(
|
| 141 |
+
background_tasks: BackgroundTasks,
|
| 142 |
+
request: Request
|
| 143 |
+
):
|
| 144 |
+
"""
|
| 145 |
+
Admin only: Trigger a promotional product scraping operation.
|
| 146 |
+
This runs in the background to avoid timeout issues.
|
| 147 |
+
"""
|
| 148 |
+
try:
|
| 149 |
+
# Add scraping task to background tasks
|
| 150 |
+
background_tasks.add_task(scrape_and_store_products)
|
| 151 |
+
|
| 152 |
+
return {
|
| 153 |
+
"status": "success",
|
| 154 |
+
"message": "Promotional product scraping started. Results will be stored in the database."
|
| 155 |
+
}
|
| 156 |
+
except Exception as e:
|
| 157 |
+
print(f"ERROR: {str(e)}")
|
| 158 |
+
raise HTTPException(500, f"Failed to start scraping operation: {str(e)}")
|
db/scrape_repository.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Any, List
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
from .supabase_client import SupabaseClient
|
| 4 |
+
|
| 5 |
+
class PromoProductRepository:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.supabase = SupabaseClient().get_client()
|
| 8 |
+
|
| 9 |
+
def upsert_multiple_products(self, products: List[Dict[str, Any]]) -> int:
|
| 10 |
+
"""
|
| 11 |
+
Upsert multiple promo products in batches
|
| 12 |
+
Returns the number of successfully upserted products
|
| 13 |
+
"""
|
| 14 |
+
batch_size = 100 # Adjust based on Supabase capacity
|
| 15 |
+
successfully_upserted = 0
|
| 16 |
+
timestamp = datetime.now().isoformat()
|
| 17 |
+
|
| 18 |
+
# Process in batches to avoid request size limitations
|
| 19 |
+
for i in range(0, len(products), batch_size):
|
| 20 |
+
batch = products[i:i+batch_size]
|
| 21 |
+
|
| 22 |
+
for product in batch:
|
| 23 |
+
store = product.get("store")
|
| 24 |
+
name = product.get("name")
|
| 25 |
+
|
| 26 |
+
formatted_product = {
|
| 27 |
+
"store": store,
|
| 28 |
+
"picture_id": product.get("pictureId"),
|
| 29 |
+
"name": name,
|
| 30 |
+
"description": product.get("description", ""),
|
| 31 |
+
"promo_start_date": product.get("promoStartDate"),
|
| 32 |
+
"promo_end_date": product.get("promoEndDate"),
|
| 33 |
+
"regular_price": product.get("regularPrice"),
|
| 34 |
+
"promo_price": product.get("promoPrice"),
|
| 35 |
+
"last_updated": timestamp
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
# Check if product exists with same store and name
|
| 40 |
+
result = self.supabase.table("promo_products").select("*") \
|
| 41 |
+
.eq("store", store) \
|
| 42 |
+
.eq("name", name) \
|
| 43 |
+
.execute()
|
| 44 |
+
|
| 45 |
+
if result.data and len(result.data) > 0:
|
| 46 |
+
# Product exists, update it
|
| 47 |
+
record_id = result.data[0]["id"]
|
| 48 |
+
self.supabase.table("promo_products") \
|
| 49 |
+
.update(formatted_product) \
|
| 50 |
+
.eq("id", record_id) \
|
| 51 |
+
.execute()
|
| 52 |
+
else:
|
| 53 |
+
# Product doesn't exist, insert it
|
| 54 |
+
self.supabase.table("promo_products") \
|
| 55 |
+
.insert(formatted_product) \
|
| 56 |
+
.execute()
|
| 57 |
+
|
| 58 |
+
successfully_upserted += 1
|
| 59 |
+
|
| 60 |
+
# Print progress periodically
|
| 61 |
+
if successfully_upserted % 50 == 0:
|
| 62 |
+
print(f"Processed {successfully_upserted} products so far...")
|
| 63 |
+
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"Failed to upsert product '{name}' from '{store}': {str(e)}")
|
| 66 |
+
# Continue with next product instead of failing completely
|
| 67 |
+
|
| 68 |
+
print(f"Successfully upserted {successfully_upserted} products")
|
| 69 |
+
return successfully_upserted
|
requirements.txt
CHANGED
|
@@ -9,4 +9,5 @@ ultralytics
|
|
| 9 |
python-multipart
|
| 10 |
google-cloud-vision
|
| 11 |
python-dotenv
|
| 12 |
-
supabase
|
|
|
|
|
|
| 9 |
python-multipart
|
| 10 |
google-cloud-vision
|
| 11 |
python-dotenv
|
| 12 |
+
supabase
|
| 13 |
+
rembg
|
utils/image_processing.py
CHANGED
|
@@ -1,6 +1,12 @@
|
|
| 1 |
import numpy as np
|
| 2 |
import cv2
|
| 3 |
from fastapi import UploadFile, HTTPException
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
async def read_image_file(file: UploadFile) -> np.ndarray:
|
| 6 |
"""Read and process an image file from FastAPI UploadFile"""
|
|
@@ -14,3 +20,91 @@ async def read_image_file(file: UploadFile) -> np.ndarray:
|
|
| 14 |
raise HTTPException(400, "Invalid image data")
|
| 15 |
|
| 16 |
return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
import cv2
|
| 3 |
from fastapi import UploadFile, HTTPException
|
| 4 |
+
from PIL import Image
|
| 5 |
+
import io
|
| 6 |
+
from rembg import remove
|
| 7 |
+
import time
|
| 8 |
+
import uuid
|
| 9 |
+
from typing import Tuple, Optional
|
| 10 |
|
| 11 |
async def read_image_file(file: UploadFile) -> np.ndarray:
|
| 12 |
"""Read and process an image file from FastAPI UploadFile"""
|
|
|
|
| 20 |
raise HTTPException(400, "Invalid image data")
|
| 21 |
|
| 22 |
return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 23 |
+
|
| 24 |
+
def remove_background(image_bytes: bytes) -> bytes:
|
| 25 |
+
"""Remove white background from image using rembg"""
|
| 26 |
+
try:
|
| 27 |
+
return remove(image_bytes)
|
| 28 |
+
except Exception as e:
|
| 29 |
+
print(f"Error removing background: {str(e)}")
|
| 30 |
+
raise Exception(f"Background removal error: {str(e)}")
|
| 31 |
+
|
| 32 |
+
def upscale_image(image_bytes: bytes, scale_factor: int = 2) -> bytes:
|
| 33 |
+
"""Upscale image using OpenCV"""
|
| 34 |
+
try:
|
| 35 |
+
# Create a numpy array from the image bytes
|
| 36 |
+
nparr = np.frombuffer(image_bytes, np.uint8)
|
| 37 |
+
img = cv2.imdecode(nparr, cv2.IMREAD_UNCHANGED)
|
| 38 |
+
|
| 39 |
+
# Handle images with alpha channel
|
| 40 |
+
if len(img.shape) > 2 and img.shape[2] == 4:
|
| 41 |
+
# Split channels
|
| 42 |
+
b, g, r, a = cv2.split(img)
|
| 43 |
+
|
| 44 |
+
# Scale RGB channels
|
| 45 |
+
rgb_channels = cv2.merge([b, g, r])
|
| 46 |
+
scaled_rgb = cv2.resize(rgb_channels, None, fx=scale_factor, fy=scale_factor,
|
| 47 |
+
interpolation=cv2.INTER_CUBIC)
|
| 48 |
+
|
| 49 |
+
# Scale alpha channel separately
|
| 50 |
+
scaled_alpha = cv2.resize(a, None, fx=scale_factor, fy=scale_factor,
|
| 51 |
+
interpolation=cv2.INTER_CUBIC)
|
| 52 |
+
|
| 53 |
+
# Merge channels back together
|
| 54 |
+
scaled_img = cv2.merge([
|
| 55 |
+
scaled_rgb[:, :, 0],
|
| 56 |
+
scaled_rgb[:, :, 1],
|
| 57 |
+
scaled_rgb[:, :, 2],
|
| 58 |
+
scaled_alpha
|
| 59 |
+
])
|
| 60 |
+
else:
|
| 61 |
+
# Regular RGB image
|
| 62 |
+
scaled_img = cv2.resize(img, None, fx=scale_factor, fy=scale_factor,
|
| 63 |
+
interpolation=cv2.INTER_CUBIC)
|
| 64 |
+
|
| 65 |
+
# Encode the image back to bytes
|
| 66 |
+
success, buffer = cv2.imencode('.png', scaled_img)
|
| 67 |
+
if not success:
|
| 68 |
+
raise Exception("Failed to encode upscaled image")
|
| 69 |
+
|
| 70 |
+
return buffer.tobytes()
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print(f"Error upscaling image: {str(e)}")
|
| 73 |
+
raise Exception(f"Image upscaling error: {str(e)}")
|
| 74 |
+
|
| 75 |
+
async def process_product_image(
|
| 76 |
+
file: UploadFile,
|
| 77 |
+
remove_bg: bool = True,
|
| 78 |
+
upscale: bool = True,
|
| 79 |
+
scale_factor: int = 2,
|
| 80 |
+
process_order: str = "remove_first"
|
| 81 |
+
) -> Tuple[bytes, str]:
|
| 82 |
+
"""Process a product image with background removal and upscaling"""
|
| 83 |
+
# Read the file content
|
| 84 |
+
content = await file.read()
|
| 85 |
+
file.file.seek(0) # Reset file pointer for potential reuse
|
| 86 |
+
|
| 87 |
+
# Create a descriptive filename with timestamp for uniqueness
|
| 88 |
+
timestamp = int(time.time())
|
| 89 |
+
original_filename = file.filename.split('.')
|
| 90 |
+
base_name = original_filename[0] if len(original_filename) > 0 else 'product'
|
| 91 |
+
extension = 'png' # Always use PNG to preserve transparency
|
| 92 |
+
|
| 93 |
+
# Process the image based on the parameters and order
|
| 94 |
+
processed_content = content
|
| 95 |
+
|
| 96 |
+
if process_order == "remove_first" and remove_bg and upscale:
|
| 97 |
+
processed_content = remove_background(processed_content)
|
| 98 |
+
processed_content = upscale_image(processed_content, scale_factor)
|
| 99 |
+
elif process_order == "upscale_first" and remove_bg and upscale:
|
| 100 |
+
processed_content = upscale_image(processed_content, scale_factor)
|
| 101 |
+
processed_content = remove_background(processed_content)
|
| 102 |
+
elif remove_bg:
|
| 103 |
+
processed_content = remove_background(processed_content)
|
| 104 |
+
elif upscale:
|
| 105 |
+
processed_content = upscale_image(processed_content, scale_factor)
|
| 106 |
+
|
| 107 |
+
# Create descriptive filename with processing info
|
| 108 |
+
processed_filename = f"{base_name}_{'nobg' if remove_bg else ''}_{'upx' + str(scale_factor) if upscale else ''}_{timestamp}.{extension}"
|
| 109 |
+
|
| 110 |
+
return processed_content, processed_filename
|