wisdom anthony commited on
Commit
78d6b0b
·
1 Parent(s): 1f9b0e6

Files deleted

Browse files
api/product_routes.py CHANGED
@@ -1,7 +1,6 @@
1
  from fastapi import APIRouter, File, UploadFile, HTTPException, Form
2
  from utils.image_processing import read_image_file, process_product_image
3
- # from product_detector.detector import ObjectDetector # Temporarily disabled - model corrupted
4
- from product_detector.mock_detector import MockObjectDetector as ObjectDetector
5
  from config.settings import MODEL_ONNX_PATH, CLASS_NAMES, INPUT_SIZE
6
  from utils.image_processing import process_and_store_product_image
7
 
 
1
  from fastapi import APIRouter, File, UploadFile, HTTPException, Form
2
  from utils.image_processing import read_image_file, process_product_image
3
+ from product_detector.detector import ObjectDetector
 
4
  from config.settings import MODEL_ONNX_PATH, CLASS_NAMES, INPUT_SIZE
5
  from utils.image_processing import process_and_store_product_image
6
 
db/similarity_repository.py CHANGED
@@ -234,39 +234,6 @@ class SimilarityRepository:
234
  return self._get_sample_promo_products()
235
 
236
 
237
- def update_product_image(self, product_id: str, image_url: str) -> bool:
238
- """
239
- Update product image in database
240
-
241
- Args:
242
- product_id: Product ID to update
243
- image_url: New image URL
244
-
245
- Returns:
246
- True if successful, False otherwise
247
- """
248
- if not self.supabase:
249
- logger.error("❌ No Supabase connection")
250
- return False
251
-
252
- try:
253
- logger.info(f"📊 Updating product {product_id} with image URL")
254
-
255
- result = self.supabase.table('products').update({
256
- 'product_image': image_url
257
- }).eq('product_id', product_id).execute()
258
-
259
- if result.data:
260
- logger.info(f"✅ Updated product {product_id} with image")
261
- return True
262
- else:
263
- logger.error(f"❌ Failed to update product {product_id}")
264
- return False
265
-
266
- except Exception as e:
267
- logger.error(f"❌ Database update error for product {product_id}: {e}")
268
- return False
269
-
270
  def get_products_without_images(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
271
  """
272
  Get products that don't have images
 
234
  return self._get_sample_promo_products()
235
 
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  def get_products_without_images(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
238
  """
239
  Get products that don't have images
product_detector/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Product detector package
product_detector/mock_detector.py DELETED
@@ -1,33 +0,0 @@
1
- import numpy as np
2
- from typing import List, Dict
3
- import warnings
4
-
5
- class MockObjectDetector:
6
- """
7
- Mock Object Detector to temporarily replace the broken ONNX model
8
- Returns dummy detection results to keep the server running
9
- """
10
-
11
- def __init__(self, model_path: str, class_names: List[str], input_size: int = 640):
12
- self.class_names = class_names
13
- self.input_size = input_size
14
- print(f"🔧 Mock detector initialized - model file was corrupted")
15
- print(f"📝 Available classes: {class_names}")
16
-
17
- def predict(self, image: np.ndarray) -> List[Dict]:
18
- """
19
- Mock prediction method - returns sample detections
20
- Replace this with real detector once model is fixed
21
- """
22
- # Return mock detection results
23
- mock_detections = [
24
- {
25
- "class": "product" if len(self.class_names) > 0 else "unknown",
26
- "confidence": 0.85,
27
- "bbox": [100, 100, 300, 250], # x1, y1, x2, y2
28
- "bbox_normalized": [0.3, 0.3, 0.4, 0.5] # center_x, center_y, width, height (normalized)
29
- }
30
- ]
31
-
32
- print(f"🔍 Mock detection completed - found {len(mock_detections)} objects")
33
- return mock_detections
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
similarity_engine/enhanced_image_processor.py DELETED
@@ -1,531 +0,0 @@
1
- """
2
- Enhanced Image Processor - Multiple Sources & Flexible Processing
3
- Supports promo products, manual uploads, URL sources, Google Images, and more
4
- """
5
-
6
- import os
7
- import logging
8
- import requests
9
- import time
10
- from typing import List, Dict, Any, Optional, Tuple
11
- import sys
12
- import os
13
-
14
- # Add parent directory to path
15
- sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
16
-
17
- from similarity_core import calculate_similarity, calculate_confidence
18
- from db.similarity_repository import get_similarity_repository
19
-
20
- # Configure logging
21
- logging.basicConfig(level=logging.INFO)
22
- logger = logging.getLogger(__name__)
23
-
24
- class EnhancedImageProcessor:
25
- """Enhanced image processor with multiple sources and flexible options"""
26
-
27
- def __init__(self):
28
- """Initialize the image processor"""
29
- self.repository = get_similarity_repository()
30
- self.processing_stats = {
31
- 'total_processed': 0,
32
- 'successful': 0,
33
- 'failed': 0,
34
- 'skipped': 0
35
- }
36
-
37
- def find_high_similarity_matches(
38
- self,
39
- source_products: List[Dict],
40
- target_products: List[Dict],
41
- threshold: float = 0.95,
42
- source_type: str = "promo"
43
- ) -> List[Dict[str, Any]]:
44
- """
45
- Find high similarity matches between source and target products
46
-
47
- Args:
48
- source_products: Products with images (promo, manual, etc.)
49
- target_products: Database products to match against
50
- threshold: Similarity threshold
51
- source_type: Type of source ("promo", "manual", "google", etc.)
52
-
53
- Returns:
54
- List of high similarity matches
55
- """
56
- logger.info(f"🔍 Finding high similarity matches for {source_type} images")
57
- logger.info(f"📊 Source products: {len(source_products)}")
58
- logger.info(f"📊 Target products: {len(target_products)}")
59
- logger.info(f"🎯 Similarity threshold: {threshold}")
60
-
61
- matches = []
62
-
63
- for i, source_product in enumerate(source_products):
64
- source_name = source_product.get('name', '').strip()
65
- if not source_name:
66
- continue
67
-
68
- logger.info(f"📊 Analyzing {source_type} product {i+1}/{len(source_products)}: {source_name[:50]}...")
69
-
70
- for target_product in target_products:
71
- target_name = target_product.get('product_name', '').strip()
72
- if not target_name:
73
- continue
74
-
75
- similarity = calculate_similarity(source_name, target_name)
76
-
77
- if similarity >= threshold:
78
- confidence = calculate_confidence(similarity, source_name, target_name)
79
-
80
- match = {
81
- 'source_id': source_product.get('id'),
82
- 'source_name': source_name,
83
- 'source_type': source_type,
84
- 'target_product_id': target_product.get('product_id'),
85
- 'target_product_name': target_name,
86
- 'similarity': round(similarity, 3),
87
- 'confidence': round(confidence, 3),
88
- 'has_current_image': bool(target_product.get('product_image')),
89
- 'source_image_info': self._extract_image_info(source_product, source_type)
90
- }
91
-
92
- matches.append(match)
93
- logger.info(f" 🔍 HIGH MATCH: {source_name} ↔ {target_name} ({similarity:.3f})")
94
- break
95
-
96
- logger.info(f"✅ Found {len(matches)} high similarity matches")
97
- return matches
98
-
99
- def _extract_image_info(self, product: Dict, source_type: str) -> Dict[str, Any]:
100
- """Extract image information based on source type"""
101
- if source_type == "promo":
102
- picture_id = product.get('picture_id')
103
- return {
104
- 'picture_id': picture_id,
105
- 'image_url': f"https://backend.360promo.hr/contents/products/{picture_id}.jpg" if picture_id else None,
106
- 'store': product.get('store'),
107
- 'promo_price': product.get('promo_price'),
108
- 'regular_price': product.get('regular_price')
109
- }
110
- elif source_type == "manual":
111
- return {
112
- 'image_url': product.get('image_url'),
113
- 'original_filename': product.get('filename'),
114
- 'uploaded_by': product.get('uploaded_by')
115
- }
116
- elif source_type == "google":
117
- return {
118
- 'image_url': product.get('image_url'),
119
- 'source_page': product.get('source_page'),
120
- 'search_query': product.get('search_query')
121
- }
122
- elif source_type == "url":
123
- return {
124
- 'image_url': product.get('image_url'),
125
- 'source_domain': product.get('source_domain')
126
- }
127
- else:
128
- return {
129
- 'image_url': product.get('image_url', product.get('picture_url'))
130
- }
131
-
132
- def check_image_availability(self, image_url: str) -> bool:
133
- """Check if image URL is accessible"""
134
- try:
135
- response = requests.head(image_url, timeout=10)
136
- return response.status_code == 200
137
- except Exception as e:
138
- logger.warning(f"⚠️ Image not accessible: {image_url} - {e}")
139
- return False
140
-
141
- def process_image_from_url(
142
- self,
143
- image_url: str,
144
- product_id: str,
145
- processing_options: Dict[str, Any] = None
146
- ) -> Optional[str]:
147
- """
148
- Download and process image from URL
149
-
150
- Args:
151
- image_url: Source image URL
152
- product_id: Target product ID
153
- processing_options: Processing configuration
154
-
155
- Returns:
156
- Processed image URL or None if failed
157
- """
158
- if processing_options is None:
159
- processing_options = {
160
- 'remove_background': True,
161
- 'upscale_factor': 2,
162
- 'target_format': 'webp',
163
- 'quality': 85
164
- }
165
-
166
- try:
167
- logger.info(f"📥 Downloading image from: {image_url}")
168
-
169
- # Download image
170
- response = requests.get(image_url, timeout=30)
171
- if response.status_code != 200:
172
- logger.error(f"❌ Failed to download: HTTP {response.status_code}")
173
- return None
174
-
175
- logger.info("✅ Image downloaded successfully")
176
-
177
- # Try to process via backend endpoint
178
- processed_url = self._process_via_backend(
179
- response.content,
180
- product_id,
181
- processing_options
182
- )
183
-
184
- if processed_url:
185
- return processed_url
186
-
187
- # If processing fails, return original URL
188
- logger.warning("⚠️ Processing failed, using original URL")
189
- return image_url
190
-
191
- except Exception as e:
192
- logger.error(f"❌ Error processing image from URL: {e}")
193
- return None
194
-
195
- def _process_via_backend(
196
- self,
197
- image_content: bytes,
198
- product_id: str,
199
- options: Dict[str, Any]
200
- ) -> Optional[str]:
201
- """Process image via backend endpoint"""
202
- try:
203
- # Get backend endpoint
204
- endpoint = os.getenv('IMAGE_PROCESS_ENDPOINT', 'http://localhost:7860/products/process-product-image')
205
-
206
- files = {'file': ('image.jpg', image_content, 'image/jpeg')}
207
- data = {
208
- 'remove_bg': str(options.get('remove_background', True)).lower(),
209
- 'upscale': str(options.get('upscale_factor', 2) > 1).lower(),
210
- 'scale_factor': str(options.get('upscale_factor', 2)),
211
- 'process_order': 'remove_first',
212
- 'product_id': product_id
213
- }
214
-
215
- response = requests.post(endpoint, files=files, data=data, timeout=60)
216
-
217
- if response.status_code == 200:
218
- result = response.json()
219
- if result.get('status') == 'success':
220
- logger.info("✅ Image processed successfully via backend")
221
- return result.get('image_url')
222
-
223
- logger.warning(f"⚠️ Backend processing failed: {response.status_code}")
224
- return None
225
-
226
- except Exception as e:
227
- logger.warning(f"⚠️ Backend processing unavailable: {e}")
228
- return None
229
-
230
- def process_promo_images(
231
- self,
232
- similarity_threshold: float = 0.95,
233
- skip_existing: bool = True,
234
- max_products: Optional[int] = None
235
- ) -> Dict[str, int]:
236
- """
237
- Process images from promotional products
238
-
239
- Args:
240
- similarity_threshold: Minimum similarity for processing
241
- skip_existing: Skip products that already have images
242
- max_products: Maximum products to process
243
-
244
- Returns:
245
- Processing statistics
246
- """
247
- logger.info("🏷️ Starting promo image processing...")
248
-
249
- # Load promo products with images
250
- promo_products = self.repository.load_promo_products(with_images_only=True)
251
- if not promo_products:
252
- logger.error("❌ No promo products with images found")
253
- return self._get_empty_stats()
254
-
255
- # Load target products
256
- if skip_existing:
257
- target_products = self.repository.get_products_without_images(max_products)
258
- else:
259
- all_products = self.repository.load_all_products()
260
- target_products = all_products[:max_products] if max_products else all_products
261
-
262
- if not target_products:
263
- logger.error("❌ No target products found")
264
- return self._get_empty_stats()
265
-
266
- # Find matches
267
- matches = self.find_high_similarity_matches(
268
- promo_products,
269
- target_products,
270
- similarity_threshold,
271
- "promo"
272
- )
273
-
274
- return self._process_matches(matches, skip_existing)
275
-
276
- def process_manual_upload(
277
- self,
278
- image_file: bytes,
279
- filename: str,
280
- product_id: str,
281
- processing_options: Dict[str, Any] = None
282
- ) -> bool:
283
- """
284
- Process manually uploaded image
285
-
286
- Args:
287
- image_file: Image file content
288
- filename: Original filename
289
- product_id: Target product ID
290
- processing_options: Processing configuration
291
-
292
- Returns:
293
- True if successful
294
- """
295
- logger.info(f"📤 Processing manual upload for product {product_id}")
296
-
297
- try:
298
- # Process image
299
- processed_url = self._process_via_backend(
300
- image_file,
301
- product_id,
302
- processing_options or {}
303
- )
304
-
305
- if not processed_url:
306
- logger.error("❌ Failed to process uploaded image")
307
- return False
308
-
309
- # Update database
310
- success = self.repository.update_product_image(product_id, processed_url)
311
-
312
- if success:
313
- # Save metadata
314
- self.repository.save_image_metadata(product_id, {
315
- 'source_type': 'manual',
316
- 'original_filename': filename,
317
- 'processed_url': processed_url,
318
- 'upload_time': time.time()
319
- })
320
-
321
- logger.info(f"✅ Successfully attached manual upload to product {product_id}")
322
- return True
323
-
324
- return False
325
-
326
- except Exception as e:
327
- logger.error(f"❌ Error processing manual upload: {e}")
328
- return False
329
-
330
- def process_from_url_list(
331
- self,
332
- url_mappings: List[Dict[str, str]],
333
- processing_options: Dict[str, Any] = None
334
- ) -> Dict[str, int]:
335
- """
336
- Process images from a list of URL mappings
337
-
338
- Args:
339
- url_mappings: List of {'product_id': 'xxx', 'image_url': 'xxx'} mappings
340
- processing_options: Processing configuration
341
-
342
- Returns:
343
- Processing statistics
344
- """
345
- logger.info(f"🌐 Processing {len(url_mappings)} URL mappings...")
346
-
347
- stats = self._get_empty_stats()
348
- stats['total_processed'] = len(url_mappings)
349
-
350
- for mapping in url_mappings:
351
- product_id = mapping.get('product_id')
352
- image_url = mapping.get('image_url')
353
-
354
- if not product_id or not image_url:
355
- stats['failed'] += 1
356
- continue
357
-
358
- logger.info(f"📊 Processing URL for product {product_id}")
359
-
360
- # Check availability
361
- if not self.check_image_availability(image_url):
362
- stats['failed'] += 1
363
- continue
364
-
365
- # Process image
366
- processed_url = self.process_image_from_url(
367
- image_url,
368
- product_id,
369
- processing_options
370
- )
371
-
372
- if processed_url:
373
- # Update database
374
- if self.repository.update_product_image(product_id, processed_url):
375
- stats['successful'] += 1
376
-
377
- # Save metadata
378
- self.repository.save_image_metadata(product_id, {
379
- 'source_type': 'url',
380
- 'source_url': image_url,
381
- 'processed_url': processed_url,
382
- 'processing_time': time.time()
383
- })
384
- else:
385
- stats['failed'] += 1
386
- else:
387
- stats['failed'] += 1
388
-
389
- logger.info(f"✅ URL processing complete: {stats['successful']}/{stats['total_processed']} successful")
390
- return stats
391
-
392
- def search_and_attach_google_images(
393
- self,
394
- product_id: str,
395
- search_query: str,
396
- max_results: int = 3,
397
- require_approval: bool = True
398
- ) -> List[Dict[str, Any]]:
399
- """
400
- Search Google Images and find potential matches
401
-
402
- Args:
403
- product_id: Target product ID
404
- search_query: Search query for Google Images
405
- max_results: Maximum results to return
406
- require_approval: Whether manual approval is required
407
-
408
- Returns:
409
- List of potential image matches
410
- """
411
- logger.info(f"🔍 Google Image search for product {product_id}: '{search_query}'")
412
-
413
- # TODO: Implement Google Images API integration
414
- # For now, return mock results
415
- mock_results = [
416
- {
417
- 'image_url': f'https://example.com/mock-image-1.jpg',
418
- 'thumbnail_url': f'https://example.com/mock-thumb-1.jpg',
419
- 'source_page': f'https://example.com/product-page-1',
420
- 'title': f'Mock result for {search_query}',
421
- 'confidence': 0.85
422
- }
423
- ]
424
-
425
- logger.info(f"🔍 Found {len(mock_results)} potential Google Image matches")
426
- logger.warning("⚠️ Google Images integration not yet implemented - returning mock data")
427
-
428
- return mock_results
429
-
430
- def _process_matches(self, matches: List[Dict], skip_existing: bool = True) -> Dict[str, int]:
431
- """Process similarity matches and attach images"""
432
- stats = self._get_empty_stats()
433
- stats['total_processed'] = len(matches)
434
-
435
- if not matches:
436
- return stats
437
-
438
- # Filter existing if needed
439
- if skip_existing:
440
- to_process = [m for m in matches if not m['has_current_image']]
441
- stats['skipped'] = len(matches) - len(to_process)
442
- matches = to_process
443
-
444
- logger.info(f"📊 Processing images for {len(matches)} products...")
445
-
446
- for match in matches:
447
- product_id = match['target_product_id']
448
- image_info = match['source_image_info']
449
- image_url = image_info.get('image_url')
450
-
451
- if not image_url:
452
- stats['failed'] += 1
453
- continue
454
-
455
- logger.info(f"📊 Processing image for product {product_id}")
456
-
457
- # Check availability
458
- if not self.check_image_availability(image_url):
459
- stats['failed'] += 1
460
- continue
461
-
462
- # Process image
463
- processed_url = self.process_image_from_url(image_url, product_id)
464
-
465
- if processed_url and self.repository.update_product_image(product_id, processed_url):
466
- stats['successful'] += 1
467
-
468
- # Save metadata
469
- self.repository.save_image_metadata(product_id, {
470
- 'source_type': match['source_type'],
471
- 'similarity': match['similarity'],
472
- 'confidence': match['confidence'],
473
- 'source_info': image_info,
474
- 'processing_time': time.time()
475
- })
476
-
477
- logger.info(f"✅ Successfully attached image to product {product_id}")
478
- else:
479
- stats['failed'] += 1
480
-
481
- return stats
482
-
483
- def _get_empty_stats(self) -> Dict[str, int]:
484
- """Get empty statistics dictionary"""
485
- return {
486
- 'total_processed': 0,
487
- 'successful': 0,
488
- 'failed': 0,
489
- 'skipped': 0,
490
- 'unavailable': 0
491
- }
492
-
493
- def get_processing_report(self, stats: Dict[str, int]) -> Dict[str, Any]:
494
- """Generate processing report"""
495
- return {
496
- 'summary': {
497
- 'total_processed': stats['total_processed'],
498
- 'successful': stats['successful'],
499
- 'failed': stats['failed'],
500
- 'skipped': stats.get('skipped', 0),
501
- 'success_rate': (stats['successful'] / max(stats['total_processed'], 1)) * 100
502
- },
503
- 'timestamp': time.time(),
504
- 'recommendations': self._generate_recommendations(stats)
505
- }
506
-
507
- def _generate_recommendations(self, stats: Dict[str, int]) -> List[str]:
508
- """Generate recommendations based on processing stats"""
509
- recommendations = []
510
-
511
- if stats['failed'] > stats['successful']:
512
- recommendations.append("High failure rate - check image sources and processing settings")
513
-
514
- if stats.get('skipped', 0) > 0:
515
- recommendations.append(f"{stats['skipped']} products already had images - consider processing all products")
516
-
517
- if stats['successful'] > 0:
518
- recommendations.append(f"Successfully processed {stats['successful']} images - consider similar processing for remaining products")
519
-
520
- return recommendations
521
-
522
-
523
- # Global processor instance
524
- _processor = None
525
-
526
- def get_image_processor() -> EnhancedImageProcessor:
527
- """Get singleton image processor instance"""
528
- global _processor
529
- if _processor is None:
530
- _processor = EnhancedImageProcessor()
531
- return _processor