chelleboyer commited on
Commit
05117fd
·
1 Parent(s): 1534ad0

feat: improve file download reliability for Hugging Face Spaces - Add retry logic with 3 attempts for downloads - Add timeouts to prevent hanging - Add progress tracking for downloads - Improve error handling and logging - Make JSON file download more resilient

Browse files
Files changed (1) hide show
  1. app.py +132 -83
app.py CHANGED
@@ -13,6 +13,7 @@ from huggingface_hub import hf_hub_download
13
  import easyocr
14
  import difflib
15
  import requests
 
16
 
17
  # Set up logging
18
  logging.basicConfig(level=logging.INFO)
@@ -27,18 +28,37 @@ REFERENCE_DIR = BASE_DIR / 'data' / 'planogram001'
27
  def download_file_from_github(url, local_path):
28
  """Download a file from GitHub and save it locally."""
29
  try:
 
 
 
 
 
30
  # Create directory if it doesn't exist
31
  os.makedirs(os.path.dirname(local_path), exist_ok=True)
32
 
33
- response = requests.get(url)
34
- response.raise_for_status()
 
35
 
36
- with open(local_path, 'wb') as f:
37
- f.write(response.content)
38
- logger.info(f"Successfully downloaded {url} to {local_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  except Exception as e:
40
  logger.error(f"Error downloading {url}: {str(e)}")
41
- raise
42
 
43
  def download_all_required_files():
44
  """Download all required files from GitHub repository."""
@@ -47,7 +67,17 @@ def download_all_required_files():
47
  # First download the JSON file to get the reference image names
48
  json_url = f"{base_url}/data/product_positions_adjusted_v10.json"
49
  json_path = "data/product_positions_adjusted_v10.json"
50
- download_file_from_github(json_url, json_path)
 
 
 
 
 
 
 
 
 
 
51
 
52
  # Read the JSON file to get reference image names
53
  global planogram_data
@@ -77,14 +107,26 @@ def download_all_required_files():
77
  # Create necessary directories
78
  os.makedirs("data/planogram001", exist_ok=True)
79
 
 
 
 
 
 
 
 
80
  for filename, local_path in files_to_download.items():
81
  url = f"{base_url}/data/{filename}"
82
- try:
83
- download_file_from_github(url, local_path)
84
- logger.info(f"Successfully downloaded {filename} to {local_path}")
85
- except Exception as e:
86
- logger.error(f"Failed to download {filename}: {str(e)}")
87
- raise
 
 
 
 
 
88
 
89
  # Initialize CLIP model and processor with a smaller model
90
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -167,9 +209,6 @@ except Exception as e:
167
  def load_reference_images():
168
  """Load all reference product images and pre-compute their features."""
169
  try:
170
- # First ensure all required files are downloaded
171
- download_all_required_files()
172
-
173
  # Now load the reference images
174
  for img_path in REFERENCE_DIR.glob('*.png'):
175
  if img_path.name == 'empty-space.png' or img_path.name == 'planogram.png':
@@ -292,18 +331,22 @@ def compare_images(ref_img, uploaded_img):
292
  @cl.on_chat_start
293
  async def start():
294
  """Initialize the chat session."""
295
- # Load reference images
296
- load_reference_images()
297
-
298
- ref_img = load_reference_image()
299
- if ref_img is None:
300
- await cl.Message(
301
- content="Error: Reference planogram image not found. Please ensure planogram.png exists in the project directory."
302
- ).send()
303
- return
304
-
305
- # Create a welcome message with instructions
306
- welcome_msg = """
 
 
 
 
307
  # Welcome to Planolyzer! 🛍️
308
 
309
  ## Quick Start:
@@ -313,61 +356,67 @@ async def start():
313
  - Adding empty spaces to the reference planogram
314
  - Moving items to wrong positions
315
  """
316
-
317
- # Send welcome message
318
- await cl.Message(content=welcome_msg).send()
319
-
320
- # Send reference planogram
321
- await cl.Message(
322
- content="## Reference Planogram:",
323
- elements=[
324
- cl.Image(
325
- name="planogram",
326
- path=str(REFERENCE_DIR / 'planogram.png'),
327
- display="inline",
328
- size="medium"
329
- ),
330
- cl.File(
331
- name="planogram.png",
332
- path=str(REFERENCE_DIR / 'planogram.png'),
333
- display="inline"
334
- ),
335
- cl.Image(
336
- name="empty_space",
337
- path=str(REFERENCE_DIR / 'empty-space.png'),
338
- display="inline",
339
- size="small"
340
- ),
341
- cl.File(
342
- name="empty-space.png",
343
- path=str(REFERENCE_DIR / 'empty-space.png'),
344
- display="inline"
345
- )
346
- ]
347
- ).send()
348
-
349
- # Send test image
350
- await cl.Message(
351
- content="## Test Image:",
352
- elements=[
353
- cl.Image(
354
- name="test_shelf",
355
- path=str(BASE_DIR / 'data' / 'test_shelf_image_cig_003.png'),
356
- display="inline",
357
- size="medium"
358
- ),
359
- cl.File(
360
- name="test_shelf_image_cig_003.png",
361
- path=str(BASE_DIR / 'data' / 'test_shelf_image_cig_003.png'),
362
- display="inline"
363
- )
364
- ]
365
- ).send()
366
-
367
- # Send additional instructions
368
- await cl.Message(
369
- content="Try downloading and uploading the test image first to see how the system works!"
370
- ).send()
 
 
 
 
 
 
371
 
372
  @cl.on_message
373
  async def main(message: cl.Message):
 
13
  import easyocr
14
  import difflib
15
  import requests
16
+ import time
17
 
18
  # Set up logging
19
  logging.basicConfig(level=logging.INFO)
 
28
  def download_file_from_github(url, local_path):
29
  """Download a file from GitHub and save it locally."""
30
  try:
31
+ # Skip if file already exists
32
+ if os.path.exists(local_path):
33
+ logger.info(f"File already exists at {local_path}, skipping download")
34
+ return True
35
+
36
  # Create directory if it doesn't exist
37
  os.makedirs(os.path.dirname(local_path), exist_ok=True)
38
 
39
+ # Add retry logic for Hugging Face Spaces
40
+ max_retries = 3
41
+ retry_delay = 2 # seconds
42
 
43
+ for attempt in range(max_retries):
44
+ try:
45
+ response = requests.get(url, timeout=30) # Add timeout
46
+ response.raise_for_status()
47
+
48
+ with open(local_path, 'wb') as f:
49
+ f.write(response.content)
50
+ logger.info(f"Successfully downloaded {url} to {local_path}")
51
+ return True
52
+ except requests.exceptions.RequestException as e:
53
+ if attempt < max_retries - 1:
54
+ logger.warning(f"Attempt {attempt + 1} failed: {str(e)}. Retrying in {retry_delay} seconds...")
55
+ time.sleep(retry_delay)
56
+ else:
57
+ raise
58
+
59
  except Exception as e:
60
  logger.error(f"Error downloading {url}: {str(e)}")
61
+ return False
62
 
63
  def download_all_required_files():
64
  """Download all required files from GitHub repository."""
 
67
  # First download the JSON file to get the reference image names
68
  json_url = f"{base_url}/data/product_positions_adjusted_v10.json"
69
  json_path = "data/product_positions_adjusted_v10.json"
70
+
71
+ # Try downloading JSON file with retries
72
+ json_downloaded = False
73
+ for attempt in range(3):
74
+ if download_file_from_github(json_url, json_path):
75
+ json_downloaded = True
76
+ break
77
+ time.sleep(2) # Wait before retry
78
+
79
+ if not json_downloaded:
80
+ raise ValueError("Failed to download required JSON file after multiple attempts")
81
 
82
  # Read the JSON file to get reference image names
83
  global planogram_data
 
107
  # Create necessary directories
108
  os.makedirs("data/planogram001", exist_ok=True)
109
 
110
+ # Track failed downloads
111
+ failed_downloads = []
112
+
113
+ # Download files with progress tracking
114
+ total_files = len(files_to_download)
115
+ downloaded_files = 0
116
+
117
  for filename, local_path in files_to_download.items():
118
  url = f"{base_url}/data/{filename}"
119
+ if download_file_from_github(url, local_path):
120
+ downloaded_files += 1
121
+ logger.info(f"Progress: {downloaded_files}/{total_files} files downloaded")
122
+ else:
123
+ failed_downloads.append(filename)
124
+
125
+ if failed_downloads:
126
+ logger.error(f"Failed to download the following files: {', '.join(failed_downloads)}")
127
+ raise ValueError(f"Failed to download {len(failed_downloads)} required files")
128
+
129
+ logger.info("Successfully downloaded all required files")
130
 
131
  # Initialize CLIP model and processor with a smaller model
132
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
209
  def load_reference_images():
210
  """Load all reference product images and pre-compute their features."""
211
  try:
 
 
 
212
  # Now load the reference images
213
  for img_path in REFERENCE_DIR.glob('*.png'):
214
  if img_path.name == 'empty-space.png' or img_path.name == 'planogram.png':
 
331
  @cl.on_chat_start
332
  async def start():
333
  """Initialize the chat session."""
334
+ try:
335
+ # First ensure all required files are downloaded
336
+ download_all_required_files()
337
+
338
+ # Then load the reference images
339
+ load_reference_images()
340
+
341
+ ref_img = load_reference_image()
342
+ if ref_img is None:
343
+ await cl.Message(
344
+ content="Error: Reference planogram image not found. Please ensure planogram.png exists in the project directory."
345
+ ).send()
346
+ return
347
+
348
+ # Create a welcome message with instructions
349
+ welcome_msg = """
350
  # Welcome to Planolyzer! 🛍️
351
 
352
  ## Quick Start:
 
356
  - Adding empty spaces to the reference planogram
357
  - Moving items to wrong positions
358
  """
359
+
360
+ # Send welcome message
361
+ await cl.Message(content=welcome_msg).send()
362
+
363
+ # Send reference planogram
364
+ await cl.Message(
365
+ content="## Reference Planogram:",
366
+ elements=[
367
+ cl.Image(
368
+ name="planogram",
369
+ path=str(REFERENCE_DIR / 'planogram.png'),
370
+ display="inline",
371
+ size="medium"
372
+ ),
373
+ cl.File(
374
+ name="planogram.png",
375
+ path=str(REFERENCE_DIR / 'planogram.png'),
376
+ display="inline"
377
+ ),
378
+ cl.Image(
379
+ name="empty_space",
380
+ path=str(REFERENCE_DIR / 'empty-space.png'),
381
+ display="inline",
382
+ size="small"
383
+ ),
384
+ cl.File(
385
+ name="empty-space.png",
386
+ path=str(REFERENCE_DIR / 'empty-space.png'),
387
+ display="inline"
388
+ )
389
+ ]
390
+ ).send()
391
+
392
+ # Send test image
393
+ await cl.Message(
394
+ content="## Test Image:",
395
+ elements=[
396
+ cl.Image(
397
+ name="test_shelf",
398
+ path=str(BASE_DIR / 'data' / 'test_shelf_image_cig_003.png'),
399
+ display="inline",
400
+ size="medium"
401
+ ),
402
+ cl.File(
403
+ name="test_shelf_image_cig_003.png",
404
+ path=str(BASE_DIR / 'data' / 'test_shelf_image_cig_003.png'),
405
+ display="inline"
406
+ )
407
+ ]
408
+ ).send()
409
+
410
+ # Send additional instructions
411
+ await cl.Message(
412
+ content="Try downloading and uploading the test image first to see how the system works!"
413
+ ).send()
414
+
415
+ except Exception as e:
416
+ logger.error(f"Error initializing chat session: {str(e)}")
417
+ await cl.Message(
418
+ content=f"Error initializing chat session: {str(e)}"
419
+ ).send()
420
 
421
  @cl.on_message
422
  async def main(message: cl.Message):