Fred808 commited on
Commit
8580dc7
·
verified ·
1 Parent(s): 50add06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -40
app.py CHANGED
@@ -62,26 +62,26 @@ CAPTION_SERVERS = [
62
  "https://fred1012-fred1012-ptlc5u.hf.space/analyze",
63
  "https://fred1012-fred1012-u7lh57.hf.space/analyze",
64
  "https://fred1012-fred1012-q8djv1.hf.space/analyze",
65
- "https://fredalone-fredalone-ozugrp.hf.space/analyze",
66
- "https://fredalone-fredalone-9brxj2.hf.space/analyze",
67
- "https://fredalone-fredalone-p8vq9a.hf.space/analyze",
68
- "https://fredalone-fredalone-vbli2y.hf.space/analyze",
69
- "https://fredalone-fredalone-uggger.hf.space/analyze",
70
- "https://fredalone-fredalone-nmi7e8.hf.space/analyze",
71
- "https://fredalone-fredalone-d1f26d.hf.space/analyze",
72
- "https://fredalone-fredalone-461jp2.hf.space/analyze",
73
- "https://fredalone-fredalone-3enfg4.hf.space/analyze",
74
- "https://fredalone-fredalone-dqdbpv.hf.space/analyze",
75
- "https://fredalone-fredalone-ivtjua.hf.space/analyze",
76
- "https://fredalone-fredalone-6bezt2.hf.space/analyze",
77
- "https://fredalone-fredalone-e0wfnk.hf.space/analyze",
78
- "https://fredalone-fredalone-zu2t7j.hf.space/analyze",
79
- "https://fredalone-fredalone-dqtv1o.hf.space/analyze",
80
- "https://fredalone-fredalone-wclyog.hf.space/analyze",
81
- "https://fredalone-fredalone-t27vig.hf.space/analyze",
82
- "https://fredalone-fredalone-gahbxh.hf.space/analyze",
83
- "https://fredalone-fredalone-kw2po4.hf.space/analyze",
84
- "https://fredalone-fredalone-8h285h.hf.space/analyze"
85
  ]
86
  MODEL_TYPE = "Florence-2-large" # Explicitly request large model
87
  DATA_COLLECTION_SERVER = "https://fred808-flow.hf.space"
@@ -350,7 +350,7 @@ async def get_model_info():
350
  return model_info
351
 
352
  async def process_image(server: CaptionServer, course: str, image: Dict) -> Dict:
353
- """Process single image through one caption server"""
354
  if server.busy:
355
  return None
356
 
@@ -377,14 +377,21 @@ async def process_image(server: CaptionServer, course: str, image: Dict) -> Dict
377
  }
378
  print(f"Server {server.url} processed {image['filename']} in {processing_time:.2f}s ({server.fps:.2f} fps)")
379
  return metadata
 
 
 
 
 
380
 
 
 
 
381
  except Exception as e:
382
  print(f"Error processing {image['filename']} on {server.url}: {e}")
 
383
 
384
  finally:
385
  server.busy = False
386
-
387
- return None
388
 
389
  async def submit_to_dataset(course: str, metadata_list: List[Dict]):
390
  """Submit course results to dataset collection server"""
@@ -417,7 +424,7 @@ async def submit_to_dataset(course: str, metadata_list: List[Dict]):
417
  return None
418
 
419
  async def process_course(course: str, servers: List[CaptionServer]):
420
- """Process all images in a course using available servers"""
421
  if course not in processed_images:
422
  processed_images[course] = set()
423
  if course not in course_captions:
@@ -433,50 +440,87 @@ async def process_course(course: str, servers: List[CaptionServer]):
433
  return
434
 
435
  print(f"\nProcessing {len(images)} images for course {course}")
436
- remaining_images = [img for img in images if img['filename'] not in processed_images[course]]
437
 
438
- if not remaining_images:
 
 
 
 
 
 
 
439
  print(f"All images already processed for course {course}")
440
  return
441
 
442
- while remaining_images and is_processing:
 
 
 
443
  # Create tasks for each available server
444
  tasks = []
 
 
445
  for server in servers:
446
- if not server.busy and remaining_images:
447
- img = remaining_images[0]
448
- remaining_images = remaining_images[1:]
 
 
 
449
  tasks.append(process_image(server, course, img))
 
 
 
450
 
451
  if not tasks:
 
452
  await asyncio.sleep(0.1)
453
  continue
454
 
455
  # Process images in parallel across servers
456
  results = await asyncio.gather(*tasks)
457
 
458
- # Handle results
459
  has_new_results = False
460
- for result in results:
461
  if result:
462
- processed_images[course].add(result['image'])
 
463
  course_captions[course].append(result)
464
  has_new_results = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
 
466
  # Save progress after each batch with new results
467
  if has_new_results:
468
  save_captions_to_file(course, course_captions[course])
469
 
470
  # Show progress
471
- total = len(images)
472
  done = len(processed_images[course])
473
- progress_percent = (done / total * 100) if total > 0 else 0
474
- print(f"\rProgress: {done}/{total} images ({progress_percent:.1f}%)", end="")
 
 
475
 
476
- if not remaining_images and len(processed_images[course]) == len(images):
477
- print(f"\nCourse {course} complete, submitting to dataset...")
478
- await submit_to_dataset(course, course_captions[course])
479
- # Don't clear the data, keep it for API queries
 
 
 
480
  break
481
 
482
  # Small delay to prevent overwhelming the servers
 
62
  "https://fred1012-fred1012-ptlc5u.hf.space/analyze",
63
  "https://fred1012-fred1012-u7lh57.hf.space/analyze",
64
  "https://fred1012-fred1012-q8djv1.hf.space/analyze",
65
+ "https://fredalone-fredalone-ozugrp.hf.space/analyze",
66
+ "https://fredalone-fredalone-9brxj2.hf.space/analyze",
67
+ "https://fredalone-fredalone-p8vq9a.hf.space/analyze",
68
+ "https://fredalone-fredalone-vbli2y.hf.space/analyze",
69
+ "https://fredalone-fredalone-uggger.hf.space/analyze",
70
+ "https://fredalone-fredalone-nmi7e8.hf.space/analyze",
71
+ "https://fredalone-fredalone-d1f26d.hf.space/analyze",
72
+ "https://fredalone-fredalone-461jp2.hf.space/analyze",
73
+ "https://fredalone-fredalone-3enfg4.hf.space/analyze",
74
+ "https://fredalone-fredalone-dqdbpv.hf.space/analyze",
75
+ "https://fredalone-fredalone-ivtjua.hf.space/analyze",
76
+ "https://fredalone-fredalone-6bezt2.hf.space/analyze",
77
+ "https://fredalone-fredalone-e0wfnk.hf.space/analyze",
78
+ "https://fredalone-fredalone-zu2t7j.hf.space/analyze",
79
+ "https://fredalone-fredalone-dqtv1o.hf.space/analyze",
80
+ "https://fredalone-fredalone-wclyog.hf.space/analyze",
81
+ "https://fredalone-fredalone-t27vig.hf.space/analyze",
82
+ "https://fredalone-fredalone-gahbxh.hf.space/analyze",
83
+ "https://fredalone-fredalone-kw2po4.hf.space/analyze",
84
+ "https://fredalone-fredalone-8h285h.hf.space/analyze"
85
  ]
86
  MODEL_TYPE = "Florence-2-large" # Explicitly request large model
87
  DATA_COLLECTION_SERVER = "https://fred808-flow.hf.space"
 
350
  return model_info
351
 
352
  async def process_image(server: CaptionServer, course: str, image: Dict) -> Dict:
353
+ """Process single image through one caption server with better error handling"""
354
  if server.busy:
355
  return None
356
 
 
377
  }
378
  print(f"Server {server.url} processed {image['filename']} in {processing_time:.2f}s ({server.fps:.2f} fps)")
379
  return metadata
380
+ else:
381
+ # Server responded but no caption (might be error or empty response)
382
+ error_msg = result.get('error', 'Unknown error') if result else 'No response'
383
+ print(f"Server {server.url} failed for {image['filename']}: {error_msg}")
384
+ return None
385
 
386
+ except asyncio.TimeoutError:
387
+ print(f"Server {server.url} timeout for {image['filename']}")
388
+ return None
389
  except Exception as e:
390
  print(f"Error processing {image['filename']} on {server.url}: {e}")
391
+ return None
392
 
393
  finally:
394
  server.busy = False
 
 
395
 
396
  async def submit_to_dataset(course: str, metadata_list: List[Dict]):
397
  """Submit course results to dataset collection server"""
 
424
  return None
425
 
426
  async def process_course(course: str, servers: List[CaptionServer]):
427
+ """Process all images in a course using available servers with retry logic"""
428
  if course not in processed_images:
429
  processed_images[course] = set()
430
  if course not in course_captions:
 
440
  return
441
 
442
  print(f"\nProcessing {len(images)} images for course {course}")
 
443
 
444
+ # Track images that need processing with retry count (5 retries)
445
+ pending_images = {
446
+ img['filename']: {'image': img, 'retries': 0, 'max_retries': 5}
447
+ for img in images
448
+ if img['filename'] not in processed_images[course]
449
+ }
450
+
451
+ if not pending_images:
452
  print(f"All images already processed for course {course}")
453
  return
454
 
455
+ failed_images = set()
456
+ total_images = len(images)
457
+
458
+ while pending_images and is_processing:
459
  # Create tasks for each available server
460
  tasks = []
461
+ assigned_images = []
462
+
463
  for server in servers:
464
+ if not server.busy and pending_images:
465
+ # Get the next pending image
466
+ filename, img_data = next(iter(pending_images.items()))
467
+ img = img_data['image']
468
+
469
+ # Assign this image to the server
470
  tasks.append(process_image(server, course, img))
471
+ assigned_images.append((filename, img))
472
+ # Remove from pending temporarily while it's being processed
473
+ del pending_images[filename]
474
 
475
  if not tasks:
476
+ # If no servers available, wait a bit
477
  await asyncio.sleep(0.1)
478
  continue
479
 
480
  # Process images in parallel across servers
481
  results = await asyncio.gather(*tasks)
482
 
483
+ # Handle results and retry logic
484
  has_new_results = False
485
+ for (filename, img), result in zip(assigned_images, results):
486
  if result:
487
+ # Success - image was processed
488
+ processed_images[course].add(filename)
489
  course_captions[course].append(result)
490
  has_new_results = True
491
+ print(f"✓ Successfully processed {filename}")
492
+ else:
493
+ # Failure - check if we should retry
494
+ current_retries = assigned_images[0][0] if assigned_images else 0
495
+ img_data = {'image': img, 'retries': current_retries + 1, 'max_retries': 5}
496
+
497
+ if img_data['retries'] <= img_data['max_retries']:
498
+ # Put back in pending for retry
499
+ pending_images[filename] = img_data
500
+ print(f"↻ Retry {img_data['retries']}/{img_data['max_retries']} for {filename}")
501
+ else:
502
+ # Max retries exceeded, mark as failed
503
+ failed_images.add(filename)
504
+ print(f"✗ Failed to process {filename} after {img_data['max_retries']} retries")
505
 
506
  # Save progress after each batch with new results
507
  if has_new_results:
508
  save_captions_to_file(course, course_captions[course])
509
 
510
  # Show progress
 
511
  done = len(processed_images[course])
512
+ pending_count = len(pending_images)
513
+ failed_count = len(failed_images)
514
+ progress_percent = (done / total_images * 100) if total_images > 0 else 0
515
+ print(f"\rProgress: {done}/{total_images} images ({progress_percent:.1f}%) - {pending_count} pending, {failed_count} failed", end="")
516
 
517
+ # Check if we're done (all images either processed or permanently failed)
518
+ if not pending_images:
519
+ if failed_images:
520
+ print(f"\nCourse {course} completed with {len(failed_images)} failed images: {list(failed_images)}")
521
+ else:
522
+ print(f"\nCourse {course} complete, submitting to dataset...")
523
+ await submit_to_dataset(course, course_captions[course])
524
  break
525
 
526
  # Small delay to prevent overwhelming the servers