Update app.py
Browse files
app.py
CHANGED
|
@@ -62,26 +62,26 @@ CAPTION_SERVERS = [
|
|
| 62 |
"https://fred1012-fred1012-ptlc5u.hf.space/analyze",
|
| 63 |
"https://fred1012-fred1012-u7lh57.hf.space/analyze",
|
| 64 |
"https://fred1012-fred1012-q8djv1.hf.space/analyze",
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
]
|
| 86 |
MODEL_TYPE = "Florence-2-large" # Explicitly request large model
|
| 87 |
DATA_COLLECTION_SERVER = "https://fred808-flow.hf.space"
|
|
@@ -350,7 +350,7 @@ async def get_model_info():
|
|
| 350 |
return model_info
|
| 351 |
|
| 352 |
async def process_image(server: CaptionServer, course: str, image: Dict) -> Dict:
|
| 353 |
-
"""Process single image through one caption server"""
|
| 354 |
if server.busy:
|
| 355 |
return None
|
| 356 |
|
|
@@ -377,14 +377,21 @@ async def process_image(server: CaptionServer, course: str, image: Dict) -> Dict
|
|
| 377 |
}
|
| 378 |
print(f"Server {server.url} processed {image['filename']} in {processing_time:.2f}s ({server.fps:.2f} fps)")
|
| 379 |
return metadata
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
|
|
|
|
|
|
|
|
|
|
| 381 |
except Exception as e:
|
| 382 |
print(f"Error processing {image['filename']} on {server.url}: {e}")
|
|
|
|
| 383 |
|
| 384 |
finally:
|
| 385 |
server.busy = False
|
| 386 |
-
|
| 387 |
-
return None
|
| 388 |
|
| 389 |
async def submit_to_dataset(course: str, metadata_list: List[Dict]):
|
| 390 |
"""Submit course results to dataset collection server"""
|
|
@@ -417,7 +424,7 @@ async def submit_to_dataset(course: str, metadata_list: List[Dict]):
|
|
| 417 |
return None
|
| 418 |
|
| 419 |
async def process_course(course: str, servers: List[CaptionServer]):
|
| 420 |
-
"""Process all images in a course using available servers"""
|
| 421 |
if course not in processed_images:
|
| 422 |
processed_images[course] = set()
|
| 423 |
if course not in course_captions:
|
|
@@ -433,50 +440,87 @@ async def process_course(course: str, servers: List[CaptionServer]):
|
|
| 433 |
return
|
| 434 |
|
| 435 |
print(f"\nProcessing {len(images)} images for course {course}")
|
| 436 |
-
remaining_images = [img for img in images if img['filename'] not in processed_images[course]]
|
| 437 |
|
| 438 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
print(f"All images already processed for course {course}")
|
| 440 |
return
|
| 441 |
|
| 442 |
-
|
|
|
|
|
|
|
|
|
|
| 443 |
# Create tasks for each available server
|
| 444 |
tasks = []
|
|
|
|
|
|
|
| 445 |
for server in servers:
|
| 446 |
-
if not server.busy and
|
| 447 |
-
|
| 448 |
-
|
|
|
|
|
|
|
|
|
|
| 449 |
tasks.append(process_image(server, course, img))
|
|
|
|
|
|
|
|
|
|
| 450 |
|
| 451 |
if not tasks:
|
|
|
|
| 452 |
await asyncio.sleep(0.1)
|
| 453 |
continue
|
| 454 |
|
| 455 |
# Process images in parallel across servers
|
| 456 |
results = await asyncio.gather(*tasks)
|
| 457 |
|
| 458 |
-
# Handle results
|
| 459 |
has_new_results = False
|
| 460 |
-
for result in results:
|
| 461 |
if result:
|
| 462 |
-
|
|
|
|
| 463 |
course_captions[course].append(result)
|
| 464 |
has_new_results = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
|
| 466 |
# Save progress after each batch with new results
|
| 467 |
if has_new_results:
|
| 468 |
save_captions_to_file(course, course_captions[course])
|
| 469 |
|
| 470 |
# Show progress
|
| 471 |
-
total = len(images)
|
| 472 |
done = len(processed_images[course])
|
| 473 |
-
|
| 474 |
-
|
|
|
|
|
|
|
| 475 |
|
| 476 |
-
if
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
|
|
|
|
|
|
|
|
|
| 480 |
break
|
| 481 |
|
| 482 |
# Small delay to prevent overwhelming the servers
|
|
|
|
| 62 |
"https://fred1012-fred1012-ptlc5u.hf.space/analyze",
|
| 63 |
"https://fred1012-fred1012-u7lh57.hf.space/analyze",
|
| 64 |
"https://fred1012-fred1012-q8djv1.hf.space/analyze",
|
| 65 |
+
"https://fredalone-fredalone-ozugrp.hf.space/analyze",
|
| 66 |
+
"https://fredalone-fredalone-9brxj2.hf.space/analyze",
|
| 67 |
+
"https://fredalone-fredalone-p8vq9a.hf.space/analyze",
|
| 68 |
+
"https://fredalone-fredalone-vbli2y.hf.space/analyze",
|
| 69 |
+
"https://fredalone-fredalone-uggger.hf.space/analyze",
|
| 70 |
+
"https://fredalone-fredalone-nmi7e8.hf.space/analyze",
|
| 71 |
+
"https://fredalone-fredalone-d1f26d.hf.space/analyze",
|
| 72 |
+
"https://fredalone-fredalone-461jp2.hf.space/analyze",
|
| 73 |
+
"https://fredalone-fredalone-3enfg4.hf.space/analyze",
|
| 74 |
+
"https://fredalone-fredalone-dqdbpv.hf.space/analyze",
|
| 75 |
+
"https://fredalone-fredalone-ivtjua.hf.space/analyze",
|
| 76 |
+
"https://fredalone-fredalone-6bezt2.hf.space/analyze",
|
| 77 |
+
"https://fredalone-fredalone-e0wfnk.hf.space/analyze",
|
| 78 |
+
"https://fredalone-fredalone-zu2t7j.hf.space/analyze",
|
| 79 |
+
"https://fredalone-fredalone-dqtv1o.hf.space/analyze",
|
| 80 |
+
"https://fredalone-fredalone-wclyog.hf.space/analyze",
|
| 81 |
+
"https://fredalone-fredalone-t27vig.hf.space/analyze",
|
| 82 |
+
"https://fredalone-fredalone-gahbxh.hf.space/analyze",
|
| 83 |
+
"https://fredalone-fredalone-kw2po4.hf.space/analyze",
|
| 84 |
+
"https://fredalone-fredalone-8h285h.hf.space/analyze"
|
| 85 |
]
|
| 86 |
MODEL_TYPE = "Florence-2-large" # Explicitly request large model
|
| 87 |
DATA_COLLECTION_SERVER = "https://fred808-flow.hf.space"
|
|
|
|
| 350 |
return model_info
|
| 351 |
|
| 352 |
async def process_image(server: CaptionServer, course: str, image: Dict) -> Dict:
|
| 353 |
+
"""Process single image through one caption server with better error handling"""
|
| 354 |
if server.busy:
|
| 355 |
return None
|
| 356 |
|
|
|
|
| 377 |
}
|
| 378 |
print(f"Server {server.url} processed {image['filename']} in {processing_time:.2f}s ({server.fps:.2f} fps)")
|
| 379 |
return metadata
|
| 380 |
+
else:
|
| 381 |
+
# Server responded but no caption (might be error or empty response)
|
| 382 |
+
error_msg = result.get('error', 'Unknown error') if result else 'No response'
|
| 383 |
+
print(f"Server {server.url} failed for {image['filename']}: {error_msg}")
|
| 384 |
+
return None
|
| 385 |
|
| 386 |
+
except asyncio.TimeoutError:
|
| 387 |
+
print(f"Server {server.url} timeout for {image['filename']}")
|
| 388 |
+
return None
|
| 389 |
except Exception as e:
|
| 390 |
print(f"Error processing {image['filename']} on {server.url}: {e}")
|
| 391 |
+
return None
|
| 392 |
|
| 393 |
finally:
|
| 394 |
server.busy = False
|
|
|
|
|
|
|
| 395 |
|
| 396 |
async def submit_to_dataset(course: str, metadata_list: List[Dict]):
|
| 397 |
"""Submit course results to dataset collection server"""
|
|
|
|
| 424 |
return None
|
| 425 |
|
| 426 |
async def process_course(course: str, servers: List[CaptionServer]):
|
| 427 |
+
"""Process all images in a course using available servers with retry logic"""
|
| 428 |
if course not in processed_images:
|
| 429 |
processed_images[course] = set()
|
| 430 |
if course not in course_captions:
|
|
|
|
| 440 |
return
|
| 441 |
|
| 442 |
print(f"\nProcessing {len(images)} images for course {course}")
|
|
|
|
| 443 |
|
| 444 |
+
# Track images that need processing with retry count (5 retries)
|
| 445 |
+
pending_images = {
|
| 446 |
+
img['filename']: {'image': img, 'retries': 0, 'max_retries': 5}
|
| 447 |
+
for img in images
|
| 448 |
+
if img['filename'] not in processed_images[course]
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
if not pending_images:
|
| 452 |
print(f"All images already processed for course {course}")
|
| 453 |
return
|
| 454 |
|
| 455 |
+
failed_images = set()
|
| 456 |
+
total_images = len(images)
|
| 457 |
+
|
| 458 |
+
while pending_images and is_processing:
|
| 459 |
# Create tasks for each available server
|
| 460 |
tasks = []
|
| 461 |
+
assigned_images = []
|
| 462 |
+
|
| 463 |
for server in servers:
|
| 464 |
+
if not server.busy and pending_images:
|
| 465 |
+
# Get the next pending image
|
| 466 |
+
filename, img_data = next(iter(pending_images.items()))
|
| 467 |
+
img = img_data['image']
|
| 468 |
+
|
| 469 |
+
# Assign this image to the server
|
| 470 |
tasks.append(process_image(server, course, img))
|
| 471 |
+
assigned_images.append((filename, img))
|
| 472 |
+
# Remove from pending temporarily while it's being processed
|
| 473 |
+
del pending_images[filename]
|
| 474 |
|
| 475 |
if not tasks:
|
| 476 |
+
# If no servers available, wait a bit
|
| 477 |
await asyncio.sleep(0.1)
|
| 478 |
continue
|
| 479 |
|
| 480 |
# Process images in parallel across servers
|
| 481 |
results = await asyncio.gather(*tasks)
|
| 482 |
|
| 483 |
+
# Handle results and retry logic
|
| 484 |
has_new_results = False
|
| 485 |
+
for (filename, img), result in zip(assigned_images, results):
|
| 486 |
if result:
|
| 487 |
+
# Success - image was processed
|
| 488 |
+
processed_images[course].add(filename)
|
| 489 |
course_captions[course].append(result)
|
| 490 |
has_new_results = True
|
| 491 |
+
print(f"✓ Successfully processed {filename}")
|
| 492 |
+
else:
|
| 493 |
+
# Failure - check if we should retry
|
| 494 |
+
current_retries = assigned_images[0][0] if assigned_images else 0
|
| 495 |
+
img_data = {'image': img, 'retries': current_retries + 1, 'max_retries': 5}
|
| 496 |
+
|
| 497 |
+
if img_data['retries'] <= img_data['max_retries']:
|
| 498 |
+
# Put back in pending for retry
|
| 499 |
+
pending_images[filename] = img_data
|
| 500 |
+
print(f"↻ Retry {img_data['retries']}/{img_data['max_retries']} for {filename}")
|
| 501 |
+
else:
|
| 502 |
+
# Max retries exceeded, mark as failed
|
| 503 |
+
failed_images.add(filename)
|
| 504 |
+
print(f"✗ Failed to process {filename} after {img_data['max_retries']} retries")
|
| 505 |
|
| 506 |
# Save progress after each batch with new results
|
| 507 |
if has_new_results:
|
| 508 |
save_captions_to_file(course, course_captions[course])
|
| 509 |
|
| 510 |
# Show progress
|
|
|
|
| 511 |
done = len(processed_images[course])
|
| 512 |
+
pending_count = len(pending_images)
|
| 513 |
+
failed_count = len(failed_images)
|
| 514 |
+
progress_percent = (done / total_images * 100) if total_images > 0 else 0
|
| 515 |
+
print(f"\rProgress: {done}/{total_images} images ({progress_percent:.1f}%) - {pending_count} pending, {failed_count} failed", end="")
|
| 516 |
|
| 517 |
+
# Check if we're done (all images either processed or permanently failed)
|
| 518 |
+
if not pending_images:
|
| 519 |
+
if failed_images:
|
| 520 |
+
print(f"\nCourse {course} completed with {len(failed_images)} failed images: {list(failed_images)}")
|
| 521 |
+
else:
|
| 522 |
+
print(f"\nCourse {course} complete, submitting to dataset...")
|
| 523 |
+
await submit_to_dataset(course, course_captions[course])
|
| 524 |
break
|
| 525 |
|
| 526 |
# Small delay to prevent overwhelming the servers
|