Update app.py
Browse files
app.py
CHANGED
|
@@ -552,18 +552,19 @@ async def process_dataset_task(start_index: int):
|
|
| 552 |
# Final progress report for the current file
|
| 553 |
if len(all_captions) == len(image_paths):
|
| 554 |
print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Successfully completed all {len(all_captions)} captions.")
|
| 555 |
-
current_file_success = True
|
| 556 |
else:
|
| 557 |
-
print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Completed with partial result: {len(all_captions)}/{len(image_paths)} captions.
|
| 558 |
-
|
|
|
|
|
|
|
| 559 |
|
| 560 |
# 4. Upload Results
|
| 561 |
if all_captions:
|
| 562 |
print(f"[{FLOW_ID}] Uploading {len(all_captions)} captions for {zip_full_name}...")
|
| 563 |
if await upload_captions_to_hf(zip_full_name, all_captions):
|
| 564 |
print(f"[{FLOW_ID}] Successfully uploaded captions for {zip_full_name}.")
|
| 565 |
-
#
|
| 566 |
-
|
| 567 |
else:
|
| 568 |
print(f"[{FLOW_ID}] Failed to upload captions for {zip_full_name}.")
|
| 569 |
current_file_success = False
|
|
@@ -615,12 +616,22 @@ app = FastAPI(
|
|
| 615 |
async def startup_event():
|
| 616 |
print(f"Flow Server {FLOW_ID} started on port {FLOW_PORT}.")
|
| 617 |
|
| 618 |
-
#
|
| 619 |
progress = load_progress()
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 624 |
|
| 625 |
# Use a dummy BackgroundTasks object for the startup task
|
| 626 |
# Note: FastAPI's startup events can't directly use BackgroundTasks, but we can use asyncio.create_task
|
|
|
|
| 552 |
# Final progress report for the current file
|
| 553 |
if len(all_captions) == len(image_paths):
|
| 554 |
print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Successfully completed all {len(all_captions)} captions.")
|
|
|
|
| 555 |
else:
|
| 556 |
+
print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Completed with partial result: {len(all_captions)}/{len(image_paths)} captions.")
|
| 557 |
+
|
| 558 |
+
# Consider the file successful if we have any captions
|
| 559 |
+
current_file_success = len(all_captions) > 0
|
| 560 |
|
| 561 |
# 4. Upload Results
|
| 562 |
if all_captions:
|
| 563 |
print(f"[{FLOW_ID}] Uploading {len(all_captions)} captions for {zip_full_name}...")
|
| 564 |
if await upload_captions_to_hf(zip_full_name, all_captions):
|
| 565 |
print(f"[{FLOW_ID}] Successfully uploaded captions for {zip_full_name}.")
|
| 566 |
+
# Keep current_file_success as True since we have captions and successfully uploaded them
|
| 567 |
+
current_file_success = True
|
| 568 |
else:
|
| 569 |
print(f"[{FLOW_ID}] Failed to upload captions for {zip_full_name}.")
|
| 570 |
current_file_success = False
|
|
|
|
| 616 |
async def startup_event():
|
| 617 |
print(f"Flow Server {FLOW_ID} started on port {FLOW_PORT}.")
|
| 618 |
|
| 619 |
+
# Get both local progress and HF state
|
| 620 |
progress = load_progress()
|
| 621 |
+
current_state = await download_hf_state()
|
| 622 |
+
|
| 623 |
+
# Get the next_download_index from HF state if available
|
| 624 |
+
hf_next_index = current_state.get("next_download_index", 0)
|
| 625 |
+
|
| 626 |
+
# If HF state has a higher index, use that instead of local progress
|
| 627 |
+
if hf_next_index > 0:
|
| 628 |
+
start_index = hf_next_index
|
| 629 |
+
print(f"[{FLOW_ID}] Using next_download_index from HF state: {start_index}")
|
| 630 |
+
else:
|
| 631 |
+
# Fall back to local progress if HF state doesn't have a meaningful index
|
| 632 |
+
start_index = progress.get('last_processed_index', 0) + 1
|
| 633 |
+
if start_index < AUTO_START_INDEX:
|
| 634 |
+
start_index = AUTO_START_INDEX
|
| 635 |
|
| 636 |
# Use a dummy BackgroundTasks object for the startup task
|
| 637 |
# Note: FastAPI's startup events can't directly use BackgroundTasks, but we can use asyncio.create_task
|