Fred808 commited on
Commit
0639bd5
·
verified ·
1 Parent(s): 5a3cc2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -10
app.py CHANGED
@@ -552,18 +552,19 @@ async def process_dataset_task(start_index: int):
552
  # Final progress report for the current file
553
  if len(all_captions) == len(image_paths):
554
  print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Successfully completed all {len(all_captions)} captions.")
555
- current_file_success = True
556
  else:
557
- print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Completed with partial result: {len(all_captions)}/{len(image_paths)} captions. Marking as partial failure.")
558
- current_file_success = False
 
 
559
 
560
  # 4. Upload Results
561
  if all_captions:
562
  print(f"[{FLOW_ID}] Uploading {len(all_captions)} captions for {zip_full_name}...")
563
  if await upload_captions_to_hf(zip_full_name, all_captions):
564
  print(f"[{FLOW_ID}] Successfully uploaded captions for {zip_full_name}.")
565
- # Partial success in captioning is still a success for the upload step
566
- pass
567
  else:
568
  print(f"[{FLOW_ID}] Failed to upload captions for {zip_full_name}.")
569
  current_file_success = False
@@ -615,12 +616,22 @@ app = FastAPI(
615
  async def startup_event():
616
  print(f"Flow Server {FLOW_ID} started on port {FLOW_PORT}.")
617
 
618
- # Automatically start the processing task
619
  progress = load_progress()
620
- # Start from the last processed index + 1, or the hardcoded AUTO_START_INDEX if the progress file is new/empty
621
- start_index = progress.get('last_processed_index', 0) + 1
622
- if start_index < AUTO_START_INDEX:
623
- start_index = AUTO_START_INDEX
 
 
 
 
 
 
 
 
 
 
624
 
625
  # Use a dummy BackgroundTasks object for the startup task
626
  # Note: FastAPI's startup events can't directly use BackgroundTasks, but we can use asyncio.create_task
 
552
  # Final progress report for the current file
553
  if len(all_captions) == len(image_paths):
554
  print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Successfully completed all {len(all_captions)} captions.")
 
555
  else:
556
+ print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Completed with partial result: {len(all_captions)}/{len(image_paths)} captions.")
557
+
558
+ # Consider the file successful if we have any captions
559
+ current_file_success = len(all_captions) > 0
560
 
561
  # 4. Upload Results
562
  if all_captions:
563
  print(f"[{FLOW_ID}] Uploading {len(all_captions)} captions for {zip_full_name}...")
564
  if await upload_captions_to_hf(zip_full_name, all_captions):
565
  print(f"[{FLOW_ID}] Successfully uploaded captions for {zip_full_name}.")
566
+ # Keep current_file_success as True since we have captions and successfully uploaded them
567
+ current_file_success = True
568
  else:
569
  print(f"[{FLOW_ID}] Failed to upload captions for {zip_full_name}.")
570
  current_file_success = False
 
616
  async def startup_event():
617
  print(f"Flow Server {FLOW_ID} started on port {FLOW_PORT}.")
618
 
619
+ # Get both local progress and HF state
620
  progress = load_progress()
621
+ current_state = await download_hf_state()
622
+
623
+ # Get the next_download_index from HF state if available
624
+ hf_next_index = current_state.get("next_download_index", 0)
625
+
626
+ # If HF state has a higher index, use that instead of local progress
627
+ if hf_next_index > 0:
628
+ start_index = hf_next_index
629
+ print(f"[{FLOW_ID}] Using next_download_index from HF state: {start_index}")
630
+ else:
631
+ # Fall back to local progress if HF state doesn't have a meaningful index
632
+ start_index = progress.get('last_processed_index', 0) + 1
633
+ if start_index < AUTO_START_INDEX:
634
+ start_index = AUTO_START_INDEX
635
 
636
  # Use a dummy BackgroundTasks object for the startup task
637
  # Note: FastAPI's startup events can't directly use BackgroundTasks, but we can use asyncio.create_task