CACHE_DIR="data/cache" OUTPUT_DIR="data/archives" BATCH_SIZE=60000 # Smaller for frequent updates mkdir -p "$OUTPUT_DIR" echo "========================================================" echo "Archiving '$CACHE_DIR' into multiple zip files..." echo "Batch Size: $BATCH_SIZE files per archive" echo "========================================================" echo "Scanning for .pt files..." find "$CACHE_DIR" -maxdepth 1 -name "sample_*.pt" > all_files_temp.txt TOTAL_FILES=$(wc -l < all_files_temp.txt) echo "Found $TOTAL_FILES .pt files." if [ "$TOTAL_FILES" -eq 0 ]; then echo "No files found to archive." rm all_files_temp.txt exit 0 fi # Split list into temporary chunk files split -l "$BATCH_SIZE" -d -a 3 all_files_temp.txt file_list_part_ echo "Starting sequential archiving..." for LIST_FILE in file_list_part_*; do PART_NUM=${LIST_FILE##*_} ZIP_NAME="$OUTPUT_DIR/cache_batch_$PART_NUM.zip" echo "[$(date +%T)] Starting batch $PART_NUM ($BATCH_SIZE files) -> $ZIP_NAME" # Process sequentially: # -1: Fast compression # -m: Move files (delete after successful zip) -- keeping requested behavior (?) # No, user "give up dont upload metadata". Script previously had -m. # User might want to KEEP source files if upload fails? # Usually archiving cache implies "pack it up". # I'll stick to -m (move) to save space as we go, unless previously requested otherwise? # User didn't specify "keep". Defaulting to -m clears disk space. # BUT wait, user said "no way to see if is zipping". # If I use -m, files disappear. # Let's use -m to clean up. cat "$LIST_FILE" | zip -1 -mq -j "$ZIP_NAME" -@ # Verify zip created if [ -f "$ZIP_NAME" ]; then SIZE=$(du -h "$ZIP_NAME" | cut -f1) echo "[$(date +%T)] Finished batch $PART_NUM (Size: $SIZE)" else echo "ERROR: Failed to create $ZIP_NAME" exit 1 fi rm "$LIST_FILE" done # Cleanup rm all_files_temp.txt echo "========================================================" echo "Done! Archives are in $OUTPUT_DIR" echo "========================================================"