objaverse_rendering_set / hf_upload_173.sh
benzlxs's picture
Upload hf_upload_173.sh with huggingface_hub
eb2cff2 verified
#!/bin/bash
# Configuration
REPO_ID="benzlxs/objaverse_rendering_set"
REPO_TYPE="dataset"
LOCAL_DIR="zip_folder"
STABILITY_CHECK_INTERVAL=150 # Seconds between size checks
STABILITY_THRESHOLD=2 # Number of consecutive checks with same size before considering stable
LOG_FILE="upload_log_173.txt"
# Enable hf_transfer for faster uploads
export HF_HUB_ENABLE_HF_TRANSFER=1
# Colors for output
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color
# Function to log messages
log_message() {
echo -e "$1"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> "$LOG_FILE"
}
# Function to check if file size is stable
is_file_stable() {
local file=$1
local prev_size=0
local stable_count=0
log_message "${YELLOW}Monitoring $file for stability...${NC}"
while [ $stable_count -lt $STABILITY_THRESHOLD ]; do
if [ ! -f "$file" ]; then
log_message "${YELLOW}File $file not found yet, waiting...${NC}"
sleep $STABILITY_CHECK_INTERVAL
continue
fi
current_size=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null)
if [ "$current_size" -eq "$prev_size" ] && [ "$current_size" -gt 0 ]; then
stable_count=$((stable_count + 1))
log_message "Size unchanged: $current_size bytes (check $stable_count/$STABILITY_THRESHOLD)"
else
stable_count=0
prev_size=$current_size
log_message "Size changed to: $current_size bytes, resetting counter"
fi
if [ $stable_count -lt $STABILITY_THRESHOLD ]; then
sleep $STABILITY_CHECK_INTERVAL
fi
done
log_message "${GREEN}File $file is stable at $current_size bytes${NC}"
return 0
}
# Function to wait for file to exist
wait_for_file() {
local file=$1
local max_wait=3600 # Maximum wait time in seconds (1 hour)
local waited=0
while [ ! -f "$file" ] && [ $waited -lt $max_wait ]; do
log_message "${YELLOW}Waiting for $file to be created...${NC}"
sleep 10
waited=$((waited + 10))
done
if [ ! -f "$file" ]; then
log_message "${RED}Timeout waiting for $file${NC}"
return 1
fi
return 0
}
# Main upload loop
log_message "${GREEN}=== Starting Hugging Face Upload Script ===${NC}"
log_message "Repository: $REPO_ID"
log_message "Local directory: $LOCAL_DIR"
# for i in 000-068 000-078 000-088 000-100 000-112 000-122 000-132 000-142 000-152 000-009 000-019 000-029 000-039 000-049 000-059 000-069 000-079 000-089 000-102 000-113 000-123 000-133 000-143 000-153
# for i in 000-068 000-078 000-088 000-100 000-112 000-122 000-132 000-142 000-152 000-009 000-019 000-029 000-039 000-059 000-069 000-079 000-089 000-102 000-113 000-123 000-133 000-143 000-153
# for i in 000-085 000-096 000-108 000-119 000-129 000-139 000-149 000-159 000-006 000-016 000-026 000-036 000-046 000-056 000-066 000-076 000-086 000-097 000-109 000-120 000-130 000-140 000-150 000-007 000-017 000-027 000-037 000-047 000-057 000-067 000-077 000-087 000-098 000-110 000-121 000-131 000-141 000-151 000-008 000-018 000-028 000-038 000-048 000-058
# for i in 000-010 000-020 000-030 000-040 000-050 000-060 000-070 000-080 000-090 000-103 000-114 000-124 000-134 000-144 000-154 000-001 000-011 000-021 000-031 000-041 000-051 000-061 000-071
for i in 000-126 000-136 000-146 000-156 000-003 000-013 000-023 000-033 000-043 000-053 000-063 000-073 000-083 000-094 000-106 000-117 000-127 000-137 000-147 000-157 000-004 000-014 000-024 000-034
do
tar_file="${LOCAL_DIR}/${i}.tar"
log_message "\n${GREEN}========================================${NC}"
log_message "${GREEN}Processing: ${i}.tar${NC}"
log_message "${GREEN}========================================${NC}"
# Wait for the tar file to appear (if it doesn't exist yet)
if [ ! -f "$tar_file" ]; then
log_message "${YELLOW}File ${i}.tar doesn't exist yet, waiting for creation...${NC}"
if ! wait_for_file "$tar_file"; then
log_message "${RED}Skipping ${i}.tar - file was not created within timeout${NC}"
continue
fi
fi
# Wait for file to be stable (packing complete)
if is_file_stable "$tar_file"; then
# Get final file size for logging
file_size=$(stat -f%z "$tar_file" 2>/dev/null || stat -c%s "$tar_file" 2>/dev/null)
file_size_mb=$((file_size / 1048576))
log_message "${GREEN}Uploading ${i}.tar (${file_size_mb} MB) to Hugging Face...${NC}"
# Upload with retry logic
max_retries=3
retry_count=0
upload_success=false
while [ $retry_count -lt $max_retries ] && [ "$upload_success" = false ]; do
if huggingface-cli upload "$REPO_ID" "$tar_file" "${i}.tar" --repo-type="$REPO_TYPE"; then
log_message "${GREEN}✓ Successfully uploaded ${i}.tar${NC}"
upload_success=true
# Optional: Delete local file after successful upload to save space
# rm "$tar_file"
# log_message "${YELLOW}Deleted local file ${i}.tar${NC}"
else
retry_count=$((retry_count + 1))
log_message "${RED}Upload failed for ${i}.tar (attempt $retry_count/$max_retries)${NC}"
if [ $retry_count -lt $max_retries ]; then
log_message "Retrying in 30 seconds..."
sleep 30
fi
fi
done
if [ "$upload_success" = false ]; then
log_message "${RED}✗ Failed to upload ${i}.tar after $max_retries attempts${NC}"
fi
else
log_message "${RED}File stability check failed for ${i}.tar${NC}"
fi
log_message "${GREEN}Completed processing ${i}.tar${NC}"
done
log_message "\n${GREEN}=== All files processed ===${NC}"
log_message "Check $LOG_FILE for detailed logs"