booth-pic-api / backend /scraper /check_progress.py
github-actions
Deploy to HF (clean history with LFS)
e666301
import os
import json
import time
METADATA_PATH = "data/metadata.jsonl"
IMAGES_DIR = "data/raw_images"
def count_progress():
try:
if os.path.exists(METADATA_PATH):
with open(METADATA_PATH, 'r', encoding='utf-8') as f:
item_count = sum(1 for line in f)
else:
item_count = 0
if os.path.exists(IMAGES_DIR):
image_count = len([name for name in os.listdir(IMAGES_DIR) if os.path.isfile(os.path.join(IMAGES_DIR, name))])
else:
image_count = 0
print(f"Items Collected: {item_count} / 1000 (Target)")
print(f"Images Saved: {image_count}")
print("-" * 30)
except Exception as e:
print(f"Error checking progress: {e}")
if __name__ == "__main__":
print("Monitoring Scraping Progress... (Ctrl+C to stop)")
while True:
count_progress()
time.sleep(5) # Update every 5 seconds