Spaces:
Running
Running
| import os | |
| import json | |
| import time | |
| METADATA_PATH = "data/metadata.jsonl" | |
| IMAGES_DIR = "data/raw_images" | |
| def count_progress(): | |
| try: | |
| if os.path.exists(METADATA_PATH): | |
| with open(METADATA_PATH, 'r', encoding='utf-8') as f: | |
| item_count = sum(1 for line in f) | |
| else: | |
| item_count = 0 | |
| if os.path.exists(IMAGES_DIR): | |
| image_count = len([name for name in os.listdir(IMAGES_DIR) if os.path.isfile(os.path.join(IMAGES_DIR, name))]) | |
| else: | |
| image_count = 0 | |
| print(f"Items Collected: {item_count} / 1000 (Target)") | |
| print(f"Images Saved: {image_count}") | |
| print("-" * 30) | |
| except Exception as e: | |
| print(f"Error checking progress: {e}") | |
| if __name__ == "__main__": | |
| print("Monitoring Scraping Progress... (Ctrl+C to stop)") | |
| while True: | |
| count_progress() | |
| time.sleep(5) # Update every 5 seconds | |