Spaces:
Running
Running
File size: 915 Bytes
a06f06c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | import json
import logging
import time
from booth_scraper import BoothScraper
def process_target_batch():
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
with open('target_urls.json', 'r', encoding='utf-8') as f:
urls = json.load(f)
scraper = BoothScraper(data_dir="data")
logging.info(f"Starting batch process for {len(urls)} URLs...")
for i, url in enumerate(urls):
logging.info(f"[{i+1}/{len(urls)}] Processing: {url}")
try:
# We use Requests-based scraping for detail pages for stability
scraper.process_item(url, page=None, likes=0) # Likes will be re-detected if possible or just left 0
time.sleep(1.5) # Be polite to Booth
except Exception as e:
logging.error(f"Failed to process {url}: {e}")
if __name__ == "__main__":
process_target_batch()
|