File size: 915 Bytes
a06f06c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import json
import logging
import time
from booth_scraper import BoothScraper

def process_target_batch():
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    
    with open('target_urls.json', 'r', encoding='utf-8') as f:
        urls = json.load(f)
    
    scraper = BoothScraper(data_dir="data")
    logging.info(f"Starting batch process for {len(urls)} URLs...")
    
    for i, url in enumerate(urls):
        logging.info(f"[{i+1}/{len(urls)}] Processing: {url}")
        try:
            # We use Requests-based scraping for detail pages for stability
            scraper.process_item(url, page=None, likes=0) # Likes will be re-detected if possible or just left 0
            time.sleep(1.5) # Be polite to Booth
        except Exception as e:
            logging.error(f"Failed to process {url}: {e}")

if __name__ == "__main__":
    process_target_batch()