#!/usr/bin/env python3 """ AdultDVDMarketplace Studio Scraper Bot Usage: python main.py --studio-url "URL" --min-price 6 python main.py --studio-name "Studio Name" --min-price 6 python main.py --studio-url "URL" --min-price 6 --format excel --headless python main.py --studio-url "URL" --min-price 6 --max-items 100 --format both """ import argparse import logging import sys import time from pathlib import Path from config import BotConfig, DEFAULT_CREDENTIALS_FILE, load_credentials, save_credentials from logger_setup import setup_logger from browser_session import BrowserSession from auth_handler import AuthHandler from studio_navigator import StudioNavigator from listing_scraper import ListingScraper from product_scraper import ProductScraper from export_manager import ExportManager # --------------------------------------------------------------------------- # CLI # --------------------------------------------------------------------------- def build_parser() -> argparse.ArgumentParser: p = argparse.ArgumentParser( description="Scrape studio products from AdultDVDMarketplace", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=( "Examples:\n" ' python main.py --studio-url "https://www.adultdvdmarketplace.com/xcart/' 'adult_dvd/dvd_search.php?type=studioid&search=1714&order_by=price" --min-price 6\n' ' python main.py --studio-name "20/20 Vision" --min-price 6\n' ' python main.py --studio-url "..." --min-price 6 --format excel --headless\n' ), ) p.add_argument("--username", default=None, help="Website login username") p.add_argument("--password", default=None, help="Website login password") p.add_argument("--credentials-file", default=DEFAULT_CREDENTIALS_FILE, help="Local credentials file path (default: .browser_state/credentials.json)") studio_input = p.add_mutually_exclusive_group(required=True) studio_input.add_argument("--studio-url", help="Studio listing page URL") studio_input.add_argument("--studio-name", help="Studio name to find from all studios directory") p.add_argument("--min-price", type=float, default=6.0, help="Minimum price threshold (default: 6.0)") p.add_argument("--format", choices=["csv", "excel", "both"], default="csv", help="Output file format (default: csv)") p.add_argument("--output", default=None, help="Output file path (auto-named if omitted)") p.add_argument("--headless", action="store_true", help="Run browser headless (no window)") p.add_argument("--max-items", type=int, default=None, help="Stop after N items (default: no limit)") p.add_argument("--timeout", type=int, default=30000, help="Browser timeout ms (default: 30000)") p.add_argument("--retries", type=int, default=3, help="Retries per product page (default: 3)") return p # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main() -> None: args = build_parser().parse_args() Path("logs").mkdir(exist_ok=True) logger = setup_logger() logger.info("=" * 60) logger.info("AdultDVDMarketplace Studio Scraper Bot") logger.info("=" * 60) logger.info(f"Studio URL : {args.studio_url or '-'}") logger.info(f"Studio Name : {args.studio_name or '-'}") logger.info(f"Min price : ${args.min_price:.2f}") logger.info(f"Format : {args.format}") logger.info(f"Headless : {args.headless}") if bool(args.username) ^ bool(args.password): logger.error("Provide both --username and --password together") sys.exit(1) if args.username and args.password: username = args.username password = args.password credential_source = "cli" else: username, password, credential_source = load_credentials(args.credentials_file) if not username or not password: logger.error("Credentials not found. Use --username/--password or set ADULTDVD_USERNAME and ADULTDVD_PASSWORD") logger.error(f"Checked credential file: {args.credentials_file}") sys.exit(1) if credential_source in ("cli", "env"): save_credentials(username, password, args.credentials_file) logger.info(f"Credentials source: {credential_source} (saved to local credentials file)") else: logger.info(f"Credentials source: {credential_source}") config = BotConfig( username=username, password=password, studio_url=args.studio_url or "", min_price=args.min_price, output_format=args.format, output_file=args.output, headless=args.headless, max_items=args.max_items, timeout=args.timeout, retry_count=args.retries, ) session = BrowserSession( headless=config.headless, state_dir=config.state_dir, timeout=config.timeout, ) records = [] output_paths = [] listing_scraper = None try: session.start() # ── Phase 1: Authenticate ──────────────────────────────────────── logger.info("") logger.info("Phase 1 ▶ Authentication") auth = AuthHandler(session, config.username, config.password) if not auth.ensure_authenticated(): logger.error("Authentication failed — aborting") sys.exit(1) # ── Phase 2: Navigate to studio ───────────────────────────────── logger.info("") logger.info("Phase 2 ▶ Studio navigation") navigator = StudioNavigator(session) if args.studio_url: studio_url = config.get_studio_url_with_sort() else: found_url = navigator.find_studio_by_name(args.studio_name) if not found_url: logger.error(f"Could not find studio by name: {args.studio_name}") sys.exit(1) studio_url = found_url if args.studio_url: if not navigator.navigate_to_studio_url(studio_url): logger.error("Could not open studio page — aborting") sys.exit(1) # ── Phase 3: Scan listing pages ────────────────────────────────── logger.info("") logger.info("Phase 3 ▶ Listing scan") listing_scraper = ListingScraper( session=session, min_price=config.min_price, max_items=config.max_items, ) product_scraper = ProductScraper(session=session, retry_count=config.retry_count) scraped_count = 0 logger.info("") logger.info("Phase 4 ▶ Product detail scraping") for idx, pinfo in enumerate(listing_scraper.iter_qualifying_products(studio_url), 1): title_hint = pinfo.get("title") or pinfo.get("url", "") logger.info(f" [{idx}] {title_hint[:80]}") record = product_scraper.scrape_product(pinfo["url"]) # Supplement blanks from listing data if not record["title"] and pinfo.get("title"): record["title"] = pinfo["title"] if not record["price"] and pinfo.get("price") is not None: record["price"] = f"${pinfo['price']:.2f}" records.append(record) scraped_count += 1 time.sleep(0.3) if scraped_count == 0: logger.warning(f"No products found at or above ${config.min_price:.2f}") print(f"\nNo qualifying products found (min price = ${config.min_price:.2f}).") else: logger.info(f"Qualifying products: {scraped_count}") # ── Phase 5: Export ─────────────────────────────────────────────── logger.info("") logger.info("Phase 5 ▶ Export") if config.output_format == "both": paths = ExportManager().save_both(records) for fmt, path in paths.items(): output_paths.append(path) print(f" {fmt.upper()} → {path}") else: mgr = ExportManager( output_format=config.output_format, output_file=config.output_file, ) out_path = mgr.save(records) if out_path: output_paths.append(out_path) print(f"\n Output → {out_path}") except KeyboardInterrupt: logger.info("Interrupted by user") if records: path = ExportManager(output_format="csv", output_file="output_partial.csv").save(records) print(f"\nPartial results saved → {path}") except Exception as exc: logger.error(f"Unexpected error: {exc}", exc_info=True) if records: path = ExportManager(output_format="csv", output_file="output_recovery.csv").save(records) print(f"\nRecovery file saved → {path}") finally: session.stop() # ── Summary ─────────────────────────────────────────────────────────── with_upc = sum(1 for r in records if r.get("upc")) with_error = sum(1 for r in records if r.get("error")) print() print("=" * 50) print(" SCRAPING COMPLETE") print("=" * 50) if listing_scraper: print(f" Pages scanned : {listing_scraper.pages_scanned}") print(f" Price sort ok : {listing_scraper.sort_verified}") print(f" Products checked : {listing_scraper.checked}") print(f" Skipped below min : {listing_scraper.skipped_below_threshold}") print(f" Skipped no price : {listing_scraper.skipped_unknown_price}") print(f" Products scraped : {len(records)}") print(f" With UPC : {with_upc}") print(f" Errors : {with_error}") if output_paths: print(f" Output files : {', '.join(output_paths)}") print(f" Logs : logs/") print("=" * 50) if __name__ == "__main__": main()