ADM-Purchasing-Tools

Running

abdulsalam2121

Add automation bot with Flask and Playwright

5b29309 about 2 months ago

10.6 kB

	#!/usr/bin/env python3
	"""
	AdultDVDMarketplace Studio Scraper Bot
	Usage:
	python main.py --studio-url "URL" --min-price 6
	python main.py --studio-name "Studio Name" --min-price 6
	python main.py --studio-url "URL" --min-price 6 --format excel --headless
	python main.py --studio-url "URL" --min-price 6 --max-items 100 --format both
	"""
	import argparse
	import logging
	import sys
	import time
	from pathlib import Path

	from config import BotConfig, DEFAULT_CREDENTIALS_FILE, load_credentials, save_credentials
	from logger_setup import setup_logger
	from browser_session import BrowserSession
	from auth_handler import AuthHandler
	from studio_navigator import StudioNavigator
	from listing_scraper import ListingScraper
	from product_scraper import ProductScraper
	from export_manager import ExportManager


	# ---------------------------------------------------------------------------
	# CLI
	# ---------------------------------------------------------------------------

	def build_parser() -> argparse.ArgumentParser:
	p = argparse.ArgumentParser(
	description="Scrape studio products from AdultDVDMarketplace",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog=(
	"Examples:\n"
	' python main.py --studio-url "https://www.adultdvdmarketplace.com/xcart/'
	'adult_dvd/dvd_search.php?type=studioid&search=1714&order_by=price" --min-price 6\n'
	' python main.py --studio-name "20/20 Vision" --min-price 6\n'
	' python main.py --studio-url "..." --min-price 6 --format excel --headless\n'
	),
	)
	p.add_argument("--username", default=None, help="Website login username")
	p.add_argument("--password", default=None, help="Website login password")
	p.add_argument("--credentials-file", default=DEFAULT_CREDENTIALS_FILE,
	help="Local credentials file path (default: .browser_state/credentials.json)")
	studio_input = p.add_mutually_exclusive_group(required=True)
	studio_input.add_argument("--studio-url", help="Studio listing page URL")
	studio_input.add_argument("--studio-name", help="Studio name to find from all studios directory")
	p.add_argument("--min-price", type=float, default=6.0, help="Minimum price threshold (default: 6.0)")
	p.add_argument("--format", choices=["csv", "excel", "both"], default="csv",
	help="Output file format (default: csv)")
	p.add_argument("--output", default=None, help="Output file path (auto-named if omitted)")
	p.add_argument("--headless", action="store_true", help="Run browser headless (no window)")
	p.add_argument("--max-items", type=int, default=None, help="Stop after N items (default: no limit)")
	p.add_argument("--timeout", type=int, default=30000, help="Browser timeout ms (default: 30000)")
	p.add_argument("--retries", type=int, default=3, help="Retries per product page (default: 3)")
	return p


	# ---------------------------------------------------------------------------
	# Main
	# ---------------------------------------------------------------------------

	def main() -> None:
	args = build_parser().parse_args()

	Path("logs").mkdir(exist_ok=True)
	logger = setup_logger()

	logger.info("=" * 60)
	logger.info("AdultDVDMarketplace Studio Scraper Bot")
	logger.info("=" * 60)
	logger.info(f"Studio URL : {args.studio_url or '-'}")
	logger.info(f"Studio Name : {args.studio_name or '-'}")
	logger.info(f"Min price : ${args.min_price:.2f}")
	logger.info(f"Format : {args.format}")
	logger.info(f"Headless : {args.headless}")

	if bool(args.username) ^ bool(args.password):
	logger.error("Provide both --username and --password together")
	sys.exit(1)

	if args.username and args.password:
	username = args.username
	password = args.password
	credential_source = "cli"
	else:
	username, password, credential_source = load_credentials(args.credentials_file)

	if not username or not password:
	logger.error("Credentials not found. Use --username/--password or set ADULTDVD_USERNAME and ADULTDVD_PASSWORD")
	logger.error(f"Checked credential file: {args.credentials_file}")
	sys.exit(1)

	if credential_source in ("cli", "env"):
	save_credentials(username, password, args.credentials_file)
	logger.info(f"Credentials source: {credential_source} (saved to local credentials file)")
	else:
	logger.info(f"Credentials source: {credential_source}")

	config = BotConfig(
	username=username,
	password=password,
	studio_url=args.studio_url or "",
	min_price=args.min_price,
	output_format=args.format,
	output_file=args.output,
	headless=args.headless,
	max_items=args.max_items,
	timeout=args.timeout,
	retry_count=args.retries,
	)

	session = BrowserSession(
	headless=config.headless,
	state_dir=config.state_dir,
	timeout=config.timeout,
	)

	records = []
	output_paths = []
	listing_scraper = None

	try:
	session.start()

	# ── Phase 1: Authenticate ────────────────────────────────────────
	logger.info("")
	logger.info("Phase 1 ▶ Authentication")
	auth = AuthHandler(session, config.username, config.password)
	if not auth.ensure_authenticated():
	logger.error("Authentication failed — aborting")
	sys.exit(1)

	# ── Phase 2: Navigate to studio ─────────────────────────────────
	logger.info("")
	logger.info("Phase 2 ▶ Studio navigation")
	navigator = StudioNavigator(session)

	if args.studio_url:
	studio_url = config.get_studio_url_with_sort()
	else:
	found_url = navigator.find_studio_by_name(args.studio_name)
	if not found_url:
	logger.error(f"Could not find studio by name: {args.studio_name}")
	sys.exit(1)
	studio_url = found_url

	if args.studio_url:
	if not navigator.navigate_to_studio_url(studio_url):
	logger.error("Could not open studio page — aborting")
	sys.exit(1)

	# ── Phase 3: Scan listing pages ──────────────────────────────────
	logger.info("")
	logger.info("Phase 3 ▶ Listing scan")
	listing_scraper = ListingScraper(
	session=session,
	min_price=config.min_price,
	max_items=config.max_items,
	)
	product_scraper = ProductScraper(session=session, retry_count=config.retry_count)
	scraped_count = 0

	logger.info("")
	logger.info("Phase 4 ▶ Product detail scraping")

	for idx, pinfo in enumerate(listing_scraper.iter_qualifying_products(studio_url), 1):
	title_hint = pinfo.get("title") or pinfo.get("url", "")
	logger.info(f" [{idx}] {title_hint[:80]}")

	record = product_scraper.scrape_product(pinfo["url"])

	# Supplement blanks from listing data
	if not record["title"] and pinfo.get("title"):
	record["title"] = pinfo["title"]
	if not record["price"] and pinfo.get("price") is not None:
	record["price"] = f"${pinfo['price']:.2f}"

	records.append(record)
	scraped_count += 1
	time.sleep(0.3)

	if scraped_count == 0:
	logger.warning(f"No products found at or above ${config.min_price:.2f}")
	print(f"\nNo qualifying products found (min price = ${config.min_price:.2f}).")
	else:
	logger.info(f"Qualifying products: {scraped_count}")

	# ── Phase 5: Export ───────────────────────────────────────────────
	logger.info("")
	logger.info("Phase 5 ▶ Export")
	if config.output_format == "both":
	paths = ExportManager().save_both(records)
	for fmt, path in paths.items():
	output_paths.append(path)
	print(f" {fmt.upper()} → {path}")
	else:
	mgr = ExportManager(
	output_format=config.output_format,
	output_file=config.output_file,
	)
	out_path = mgr.save(records)
	if out_path:
	output_paths.append(out_path)
	print(f"\n Output → {out_path}")

	except KeyboardInterrupt:
	logger.info("Interrupted by user")
	if records:
	path = ExportManager(output_format="csv", output_file="output_partial.csv").save(records)
	print(f"\nPartial results saved → {path}")
	except Exception as exc:
	logger.error(f"Unexpected error: {exc}", exc_info=True)
	if records:
	path = ExportManager(output_format="csv", output_file="output_recovery.csv").save(records)
	print(f"\nRecovery file saved → {path}")
	finally:
	session.stop()

	# ── Summary ───────────────────────────────────────────────────────────
	with_upc = sum(1 for r in records if r.get("upc"))
	with_error = sum(1 for r in records if r.get("error"))

	print()
	print("=" * 50)
	print(" SCRAPING COMPLETE")
	print("=" * 50)
	if listing_scraper:
	print(f" Pages scanned : {listing_scraper.pages_scanned}")
	print(f" Price sort ok : {listing_scraper.sort_verified}")
	print(f" Products checked : {listing_scraper.checked}")
	print(f" Skipped below min : {listing_scraper.skipped_below_threshold}")
	print(f" Skipped no price : {listing_scraper.skipped_unknown_price}")
	print(f" Products scraped : {len(records)}")
	print(f" With UPC : {with_upc}")
	print(f" Errors : {with_error}")
	if output_paths:
	print(f" Output files : {', '.join(output_paths)}")
	print(f" Logs : logs/")
	print("=" * 50)


	if __name__ == "__main__":
	main()