Spaces:

mametarow
/

booth-pic-api

Running

booth-pic-api / backend /scraper /test_playwright_item.py

github-actions

Deploy to HF (clean history with LFS)

e666301 about 1 month ago

1.07 kB

	import logging
	import asyncio
	from playwright.sync_api import sync_playwright
	from booth_scraper import BoothScraper

	logging.basicConfig(level=logging.INFO)

	def test_full_process():
	scraper = BoothScraper()
	with sync_playwright() as p:
	browser = p.chromium.launch(headless=True)
	page = browser.new_page()
	# Item with many tags and clear description
	url = 'https://booth.pm/ja/items/4294026'
	scraper.process_item(url, page=page)
	browser.close()

	# Verify the last line in metadata.jsonl
	import json
	with open('data/metadata.jsonl', 'r', encoding='utf-8') as f:
	line = f.readlines()[-1]
	data = json.loads(line)
	print("\n--- TEST RESULT ---")
	print(f"Title: {data.get('title')}")
	print(f"Tags: {data.get('tags')}")
	print(f"Desc (len): {len(data.get('description', ''))}")
	if data.get('tags'):
	print("SUCCESS: Tags found!")
	else:
	print("FAILURE: Tags still empty.")

	if __name__ == "__main__":
	test_full_process()