Spaces:
Runtime error
Runtime error
File size: 3,988 Bytes
a4bb19d 91237b8 a4bb19d 91237b8 a4bb19d 91237b8 21dc073 91237b8 17bef4b 21dc073 1ef09d2 17bef4b 91237b8 21dc073 91237b8 17bef4b 21dc073 17bef4b 91237b8 a4bb19d 91237b8 a4bb19d d63acef a4bb19d 91237b8 a4bb19d 91237b8 a4bb19d 91237b8 a4bb19d 91237b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import os
import requests
from bs4 import BeautifulSoup
import json
from playwright.async_api import async_playwright
import asyncio
def create_design_directory(design_id):
"""Create a directory for the design if it doesn't exist"""
directory = f"designs/{design_id}"
if not os.path.exists(directory):
os.makedirs(directory)
return directory
def save_css(url, directory):
"""Download and save CSS file"""
response = requests.get(url)
css_path = f"{directory}/style.css"
with open(css_path, "w", encoding="utf-8") as f:
f.write(response.text)
def save_metadata(metadata, directory):
"""Save design metadata as JSON"""
metadata_path = f"{directory}/metadata.json"
with open(metadata_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=4)
async def take_screenshot(url, directory):
"""Take screenshots of the design at desktop and mobile widths"""
async with async_playwright() as p:
browser = await p.chromium.launch()
# Desktop screenshot (1920px width)
page = await browser.new_page(viewport={'width': 1600, 'height': 1080})
await page.goto(url)
# Wait for network to be idle (no requests for at least 500ms)
await page.wait_for_load_state("networkidle")
# Add a significant delay to ensure background images are loaded
await page.wait_for_timeout(3000)
# Get full height
height = await page.evaluate('document.body.scrollHeight')
await page.set_viewport_size({'width': 1600, 'height': int(height)})
await page.screenshot(path=f"{directory}/screenshot_desktop.png", full_page=True)
# Mobile screenshot (480px width)
page = await browser.new_page(viewport={'width': 480, 'height': 1080})
await page.goto(url)
# Wait for network to be idle (no requests for at least 500ms)
await page.wait_for_load_state("networkidle")
# Add a significant delay to ensure background images are loaded
await page.wait_for_timeout(2000)
# Get full height
height = await page.evaluate('document.body.scrollHeight')
await page.set_viewport_size({'width': 480, 'height': int(height)})
await page.screenshot(path=f"{directory}/screenshot_mobile.png", full_page=True)
await browser.close()
async def scrape_design(design_id):
"""Scrape a single design"""
# Create base URLs
design_url = f"https://www.csszengarden.com/{design_id}"
css_url = f"https://www.csszengarden.com/{design_id}/{design_id}.css"
# Create directory for this design
directory = create_design_directory(design_id)
# Get design page
response = requests.get(design_url)
print(f"{design_id}: Response status: {response.status_code}")
soup = BeautifulSoup(response.text, "html.parser")
# Extract metadata with error handling
try:
metadata = {
"id": design_id,
"url": design_url,
"css_url": css_url
}
except Exception as e:
print(f"\nError extracting metadata: {str(e)}")
raise
# Save everything
save_css(css_url, directory)
save_metadata(metadata, directory)
await take_screenshot(design_url, directory)
async def main():
"""Main function to scrape multiple designs"""
# Create designs directory if it doesn't exist
if not os.path.exists("designs"):
os.makedirs("designs")
# List of design IDs to scrape
design_ids = ["221", "220", "219"] # Add more IDs as needed
for design_id in design_ids:
try:
print(f"Scraping design {design_id}...")
await scrape_design(design_id)
print(f"Successfully scraped design {design_id}")
except Exception as e:
print(f"Error scraping design {design_id}: {str(e)}")
if __name__ == "__main__":
asyncio.run(main()) |