File size: 3,988 Bytes
a4bb19d
 
 
 
91237b8
 
a4bb19d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91237b8
a4bb19d
91237b8
 
 
 
21dc073
91237b8
17bef4b
21dc073
 
1ef09d2
17bef4b
91237b8
 
21dc073
91237b8
 
 
 
 
17bef4b
21dc073
 
 
17bef4b
91237b8
 
 
 
 
 
a4bb19d
91237b8
a4bb19d
 
 
 
 
 
 
 
 
 
d63acef
a4bb19d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91237b8
a4bb19d
91237b8
a4bb19d
 
 
 
 
 
 
 
 
 
 
91237b8
a4bb19d
 
 
 
 
91237b8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import requests
from bs4 import BeautifulSoup
import json
from playwright.async_api import async_playwright
import asyncio

def create_design_directory(design_id):
    """Create a directory for the design if it doesn't exist"""
    directory = f"designs/{design_id}"
    if not os.path.exists(directory):
        os.makedirs(directory)
    return directory

def save_css(url, directory):
    """Download and save CSS file"""
    response = requests.get(url)
    css_path = f"{directory}/style.css"
    with open(css_path, "w", encoding="utf-8") as f:
        f.write(response.text)

def save_metadata(metadata, directory):
    """Save design metadata as JSON"""
    metadata_path = f"{directory}/metadata.json"
    with open(metadata_path, "w", encoding="utf-8") as f:
        json.dump(metadata, f, indent=4)

async def take_screenshot(url, directory):
    """Take screenshots of the design at desktop and mobile widths"""
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        
        # Desktop screenshot (1920px width)
        page = await browser.new_page(viewport={'width': 1600, 'height': 1080})
        await page.goto(url)
        # Wait for network to be idle (no requests for at least 500ms)
        await page.wait_for_load_state("networkidle")
        # Add a significant delay to ensure background images are loaded
        await page.wait_for_timeout(3000)
        
        # Get full height
        height = await page.evaluate('document.body.scrollHeight')
        await page.set_viewport_size({'width': 1600, 'height': int(height)})
        await page.screenshot(path=f"{directory}/screenshot_desktop.png", full_page=True)
        
        # Mobile screenshot (480px width)
        page = await browser.new_page(viewport={'width': 480, 'height': 1080})
        await page.goto(url)
        # Wait for network to be idle (no requests for at least 500ms)
        await page.wait_for_load_state("networkidle")
        # Add a significant delay to ensure background images are loaded
        await page.wait_for_timeout(2000)
        
        # Get full height
        height = await page.evaluate('document.body.scrollHeight')
        await page.set_viewport_size({'width': 480, 'height': int(height)})
        await page.screenshot(path=f"{directory}/screenshot_mobile.png", full_page=True)
        
        await browser.close()

async def scrape_design(design_id):
    """Scrape a single design"""
    # Create base URLs
    design_url = f"https://www.csszengarden.com/{design_id}"
    css_url = f"https://www.csszengarden.com/{design_id}/{design_id}.css"
    
    # Create directory for this design
    directory = create_design_directory(design_id)
    
    # Get design page
    response = requests.get(design_url)
    print(f"{design_id}: Response status: {response.status_code}")
    
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Extract metadata with error handling
    try:
        metadata = {
            "id": design_id,
            "url": design_url,
            "css_url": css_url
        }
    except Exception as e:
        print(f"\nError extracting metadata: {str(e)}")
        raise
    
    # Save everything
    save_css(css_url, directory)
    save_metadata(metadata, directory)
    await take_screenshot(design_url, directory)

async def main():
    """Main function to scrape multiple designs"""
    # Create designs directory if it doesn't exist
    if not os.path.exists("designs"):
        os.makedirs("designs")
    
    # List of design IDs to scrape
    design_ids = ["221", "220", "219"]  # Add more IDs as needed
    
    for design_id in design_ids:
        try:
            print(f"Scraping design {design_id}...")
            await scrape_design(design_id)
            print(f"Successfully scraped design {design_id}")
        except Exception as e:
            print(f"Error scraping design {design_id}: {str(e)}")

if __name__ == "__main__":
    asyncio.run(main())