Technologic101 commited on
Commit
21dc073
·
1 Parent(s): 9cee9a1

task: adds scraper wait time to load background images

Browse files
data_collection/notebook.ipynb CHANGED
@@ -13,72 +13,15 @@
13
  },
14
  {
15
  "cell_type": "code",
16
- "execution_count": 3,
17
  "metadata": {},
18
  "outputs": [
19
  {
20
  "name": "stdout",
21
  "output_type": "stream",
22
  "text": [
23
- "Testing scraper with design 001...\n",
24
- "001: Response status: 200\n",
25
- "Success!\n",
26
- "Testing scraper with design 002...\n",
27
- "002: Response status: 200\n",
28
- "Success!\n",
29
- "Testing scraper with design 003...\n",
30
- "003: Response status: 200\n",
31
- "Success!\n",
32
- "Testing scraper with design 004...\n",
33
- "004: Response status: 200\n",
34
- "Success!\n",
35
- "Testing scraper with design 005...\n",
36
- "005: Response status: 200\n",
37
- "Success!\n",
38
- "Testing scraper with design 006...\n",
39
- "006: Response status: 200\n",
40
- "Success!\n",
41
- "Testing scraper with design 007...\n",
42
- "007: Response status: 200\n",
43
- "Success!\n",
44
- "Testing scraper with design 008...\n",
45
- "008: Response status: 200\n",
46
- "Success!\n",
47
- "Testing scraper with design 009...\n",
48
- "009: Response status: 200\n",
49
- "Success!\n",
50
- "Testing scraper with design 010...\n",
51
- "010: Response status: 200\n",
52
- "Success!\n",
53
- "Testing scraper with design 011...\n",
54
- "011: Response status: 200\n",
55
- "Success!\n",
56
- "Testing scraper with design 012...\n",
57
- "012: Response status: 200\n",
58
- "Success!\n",
59
- "Testing scraper with design 013...\n",
60
- "013: Response status: 200\n",
61
- "Success!\n",
62
- "Testing scraper with design 014...\n",
63
- "014: Response status: 200\n",
64
- "Success!\n",
65
- "Testing scraper with design 015...\n",
66
- "015: Response status: 200\n",
67
- "Success!\n",
68
- "Testing scraper with design 016...\n",
69
- "016: Response status: 200\n",
70
- "Success!\n",
71
- "Testing scraper with design 017...\n",
72
- "017: Response status: 200\n",
73
- "Success!\n",
74
- "Testing scraper with design 018...\n",
75
- "018: Response status: 200\n",
76
- "Success!\n",
77
- "Testing scraper with design 019...\n",
78
- "019: Response status: 200\n",
79
- "Success!\n",
80
- "Testing scraper with design 020...\n",
81
- "020: Response status: 200\n",
82
  "Success!\n"
83
  ]
84
  }
@@ -88,8 +31,8 @@
88
  "import asyncio\n",
89
  "\n",
90
  "# Pick from a range of 001 to 221\n",
91
- "test_set = [f\"{i:03d}\" for i in range(1, 21)]\n",
92
- "#test_set = [\"010\"]\n",
93
  "async def test_scraper(ids):\n",
94
  " for test_design_id in ids:\n",
95
  " try:\n",
 
13
  },
14
  {
15
  "cell_type": "code",
16
+ "execution_count": 5,
17
  "metadata": {},
18
  "outputs": [
19
  {
20
  "name": "stdout",
21
  "output_type": "stream",
22
  "text": [
23
+ "Testing scraper with design 112...\n",
24
+ "112: Response status: 200\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  "Success!\n"
26
  ]
27
  }
 
31
  "import asyncio\n",
32
  "\n",
33
  "# Pick from a range of 001 to 221\n",
34
+ "#test_set = [f\"{i:03d}\" for i in range(1, 21)]\n",
35
+ "test_set = [\"112\"]\n",
36
  "async def test_scraper(ids):\n",
37
  " for test_design_id in ids:\n",
38
  " try:\n",
data_collection/scraper.py CHANGED
@@ -31,27 +31,25 @@ async def take_screenshot(url, directory):
31
  browser = await p.chromium.launch()
32
 
33
  # Desktop screenshot (1920px width)
34
- page = await browser.new_page(viewport={'width': 1920, 'height': 1080})
35
  await page.goto(url)
36
  # Wait for network to be idle (no requests for at least 500ms)
37
- await page.wait_for_load_state()
38
-
39
- # Additional wait to ensure any animations/transitions complete
40
- #await page.wait_for_timeout(2000) # 2 second delay
41
 
42
  # Get full height
43
  height = await page.evaluate('document.body.scrollHeight')
44
- await page.set_viewport_size({'width': 1920, 'height': int(height)})
45
  await page.screenshot(path=f"{directory}/screenshot_desktop.png", full_page=True)
46
 
47
  # Mobile screenshot (480px width)
48
  page = await browser.new_page(viewport={'width': 480, 'height': 1080})
49
  await page.goto(url)
50
  # Wait for network to be idle (no requests for at least 500ms)
51
- await page.wait_for_load_state()
52
-
53
- # Additional wait to ensure any animations/transitions complete
54
- #await page.wait_for_timeout(2000) # 2 second delay
55
 
56
  # Get full height
57
  height = await page.evaluate('document.body.scrollHeight')
 
31
  browser = await p.chromium.launch()
32
 
33
  # Desktop screenshot (1920px width)
34
+ page = await browser.new_page(viewport={'width': 1600, 'height': 1080})
35
  await page.goto(url)
36
  # Wait for network to be idle (no requests for at least 500ms)
37
+ await page.wait_for_load_state("networkidle")
38
+ # Add a significant delay to ensure background images are loaded
39
+ await page.wait_for_timeout(2000)
 
40
 
41
  # Get full height
42
  height = await page.evaluate('document.body.scrollHeight')
43
+ await page.set_viewport_size({'width': 1600, 'height': int(height)})
44
  await page.screenshot(path=f"{directory}/screenshot_desktop.png", full_page=True)
45
 
46
  # Mobile screenshot (480px width)
47
  page = await browser.new_page(viewport={'width': 480, 'height': 1080})
48
  await page.goto(url)
49
  # Wait for network to be idle (no requests for at least 500ms)
50
+ await page.wait_for_load_state("networkidle")
51
+ # Add a significant delay to ensure background images are loaded
52
+ await page.wait_for_timeout(2000)
 
53
 
54
  # Get full height
55
  height = await page.evaluate('document.body.scrollHeight')