Nikhil Pravin Pise commited on
Commit
aa69bc4
·
1 Parent(s): e98cc10

fix: restore block_resources function in utils.py

Browse files
Files changed (1) hide show
  1. src/utils.py +16 -0
src/utils.py CHANGED
@@ -19,6 +19,22 @@ from urllib.parse import urlparse, urljoin
19
  T = TypeVar("T")
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # =============================================================================
23
  # TIMING UTILITIES
24
  # =============================================================================
 
19
  T = TypeVar("T")
20
 
21
 
22
+ # =============================================================================
23
+ # PLAYWRIGHT UTILITIES
24
+ # =============================================================================
25
+
26
+
27
+ async def block_resources(route) -> None:
28
+ """
29
+ Blocks unnecessary resources to speed up scraping.
30
+ Used with Playwright's page.route() to block images, stylesheets, etc.
31
+ """
32
+ if route.request.resource_type in ["image", "stylesheet", "font", "media"]:
33
+ await route.abort()
34
+ else:
35
+ await route.continue_()
36
+
37
+
38
  # =============================================================================
39
  # TIMING UTILITIES
40
  # =============================================================================