Technologic101 commited on
Commit
91237b8
·
1 Parent(s): a4bb19d

task: csszengarden scraper [poc]

Browse files
Files changed (5) hide show
  1. .gitignore +3 -0
  2. pyproject.toml +0 -2
  3. scraper.py +32 -37
  4. test_scraper.ipynb +40 -25
  5. uv.lock +1 -86
.gitignore CHANGED
@@ -12,3 +12,6 @@
12
 
13
  .chainlit/cache
14
 
 
 
 
 
12
 
13
  .chainlit/cache
14
 
15
+ /designs
16
+
17
+ __pycache__
pyproject.toml CHANGED
@@ -12,8 +12,6 @@ dependencies = [
12
  "ipykernel>=6.29.0",
13
  "beautifulsoup4>=4.12.0",
14
  "scrapy>=2.11.0",
15
- "selenium>=4.18.0",
16
- "selenium-wire>=5.1.0", # Added for scraper
17
  "playwright>=1.42.0",
18
  "pandas>=2.2.0",
19
  "numpy>=1.26.0",
 
12
  "ipykernel>=6.29.0",
13
  "beautifulsoup4>=4.12.0",
14
  "scrapy>=2.11.0",
 
 
15
  "playwright>=1.42.0",
16
  "pandas>=2.2.0",
17
  "numpy>=1.26.0",
scraper.py CHANGED
@@ -2,8 +2,8 @@ import os
2
  import requests
3
  from bs4 import BeautifulSoup
4
  import json
5
- from selenium import webdriver
6
- from selenium.webdriver.chrome.options import Options
7
 
8
  def create_design_directory(design_id):
9
  """Create a directory for the design if it doesn't exist"""
@@ -25,32 +25,30 @@ def save_metadata(metadata, directory):
25
  with open(metadata_path, "w", encoding="utf-8") as f:
26
  json.dump(metadata, f, indent=4)
27
 
28
- def take_screenshot(url, directory):
29
  """Take screenshots of the design at desktop and mobile widths"""
30
- chrome_options = Options()
31
- chrome_options.add_argument("--headless")
32
-
33
- driver = webdriver.Chrome(options=chrome_options)
34
-
35
- # Desktop screenshot (1920px width)
36
- driver.set_window_size(1920, 1080)
37
- driver.get(url)
38
- # Wait for page to load and get full height
39
- total_height = driver.execute_script("return document.body.scrollHeight")
40
- driver.set_window_size(1920, total_height)
41
- driver.save_screenshot(f"{directory}/screenshot_desktop.png")
42
-
43
- # Mobile screenshot (480px width)
44
- driver.set_window_size(480, 1080)
45
- driver.get(url)
46
- # Wait for page to load and get full height
47
- total_height = driver.execute_script("return document.body.scrollHeight")
48
- driver.set_window_size(480, total_height)
49
- driver.save_screenshot(f"{directory}/screenshot_mobile.png")
50
-
51
- driver.quit()
52
 
53
- def scrape_design(design_id):
54
  """Scrape a single design"""
55
  # Create base URLs
56
  design_url = f"https://www.csszengarden.com/{design_id}"
@@ -63,22 +61,19 @@ def scrape_design(design_id):
63
  response = requests.get(design_url)
64
  print(f"Response status: {response.status_code}")
65
 
66
- # Debug HTML content
67
- print("\nFirst 500 characters of response:")
68
- print(response.text[:500])
69
-
70
  soup = BeautifulSoup(response.text, "html.parser")
 
71
 
72
  # Debug found elements
73
  print("\nFound elements:")
74
- print(f"h1: {soup.select_one('h1')}")
75
- print(f"author: {soup.select_one('meta[name=\"author\"]')}")
76
 
77
  # Extract metadata with error handling
78
  try:
79
  metadata = {
80
  "id": design_id,
81
- "author": soup.select_one('meta[name="author"]')["content"] if soup.select_one('meta[name="author"]') else "Unknown Author",
82
  "url": design_url,
83
  "css_url": css_url
84
  }
@@ -89,9 +84,9 @@ def scrape_design(design_id):
89
  # Save everything
90
  save_css(css_url, directory)
91
  save_metadata(metadata, directory)
92
- take_screenshot(design_url, directory)
93
 
94
- def main():
95
  """Main function to scrape multiple designs"""
96
  # Create designs directory if it doesn't exist
97
  if not os.path.exists("designs"):
@@ -103,10 +98,10 @@ def main():
103
  for design_id in design_ids:
104
  try:
105
  print(f"Scraping design {design_id}...")
106
- scrape_design(design_id)
107
  print(f"Successfully scraped design {design_id}")
108
  except Exception as e:
109
  print(f"Error scraping design {design_id}: {str(e)}")
110
 
111
  if __name__ == "__main__":
112
- main()
 
2
  import requests
3
  from bs4 import BeautifulSoup
4
  import json
5
+ from playwright.async_api import async_playwright
6
+ import asyncio
7
 
8
  def create_design_directory(design_id):
9
  """Create a directory for the design if it doesn't exist"""
 
25
  with open(metadata_path, "w", encoding="utf-8") as f:
26
  json.dump(metadata, f, indent=4)
27
 
28
+ async def take_screenshot(url, directory):
29
  """Take screenshots of the design at desktop and mobile widths"""
30
+ async with async_playwright() as p:
31
+ browser = await p.chromium.launch()
32
+
33
+ # Desktop screenshot (1920px width)
34
+ page = await browser.new_page(viewport={'width': 1920, 'height': 1080})
35
+ await page.goto(url)
36
+ # Get full height
37
+ height = await page.evaluate('document.body.scrollHeight')
38
+ await page.set_viewport_size({'width': 1920, 'height': int(height)})
39
+ await page.screenshot(path=f"{directory}/screenshot_desktop.png", full_page=True)
40
+
41
+ # Mobile screenshot (480px width)
42
+ page = await browser.new_page(viewport={'width': 480, 'height': 1080})
43
+ await page.goto(url)
44
+ # Get full height
45
+ height = await page.evaluate('document.body.scrollHeight')
46
+ await page.set_viewport_size({'width': 480, 'height': int(height)})
47
+ await page.screenshot(path=f"{directory}/screenshot_mobile.png", full_page=True)
48
+
49
+ await browser.close()
 
 
50
 
51
+ async def scrape_design(design_id):
52
  """Scrape a single design"""
53
  # Create base URLs
54
  design_url = f"https://www.csszengarden.com/{design_id}"
 
61
  response = requests.get(design_url)
62
  print(f"Response status: {response.status_code}")
63
 
 
 
 
 
64
  soup = BeautifulSoup(response.text, "html.parser")
65
+ author_meta = soup.select_one('meta[name="author"]')
66
 
67
  # Debug found elements
68
  print("\nFound elements:")
69
+ print(f"h1: {soup.select_one('h1')['content']}")
70
+ print(f"author: {author_meta['content']}")
71
 
72
  # Extract metadata with error handling
73
  try:
74
  metadata = {
75
  "id": design_id,
76
+ "author": author_meta["content"] if author_meta else "Unknown Author",
77
  "url": design_url,
78
  "css_url": css_url
79
  }
 
84
  # Save everything
85
  save_css(css_url, directory)
86
  save_metadata(metadata, directory)
87
+ await take_screenshot(design_url, directory)
88
 
89
+ async def main():
90
  """Main function to scrape multiple designs"""
91
  # Create designs directory if it doesn't exist
92
  if not os.path.exists("designs"):
 
98
  for design_id in design_ids:
99
  try:
100
  print(f"Scraping design {design_id}...")
101
+ await scrape_design(design_id)
102
  print(f"Successfully scraped design {design_id}")
103
  except Exception as e:
104
  print(f"Error scraping design {design_id}: {str(e)}")
105
 
106
  if __name__ == "__main__":
107
+ asyncio.run(main())
test_scraper.ipynb CHANGED
@@ -11,7 +11,16 @@
11
  },
12
  {
13
  "cell_type": "code",
14
- "execution_count": 2,
 
 
 
 
 
 
 
 
 
15
  "metadata": {},
16
  "outputs": [
17
  {
@@ -19,39 +28,28 @@
19
  "output_type": "stream",
20
  "text": [
21
  "Testing scraper with design 221...\n",
22
- "Response status: 200\n",
23
- "\n",
24
- "First 500 characters of response:\n",
25
- "<!DOCTYPE html>\n",
26
- "<html lang=\"en\">\n",
27
- "<head>\n",
28
- "\t<meta charset=\"utf-8\">\n",
29
- "\t<title>CSS Zen Garden: The Beauty of CSS Design</title>\n",
30
- "\n",
31
- "\t<link rel=\"stylesheet\" media=\"screen\" href=\"/221/221.css?v=8may2013\">\n",
32
- "\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS\" href=\"http://www.csszengarden.com/zengarden.xml\">\n",
33
- "\n",
34
- "\t<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n",
35
- "\t<meta name=\"author\" content=\"Dave Shea\">\n",
36
- "\t<meta name=\"description\" content=\"A demonstration of what can be accomplished v\n",
37
- "\n",
38
- "Found elements:\n",
39
- "h1: <h1>CSS Zen Garden</h1>\n",
40
- "author: None\n",
41
- "author link: None\n",
42
  "Success!\n"
43
  ]
 
 
 
 
 
 
 
 
 
44
  }
45
  ],
46
  "source": [
47
- "from scraper import create_design_directory, save_css, save_metadata, take_screenshot, scrape_design\n",
48
  "\n",
49
  "# Test with a single design first\n",
50
  "test_design_id = \"221\"\n",
51
  "\n",
52
  "try:\n",
53
  " print(f\"Testing scraper with design {test_design_id}...\")\n",
54
- " scrape_design(test_design_id)\n",
55
  " print(\"Success!\")\n",
56
  "except Exception as e:\n",
57
  " print(f\"Error: {str(e)}\")"
@@ -59,9 +57,26 @@
59
  },
60
  {
61
  "cell_type": "code",
62
- "execution_count": null,
63
  "metadata": {},
64
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  "source": [
66
  "# If successful, let's check what we got\n",
67
  "import json\n",
 
11
  },
12
  {
13
  "cell_type": "code",
14
+ "execution_count": 1,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "!playwright install chromium"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 1,
24
  "metadata": {},
25
  "outputs": [
26
  {
 
28
  "output_type": "stream",
29
  "text": [
30
  "Testing scraper with design 221...\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "Success!\n"
32
  ]
33
+ },
34
+ {
35
+ "name": "stderr",
36
+ "output_type": "stream",
37
+ "text": [
38
+ "/var/folders/02/z250w46j5_514v22h_ct_zq40000gn/T/ipykernel_37704/2179274543.py:8: RuntimeWarning: coroutine 'scrape_design' was never awaited\n",
39
+ " scrape_design(test_design_id)\n",
40
+ "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n"
41
+ ]
42
  }
43
  ],
44
  "source": [
45
+ "from scraper import scrape_design\n",
46
  "\n",
47
  "# Test with a single design first\n",
48
  "test_design_id = \"221\"\n",
49
  "\n",
50
  "try:\n",
51
  " print(f\"Testing scraper with design {test_design_id}...\")\n",
52
+ " await scrape_design(test_design_id)\n",
53
  " print(\"Success!\")\n",
54
  "except Exception as e:\n",
55
  " print(f\"Error: {str(e)}\")"
 
57
  },
58
  {
59
  "cell_type": "code",
60
+ "execution_count": 3,
61
  "metadata": {},
62
+ "outputs": [
63
+ {
64
+ "name": "stdout",
65
+ "output_type": "stream",
66
+ "text": [
67
+ "Files created:\n",
68
+ "['metadata.json', 'style.css']\n",
69
+ "\n",
70
+ "Metadata:\n",
71
+ "{\n",
72
+ " \"id\": \"221\",\n",
73
+ " \"author\": \"Dave Shea\",\n",
74
+ " \"url\": \"https://www.csszengarden.com/221\",\n",
75
+ " \"css_url\": \"https://www.csszengarden.com/221/221.css\"\n",
76
+ "}\n"
77
+ ]
78
+ }
79
+ ],
80
  "source": [
81
  "# If successful, let's check what we got\n",
82
  "import json\n",
uv.lock CHANGED
@@ -732,7 +732,7 @@ wheels = [
732
  [[package]]
733
  name = "imagineui"
734
  version = "0.1.0"
735
- source = { editable = "." }
736
  dependencies = [
737
  { name = "beautifulsoup4" },
738
  { name = "chainlit" },
@@ -748,7 +748,6 @@ dependencies = [
748
  { name = "python-dotenv" },
749
  { name = "requests" },
750
  { name = "scrapy" },
751
- { name = "selenium" },
752
  { name = "sentence-transformers" },
753
  { name = "torch" },
754
  { name = "transformers" },
@@ -770,7 +769,6 @@ requires-dist = [
770
  { name = "python-dotenv", specifier = ">=1.0.0" },
771
  { name = "requests", specifier = ">=2.28.1" },
772
  { name = "scrapy", specifier = ">=2.11.0" },
773
- { name = "selenium", specifier = ">=4.18.0" },
774
  { name = "sentence-transformers", specifier = ">=2.5.0" },
775
  { name = "torch", specifier = "==2.1.2" },
776
  { name = "transformers", specifier = ">=4.37.0" },
@@ -1861,18 +1859,6 @@ wheels = [
1861
  { url = "https://files.pythonhosted.org/packages/00/f8/bb60a4644287a544ec81df1699d5b965776bc9848d9029d9f9b3402ac8bb/orjson-3.10.15-cp311-cp311-win_amd64.whl", hash = "sha256:cf45e0214c593660339ef63e875f32ddd5aa3b4adc15e662cdb80dc49e194f8e", size = 133570 },
1862
  ]
1863
 
1864
- [[package]]
1865
- name = "outcome"
1866
- version = "1.3.0.post0"
1867
- source = { registry = "https://pypi.org/simple" }
1868
- dependencies = [
1869
- { name = "attrs" },
1870
- ]
1871
- sdist = { url = "https://files.pythonhosted.org/packages/98/df/77698abfac98571e65ffeb0c1fba8ffd692ab8458d617a0eed7d9a8d38f2/outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8", size = 21060 }
1872
- wheels = [
1873
- { url = "https://files.pythonhosted.org/packages/55/8b/5ab7257531a5d830fc8000c476e63c935488d74609b50f9384a643ec0a62/outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b", size = 10692 },
1874
- ]
1875
-
1876
  [[package]]
1877
  name = "overrides"
1878
  version = "7.7.0"
@@ -2248,15 +2234,6 @@ version = "2.1.2"
2248
  source = { registry = "https://pypi.org/simple" }
2249
  sdist = { url = "https://files.pythonhosted.org/packages/d5/7b/65f55513d3c769fd677f90032d8d8703e3dc17e88a41b6074d2177548bca/PyPyDispatcher-2.1.2.tar.gz", hash = "sha256:b6bec5dfcff9d2535bca2b23c80eae367b1ac250a645106948d315fcfa9130f2", size = 23224 }
2250
 
2251
- [[package]]
2252
- name = "pysocks"
2253
- version = "1.7.1"
2254
- source = { registry = "https://pypi.org/simple" }
2255
- sdist = { url = "https://files.pythonhosted.org/packages/bd/11/293dd436aea955d45fc4e8a35b6ae7270f5b8e00b53cf6c024c83b657a11/PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0", size = 284429 }
2256
- wheels = [
2257
- { url = "https://files.pythonhosted.org/packages/8d/59/b4572118e098ac8e46e399a1dd0f2d85403ce8bbaad9ec79373ed6badaf9/PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", size = 16725 },
2258
- ]
2259
-
2260
  [[package]]
2261
  name = "python-dateutil"
2262
  version = "2.9.0.post0"
@@ -2606,23 +2583,6 @@ wheels = [
2606
  { url = "https://files.pythonhosted.org/packages/e8/43/2cc828e9b7a453d791afbe3ef36c951f4641fc1d886b6d39e9455c5468e0/Scrapy-2.12.0-py2.py3-none-any.whl", hash = "sha256:c33e2dc7da42e727390bacb32dd9938a54ac210fa71972b5c392754f478669cd", size = 311170 },
2607
  ]
2608
 
2609
- [[package]]
2610
- name = "selenium"
2611
- version = "4.29.0"
2612
- source = { registry = "https://pypi.org/simple" }
2613
- dependencies = [
2614
- { name = "certifi" },
2615
- { name = "trio" },
2616
- { name = "trio-websocket" },
2617
- { name = "typing-extensions" },
2618
- { name = "urllib3", extra = ["socks"] },
2619
- { name = "websocket-client" },
2620
- ]
2621
- sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/09de87ef66a10a7d40417d4e93449eb892154d2dc6385187aa9298a2c09d/selenium-4.29.0.tar.gz", hash = "sha256:3a62f7ec33e669364a6c0562a701deb69745b569c50d55f1a912bf8eb33358ba", size = 985717 }
2622
- wheels = [
2623
- { url = "https://files.pythonhosted.org/packages/2f/a6/fc66ea71ec0769f72abdf15cb9ec9269517abe68a160839383ddff7478f1/selenium-4.29.0-py3-none-any.whl", hash = "sha256:ce5d26f1ddc1111641113653af33694c13947dd36c2df09cdd33f554351d372e", size = 9536642 },
2624
- ]
2625
-
2626
  [[package]]
2627
  name = "send2trash"
2628
  version = "1.8.3"
@@ -2704,15 +2664,6 @@ wheels = [
2704
  { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
2705
  ]
2706
 
2707
- [[package]]
2708
- name = "sortedcontainers"
2709
- version = "2.4.0"
2710
- source = { registry = "https://pypi.org/simple" }
2711
- sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594 }
2712
- wheels = [
2713
- { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575 },
2714
- ]
2715
-
2716
  [[package]]
2717
  name = "soupsieve"
2718
  version = "2.6"
@@ -2982,37 +2933,6 @@ wheels = [
2982
  { url = "https://files.pythonhosted.org/packages/20/37/1f29af63e9c30156a3ed6ebc2754077016577c094f31de7b2631e5d379eb/transformers-4.49.0-py3-none-any.whl", hash = "sha256:6b4fded1c5fee04d384b1014495b4235a2b53c87503d7d592423c06128cbbe03", size = 9970275 },
2983
  ]
2984
 
2985
- [[package]]
2986
- name = "trio"
2987
- version = "0.29.0"
2988
- source = { registry = "https://pypi.org/simple" }
2989
- dependencies = [
2990
- { name = "attrs" },
2991
- { name = "cffi", marker = "implementation_name != 'pypy' and os_name == 'nt'" },
2992
- { name = "idna" },
2993
- { name = "outcome" },
2994
- { name = "sniffio" },
2995
- { name = "sortedcontainers" },
2996
- ]
2997
- sdist = { url = "https://files.pythonhosted.org/packages/a1/47/f62e62a1a6f37909aed0bf8f5d5411e06fa03846cfcb64540cd1180ccc9f/trio-0.29.0.tar.gz", hash = "sha256:ea0d3967159fc130acb6939a0be0e558e364fee26b5deeecc893a6b08c361bdf", size = 588952 }
2998
- wheels = [
2999
- { url = "https://files.pythonhosted.org/packages/c9/55/c4d9bea8b3d7937901958f65124123512419ab0eb73695e5f382521abbfb/trio-0.29.0-py3-none-any.whl", hash = "sha256:d8c463f1a9cc776ff63e331aba44c125f423a5a13c684307e828d930e625ba66", size = 492920 },
3000
- ]
3001
-
3002
- [[package]]
3003
- name = "trio-websocket"
3004
- version = "0.12.1"
3005
- source = { registry = "https://pypi.org/simple" }
3006
- dependencies = [
3007
- { name = "outcome" },
3008
- { name = "trio" },
3009
- { name = "wsproto" },
3010
- ]
3011
- sdist = { url = "https://files.pythonhosted.org/packages/8d/ba/ab932f5f520565caf948ccadade04f82daa33272b9629b7bc71fd1bb1a63/trio_websocket-0.12.1.tar.gz", hash = "sha256:d55ccd4d3eae27c494f3fdae14823317839bdcb8214d1173eacc4d42c69fc91b", size = 33547 }
3012
- wheels = [
3013
- { url = "https://files.pythonhosted.org/packages/d3/b9/b07ec357ba125ad26e1c07781b9d7f0414af85ad76e0d73617ddb5ce041c/trio_websocket-0.12.1-py3-none-any.whl", hash = "sha256:608ec746bb287e5d5a66baf483e41194193c5cf05ffaad6240e7d1fcd80d1e6f", size = 21216 },
3014
- ]
3015
-
3016
  [[package]]
3017
  name = "triton"
3018
  version = "2.1.0"
@@ -3115,11 +3035,6 @@ wheels = [
3115
  { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 },
3116
  ]
3117
 
3118
- [package.optional-dependencies]
3119
- socks = [
3120
- { name = "pysocks" },
3121
- ]
3122
-
3123
  [[package]]
3124
  name = "uvicorn"
3125
  version = "0.34.0"
 
732
  [[package]]
733
  name = "imagineui"
734
  version = "0.1.0"
735
+ source = { virtual = "." }
736
  dependencies = [
737
  { name = "beautifulsoup4" },
738
  { name = "chainlit" },
 
748
  { name = "python-dotenv" },
749
  { name = "requests" },
750
  { name = "scrapy" },
 
751
  { name = "sentence-transformers" },
752
  { name = "torch" },
753
  { name = "transformers" },
 
769
  { name = "python-dotenv", specifier = ">=1.0.0" },
770
  { name = "requests", specifier = ">=2.28.1" },
771
  { name = "scrapy", specifier = ">=2.11.0" },
 
772
  { name = "sentence-transformers", specifier = ">=2.5.0" },
773
  { name = "torch", specifier = "==2.1.2" },
774
  { name = "transformers", specifier = ">=4.37.0" },
 
1859
  { url = "https://files.pythonhosted.org/packages/00/f8/bb60a4644287a544ec81df1699d5b965776bc9848d9029d9f9b3402ac8bb/orjson-3.10.15-cp311-cp311-win_amd64.whl", hash = "sha256:cf45e0214c593660339ef63e875f32ddd5aa3b4adc15e662cdb80dc49e194f8e", size = 133570 },
1860
  ]
1861
 
 
 
 
 
 
 
 
 
 
 
 
 
1862
  [[package]]
1863
  name = "overrides"
1864
  version = "7.7.0"
 
2234
  source = { registry = "https://pypi.org/simple" }
2235
  sdist = { url = "https://files.pythonhosted.org/packages/d5/7b/65f55513d3c769fd677f90032d8d8703e3dc17e88a41b6074d2177548bca/PyPyDispatcher-2.1.2.tar.gz", hash = "sha256:b6bec5dfcff9d2535bca2b23c80eae367b1ac250a645106948d315fcfa9130f2", size = 23224 }
2236
 
 
 
 
 
 
 
 
 
 
2237
  [[package]]
2238
  name = "python-dateutil"
2239
  version = "2.9.0.post0"
 
2583
  { url = "https://files.pythonhosted.org/packages/e8/43/2cc828e9b7a453d791afbe3ef36c951f4641fc1d886b6d39e9455c5468e0/Scrapy-2.12.0-py2.py3-none-any.whl", hash = "sha256:c33e2dc7da42e727390bacb32dd9938a54ac210fa71972b5c392754f478669cd", size = 311170 },
2584
  ]
2585
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2586
  [[package]]
2587
  name = "send2trash"
2588
  version = "1.8.3"
 
2664
  { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
2665
  ]
2666
 
 
 
 
 
 
 
 
 
 
2667
  [[package]]
2668
  name = "soupsieve"
2669
  version = "2.6"
 
2933
  { url = "https://files.pythonhosted.org/packages/20/37/1f29af63e9c30156a3ed6ebc2754077016577c094f31de7b2631e5d379eb/transformers-4.49.0-py3-none-any.whl", hash = "sha256:6b4fded1c5fee04d384b1014495b4235a2b53c87503d7d592423c06128cbbe03", size = 9970275 },
2934
  ]
2935
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2936
  [[package]]
2937
  name = "triton"
2938
  version = "2.1.0"
 
3035
  { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 },
3036
  ]
3037
 
 
 
 
 
 
3038
  [[package]]
3039
  name = "uvicorn"
3040
  version = "0.34.0"