Muttered3 commited on
Commit
2418b45
·
verified ·
1 Parent(s): 8a3c68a

Create scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +38 -0
scraper.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import random
3
+ import aiohttp
4
+ from parser import parse_html
5
+
6
+ USER_AGENTS = [
7
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124.0 Safari/537.36",
8
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15",
9
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/123.0 Safari/537.36",
10
+ ]
11
+
12
+ async def fetch_word(session: aiohttp.ClientSession, word: str) -> str:
13
+ url = f"https://fragment.com/username/{word}"
14
+
15
+ for _ in range(6):
16
+ for attempt in range(3):
17
+ headers = {"User-Agent": random.choice(USER_AGENTS)}
18
+ try:
19
+ async with session.get(url, headers=headers, allow_redirects=True, timeout=15) as resp:
20
+ if resp.status == 429:
21
+ wait = (2 ** attempt * 3) + random.uniform(1, 3)
22
+ await asyncio.sleep(wait)
23
+ continue
24
+ if resp.status == 200:
25
+ html = await resp.text()
26
+ status = parse_html(html, str(resp.url), word)
27
+ await asyncio.sleep(random.uniform(0.15, 0.6))
28
+
29
+ if status != "UNCERTAIN":
30
+ return status
31
+ else:
32
+ break
33
+ except Exception:
34
+ pass
35
+
36
+ await asyncio.sleep(random.uniform(0.8, 2.0))
37
+
38
+ return "UNCERTAIN"