Update app.py
Browse files
app.py
CHANGED
|
@@ -20,13 +20,16 @@ def proxy():
|
|
| 20 |
|
| 21 |
# 2. Browser-like Headers to minimize block probability
|
| 22 |
headers = {
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
|
| 26 |
try:
|
| 27 |
# 3. Fetch with http2=True and follow_redirects=True
|
| 28 |
-
with httpx.Client(http2=True, follow_redirects=True, timeout=
|
| 29 |
-
resp = client.get(target_url
|
| 30 |
|
| 31 |
# 4. Return the content back to your Airflow scraper
|
| 32 |
return Response(
|
|
|
|
| 20 |
|
| 21 |
# 2. Browser-like Headers to minimize block probability
|
| 22 |
headers = {
|
| 23 |
+
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36',
|
| 24 |
+
'Accept-Language': 'en-US,en;q=0.5',
|
| 25 |
+
'Upgrade-Insecure-Requests': '1',
|
| 26 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
| 27 |
}
|
| 28 |
|
| 29 |
try:
|
| 30 |
# 3. Fetch with http2=True and follow_redirects=True
|
| 31 |
+
with httpx.Client(headers=headers, http2=True, follow_redirects=True, timeout=None) as client:
|
| 32 |
+
resp = client.get(target_url)
|
| 33 |
|
| 34 |
# 4. Return the content back to your Airflow scraper
|
| 35 |
return Response(
|