File size: 1,413 Bytes
2c74f93 351f81c 2c74f93 d7d369d 2c74f93 351f81c d7d369d 351f81c 2c74f93 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | from flask import Flask, request, Response
import httpx
app = Flask(__name__)
# Replace 'socio_secret_2026' with a strong, random password
SECRET_KEY = "socio_secret_2026"
@app.route('/api/proxy')
def proxy():
target_url = request.args.get('url')
key = request.args.get('key')
# 1. Security Check
if key != SECRET_KEY:
return "Unauthorized", 401
if not target_url:
return "Missing URL", 400
# 2. Browser-like Headers to minimize block probability
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36',
'Accept-Language': 'en-US,en;q=0.5',
'Upgrade-Insecure-Requests': '1',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
}
try:
# 3. Fetch with http2=True and follow_redirects=True
with httpx.Client(headers=headers, http2=True, follow_redirects=True, timeout=None) as client:
resp = client.get(target_url)
# 4. Return the content back to your Airflow scraper
return Response(
resp.text,
status=resp.status_code,
content_type=resp.headers.get('content-type', 'text/html')
)
except Exception as e:
return str(e), 500
if __name__ == '__main__':
app.run() |