from flask import Flask, request, Response import httpx app = Flask(__name__) # Replace 'socio_secret_2026' with a strong, random password SECRET_KEY = "socio_secret_2026" @app.route('/api/proxy') def proxy(): target_url = request.args.get('url') key = request.args.get('key') # 1. Security Check if key != SECRET_KEY: return "Unauthorized", 401 if not target_url: return "Missing URL", 400 # 2. Browser-like Headers to minimize block probability headers = { 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36', 'Accept-Language': 'en-US,en;q=0.5', 'Upgrade-Insecure-Requests': '1', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', } try: # 3. Fetch with http2=True and follow_redirects=True with httpx.Client(headers=headers, http2=True, follow_redirects=True, timeout=None) as client: resp = client.get(target_url) # 4. Return the content back to your Airflow scraper return Response( resp.text, status=resp.status_code, content_type=resp.headers.get('content-type', 'text/html') ) except Exception as e: return str(e), 500 if __name__ == '__main__': app.run()