Spaces:
Sleeping
Sleeping
| from flask import Flask, request, jsonify, send_from_directory, make_response | |
| import os | |
| import time | |
| import random | |
| import base64 | |
| from datetime import datetime | |
| from werkzeug.utils import secure_filename | |
| import requests | |
| app = Flask(__name__) | |
| PUBLIC_DIR = 'public' | |
| UPLOAD_DIR = 'uploads' | |
| os.makedirs(PUBLIC_DIR, exist_ok=True) | |
| os.makedirs(UPLOAD_DIR, exist_ok=True) | |
| ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif', 'html', 'json', 'xml', 'csv'} | |
| USER_AGENTS = [ | |
| 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | |
| 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', | |
| 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | |
| 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', | |
| 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/120.0.0.0', | |
| 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15', | |
| 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | |
| 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0', | |
| 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1', | |
| 'Mozilla/5.0 (iPad; CPU OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1', | |
| 'Mozilla/5.0 (Linux; Android 13; SM-S918B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36', | |
| 'Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36' | |
| ] | |
| def allowed_file(filename): | |
| return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| def index(): | |
| return jsonify({ | |
| 'message': 'Scraper API with Smart Cloudflare Bypass', | |
| 'endpoints': { | |
| 'POST /api/get': 'Fetch URL (auto-try all UAs until success)', | |
| 'POST /api/upload': 'Upload file', | |
| 'GET /api/files': 'List files' | |
| } | |
| }) | |
| def get_url(): | |
| data = request.get_json() | |
| url = data.get('url') | |
| html_only = data.get('html_only', False) | |
| try_all = data.get('try_all', True) | |
| if not url: | |
| return jsonify({'success': False, 'error': 'URL required'}), 400 | |
| if not try_all: | |
| user_agent = random.choice(USER_AGENTS) | |
| return fetch_with_ua(url, user_agent, html_only) | |
| failed_attempts = [] | |
| for user_agent in USER_AGENTS: | |
| try: | |
| headers = { | |
| 'User-Agent': user_agent, | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'Accept-Encoding': 'gzip, deflate, br', | |
| 'DNT': '1', | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1' | |
| } | |
| response = requests.get(url, headers=headers, timeout=15, allow_redirects=True) | |
| if response.status_code == 403: | |
| failed_attempts.append({ | |
| 'user_agent': user_agent, | |
| 'status': 403, | |
| 'reason': 'Forbidden' | |
| }) | |
| continue | |
| html = response.text | |
| if 'Just a moment' in html or 'Verifying you are human' in html or 'cf-chl-widget' in html: | |
| failed_attempts.append({ | |
| 'user_agent': user_agent, | |
| 'status': response.status_code, | |
| 'reason': 'Cloudflare challenge detected' | |
| }) | |
| continue | |
| if html_only: | |
| html_b64 = base64.b64encode(html.encode('utf-8')).decode('utf-8') | |
| return jsonify({ | |
| 'success': True, | |
| 'html_base64': html_b64, | |
| 'user_agent': user_agent, | |
| 'attempts': len(failed_attempts) + 1 | |
| }) | |
| return jsonify({ | |
| 'success': True, | |
| 'data': { | |
| 'html': html, | |
| 'status_code': response.status_code, | |
| 'url': str(response.url), | |
| 'user_agent': user_agent, | |
| 'attempts': len(failed_attempts) + 1, | |
| 'failed_attempts': failed_attempts, | |
| 'timestamp': datetime.now().isoformat() | |
| } | |
| }) | |
| except requests.exceptions.Timeout: | |
| failed_attempts.append({ | |
| 'user_agent': user_agent, | |
| 'status': 'timeout', | |
| 'reason': 'Request timeout' | |
| }) | |
| continue | |
| except Exception as e: | |
| failed_attempts.append({ | |
| 'user_agent': user_agent, | |
| 'status': 'error', | |
| 'reason': str(e) | |
| }) | |
| continue | |
| return jsonify({ | |
| 'success': False, | |
| 'error': 'All user agents failed', | |
| 'total_attempts': len(USER_AGENTS), | |
| 'failed_attempts': failed_attempts | |
| }), 403 | |
| def fetch_with_ua(url, user_agent, html_only): | |
| try: | |
| headers = { | |
| 'User-Agent': user_agent, | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'Accept-Encoding': 'gzip, deflate, br', | |
| 'DNT': '1', | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1' | |
| } | |
| response = requests.get(url, headers=headers, timeout=30, allow_redirects=True) | |
| html = response.text | |
| if html_only: | |
| html_b64 = base64.b64encode(html.encode('utf-8')).decode('utf-8') | |
| return jsonify({ | |
| 'success': True, | |
| 'html_base64': html_b64, | |
| 'user_agent': user_agent | |
| }) | |
| return jsonify({ | |
| 'success': True, | |
| 'data': { | |
| 'html': html, | |
| 'status_code': response.status_code, | |
| 'url': str(response.url), | |
| 'user_agent': user_agent, | |
| 'timestamp': datetime.now().isoformat() | |
| } | |
| }) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def upload_file(): | |
| try: | |
| if 'file' not in request.files: | |
| return jsonify({'success': False, 'error': 'No file'}), 400 | |
| file = request.files['file'] | |
| if file.filename == '': | |
| return jsonify({'success': False, 'error': 'No file selected'}), 400 | |
| if not allowed_file(file.filename): | |
| return jsonify({'success': False, 'error': 'File type not allowed'}), 400 | |
| filename = secure_filename(file.filename) | |
| timestamp = int(time.time()) | |
| unique_filename = f"{timestamp}_{filename}" | |
| filepath = os.path.join(UPLOAD_DIR, unique_filename) | |
| file.save(filepath) | |
| file_size = os.path.getsize(filepath) | |
| return jsonify({ | |
| 'success': True, | |
| 'data': { | |
| 'filename': unique_filename, | |
| 'original_filename': filename, | |
| 'size': file_size, | |
| 'size_mb': round(file_size / (1024 * 1024), 2), | |
| 'url': f"{request.host_url}uploads/{unique_filename}", | |
| 'timestamp': datetime.now().isoformat() | |
| } | |
| }) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def list_files(): | |
| try: | |
| files = [] | |
| for filename in os.listdir(UPLOAD_DIR): | |
| filepath = os.path.join(UPLOAD_DIR, filename) | |
| if os.path.isfile(filepath): | |
| file_stat = os.stat(filepath) | |
| files.append({ | |
| 'filename': filename, | |
| 'size': file_stat.st_size, | |
| 'size_mb': round(file_stat.st_size / (1024 * 1024), 2), | |
| 'url': f"{request.host_url}uploads/{filename}", | |
| 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat() | |
| }) | |
| return jsonify({ | |
| 'success': True, | |
| 'data': { | |
| 'total': len(files), | |
| 'files': files | |
| } | |
| }) | |
| except Exception as e: | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def serve_upload(filename): | |
| return send_from_directory(UPLOAD_DIR, filename) | |
| def serve_file(filename): | |
| return send_from_directory(PUBLIC_DIR, filename) | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860))) |