from flask import Flask, request, jsonify, send_from_directory, make_response import os import time import random import base64 from datetime import datetime from werkzeug.utils import secure_filename import requests app = Flask(__name__) PUBLIC_DIR = 'public' UPLOAD_DIR = 'uploads' os.makedirs(PUBLIC_DIR, exist_ok=True) os.makedirs(UPLOAD_DIR, exist_ok=True) ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif', 'html', 'json', 'xml', 'csv'} USER_AGENTS = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/120.0.0.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0', 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1', 'Mozilla/5.0 (iPad; CPU OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1', 'Mozilla/5.0 (Linux; Android 13; SM-S918B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36', 'Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36' ] def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS @app.route('/') def index(): return jsonify({ 'message': 'Scraper API with Smart Cloudflare Bypass', 'endpoints': { 'POST /api/get': 'Fetch URL (auto-try all UAs until success)', 'POST /api/upload': 'Upload file', 'GET /api/files': 'List files' } }) @app.route('/api/get', methods=['POST']) def get_url(): data = request.get_json() url = data.get('url') html_only = data.get('html_only', False) try_all = data.get('try_all', True) if not url: return jsonify({'success': False, 'error': 'URL required'}), 400 if not try_all: user_agent = random.choice(USER_AGENTS) return fetch_with_ua(url, user_agent, html_only) failed_attempts = [] for user_agent in USER_AGENTS: try: headers = { 'User-Agent': user_agent, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'DNT': '1', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1' } response = requests.get(url, headers=headers, timeout=15, allow_redirects=True) if response.status_code == 403: failed_attempts.append({ 'user_agent': user_agent, 'status': 403, 'reason': 'Forbidden' }) continue html = response.text if 'Just a moment' in html or 'Verifying you are human' in html or 'cf-chl-widget' in html: failed_attempts.append({ 'user_agent': user_agent, 'status': response.status_code, 'reason': 'Cloudflare challenge detected' }) continue if html_only: html_b64 = base64.b64encode(html.encode('utf-8')).decode('utf-8') return jsonify({ 'success': True, 'html_base64': html_b64, 'user_agent': user_agent, 'attempts': len(failed_attempts) + 1 }) return jsonify({ 'success': True, 'data': { 'html': html, 'status_code': response.status_code, 'url': str(response.url), 'user_agent': user_agent, 'attempts': len(failed_attempts) + 1, 'failed_attempts': failed_attempts, 'timestamp': datetime.now().isoformat() } }) except requests.exceptions.Timeout: failed_attempts.append({ 'user_agent': user_agent, 'status': 'timeout', 'reason': 'Request timeout' }) continue except Exception as e: failed_attempts.append({ 'user_agent': user_agent, 'status': 'error', 'reason': str(e) }) continue return jsonify({ 'success': False, 'error': 'All user agents failed', 'total_attempts': len(USER_AGENTS), 'failed_attempts': failed_attempts }), 403 def fetch_with_ua(url, user_agent, html_only): try: headers = { 'User-Agent': user_agent, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'DNT': '1', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1' } response = requests.get(url, headers=headers, timeout=30, allow_redirects=True) html = response.text if html_only: html_b64 = base64.b64encode(html.encode('utf-8')).decode('utf-8') return jsonify({ 'success': True, 'html_base64': html_b64, 'user_agent': user_agent }) return jsonify({ 'success': True, 'data': { 'html': html, 'status_code': response.status_code, 'url': str(response.url), 'user_agent': user_agent, 'timestamp': datetime.now().isoformat() } }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/upload', methods=['POST']) def upload_file(): try: if 'file' not in request.files: return jsonify({'success': False, 'error': 'No file'}), 400 file = request.files['file'] if file.filename == '': return jsonify({'success': False, 'error': 'No file selected'}), 400 if not allowed_file(file.filename): return jsonify({'success': False, 'error': 'File type not allowed'}), 400 filename = secure_filename(file.filename) timestamp = int(time.time()) unique_filename = f"{timestamp}_{filename}" filepath = os.path.join(UPLOAD_DIR, unique_filename) file.save(filepath) file_size = os.path.getsize(filepath) return jsonify({ 'success': True, 'data': { 'filename': unique_filename, 'original_filename': filename, 'size': file_size, 'size_mb': round(file_size / (1024 * 1024), 2), 'url': f"{request.host_url}uploads/{unique_filename}", 'timestamp': datetime.now().isoformat() } }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/files', methods=['GET']) def list_files(): try: files = [] for filename in os.listdir(UPLOAD_DIR): filepath = os.path.join(UPLOAD_DIR, filename) if os.path.isfile(filepath): file_stat = os.stat(filepath) files.append({ 'filename': filename, 'size': file_stat.st_size, 'size_mb': round(file_stat.st_size / (1024 * 1024), 2), 'url': f"{request.host_url}uploads/{filename}", 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat() }) return jsonify({ 'success': True, 'data': { 'total': len(files), 'files': files } }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/uploads/') def serve_upload(filename): return send_from_directory(UPLOAD_DIR, filename) @app.route('/files/') def serve_file(filename): return send_from_directory(PUBLIC_DIR, filename) if __name__ == '__main__': app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))