Spaces:
Sleeping
Sleeping
File size: 9,363 Bytes
dcd5b56 9d8ea9a af75712 0c0135d 9d8ea9a 075e397 af75712 9d8ea9a dcd5b56 9d8ea9a 075e397 9d8ea9a 075e397 af75712 e0b0ec0 af75712 e0b0ec0 af75712 e0b0ec0 af75712 075e397 9d8ea9a af75712 9d8ea9a af75712 622f53a 9d8ea9a af75712 e89dbd6 af75712 e89dbd6 622f53a e89dbd6 af75712 622f53a af75712 622f53a af75712 622f53a af75712 622f53a c39dbb7 af75712 622f53a 0c0135d 622f53a af75712 622f53a af75712 622f53a af75712 622f53a af75712 622f53a af75712 ceb6c8b 0c0135d ceb6c8b af75712 ceb6c8b e89dbd6 075e397 622f53a 075e397 622f53a 075e397 622f53a 075e397 622f53a 075e397 622f53a 075e397 9d8ea9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
from flask import Flask, request, jsonify, send_from_directory, make_response
import os
import time
import random
import base64
from datetime import datetime
from werkzeug.utils import secure_filename
import requests
app = Flask(__name__)
PUBLIC_DIR = 'public'
UPLOAD_DIR = 'uploads'
os.makedirs(PUBLIC_DIR, exist_ok=True)
os.makedirs(UPLOAD_DIR, exist_ok=True)
ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif', 'html', 'json', 'xml', 'csv'}
USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/120.0.0.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0',
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (iPad; CPU OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (Linux; Android 13; SM-S918B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36',
'Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36'
]
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route('/')
def index():
return jsonify({
'message': 'Scraper API with Smart Cloudflare Bypass',
'endpoints': {
'POST /api/get': 'Fetch URL (auto-try all UAs until success)',
'POST /api/upload': 'Upload file',
'GET /api/files': 'List files'
}
})
@app.route('/api/get', methods=['POST'])
def get_url():
data = request.get_json()
url = data.get('url')
html_only = data.get('html_only', False)
try_all = data.get('try_all', True)
if not url:
return jsonify({'success': False, 'error': 'URL required'}), 400
if not try_all:
user_agent = random.choice(USER_AGENTS)
return fetch_with_ua(url, user_agent, html_only)
failed_attempts = []
for user_agent in USER_AGENTS:
try:
headers = {
'User-Agent': user_agent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
response = requests.get(url, headers=headers, timeout=15, allow_redirects=True)
if response.status_code == 403:
failed_attempts.append({
'user_agent': user_agent,
'status': 403,
'reason': 'Forbidden'
})
continue
html = response.text
if 'Just a moment' in html or 'Verifying you are human' in html or 'cf-chl-widget' in html:
failed_attempts.append({
'user_agent': user_agent,
'status': response.status_code,
'reason': 'Cloudflare challenge detected'
})
continue
if html_only:
html_b64 = base64.b64encode(html.encode('utf-8')).decode('utf-8')
return jsonify({
'success': True,
'html_base64': html_b64,
'user_agent': user_agent,
'attempts': len(failed_attempts) + 1
})
return jsonify({
'success': True,
'data': {
'html': html,
'status_code': response.status_code,
'url': str(response.url),
'user_agent': user_agent,
'attempts': len(failed_attempts) + 1,
'failed_attempts': failed_attempts,
'timestamp': datetime.now().isoformat()
}
})
except requests.exceptions.Timeout:
failed_attempts.append({
'user_agent': user_agent,
'status': 'timeout',
'reason': 'Request timeout'
})
continue
except Exception as e:
failed_attempts.append({
'user_agent': user_agent,
'status': 'error',
'reason': str(e)
})
continue
return jsonify({
'success': False,
'error': 'All user agents failed',
'total_attempts': len(USER_AGENTS),
'failed_attempts': failed_attempts
}), 403
def fetch_with_ua(url, user_agent, html_only):
try:
headers = {
'User-Agent': user_agent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
response = requests.get(url, headers=headers, timeout=30, allow_redirects=True)
html = response.text
if html_only:
html_b64 = base64.b64encode(html.encode('utf-8')).decode('utf-8')
return jsonify({
'success': True,
'html_base64': html_b64,
'user_agent': user_agent
})
return jsonify({
'success': True,
'data': {
'html': html,
'status_code': response.status_code,
'url': str(response.url),
'user_agent': user_agent,
'timestamp': datetime.now().isoformat()
}
})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/upload', methods=['POST'])
def upload_file():
try:
if 'file' not in request.files:
return jsonify({'success': False, 'error': 'No file'}), 400
file = request.files['file']
if file.filename == '':
return jsonify({'success': False, 'error': 'No file selected'}), 400
if not allowed_file(file.filename):
return jsonify({'success': False, 'error': 'File type not allowed'}), 400
filename = secure_filename(file.filename)
timestamp = int(time.time())
unique_filename = f"{timestamp}_{filename}"
filepath = os.path.join(UPLOAD_DIR, unique_filename)
file.save(filepath)
file_size = os.path.getsize(filepath)
return jsonify({
'success': True,
'data': {
'filename': unique_filename,
'original_filename': filename,
'size': file_size,
'size_mb': round(file_size / (1024 * 1024), 2),
'url': f"{request.host_url}uploads/{unique_filename}",
'timestamp': datetime.now().isoformat()
}
})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/files', methods=['GET'])
def list_files():
try:
files = []
for filename in os.listdir(UPLOAD_DIR):
filepath = os.path.join(UPLOAD_DIR, filename)
if os.path.isfile(filepath):
file_stat = os.stat(filepath)
files.append({
'filename': filename,
'size': file_stat.st_size,
'size_mb': round(file_stat.st_size / (1024 * 1024), 2),
'url': f"{request.host_url}uploads/{filename}",
'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat()
})
return jsonify({
'success': True,
'data': {
'total': len(files),
'files': files
}
})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/uploads/<path:filename>')
def serve_upload(filename):
return send_from_directory(UPLOAD_DIR, filename)
@app.route('/files/<path:filename>')
def serve_file(filename):
return send_from_directory(PUBLIC_DIR, filename)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860))) |