Create main.py
Browse files
main.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, Response
|
| 2 |
+
import requests
|
| 3 |
+
import logging
|
| 4 |
+
from urllib.parse import urljoin
|
| 5 |
+
from bs4 import BeautifulSoup
|
| 6 |
+
import re
|
| 7 |
+
|
| 8 |
+
# Configure logging
|
| 9 |
+
logging.basicConfig(level=logging.INFO)
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
app = Flask(__name__)
|
| 13 |
+
|
| 14 |
+
# Target URL base
|
| 15 |
+
TARGET_BASE_URL = "https://superetka.com"
|
| 16 |
+
|
| 17 |
+
@app.route('/proxy_image')
|
| 18 |
+
def proxy_image():
|
| 19 |
+
image_url = request.args.get('url')
|
| 20 |
+
if not image_url:
|
| 21 |
+
return 'No URL provided', 400
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
# Forward the request to get the image
|
| 25 |
+
resp = requests.get(image_url, timeout=30)
|
| 26 |
+
return Response(resp.content, mimetype=resp.headers['Content-Type'])
|
| 27 |
+
except Exception as e:
|
| 28 |
+
logger.error(f"Error proxying image: {str(e)}")
|
| 29 |
+
return str(e), 500
|
| 30 |
+
|
| 31 |
+
@app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'])
|
| 32 |
+
@app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'])
|
| 33 |
+
def proxy(path):
|
| 34 |
+
# Construct target URL with path and query parameters
|
| 35 |
+
if path and path.strip():
|
| 36 |
+
# If path is provided, use it
|
| 37 |
+
url = f"{TARGET_BASE_URL}/{path}"
|
| 38 |
+
else:
|
| 39 |
+
# Default to wap.php if no path is provided
|
| 40 |
+
url = f"{TARGET_BASE_URL}/etka/wap.php"
|
| 41 |
+
|
| 42 |
+
# If there are query parameters, append them to the URL
|
| 43 |
+
if request.query_string:
|
| 44 |
+
url = f"{url}?{request.query_string.decode('utf-8')}"
|
| 45 |
+
|
| 46 |
+
# Log the constructed URL
|
| 47 |
+
logger.info(f"Constructed URL: {url}")
|
| 48 |
+
|
| 49 |
+
# Log the request
|
| 50 |
+
logger.info(f"Received request: {request.method} {request.url}")
|
| 51 |
+
logger.info(f"Forwarding to: {url}")
|
| 52 |
+
|
| 53 |
+
# Get headers from the incoming request
|
| 54 |
+
headers = {key: value for key, value in request.headers if key.lower() != 'host'}
|
| 55 |
+
|
| 56 |
+
try:
|
| 57 |
+
# Forward the request to the target server
|
| 58 |
+
# Don't pass params separately as they're already in the URL
|
| 59 |
+
resp = requests.request(
|
| 60 |
+
method=request.method,
|
| 61 |
+
url=url,
|
| 62 |
+
headers=headers,
|
| 63 |
+
data=request.get_data(),
|
| 64 |
+
cookies=request.cookies,
|
| 65 |
+
allow_redirects=False,
|
| 66 |
+
timeout=30
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# Log the response
|
| 70 |
+
logger.info(f"Received response from target: {resp.status_code}")
|
| 71 |
+
|
| 72 |
+
# Check if response is HTML and filter content if needed
|
| 73 |
+
content_type = resp.headers.get('Content-Type', '')
|
| 74 |
+
if 'text/html' in content_type:
|
| 75 |
+
# Parse HTML content
|
| 76 |
+
html_content = resp.content.decode('utf-8', errors='ignore')
|
| 77 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
| 78 |
+
|
| 79 |
+
# Filter out "Полная версия ETKA"
|
| 80 |
+
for element in soup.find_all(string=re.compile('Полная версия ETKA')):
|
| 81 |
+
# Replace the text with empty string
|
| 82 |
+
element.replace_with('')
|
| 83 |
+
|
| 84 |
+
# Filter out README content
|
| 85 |
+
for element in soup.find_all(string=re.compile('README', re.IGNORECASE)):
|
| 86 |
+
element.replace_with('')
|
| 87 |
+
|
| 88 |
+
# Redirect part number links to Google search
|
| 89 |
+
# Look for links that contain part numbers (typically in the second column of the table)
|
| 90 |
+
part_number_links = soup.select('td:nth-child(2) a')
|
| 91 |
+
for link in part_number_links:
|
| 92 |
+
# Get the part number from the link text
|
| 93 |
+
part_number = link.text.strip()
|
| 94 |
+
# Check if it matches a part number pattern (alphanumeric with possible spaces)
|
| 95 |
+
if re.match(r'^[A-Z0-9 ]+$', part_number):
|
| 96 |
+
# Create a Google search URL for this part number with avto.pro
|
| 97 |
+
google_search_url = f"https://www.google.com/search?q={part_number} avto.pro"
|
| 98 |
+
# Update the link's href attribute
|
| 99 |
+
link['href'] = google_search_url
|
| 100 |
+
|
| 101 |
+
# Replace images with buttons
|
| 102 |
+
for img in soup.find_all('img'):
|
| 103 |
+
# Create button element
|
| 104 |
+
button = soup.new_tag('button')
|
| 105 |
+
img_src = img['src']
|
| 106 |
+
if not img_src.startswith('http'):
|
| 107 |
+
img_src = urljoin(TARGET_BASE_URL, img_src)
|
| 108 |
+
proxy_url = f"/proxy_image?url={img_src}"
|
| 109 |
+
button['onclick'] = f'window.open("{proxy_url}", "_blank")'
|
| 110 |
+
button['style'] = 'margin: 5px;'
|
| 111 |
+
button.string = 'Показать изображение'
|
| 112 |
+
# Replace image with button
|
| 113 |
+
img.replace_with(button)
|
| 114 |
+
|
| 115 |
+
# Create a Flask response object with filtered content
|
| 116 |
+
response = Response(
|
| 117 |
+
soup.encode(),
|
| 118 |
+
status=resp.status_code
|
| 119 |
+
)
|
| 120 |
+
else:
|
| 121 |
+
# Create a Flask response object with original content
|
| 122 |
+
response = Response(
|
| 123 |
+
resp.content,
|
| 124 |
+
status=resp.status_code
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
# Copy headers from the target response
|
| 128 |
+
for key, value in resp.headers.items():
|
| 129 |
+
if key.lower() not in ('transfer-encoding', 'content-encoding', 'content-length'):
|
| 130 |
+
response.headers[key] = value
|
| 131 |
+
|
| 132 |
+
return response
|
| 133 |
+
|
| 134 |
+
except requests.RequestException as e:
|
| 135 |
+
logger.error(f"Error forwarding request: {str(e)}")
|
| 136 |
+
return Response(f"Error forwarding request: {str(e)}", status=500)
|
| 137 |
+
|
| 138 |
+
if __name__ == '__main__':
|
| 139 |
+
app.run(host='0.0.0.0', port=5000, debug=True)
|