Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,7 +21,6 @@ warnings.filterwarnings("ignore")
|
|
| 21 |
BASE = "https://www.croxyproxy.com"
|
| 22 |
app = Flask(__name__)
|
| 23 |
|
| 24 |
-
# ββ Headers Γ garder dans la rΓ©ponse (tout le reste = poubelle) ββ
|
| 25 |
KEEP_HEADERS = {
|
| 26 |
"content-type", "content-length", "content-encoding",
|
| 27 |
"server", "date", "connection",
|
|
@@ -32,7 +31,6 @@ KEEP_HEADERS = {
|
|
| 32 |
"x-request-id", "location", "retry-after",
|
| 33 |
}
|
| 34 |
|
| 35 |
-
# ββ Headers toujours exclus (bruit du proxy) ββ
|
| 36 |
DROP_HEADERS = {
|
| 37 |
"set-cookie", "__cph", "__cpc",
|
| 38 |
"content-security-policy", "strict-transport-security",
|
|
@@ -59,10 +57,8 @@ def dec(e):
|
|
| 59 |
|
| 60 |
|
| 61 |
def filter_headers(raw_headers, include_all=False):
|
| 62 |
-
"""Filtre les headers : garde uniquement les utiles."""
|
| 63 |
if include_all:
|
| 64 |
return dict(raw_headers)
|
| 65 |
-
|
| 66 |
cleaned = {}
|
| 67 |
for k, v in raw_headers.items():
|
| 68 |
kl = k.lower()
|
|
@@ -74,54 +70,42 @@ def filter_headers(raw_headers, include_all=False):
|
|
| 74 |
|
| 75 |
|
| 76 |
def parse_body(text, content_type=""):
|
| 77 |
-
"""Parse le body en JSON si possible, sinon tronque le texte."""
|
| 78 |
if not text:
|
| 79 |
return None
|
| 80 |
-
|
| 81 |
-
# Tente JSON
|
| 82 |
if "json" in content_type.lower() or text.strip().startswith(("{", "[")):
|
| 83 |
try:
|
| 84 |
return json.loads(text)
|
| 85 |
except (json.JSONDecodeError, ValueError):
|
| 86 |
pass
|
| 87 |
-
|
| 88 |
-
# HTML β tronquΓ©
|
| 89 |
if "html" in content_type.lower() or text.strip().startswith("<"):
|
| 90 |
return {
|
| 91 |
"_type": "html",
|
| 92 |
"_length": len(text),
|
| 93 |
"_preview": text[:300].strip() + ("..." if len(text) > 300 else ""),
|
| 94 |
}
|
| 95 |
-
|
| 96 |
-
# Texte brut β tronquΓ© si long
|
| 97 |
if len(text) > 2000:
|
| 98 |
return {
|
| 99 |
"_type": "text",
|
| 100 |
"_length": len(text),
|
| 101 |
"_preview": text[:500].strip() + "...",
|
| 102 |
}
|
| 103 |
-
|
| 104 |
return text
|
| 105 |
|
| 106 |
|
| 107 |
def extract_ip(url_str):
|
| 108 |
-
"""Extrait l'IP d'une URL de serveur proxy."""
|
| 109 |
return (url_str or "").replace("https://", "").replace("http://", "").split("/")[0]
|
| 110 |
|
| 111 |
|
| 112 |
def format_result(raw, include_raw_headers=False):
|
| 113 |
-
"""Formate proprement le rΓ©sultat d'un fetch."""
|
| 114 |
if not raw.get("success"):
|
| 115 |
return {
|
| 116 |
"success": False,
|
| 117 |
"error": raw.get("error"),
|
| 118 |
"server": raw.get("server"),
|
| 119 |
}
|
| 120 |
-
|
| 121 |
ct = ""
|
| 122 |
if raw.get("headers"):
|
| 123 |
ct = raw["headers"].get("Content-Type", raw["headers"].get("content-type", ""))
|
| 124 |
-
|
| 125 |
result = {
|
| 126 |
"success": True,
|
| 127 |
"status": raw.get("status"),
|
|
@@ -130,35 +114,28 @@ def format_result(raw, include_raw_headers=False):
|
|
| 130 |
"proxy": raw.get("proxy"),
|
| 131 |
"servers_available": raw.get("servers_available"),
|
| 132 |
}
|
| 133 |
-
|
| 134 |
-
# Headers filtrΓ©s
|
| 135 |
if raw.get("headers"):
|
| 136 |
result["headers"] = filter_headers(raw["headers"], include_all=include_raw_headers)
|
| 137 |
-
|
| 138 |
return result
|
| 139 |
|
| 140 |
|
| 141 |
def fetch_raw(url, sid=None):
|
| 142 |
-
"""Fetch via CroxyProxy β retourne les donnΓ©es brutes."""
|
| 143 |
sc = cloudscraper.create_scraper(
|
| 144 |
browser={"browser": "chrome", "platform": "windows", "desktop": True}
|
| 145 |
)
|
| 146 |
S.stats["req"] += 1
|
| 147 |
-
|
| 148 |
try:
|
| 149 |
-
# 1. GET / β csrf
|
| 150 |
r1 = sc.get(BASE, timeout=30)
|
| 151 |
if r1.status_code != 200:
|
| 152 |
S.stats["fail"] += 1
|
| 153 |
return {"success": False, "error": f"Homepage {r1.status_code}"}
|
| 154 |
-
|
| 155 |
s1 = BeautifulSoup(r1.text, "lxml")
|
| 156 |
ci = s1.find("input", {"name": "csrf"})
|
| 157 |
if not ci:
|
| 158 |
S.stats["fail"] += 1
|
| 159 |
return {"success": False, "error": "No CSRF"}
|
| 160 |
-
|
| 161 |
-
# 2. POST /servers β selector page
|
| 162 |
r2 = sc.post(
|
| 163 |
f"{BASE}/servers",
|
| 164 |
data={
|
|
@@ -174,34 +151,31 @@ def fetch_raw(url, sid=None):
|
|
| 174 |
allow_redirects=True,
|
| 175 |
timeout=30,
|
| 176 |
)
|
| 177 |
-
|
| 178 |
if r2.status_code != 200:
|
| 179 |
S.stats["fail"] += 1
|
| 180 |
return {"success": False, "error": f"Servers {r2.status_code}"}
|
| 181 |
-
|
| 182 |
s2 = BeautifulSoup(r2.text, "lxml")
|
| 183 |
sel = s2.find("script", {"id": "serverSelectorScript"})
|
| 184 |
if not sel:
|
| 185 |
S.stats["fail"] += 1
|
| 186 |
return {"success": False, "error": "No selector"}
|
| 187 |
-
|
| 188 |
-
# 3. Parse servers + csrf2
|
| 189 |
ss = [
|
| 190 |
x for x in (dec(i) for i in json.loads(unescape(sel.get("data-ss", ""))))
|
| 191 |
if x and x.get("id")
|
| 192 |
]
|
| 193 |
csrf2 = unescape(sel.get("data-csrf", "")).strip('"')
|
| 194 |
fo = unescape(sel.get("data-fo", "")).strip('"')
|
| 195 |
-
|
| 196 |
if not ss:
|
| 197 |
S.stats["fail"] += 1
|
| 198 |
return {"success": False, "error": "No servers"}
|
| 199 |
-
|
| 200 |
-
# Mettre Γ jour le cache
|
| 201 |
S.servers = ss
|
| 202 |
S.last = datetime.now(timezone.utc).isoformat()
|
| 203 |
-
|
| 204 |
-
# Choisir le serveur
|
| 205 |
ch = None
|
| 206 |
if sid:
|
| 207 |
ch = next((x for x in ss if x["id"] == sid), None)
|
|
@@ -209,8 +183,7 @@ def fetch_raw(url, sid=None):
|
|
| 209 |
with S.lock:
|
| 210 |
ch = ss[S.idx % len(ss)]
|
| 211 |
S.idx += 1
|
| 212 |
-
|
| 213 |
-
# 4. POST /requests β 302
|
| 214 |
r3 = sc.post(
|
| 215 |
f"{BASE}/requests?fso=",
|
| 216 |
data={
|
|
@@ -225,7 +198,7 @@ def fetch_raw(url, sid=None):
|
|
| 225 |
allow_redirects=False,
|
| 226 |
timeout=30,
|
| 227 |
)
|
| 228 |
-
|
| 229 |
loc = r3.headers.get("Location") or r3.headers.get("location")
|
| 230 |
if not loc:
|
| 231 |
S.stats["fail"] += 1
|
|
@@ -234,18 +207,16 @@ def fetch_raw(url, sid=None):
|
|
| 234 |
"error": f"No redirect ({r3.status_code})",
|
| 235 |
"server": ch.get("name"),
|
| 236 |
}
|
| 237 |
-
|
| 238 |
-
# 5. GET redirect β data-r
|
| 239 |
r4 = sc.get(loc, timeout=30, allow_redirects=True)
|
| 240 |
dr = re.search(r'data-r="([^"]+)"', r4.text)
|
| 241 |
if not dr:
|
| 242 |
S.stats["fail"] += 1
|
| 243 |
return {"success": False, "error": "No data-r", "server": ch.get("name")}
|
| 244 |
-
|
| 245 |
-
# 6. GET final
|
| 246 |
final = base64.b64decode(dr.group(1)).decode()
|
| 247 |
r5 = sc.get(final, timeout=30, allow_redirects=True)
|
| 248 |
-
|
| 249 |
S.stats["ok"] += 1
|
| 250 |
return {
|
| 251 |
"success": True,
|
|
@@ -265,6 +236,25 @@ def fetch_raw(url, sid=None):
|
|
| 265 |
return {"success": False, "error": str(e)}
|
| 266 |
|
| 267 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 269 |
# ROUTES
|
| 270 |
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -318,7 +308,6 @@ def proxy_fetch():
|
|
| 318 |
d = request.get_json() or {}
|
| 319 |
if not d.get("url"):
|
| 320 |
return jsonify({"error": "url required"}), 400
|
| 321 |
-
|
| 322 |
raw = fetch_raw(d["url"], d.get("server_id"))
|
| 323 |
return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
|
| 324 |
|
|
@@ -328,7 +317,6 @@ def proxy_random():
|
|
| 328 |
d = request.get_json() or {}
|
| 329 |
if not d.get("url"):
|
| 330 |
return jsonify({"error": "url required"}), 400
|
| 331 |
-
|
| 332 |
sid = random.choice(S.servers)["id"] if S.servers else None
|
| 333 |
raw = fetch_raw(d["url"], sid)
|
| 334 |
return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
|
|
@@ -340,14 +328,12 @@ def proxy_batch():
|
|
| 340 |
urls = d.get("urls", [])
|
| 341 |
if not urls:
|
| 342 |
return jsonify({"error": "urls required"}), 400
|
| 343 |
-
|
| 344 |
include_raw = d.get("raw_headers", False)
|
| 345 |
results = []
|
| 346 |
for u in urls:
|
| 347 |
raw = fetch_raw(u)
|
| 348 |
results.append(format_result(raw, include_raw_headers=include_raw))
|
| 349 |
time.sleep(0.5)
|
| 350 |
-
|
| 351 |
return jsonify({
|
| 352 |
"count": len(results),
|
| 353 |
"success_count": sum(1 for r in results if r.get("success")),
|
|
@@ -356,4 +342,4 @@ def proxy_batch():
|
|
| 356 |
|
| 357 |
|
| 358 |
if __name__ == "__main__":
|
| 359 |
-
app.run(host="0.0.0.0", port=7860)
|
|
|
|
| 21 |
BASE = "https://www.croxyproxy.com"
|
| 22 |
app = Flask(__name__)
|
| 23 |
|
|
|
|
| 24 |
KEEP_HEADERS = {
|
| 25 |
"content-type", "content-length", "content-encoding",
|
| 26 |
"server", "date", "connection",
|
|
|
|
| 31 |
"x-request-id", "location", "retry-after",
|
| 32 |
}
|
| 33 |
|
|
|
|
| 34 |
DROP_HEADERS = {
|
| 35 |
"set-cookie", "__cph", "__cpc",
|
| 36 |
"content-security-policy", "strict-transport-security",
|
|
|
|
| 57 |
|
| 58 |
|
| 59 |
def filter_headers(raw_headers, include_all=False):
|
|
|
|
| 60 |
if include_all:
|
| 61 |
return dict(raw_headers)
|
|
|
|
| 62 |
cleaned = {}
|
| 63 |
for k, v in raw_headers.items():
|
| 64 |
kl = k.lower()
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
def parse_body(text, content_type=""):
|
|
|
|
| 73 |
if not text:
|
| 74 |
return None
|
|
|
|
|
|
|
| 75 |
if "json" in content_type.lower() or text.strip().startswith(("{", "[")):
|
| 76 |
try:
|
| 77 |
return json.loads(text)
|
| 78 |
except (json.JSONDecodeError, ValueError):
|
| 79 |
pass
|
|
|
|
|
|
|
| 80 |
if "html" in content_type.lower() or text.strip().startswith("<"):
|
| 81 |
return {
|
| 82 |
"_type": "html",
|
| 83 |
"_length": len(text),
|
| 84 |
"_preview": text[:300].strip() + ("..." if len(text) > 300 else ""),
|
| 85 |
}
|
|
|
|
|
|
|
| 86 |
if len(text) > 2000:
|
| 87 |
return {
|
| 88 |
"_type": "text",
|
| 89 |
"_length": len(text),
|
| 90 |
"_preview": text[:500].strip() + "...",
|
| 91 |
}
|
|
|
|
| 92 |
return text
|
| 93 |
|
| 94 |
|
| 95 |
def extract_ip(url_str):
|
|
|
|
| 96 |
return (url_str or "").replace("https://", "").replace("http://", "").split("/")[0]
|
| 97 |
|
| 98 |
|
| 99 |
def format_result(raw, include_raw_headers=False):
|
|
|
|
| 100 |
if not raw.get("success"):
|
| 101 |
return {
|
| 102 |
"success": False,
|
| 103 |
"error": raw.get("error"),
|
| 104 |
"server": raw.get("server"),
|
| 105 |
}
|
|
|
|
| 106 |
ct = ""
|
| 107 |
if raw.get("headers"):
|
| 108 |
ct = raw["headers"].get("Content-Type", raw["headers"].get("content-type", ""))
|
|
|
|
| 109 |
result = {
|
| 110 |
"success": True,
|
| 111 |
"status": raw.get("status"),
|
|
|
|
| 114 |
"proxy": raw.get("proxy"),
|
| 115 |
"servers_available": raw.get("servers_available"),
|
| 116 |
}
|
|
|
|
|
|
|
| 117 |
if raw.get("headers"):
|
| 118 |
result["headers"] = filter_headers(raw["headers"], include_all=include_raw_headers)
|
|
|
|
| 119 |
return result
|
| 120 |
|
| 121 |
|
| 122 |
def fetch_raw(url, sid=None):
|
|
|
|
| 123 |
sc = cloudscraper.create_scraper(
|
| 124 |
browser={"browser": "chrome", "platform": "windows", "desktop": True}
|
| 125 |
)
|
| 126 |
S.stats["req"] += 1
|
|
|
|
| 127 |
try:
|
|
|
|
| 128 |
r1 = sc.get(BASE, timeout=30)
|
| 129 |
if r1.status_code != 200:
|
| 130 |
S.stats["fail"] += 1
|
| 131 |
return {"success": False, "error": f"Homepage {r1.status_code}"}
|
| 132 |
+
|
| 133 |
s1 = BeautifulSoup(r1.text, "lxml")
|
| 134 |
ci = s1.find("input", {"name": "csrf"})
|
| 135 |
if not ci:
|
| 136 |
S.stats["fail"] += 1
|
| 137 |
return {"success": False, "error": "No CSRF"}
|
| 138 |
+
|
|
|
|
| 139 |
r2 = sc.post(
|
| 140 |
f"{BASE}/servers",
|
| 141 |
data={
|
|
|
|
| 151 |
allow_redirects=True,
|
| 152 |
timeout=30,
|
| 153 |
)
|
| 154 |
+
|
| 155 |
if r2.status_code != 200:
|
| 156 |
S.stats["fail"] += 1
|
| 157 |
return {"success": False, "error": f"Servers {r2.status_code}"}
|
| 158 |
+
|
| 159 |
s2 = BeautifulSoup(r2.text, "lxml")
|
| 160 |
sel = s2.find("script", {"id": "serverSelectorScript"})
|
| 161 |
if not sel:
|
| 162 |
S.stats["fail"] += 1
|
| 163 |
return {"success": False, "error": "No selector"}
|
| 164 |
+
|
|
|
|
| 165 |
ss = [
|
| 166 |
x for x in (dec(i) for i in json.loads(unescape(sel.get("data-ss", ""))))
|
| 167 |
if x and x.get("id")
|
| 168 |
]
|
| 169 |
csrf2 = unescape(sel.get("data-csrf", "")).strip('"')
|
| 170 |
fo = unescape(sel.get("data-fo", "")).strip('"')
|
| 171 |
+
|
| 172 |
if not ss:
|
| 173 |
S.stats["fail"] += 1
|
| 174 |
return {"success": False, "error": "No servers"}
|
| 175 |
+
|
|
|
|
| 176 |
S.servers = ss
|
| 177 |
S.last = datetime.now(timezone.utc).isoformat()
|
| 178 |
+
|
|
|
|
| 179 |
ch = None
|
| 180 |
if sid:
|
| 181 |
ch = next((x for x in ss if x["id"] == sid), None)
|
|
|
|
| 183 |
with S.lock:
|
| 184 |
ch = ss[S.idx % len(ss)]
|
| 185 |
S.idx += 1
|
| 186 |
+
|
|
|
|
| 187 |
r3 = sc.post(
|
| 188 |
f"{BASE}/requests?fso=",
|
| 189 |
data={
|
|
|
|
| 198 |
allow_redirects=False,
|
| 199 |
timeout=30,
|
| 200 |
)
|
| 201 |
+
|
| 202 |
loc = r3.headers.get("Location") or r3.headers.get("location")
|
| 203 |
if not loc:
|
| 204 |
S.stats["fail"] += 1
|
|
|
|
| 207 |
"error": f"No redirect ({r3.status_code})",
|
| 208 |
"server": ch.get("name"),
|
| 209 |
}
|
| 210 |
+
|
|
|
|
| 211 |
r4 = sc.get(loc, timeout=30, allow_redirects=True)
|
| 212 |
dr = re.search(r'data-r="([^"]+)"', r4.text)
|
| 213 |
if not dr:
|
| 214 |
S.stats["fail"] += 1
|
| 215 |
return {"success": False, "error": "No data-r", "server": ch.get("name")}
|
| 216 |
+
|
|
|
|
| 217 |
final = base64.b64decode(dr.group(1)).decode()
|
| 218 |
r5 = sc.get(final, timeout=30, allow_redirects=True)
|
| 219 |
+
|
| 220 |
S.stats["ok"] += 1
|
| 221 |
return {
|
| 222 |
"success": True,
|
|
|
|
| 236 |
return {"success": False, "error": str(e)}
|
| 237 |
|
| 238 |
|
| 239 |
+
def warmup():
|
| 240 |
+
"""Populate server list on startup."""
|
| 241 |
+
print("Warming up β populating server list...")
|
| 242 |
+
result = fetch_raw("https://httpbin.org/ip")
|
| 243 |
+
if result.get("success"):
|
| 244 |
+
print(f"β {len(S.servers)} servers loaded")
|
| 245 |
+
else:
|
| 246 |
+
print(f"β Warm-up failed: {result.get('error')}")
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
# Gunicorn hook β runs in each worker after fork
|
| 250 |
+
def post_fork(server, worker):
|
| 251 |
+
warmup()
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
# Runs at import time β works for both gunicorn and direct python
|
| 255 |
+
warmup()
|
| 256 |
+
|
| 257 |
+
|
| 258 |
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 259 |
# ROUTES
|
| 260 |
# βββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 308 |
d = request.get_json() or {}
|
| 309 |
if not d.get("url"):
|
| 310 |
return jsonify({"error": "url required"}), 400
|
|
|
|
| 311 |
raw = fetch_raw(d["url"], d.get("server_id"))
|
| 312 |
return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
|
| 313 |
|
|
|
|
| 317 |
d = request.get_json() or {}
|
| 318 |
if not d.get("url"):
|
| 319 |
return jsonify({"error": "url required"}), 400
|
|
|
|
| 320 |
sid = random.choice(S.servers)["id"] if S.servers else None
|
| 321 |
raw = fetch_raw(d["url"], sid)
|
| 322 |
return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
|
|
|
|
| 328 |
urls = d.get("urls", [])
|
| 329 |
if not urls:
|
| 330 |
return jsonify({"error": "urls required"}), 400
|
|
|
|
| 331 |
include_raw = d.get("raw_headers", False)
|
| 332 |
results = []
|
| 333 |
for u in urls:
|
| 334 |
raw = fetch_raw(u)
|
| 335 |
results.append(format_result(raw, include_raw_headers=include_raw))
|
| 336 |
time.sleep(0.5)
|
|
|
|
| 337 |
return jsonify({
|
| 338 |
"count": len(results),
|
| 339 |
"success_count": sum(1 for r in results if r.get("success")),
|
|
|
|
| 342 |
|
| 343 |
|
| 344 |
if __name__ == "__main__":
|
| 345 |
+
app.run(host="0.0.0.0", port=7860)
|