CroxyProxyAPI2

Sleeping

App Files Files Community

MB-IDK commited on Apr 12

Commit

f5e3b14

verified ·

1 Parent(s): c1ec4b6

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -47

app.py CHANGED Viewed

@@ -21,7 +21,6 @@ warnings.filterwarnings("ignore")
 BASE = "https://www.croxyproxy.com"
 app = Flask(__name__)
-# ── Headers à garder dans la réponse (tout le reste = poubelle) ──
 KEEP_HEADERS = {
     "content-type", "content-length", "content-encoding",
     "server", "date", "connection",
@@ -32,7 +31,6 @@ KEEP_HEADERS = {
     "x-request-id", "location", "retry-after",
 }
-# ── Headers toujours exclus (bruit du proxy) ──
 DROP_HEADERS = {
     "set-cookie", "__cph", "__cpc",
     "content-security-policy", "strict-transport-security",
@@ -59,10 +57,8 @@ def dec(e):
 def filter_headers(raw_headers, include_all=False):
-    """Filtre les headers : garde uniquement les utiles."""
     if include_all:
         return dict(raw_headers)
     cleaned = {}
     for k, v in raw_headers.items():
         kl = k.lower()
@@ -74,54 +70,42 @@ def filter_headers(raw_headers, include_all=False):
 def parse_body(text, content_type=""):
-    """Parse le body en JSON si possible, sinon tronque le texte."""
     if not text:
         return None
-    # Tente JSON
     if "json" in content_type.lower() or text.strip().startswith(("{", "[")):
         try:
             return json.loads(text)
         except (json.JSONDecodeError, ValueError):
             pass
-    # HTML → tronqué
     if "html" in content_type.lower() or text.strip().startswith("<"):
         return {
             "_type": "html",
             "_length": len(text),
             "_preview": text[:300].strip() + ("..." if len(text) > 300 else ""),
         }
-    # Texte brut → tronqué si long
     if len(text) > 2000:
         return {
             "_type": "text",
             "_length": len(text),
             "_preview": text[:500].strip() + "...",
         }
     return text
 def extract_ip(url_str):
-    """Extrait l'IP d'une URL de serveur proxy."""
     return (url_str or "").replace("https://", "").replace("http://", "").split("/")[0]
 def format_result(raw, include_raw_headers=False):
-    """Formate proprement le résultat d'un fetch."""
     if not raw.get("success"):
         return {
             "success": False,
             "error": raw.get("error"),
             "server": raw.get("server"),
         }
     ct = ""
     if raw.get("headers"):
         ct = raw["headers"].get("Content-Type", raw["headers"].get("content-type", ""))
     result = {
         "success": True,
         "status": raw.get("status"),
@@ -130,35 +114,28 @@ def format_result(raw, include_raw_headers=False):
         "proxy": raw.get("proxy"),
         "servers_available": raw.get("servers_available"),
     }
-    # Headers filtrés
     if raw.get("headers"):
         result["headers"] = filter_headers(raw["headers"], include_all=include_raw_headers)
     return result
 def fetch_raw(url, sid=None):
-    """Fetch via CroxyProxy — retourne les données brutes."""
     sc = cloudscraper.create_scraper(
         browser={"browser": "chrome", "platform": "windows", "desktop": True}
     )
     S.stats["req"] += 1
     try:
-        # 1. GET / → csrf
         r1 = sc.get(BASE, timeout=30)
         if r1.status_code != 200:
             S.stats["fail"] += 1
             return {"success": False, "error": f"Homepage {r1.status_code}"}
         s1 = BeautifulSoup(r1.text, "lxml")
         ci = s1.find("input", {"name": "csrf"})
         if not ci:
             S.stats["fail"] += 1
             return {"success": False, "error": "No CSRF"}
-        # 2. POST /servers → selector page
         r2 = sc.post(
             f"{BASE}/servers",
             data={
@@ -174,34 +151,31 @@ def fetch_raw(url, sid=None):
             allow_redirects=True,
             timeout=30,
         )
         if r2.status_code != 200:
             S.stats["fail"] += 1
             return {"success": False, "error": f"Servers {r2.status_code}"}
         s2 = BeautifulSoup(r2.text, "lxml")
         sel = s2.find("script", {"id": "serverSelectorScript"})
         if not sel:
             S.stats["fail"] += 1
             return {"success": False, "error": "No selector"}
-        # 3. Parse servers + csrf2
         ss = [
             x for x in (dec(i) for i in json.loads(unescape(sel.get("data-ss", ""))))
             if x and x.get("id")
         ]
         csrf2 = unescape(sel.get("data-csrf", "")).strip('"')
         fo = unescape(sel.get("data-fo", "")).strip('"')
         if not ss:
             S.stats["fail"] += 1
             return {"success": False, "error": "No servers"}
-        # Mettre à jour le cache
         S.servers = ss
         S.last = datetime.now(timezone.utc).isoformat()
-        # Choisir le serveur
         ch = None
         if sid:
             ch = next((x for x in ss if x["id"] == sid), None)
@@ -209,8 +183,7 @@ def fetch_raw(url, sid=None):
             with S.lock:
                 ch = ss[S.idx % len(ss)]
                 S.idx += 1
-        # 4. POST /requests → 302
         r3 = sc.post(
             f"{BASE}/requests?fso=",
             data={
@@ -225,7 +198,7 @@ def fetch_raw(url, sid=None):
             allow_redirects=False,
             timeout=30,
         )
         loc = r3.headers.get("Location") or r3.headers.get("location")
         if not loc:
             S.stats["fail"] += 1
@@ -234,18 +207,16 @@ def fetch_raw(url, sid=None):
                 "error": f"No redirect ({r3.status_code})",
                 "server": ch.get("name"),
             }
-        # 5. GET redirect → data-r
         r4 = sc.get(loc, timeout=30, allow_redirects=True)
         dr = re.search(r'data-r="([^"]+)"', r4.text)
         if not dr:
             S.stats["fail"] += 1
             return {"success": False, "error": "No data-r", "server": ch.get("name")}
-        # 6. GET final
         final = base64.b64decode(dr.group(1)).decode()
         r5 = sc.get(final, timeout=30, allow_redirects=True)
         S.stats["ok"] += 1
         return {
             "success": True,
@@ -265,6 +236,25 @@ def fetch_raw(url, sid=None):
         return {"success": False, "error": str(e)}
 # ═══════════════════════════════════════════════
 #  ROUTES
 # ═══════════════════════════════════════════════
@@ -318,7 +308,6 @@ def proxy_fetch():
     d = request.get_json() or {}
     if not d.get("url"):
         return jsonify({"error": "url required"}), 400
     raw = fetch_raw(d["url"], d.get("server_id"))
     return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
@@ -328,7 +317,6 @@ def proxy_random():
     d = request.get_json() or {}
     if not d.get("url"):
         return jsonify({"error": "url required"}), 400
     sid = random.choice(S.servers)["id"] if S.servers else None
     raw = fetch_raw(d["url"], sid)
     return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
@@ -340,14 +328,12 @@ def proxy_batch():
     urls = d.get("urls", [])
     if not urls:
         return jsonify({"error": "urls required"}), 400
     include_raw = d.get("raw_headers", False)
     results = []
     for u in urls:
         raw = fetch_raw(u)
         results.append(format_result(raw, include_raw_headers=include_raw))
         time.sleep(0.5)
     return jsonify({
         "count": len(results),
         "success_count": sum(1 for r in results if r.get("success")),
@@ -356,4 +342,4 @@ def proxy_batch():
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)

 BASE = "https://www.croxyproxy.com"
 app = Flask(__name__)
 KEEP_HEADERS = {
     "content-type", "content-length", "content-encoding",
     "server", "date", "connection",
     "x-request-id", "location", "retry-after",
 }
 DROP_HEADERS = {
     "set-cookie", "__cph", "__cpc",
     "content-security-policy", "strict-transport-security",
 def filter_headers(raw_headers, include_all=False):
     if include_all:
         return dict(raw_headers)
     cleaned = {}
     for k, v in raw_headers.items():
         kl = k.lower()
 def parse_body(text, content_type=""):
     if not text:
         return None
     if "json" in content_type.lower() or text.strip().startswith(("{", "[")):
         try:
             return json.loads(text)
         except (json.JSONDecodeError, ValueError):
             pass
     if "html" in content_type.lower() or text.strip().startswith("<"):
         return {
             "_type": "html",
             "_length": len(text),
             "_preview": text[:300].strip() + ("..." if len(text) > 300 else ""),
         }
     if len(text) > 2000:
         return {
             "_type": "text",
             "_length": len(text),
             "_preview": text[:500].strip() + "...",
         }
     return text
 def extract_ip(url_str):
     return (url_str or "").replace("https://", "").replace("http://", "").split("/")[0]
 def format_result(raw, include_raw_headers=False):
     if not raw.get("success"):
         return {
             "success": False,
             "error": raw.get("error"),
             "server": raw.get("server"),
         }
     ct = ""
     if raw.get("headers"):
         ct = raw["headers"].get("Content-Type", raw["headers"].get("content-type", ""))
     result = {
         "success": True,
         "status": raw.get("status"),
         "proxy": raw.get("proxy"),
         "servers_available": raw.get("servers_available"),
     }
     if raw.get("headers"):
         result["headers"] = filter_headers(raw["headers"], include_all=include_raw_headers)
     return result
 def fetch_raw(url, sid=None):
     sc = cloudscraper.create_scraper(
         browser={"browser": "chrome", "platform": "windows", "desktop": True}
     )
     S.stats["req"] += 1
     try:
         r1 = sc.get(BASE, timeout=30)
         if r1.status_code != 200:
             S.stats["fail"] += 1
             return {"success": False, "error": f"Homepage {r1.status_code}"}
         s1 = BeautifulSoup(r1.text, "lxml")
         ci = s1.find("input", {"name": "csrf"})
         if not ci:
             S.stats["fail"] += 1
             return {"success": False, "error": "No CSRF"}
         r2 = sc.post(
             f"{BASE}/servers",
             data={
             allow_redirects=True,
             timeout=30,
         )
         if r2.status_code != 200:
             S.stats["fail"] += 1
             return {"success": False, "error": f"Servers {r2.status_code}"}
         s2 = BeautifulSoup(r2.text, "lxml")
         sel = s2.find("script", {"id": "serverSelectorScript"})
         if not sel:
             S.stats["fail"] += 1
             return {"success": False, "error": "No selector"}
         ss = [
             x for x in (dec(i) for i in json.loads(unescape(sel.get("data-ss", ""))))
             if x and x.get("id")
         ]
         csrf2 = unescape(sel.get("data-csrf", "")).strip('"')
         fo = unescape(sel.get("data-fo", "")).strip('"')
         if not ss:
             S.stats["fail"] += 1
             return {"success": False, "error": "No servers"}
         S.servers = ss
         S.last = datetime.now(timezone.utc).isoformat()
         ch = None
         if sid:
             ch = next((x for x in ss if x["id"] == sid), None)
             with S.lock:
                 ch = ss[S.idx % len(ss)]
                 S.idx += 1
         r3 = sc.post(
             f"{BASE}/requests?fso=",
             data={
             allow_redirects=False,
             timeout=30,
         )
         loc = r3.headers.get("Location") or r3.headers.get("location")
         if not loc:
             S.stats["fail"] += 1
                 "error": f"No redirect ({r3.status_code})",
                 "server": ch.get("name"),
             }
         r4 = sc.get(loc, timeout=30, allow_redirects=True)
         dr = re.search(r'data-r="([^"]+)"', r4.text)
         if not dr:
             S.stats["fail"] += 1
             return {"success": False, "error": "No data-r", "server": ch.get("name")}
         final = base64.b64decode(dr.group(1)).decode()
         r5 = sc.get(final, timeout=30, allow_redirects=True)
         S.stats["ok"] += 1
         return {
             "success": True,
         return {"success": False, "error": str(e)}
+def warmup():
+    """Populate server list on startup."""
+    print("Warming up — populating server list...")
+    result = fetch_raw("https://httpbin.org/ip")
+    if result.get("success"):
+        print(f"✓ {len(S.servers)} servers loaded")
+    else:
+        print(f"✗ Warm-up failed: {result.get('error')}")
+# Gunicorn hook — runs in each worker after fork
+def post_fork(server, worker):
+    warmup()
+# Runs at import time — works for both gunicorn and direct python
+warmup()
 # ═══════════════════════════════════════════════
 #  ROUTES
 # ═══════════════════════════════════════════════
     d = request.get_json() or {}
     if not d.get("url"):
         return jsonify({"error": "url required"}), 400
     raw = fetch_raw(d["url"], d.get("server_id"))
     return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
     d = request.get_json() or {}
     if not d.get("url"):
         return jsonify({"error": "url required"}), 400
     sid = random.choice(S.servers)["id"] if S.servers else None
     raw = fetch_raw(d["url"], sid)
     return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
     urls = d.get("urls", [])
     if not urls:
         return jsonify({"error": "urls required"}), 400
     include_raw = d.get("raw_headers", False)
     results = []
     for u in urls:
         raw = fetch_raw(u)
         results.append(format_result(raw, include_raw_headers=include_raw))
         time.sleep(0.5)
     return jsonify({
         "count": len(results),
         "success_count": sum(1 for r in results if r.get("success")),
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)