MB-IDK commited on
Commit
f5e3b14
Β·
verified Β·
1 Parent(s): c1ec4b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -47
app.py CHANGED
@@ -21,7 +21,6 @@ warnings.filterwarnings("ignore")
21
  BASE = "https://www.croxyproxy.com"
22
  app = Flask(__name__)
23
 
24
- # ── Headers Γ  garder dans la rΓ©ponse (tout le reste = poubelle) ──
25
  KEEP_HEADERS = {
26
  "content-type", "content-length", "content-encoding",
27
  "server", "date", "connection",
@@ -32,7 +31,6 @@ KEEP_HEADERS = {
32
  "x-request-id", "location", "retry-after",
33
  }
34
 
35
- # ── Headers toujours exclus (bruit du proxy) ──
36
  DROP_HEADERS = {
37
  "set-cookie", "__cph", "__cpc",
38
  "content-security-policy", "strict-transport-security",
@@ -59,10 +57,8 @@ def dec(e):
59
 
60
 
61
  def filter_headers(raw_headers, include_all=False):
62
- """Filtre les headers : garde uniquement les utiles."""
63
  if include_all:
64
  return dict(raw_headers)
65
-
66
  cleaned = {}
67
  for k, v in raw_headers.items():
68
  kl = k.lower()
@@ -74,54 +70,42 @@ def filter_headers(raw_headers, include_all=False):
74
 
75
 
76
  def parse_body(text, content_type=""):
77
- """Parse le body en JSON si possible, sinon tronque le texte."""
78
  if not text:
79
  return None
80
-
81
- # Tente JSON
82
  if "json" in content_type.lower() or text.strip().startswith(("{", "[")):
83
  try:
84
  return json.loads(text)
85
  except (json.JSONDecodeError, ValueError):
86
  pass
87
-
88
- # HTML β†’ tronquΓ©
89
  if "html" in content_type.lower() or text.strip().startswith("<"):
90
  return {
91
  "_type": "html",
92
  "_length": len(text),
93
  "_preview": text[:300].strip() + ("..." if len(text) > 300 else ""),
94
  }
95
-
96
- # Texte brut β†’ tronquΓ© si long
97
  if len(text) > 2000:
98
  return {
99
  "_type": "text",
100
  "_length": len(text),
101
  "_preview": text[:500].strip() + "...",
102
  }
103
-
104
  return text
105
 
106
 
107
  def extract_ip(url_str):
108
- """Extrait l'IP d'une URL de serveur proxy."""
109
  return (url_str or "").replace("https://", "").replace("http://", "").split("/")[0]
110
 
111
 
112
  def format_result(raw, include_raw_headers=False):
113
- """Formate proprement le rΓ©sultat d'un fetch."""
114
  if not raw.get("success"):
115
  return {
116
  "success": False,
117
  "error": raw.get("error"),
118
  "server": raw.get("server"),
119
  }
120
-
121
  ct = ""
122
  if raw.get("headers"):
123
  ct = raw["headers"].get("Content-Type", raw["headers"].get("content-type", ""))
124
-
125
  result = {
126
  "success": True,
127
  "status": raw.get("status"),
@@ -130,35 +114,28 @@ def format_result(raw, include_raw_headers=False):
130
  "proxy": raw.get("proxy"),
131
  "servers_available": raw.get("servers_available"),
132
  }
133
-
134
- # Headers filtrΓ©s
135
  if raw.get("headers"):
136
  result["headers"] = filter_headers(raw["headers"], include_all=include_raw_headers)
137
-
138
  return result
139
 
140
 
141
  def fetch_raw(url, sid=None):
142
- """Fetch via CroxyProxy β€” retourne les donnΓ©es brutes."""
143
  sc = cloudscraper.create_scraper(
144
  browser={"browser": "chrome", "platform": "windows", "desktop": True}
145
  )
146
  S.stats["req"] += 1
147
-
148
  try:
149
- # 1. GET / β†’ csrf
150
  r1 = sc.get(BASE, timeout=30)
151
  if r1.status_code != 200:
152
  S.stats["fail"] += 1
153
  return {"success": False, "error": f"Homepage {r1.status_code}"}
154
-
155
  s1 = BeautifulSoup(r1.text, "lxml")
156
  ci = s1.find("input", {"name": "csrf"})
157
  if not ci:
158
  S.stats["fail"] += 1
159
  return {"success": False, "error": "No CSRF"}
160
-
161
- # 2. POST /servers β†’ selector page
162
  r2 = sc.post(
163
  f"{BASE}/servers",
164
  data={
@@ -174,34 +151,31 @@ def fetch_raw(url, sid=None):
174
  allow_redirects=True,
175
  timeout=30,
176
  )
177
-
178
  if r2.status_code != 200:
179
  S.stats["fail"] += 1
180
  return {"success": False, "error": f"Servers {r2.status_code}"}
181
-
182
  s2 = BeautifulSoup(r2.text, "lxml")
183
  sel = s2.find("script", {"id": "serverSelectorScript"})
184
  if not sel:
185
  S.stats["fail"] += 1
186
  return {"success": False, "error": "No selector"}
187
-
188
- # 3. Parse servers + csrf2
189
  ss = [
190
  x for x in (dec(i) for i in json.loads(unescape(sel.get("data-ss", ""))))
191
  if x and x.get("id")
192
  ]
193
  csrf2 = unescape(sel.get("data-csrf", "")).strip('"')
194
  fo = unescape(sel.get("data-fo", "")).strip('"')
195
-
196
  if not ss:
197
  S.stats["fail"] += 1
198
  return {"success": False, "error": "No servers"}
199
-
200
- # Mettre Γ  jour le cache
201
  S.servers = ss
202
  S.last = datetime.now(timezone.utc).isoformat()
203
-
204
- # Choisir le serveur
205
  ch = None
206
  if sid:
207
  ch = next((x for x in ss if x["id"] == sid), None)
@@ -209,8 +183,7 @@ def fetch_raw(url, sid=None):
209
  with S.lock:
210
  ch = ss[S.idx % len(ss)]
211
  S.idx += 1
212
-
213
- # 4. POST /requests β†’ 302
214
  r3 = sc.post(
215
  f"{BASE}/requests?fso=",
216
  data={
@@ -225,7 +198,7 @@ def fetch_raw(url, sid=None):
225
  allow_redirects=False,
226
  timeout=30,
227
  )
228
-
229
  loc = r3.headers.get("Location") or r3.headers.get("location")
230
  if not loc:
231
  S.stats["fail"] += 1
@@ -234,18 +207,16 @@ def fetch_raw(url, sid=None):
234
  "error": f"No redirect ({r3.status_code})",
235
  "server": ch.get("name"),
236
  }
237
-
238
- # 5. GET redirect β†’ data-r
239
  r4 = sc.get(loc, timeout=30, allow_redirects=True)
240
  dr = re.search(r'data-r="([^"]+)"', r4.text)
241
  if not dr:
242
  S.stats["fail"] += 1
243
  return {"success": False, "error": "No data-r", "server": ch.get("name")}
244
-
245
- # 6. GET final
246
  final = base64.b64decode(dr.group(1)).decode()
247
  r5 = sc.get(final, timeout=30, allow_redirects=True)
248
-
249
  S.stats["ok"] += 1
250
  return {
251
  "success": True,
@@ -265,6 +236,25 @@ def fetch_raw(url, sid=None):
265
  return {"success": False, "error": str(e)}
266
 
267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  # ═══════════════════════════════════════════════
269
  # ROUTES
270
  # ═══════════════════════════════════════════════
@@ -318,7 +308,6 @@ def proxy_fetch():
318
  d = request.get_json() or {}
319
  if not d.get("url"):
320
  return jsonify({"error": "url required"}), 400
321
-
322
  raw = fetch_raw(d["url"], d.get("server_id"))
323
  return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
324
 
@@ -328,7 +317,6 @@ def proxy_random():
328
  d = request.get_json() or {}
329
  if not d.get("url"):
330
  return jsonify({"error": "url required"}), 400
331
-
332
  sid = random.choice(S.servers)["id"] if S.servers else None
333
  raw = fetch_raw(d["url"], sid)
334
  return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
@@ -340,14 +328,12 @@ def proxy_batch():
340
  urls = d.get("urls", [])
341
  if not urls:
342
  return jsonify({"error": "urls required"}), 400
343
-
344
  include_raw = d.get("raw_headers", False)
345
  results = []
346
  for u in urls:
347
  raw = fetch_raw(u)
348
  results.append(format_result(raw, include_raw_headers=include_raw))
349
  time.sleep(0.5)
350
-
351
  return jsonify({
352
  "count": len(results),
353
  "success_count": sum(1 for r in results if r.get("success")),
@@ -356,4 +342,4 @@ def proxy_batch():
356
 
357
 
358
  if __name__ == "__main__":
359
- app.run(host="0.0.0.0", port=7860)
 
21
  BASE = "https://www.croxyproxy.com"
22
  app = Flask(__name__)
23
 
 
24
  KEEP_HEADERS = {
25
  "content-type", "content-length", "content-encoding",
26
  "server", "date", "connection",
 
31
  "x-request-id", "location", "retry-after",
32
  }
33
 
 
34
  DROP_HEADERS = {
35
  "set-cookie", "__cph", "__cpc",
36
  "content-security-policy", "strict-transport-security",
 
57
 
58
 
59
  def filter_headers(raw_headers, include_all=False):
 
60
  if include_all:
61
  return dict(raw_headers)
 
62
  cleaned = {}
63
  for k, v in raw_headers.items():
64
  kl = k.lower()
 
70
 
71
 
72
  def parse_body(text, content_type=""):
 
73
  if not text:
74
  return None
 
 
75
  if "json" in content_type.lower() or text.strip().startswith(("{", "[")):
76
  try:
77
  return json.loads(text)
78
  except (json.JSONDecodeError, ValueError):
79
  pass
 
 
80
  if "html" in content_type.lower() or text.strip().startswith("<"):
81
  return {
82
  "_type": "html",
83
  "_length": len(text),
84
  "_preview": text[:300].strip() + ("..." if len(text) > 300 else ""),
85
  }
 
 
86
  if len(text) > 2000:
87
  return {
88
  "_type": "text",
89
  "_length": len(text),
90
  "_preview": text[:500].strip() + "...",
91
  }
 
92
  return text
93
 
94
 
95
  def extract_ip(url_str):
 
96
  return (url_str or "").replace("https://", "").replace("http://", "").split("/")[0]
97
 
98
 
99
  def format_result(raw, include_raw_headers=False):
 
100
  if not raw.get("success"):
101
  return {
102
  "success": False,
103
  "error": raw.get("error"),
104
  "server": raw.get("server"),
105
  }
 
106
  ct = ""
107
  if raw.get("headers"):
108
  ct = raw["headers"].get("Content-Type", raw["headers"].get("content-type", ""))
 
109
  result = {
110
  "success": True,
111
  "status": raw.get("status"),
 
114
  "proxy": raw.get("proxy"),
115
  "servers_available": raw.get("servers_available"),
116
  }
 
 
117
  if raw.get("headers"):
118
  result["headers"] = filter_headers(raw["headers"], include_all=include_raw_headers)
 
119
  return result
120
 
121
 
122
  def fetch_raw(url, sid=None):
 
123
  sc = cloudscraper.create_scraper(
124
  browser={"browser": "chrome", "platform": "windows", "desktop": True}
125
  )
126
  S.stats["req"] += 1
 
127
  try:
 
128
  r1 = sc.get(BASE, timeout=30)
129
  if r1.status_code != 200:
130
  S.stats["fail"] += 1
131
  return {"success": False, "error": f"Homepage {r1.status_code}"}
132
+
133
  s1 = BeautifulSoup(r1.text, "lxml")
134
  ci = s1.find("input", {"name": "csrf"})
135
  if not ci:
136
  S.stats["fail"] += 1
137
  return {"success": False, "error": "No CSRF"}
138
+
 
139
  r2 = sc.post(
140
  f"{BASE}/servers",
141
  data={
 
151
  allow_redirects=True,
152
  timeout=30,
153
  )
154
+
155
  if r2.status_code != 200:
156
  S.stats["fail"] += 1
157
  return {"success": False, "error": f"Servers {r2.status_code}"}
158
+
159
  s2 = BeautifulSoup(r2.text, "lxml")
160
  sel = s2.find("script", {"id": "serverSelectorScript"})
161
  if not sel:
162
  S.stats["fail"] += 1
163
  return {"success": False, "error": "No selector"}
164
+
 
165
  ss = [
166
  x for x in (dec(i) for i in json.loads(unescape(sel.get("data-ss", ""))))
167
  if x and x.get("id")
168
  ]
169
  csrf2 = unescape(sel.get("data-csrf", "")).strip('"')
170
  fo = unescape(sel.get("data-fo", "")).strip('"')
171
+
172
  if not ss:
173
  S.stats["fail"] += 1
174
  return {"success": False, "error": "No servers"}
175
+
 
176
  S.servers = ss
177
  S.last = datetime.now(timezone.utc).isoformat()
178
+
 
179
  ch = None
180
  if sid:
181
  ch = next((x for x in ss if x["id"] == sid), None)
 
183
  with S.lock:
184
  ch = ss[S.idx % len(ss)]
185
  S.idx += 1
186
+
 
187
  r3 = sc.post(
188
  f"{BASE}/requests?fso=",
189
  data={
 
198
  allow_redirects=False,
199
  timeout=30,
200
  )
201
+
202
  loc = r3.headers.get("Location") or r3.headers.get("location")
203
  if not loc:
204
  S.stats["fail"] += 1
 
207
  "error": f"No redirect ({r3.status_code})",
208
  "server": ch.get("name"),
209
  }
210
+
 
211
  r4 = sc.get(loc, timeout=30, allow_redirects=True)
212
  dr = re.search(r'data-r="([^"]+)"', r4.text)
213
  if not dr:
214
  S.stats["fail"] += 1
215
  return {"success": False, "error": "No data-r", "server": ch.get("name")}
216
+
 
217
  final = base64.b64decode(dr.group(1)).decode()
218
  r5 = sc.get(final, timeout=30, allow_redirects=True)
219
+
220
  S.stats["ok"] += 1
221
  return {
222
  "success": True,
 
236
  return {"success": False, "error": str(e)}
237
 
238
 
239
+ def warmup():
240
+ """Populate server list on startup."""
241
+ print("Warming up β€” populating server list...")
242
+ result = fetch_raw("https://httpbin.org/ip")
243
+ if result.get("success"):
244
+ print(f"βœ“ {len(S.servers)} servers loaded")
245
+ else:
246
+ print(f"βœ— Warm-up failed: {result.get('error')}")
247
+
248
+
249
+ # Gunicorn hook β€” runs in each worker after fork
250
+ def post_fork(server, worker):
251
+ warmup()
252
+
253
+
254
+ # Runs at import time β€” works for both gunicorn and direct python
255
+ warmup()
256
+
257
+
258
  # ═══════════════════════════════════════════════
259
  # ROUTES
260
  # ═══════════════════════════════════════════════
 
308
  d = request.get_json() or {}
309
  if not d.get("url"):
310
  return jsonify({"error": "url required"}), 400
 
311
  raw = fetch_raw(d["url"], d.get("server_id"))
312
  return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
313
 
 
317
  d = request.get_json() or {}
318
  if not d.get("url"):
319
  return jsonify({"error": "url required"}), 400
 
320
  sid = random.choice(S.servers)["id"] if S.servers else None
321
  raw = fetch_raw(d["url"], sid)
322
  return jsonify(format_result(raw, include_raw_headers=d.get("raw_headers", False)))
 
328
  urls = d.get("urls", [])
329
  if not urls:
330
  return jsonify({"error": "urls required"}), 400
 
331
  include_raw = d.get("raw_headers", False)
332
  results = []
333
  for u in urls:
334
  raw = fetch_raw(u)
335
  results.append(format_result(raw, include_raw_headers=include_raw))
336
  time.sleep(0.5)
 
337
  return jsonify({
338
  "count": len(results),
339
  "success_count": sum(1 for r in results if r.get("success")),
 
342
 
343
 
344
  if __name__ == "__main__":
345
+ app.run(host="0.0.0.0", port=7860)