Spaces:
Paused
Paused
github-actions[bot]
commited on
Commit
·
be63dad
1
Parent(s):
799a782
Update from GitHub Actions
Browse files
main.py
CHANGED
|
@@ -6,6 +6,8 @@ from fastapi.responses import HTMLResponse, StreamingResponse
|
|
| 6 |
from typing import Optional
|
| 7 |
import uvicorn
|
| 8 |
import asyncio
|
|
|
|
|
|
|
| 9 |
|
| 10 |
app = FastAPI(
|
| 11 |
title="ScraperProxy",
|
|
@@ -391,11 +393,37 @@ async def proxy(request: Request):
|
|
| 391 |
'https': proxy
|
| 392 |
}
|
| 393 |
# 测试代理是否生效
|
| 394 |
-
response = scraper.get('https://httpbin.org/ip')
|
| 395 |
-
print(response.text)
|
| 396 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
-
|
| 399 |
# 获取请求体
|
| 400 |
body = await request.body()
|
| 401 |
|
|
@@ -427,10 +455,11 @@ async def proxy(request: Request):
|
|
| 427 |
headers.pop("content-length", None)
|
| 428 |
headers.pop("user-agent", None)
|
| 429 |
print(f"{headers}")
|
|
|
|
| 430 |
# 构建请求参数
|
| 431 |
request_kwargs = {
|
| 432 |
"url": target_url,
|
| 433 |
-
"headers":
|
| 434 |
"params": params,
|
| 435 |
"stream": stream_request # 设置stream参数
|
| 436 |
}
|
|
|
|
| 6 |
from typing import Optional
|
| 7 |
import uvicorn
|
| 8 |
import asyncio
|
| 9 |
+
from urllib.parse import urlparse
|
| 10 |
+
import time
|
| 11 |
|
| 12 |
app = FastAPI(
|
| 13 |
title="ScraperProxy",
|
|
|
|
| 393 |
'https': proxy
|
| 394 |
}
|
| 395 |
# 测试代理是否生效
|
| 396 |
+
# response = scraper.get('https://httpbin.org/ip')
|
| 397 |
+
# print(response.text)
|
| 398 |
|
| 399 |
+
# 获取home_url
|
| 400 |
+
home_url = request.query_params.get("home")
|
| 401 |
+
if not home_url:
|
| 402 |
+
# 从target_url中提取home_url
|
| 403 |
+
parsed_url = urlparse(target_url)
|
| 404 |
+
home_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
| 405 |
+
|
| 406 |
+
# 重试获取主页响应
|
| 407 |
+
max_retries = 5
|
| 408 |
+
retry_delay = 1 # 重试间隔秒数
|
| 409 |
+
home_response = None
|
| 410 |
+
|
| 411 |
+
for attempt in range(max_retries):
|
| 412 |
+
try:
|
| 413 |
+
home_response = scraper.get(home_url, headers={"sec-fetch-dest": "document"})
|
| 414 |
+
print(f"主页响应 (尝试 {attempt + 1}): {home_response.status_code}")
|
| 415 |
+
|
| 416 |
+
if home_response.status_code == 200:
|
| 417 |
+
break
|
| 418 |
+
|
| 419 |
+
if attempt < max_retries - 1: # 如果不是最后一次尝试
|
| 420 |
+
time.sleep(retry_delay)
|
| 421 |
+
|
| 422 |
+
except Exception as e:
|
| 423 |
+
print(f"主页请求失败 (尝试 {attempt + 1}): {str(e)}")
|
| 424 |
+
if attempt < max_retries - 1:
|
| 425 |
+
time.sleep(retry_delay)
|
| 426 |
|
|
|
|
| 427 |
# 获取请求体
|
| 428 |
body = await request.body()
|
| 429 |
|
|
|
|
| 455 |
headers.pop("content-length", None)
|
| 456 |
headers.pop("user-agent", None)
|
| 457 |
print(f"{headers}")
|
| 458 |
+
|
| 459 |
# 构建请求参数
|
| 460 |
request_kwargs = {
|
| 461 |
"url": target_url,
|
| 462 |
+
"headers": {"sec-fetch-dest": "document"},
|
| 463 |
"params": params,
|
| 464 |
"stream": stream_request # 设置stream参数
|
| 465 |
}
|