Update app.py
Browse files
app.py
CHANGED
|
@@ -13,28 +13,11 @@ from cachetools import TTLCache
|
|
| 13 |
cache = TTLCache(maxsize=1000, ttl=3600)
|
| 14 |
|
| 15 |
async def fetch_url(url, session, max_retries=3, timeout=180):
|
| 16 |
-
total_content = b""
|
| 17 |
for attempt in range(max_retries):
|
| 18 |
try:
|
| 19 |
async with session.get(url, timeout=ClientTimeout(total=timeout)) as response:
|
| 20 |
response.raise_for_status()
|
| 21 |
-
|
| 22 |
-
chunk = await response.content.read(8192) # 每次读取8KB
|
| 23 |
-
if not chunk:
|
| 24 |
-
break
|
| 25 |
-
total_content += chunk
|
| 26 |
-
|
| 27 |
-
if response.content_length is not None and len(total_content) < response.content_length:
|
| 28 |
-
print(f"Warning: Received content length ({len(total_content)}) is less than expected ({response.content_length})")
|
| 29 |
-
if attempt == max_retries - 1:
|
| 30 |
-
print("This was the last attempt. Returning partial content.")
|
| 31 |
-
return total_content.decode('utf-8', errors='ignore')
|
| 32 |
-
else:
|
| 33 |
-
print("Retrying...")
|
| 34 |
-
await asyncio.sleep(5)
|
| 35 |
-
continue
|
| 36 |
-
|
| 37 |
-
return total_content.decode('utf-8', errors='ignore')
|
| 38 |
except asyncio.TimeoutError:
|
| 39 |
print(f"Attempt {attempt + 1} timed out after {timeout} seconds", flush=True)
|
| 40 |
except aiohttp.ClientError as e:
|
|
@@ -55,8 +38,6 @@ async def extract_and_transform_proxies(input_text):
|
|
| 55 |
data = yaml.safe_load(input_text)
|
| 56 |
if isinstance(data, dict) and 'proxies' in data:
|
| 57 |
proxies_list = data['proxies']
|
| 58 |
-
elif isinstance(data, list):
|
| 59 |
-
proxies_list = data
|
| 60 |
else:
|
| 61 |
# 如果不是预期的格式,尝试提取proxies部分
|
| 62 |
proxies_match = re.search(r'proxies:\s*\n((?:[-\s]*{.*\n?)*)', input_text, re.MULTILINE | re.DOTALL)
|
|
@@ -64,10 +45,7 @@ async def extract_and_transform_proxies(input_text):
|
|
| 64 |
proxies_text = proxies_match.group(1)
|
| 65 |
proxies_list = yaml.safe_load(proxies_text)
|
| 66 |
else:
|
| 67 |
-
|
| 68 |
-
proxy_pattern = r'{[^}]+}'
|
| 69 |
-
possible_proxies = re.findall(proxy_pattern, input_text)
|
| 70 |
-
proxies_list = [yaml.safe_load(proxy) for proxy in possible_proxies]
|
| 71 |
except yaml.YAMLError as e:
|
| 72 |
return f"YAML解析错误: {str(e)}"
|
| 73 |
|
|
@@ -167,6 +145,9 @@ async def handle_request(request):
|
|
| 167 |
print(f"URL content length: {len(input_text)}", flush=True)
|
| 168 |
result = await extract_and_transform_proxies(input_text)
|
| 169 |
print(f"Transformed result length: {len(result)}", flush=True)
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
# 将结果存入缓存
|
| 172 |
cache[url] = result
|
|
|
|
| 13 |
cache = TTLCache(maxsize=1000, ttl=3600)
|
| 14 |
|
| 15 |
async def fetch_url(url, session, max_retries=3, timeout=180):
|
|
|
|
| 16 |
for attempt in range(max_retries):
|
| 17 |
try:
|
| 18 |
async with session.get(url, timeout=ClientTimeout(total=timeout)) as response:
|
| 19 |
response.raise_for_status()
|
| 20 |
+
return await response.text()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
except asyncio.TimeoutError:
|
| 22 |
print(f"Attempt {attempt + 1} timed out after {timeout} seconds", flush=True)
|
| 23 |
except aiohttp.ClientError as e:
|
|
|
|
| 38 |
data = yaml.safe_load(input_text)
|
| 39 |
if isinstance(data, dict) and 'proxies' in data:
|
| 40 |
proxies_list = data['proxies']
|
|
|
|
|
|
|
| 41 |
else:
|
| 42 |
# 如果不是预期的格式,尝试提取proxies部分
|
| 43 |
proxies_match = re.search(r'proxies:\s*\n((?:[-\s]*{.*\n?)*)', input_text, re.MULTILINE | re.DOTALL)
|
|
|
|
| 45 |
proxies_text = proxies_match.group(1)
|
| 46 |
proxies_list = yaml.safe_load(proxies_text)
|
| 47 |
else:
|
| 48 |
+
return "未找到有效的代理配置"
|
|
|
|
|
|
|
|
|
|
| 49 |
except yaml.YAMLError as e:
|
| 50 |
return f"YAML解析错误: {str(e)}"
|
| 51 |
|
|
|
|
| 145 |
print(f"URL content length: {len(input_text)}", flush=True)
|
| 146 |
result = await extract_and_transform_proxies(input_text)
|
| 147 |
print(f"Transformed result length: {len(result)}", flush=True)
|
| 148 |
+
print("First 1000 characters of transformed result:")
|
| 149 |
+
print(result[:1000])
|
| 150 |
+
print("------------------------")
|
| 151 |
|
| 152 |
# 将结果存入缓存
|
| 153 |
cache[url] = result
|