Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
-
from asyncio import Queue, create_task
|
| 2 |
from contextlib import asynccontextmanager
|
| 3 |
from json import dumps, loads
|
| 4 |
from pathlib import Path
|
|
|
|
| 5 |
|
| 6 |
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
| 7 |
from fastapi import FastAPI, HTTPException
|
|
@@ -15,11 +16,17 @@ try:
|
|
| 15 |
except:
|
| 16 |
workdir = Path.cwd().parent
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
collected_json = workdir / 'proxies.json'
|
|
|
|
| 19 |
countries_list = ['US', 'CA', 'FR', 'FI', 'HR', 'ME', 'CH', 'SE', 'EE', 'DE', 'GB', 'IT', 'NL', 'PL', 'CZ', 'RS', 'RO', 'MD', 'AT', 'BE', 'BG', 'HU', 'DK', 'IS', 'KZ', 'LV', 'LT', 'LU', 'NO', 'PT', 'SK', 'SI']
|
| 20 |
|
| 21 |
|
| 22 |
-
def create_json_from_proxies(proxy_lines, filename):
|
| 23 |
countries = set()
|
| 24 |
proxies = []
|
| 25 |
|
|
@@ -45,7 +52,7 @@ def create_json_from_proxies(proxy_lines, filename):
|
|
| 45 |
return filename
|
| 46 |
|
| 47 |
|
| 48 |
-
async def collect_proxies(proxies_queue):
|
| 49 |
proxies_list = []
|
| 50 |
while True:
|
| 51 |
proxy = await proxies_queue.get()
|
|
@@ -56,13 +63,58 @@ async def collect_proxies(proxies_queue):
|
|
| 56 |
return proxies_list
|
| 57 |
|
| 58 |
|
| 59 |
-
async def
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
proxies_queue = Queue()
|
| 62 |
-
broker = Broker(proxies_queue, timeout=
|
| 63 |
-
|
|
|
|
|
|
|
| 64 |
proxies_list = await collect_proxies(proxies_queue)
|
| 65 |
-
return create_json_from_proxies(proxies_list,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
|
| 68 |
scheduler.add_job(find_proxies, 'interval', max_instances=1, minutes=60)
|
|
@@ -80,17 +132,45 @@ async def app_lifespan(app: FastAPI):
|
|
| 80 |
app = FastAPI(lifespan=app_lifespan)
|
| 81 |
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
@app.post('*')
|
| 84 |
async def read_root():
|
| 85 |
return HTTPException(405)
|
| 86 |
|
| 87 |
|
| 88 |
-
@app.get('/
|
| 89 |
async def get_proxies():
|
| 90 |
if collected_json.exists():
|
| 91 |
return loads(collected_json.read_text())
|
| 92 |
else:
|
| 93 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
|
| 96 |
@app.get('/')
|
|
|
|
| 1 |
+
from asyncio import Queue, create_task, gather, sleep
|
| 2 |
from contextlib import asynccontextmanager
|
| 3 |
from json import dumps, loads
|
| 4 |
from pathlib import Path
|
| 5 |
+
from typing import Literal
|
| 6 |
|
| 7 |
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
| 8 |
from fastapi import FastAPI, HTTPException
|
|
|
|
| 16 |
except:
|
| 17 |
workdir = Path.cwd().parent
|
| 18 |
|
| 19 |
+
is_first_run = True
|
| 20 |
+
|
| 21 |
+
http_collected_json = workdir / 'http_proxies.json'
|
| 22 |
+
https_collected_json = workdir / 'https_proxies.json'
|
| 23 |
+
socks5_collected_json = workdir / 'socks5_proxies.json'
|
| 24 |
collected_json = workdir / 'proxies.json'
|
| 25 |
+
|
| 26 |
countries_list = ['US', 'CA', 'FR', 'FI', 'HR', 'ME', 'CH', 'SE', 'EE', 'DE', 'GB', 'IT', 'NL', 'PL', 'CZ', 'RS', 'RO', 'MD', 'AT', 'BE', 'BG', 'HU', 'DK', 'IS', 'KZ', 'LV', 'LT', 'LU', 'NO', 'PT', 'SK', 'SI']
|
| 27 |
|
| 28 |
|
| 29 |
+
def create_json_from_proxies(proxy_lines: list[str], filename: Path):
|
| 30 |
countries = set()
|
| 31 |
proxies = []
|
| 32 |
|
|
|
|
| 52 |
return filename
|
| 53 |
|
| 54 |
|
| 55 |
+
async def collect_proxies(proxies_queue: Queue):
|
| 56 |
proxies_list = []
|
| 57 |
while True:
|
| 58 |
proxy = await proxies_queue.get()
|
|
|
|
| 63 |
return proxies_list
|
| 64 |
|
| 65 |
|
| 66 |
+
async def sort_proxies_and_merge(files: list[Path], output_file: Path):
|
| 67 |
+
all_countries = set()
|
| 68 |
+
proxies_by_type = {}
|
| 69 |
+
for file in files:
|
| 70 |
+
if file.is_file() and file.stat().st_size > 0:
|
| 71 |
+
data = loads(file.read_text(encoding='utf-8'))
|
| 72 |
+
proxies = data.get('proxies')
|
| 73 |
+
if proxies:
|
| 74 |
+
first_proxy = proxies[0] if proxies else None
|
| 75 |
+
proxy_type = first_proxy.get('protocol').lower() if first_proxy and first_proxy.get('protocol') else None
|
| 76 |
+
if proxy_type:
|
| 77 |
+
sorted_proxies = sorted(proxies, key=lambda x: x.get('ping'))
|
| 78 |
+
proxies_by_type[proxy_type] = {
|
| 79 |
+
'countries': list(set(proxy.get('country') for proxy in sorted_proxies if proxy.get('country'))),
|
| 80 |
+
'proxies': sorted_proxies
|
| 81 |
+
}
|
| 82 |
+
all_countries.update(proxies_by_type[proxy_type]["countries"])
|
| 83 |
+
all_countries = sorted(all_countries)
|
| 84 |
+
merged_data = {'countries': all_countries, 'proxies_by_type': proxies_by_type}
|
| 85 |
+
output_file.write_text(dumps(merged_data, indent=4))
|
| 86 |
+
return output_file
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
async def stop_broker_after_timeout(broker: Broker, timeout_minutes: int):
|
| 90 |
+
await sleep(timeout_minutes * 60)
|
| 91 |
+
try:
|
| 92 |
+
broker.stop()
|
| 93 |
+
except:
|
| 94 |
+
pass
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
async def find_proxies_by_type(proxy_type: Literal['HTTP', 'HTTPS', 'SOCKS5'], output_json_file: Path, timeout_minutes: int = 50):
|
| 98 |
+
output_json_file.write_text(dumps({'countries': None, 'proxies': []}, indent=4))
|
| 99 |
proxies_queue = Queue()
|
| 100 |
+
broker = Broker(proxies_queue, timeout=1, max_conn=200, max_tries=2, verify_ssl=True if proxy_type != 'HTTPS' else False)
|
| 101 |
+
stop_task = create_task(stop_broker_after_timeout(broker, timeout_minutes))
|
| 102 |
+
await broker.find(types=[proxy_type], countries=countries_list, limit=0)
|
| 103 |
+
await stop_task
|
| 104 |
proxies_list = await collect_proxies(proxies_queue)
|
| 105 |
+
return create_json_from_proxies(proxies_list, output_json_file)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
async def find_proxies():
|
| 109 |
+
global is_first_run
|
| 110 |
+
timeout_minutes = 5 if is_first_run else 50
|
| 111 |
+
results = await gather(
|
| 112 |
+
find_proxies_by_type('HTTP', http_collected_json, timeout_minutes),
|
| 113 |
+
find_proxies_by_type('HTTPS', https_collected_json, timeout_minutes),
|
| 114 |
+
find_proxies_by_type('SOCKS5', socks5_collected_json, timeout_minutes)
|
| 115 |
+
)
|
| 116 |
+
await sort_proxies_and_merge(list(results), collected_json)
|
| 117 |
+
is_first_run = False
|
| 118 |
|
| 119 |
|
| 120 |
scheduler.add_job(find_proxies, 'interval', max_instances=1, minutes=60)
|
|
|
|
| 132 |
app = FastAPI(lifespan=app_lifespan)
|
| 133 |
|
| 134 |
|
| 135 |
+
def not_redy_yet():
|
| 136 |
+
return JSONResponse({"error": "ёще не готово, сбор и проверка прокси занимает около часа"}, status_code=204)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
@app.post('*')
|
| 140 |
async def read_root():
|
| 141 |
return HTTPException(405)
|
| 142 |
|
| 143 |
|
| 144 |
+
@app.get('/all/')
|
| 145 |
async def get_proxies():
|
| 146 |
if collected_json.exists():
|
| 147 |
return loads(collected_json.read_text())
|
| 148 |
else:
|
| 149 |
+
return not_redy_yet()
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
@app.get('/http/')
|
| 153 |
+
async def get_http_proxies():
|
| 154 |
+
if http_collected_json.exists():
|
| 155 |
+
return loads(http_collected_json.read_text())
|
| 156 |
+
else:
|
| 157 |
+
return not_redy_yet()
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
@app.get('/https/')
|
| 161 |
+
async def get_https_proxies():
|
| 162 |
+
if https_collected_json.exists():
|
| 163 |
+
return loads(https_collected_json.read_text())
|
| 164 |
+
else:
|
| 165 |
+
return not_redy_yet()
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
@app.get('/socks5/')
|
| 169 |
+
async def get_socks5_proxies():
|
| 170 |
+
if socks5_collected_json.exists():
|
| 171 |
+
return loads(socks5_collected_json.read_text())
|
| 172 |
+
else:
|
| 173 |
+
return not_redy_yet()
|
| 174 |
|
| 175 |
|
| 176 |
@app.get('/')
|