File size: 10,083 Bytes
992cb29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
import requests
from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.responses import HTMLResponse, FileResponse
from faker import Faker
import fake_useragent
import json
import logging
import os
import asyncio
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock, Thread
from typing import List
# Initialize the FastAPI app
app = FastAPI()
# Configure logging
logging.basicConfig(level=logging.INFO)
# Create instances of Faker and fake_useragent
fake = Faker()
user_agent_generator = fake_useragent.UserAgent()
# Global dictionaries to store valid and invalid proxies
global_proxies = {
"valid": set(),
"invalid": set()
}
# Lock to synchronize access to global proxies
proxies_lock = Lock()
def get_random_user_agent():
"""Generate a random fake user agent."""
return user_agent_generator.random
def get_random_ip():
"""Generate a random fake IP address."""
return fake.ipv4()
def fetch_response(url, headers=None):
"""Fetches the URL and returns the raw response text."""
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise HTTPError for bad responses
return response.text
except requests.RequestException as e:
logging.error(f"Request failed for URL {url}: {e}")
return None
async def verify_proxy(proxy: str):
"""Verify if a proxy is working by making a test request."""
if proxy in global_proxies["invalid"]:
# Skip already verified invalid proxies
return proxy, False
test_url = "http://httpbin.org/ip" # Simple URL to test proxy
proxy_dict = {
"http": f"http://{proxy}",
"https": f"http://{proxy}"
}
try:
response = requests.get(test_url, proxies=proxy_dict, timeout=5)
response.raise_for_status()
with proxies_lock:
global_proxies["valid"].add(proxy)
return proxy, True
except requests.RequestException:
with proxies_lock:
global_proxies["invalid"].add(proxy)
return proxy, False
async def background_verification(proxies_to_verify: List[str]):
"""Background task to verify proxies."""
with ThreadPoolExecutor() as executor:
futures = [executor.submit(asyncio.run, verify_proxy(proxy)) for proxy in proxies_to_verify]
for future in as_completed(futures):
proxy, is_valid = future.result()
if is_valid:
logging.info(f"Proxy {proxy} is valid.")
else:
logging.info(f"Proxy {proxy} is invalid and will be excluded.")
@app.get("/", response_class=HTMLResponse)
async def rotate_ip(background_tasks: BackgroundTasks):
proxies = set() # Use a set to avoid duplicate proxies
# URLs for fetching proxies
proxy_urls = [
os.getenv("PROXY_API_URL", "http://pubproxy.com/api/proxy?format=txt&level=anonymous,elite&type=http,socks4,socks5&last_check=60&speed=25&limit=1&post=true&user_agent=true&cookies=true&referer=true"),
'https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt',
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt',
'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt',
'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt',
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/proxy.txt',
'https://raw.githubusercontent.com/sunny9577/proxy-scraper/master/proxies.txt',
"https://storage.googleapis.com/river-treat-249913.appspot.com/p.txt",
"https://storage.googleapis.com/river-treat-249913.appspot.com/proxy.txt",
"https://storage.googleapis.com/river-treat-249913.appspot.com/ultimate.txt",
# Additional proxy sources
'https://raw.githubusercontent.com/proxylist/proxylist/master/proxy.txt',
'https://raw.githubusercontent.com/scrapfly/proxy-list/main/proxies.txt',
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTP.txt',
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4.txt',
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5.txt',
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS.txt',
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/ALL.txt',
'https://raw.githubusercontent.com/proxylist/proxylist/master/https.txt',
'https://raw.githubusercontent.com/proxylist/proxylist/master/socks4.txt',
'https://raw.githubusercontent.com/proxylist/proxylist/master/socks5.txt',
'https://raw.githubusercontent.com/proxylist/proxylist/master/http.txt',
'https://raw.githubusercontent.com/proxylist/proxylist/master/all.txt',
'https://raw.githubusercontent.com/jetlore/proxies/master/proxy-list.txt',
'https://raw.githubusercontent.com/hookzof/proxy-list/main/proxy.txt',
'https://raw.githubusercontent.com/zzlol123/proxy-list/main/proxies.txt',
'https://raw.githubusercontent.com/sqSfg/Proxy-List/master/http.txt',
'https://raw.githubusercontent.com/sqSfg/Proxy-List/master/https.txt',
'https://raw.githubusercontent.com/sqSfg/Proxy-List/master/socks4.txt',
'https://raw.githubusercontent.com/sqSfg/Proxy-List/master/socks5.txt',
'https://raw.githubusercontent.com/sqSfg/Proxy-List/master/all.txt',
'https://www.proxy-list.download/api/v1/get?type=https',
'https://www.proxy-list.download/api/v1/get?type=http',
'https://www.proxy-list.download/api/v1/get?type=socks4',
'https://www.proxy-list.download/api/v1/get?type=socks5',
'https://www.proxy-list.download/api/v1/get?type=all',
'https://www.sslproxies.org/',
'https://www.us-proxy.org/',
'https://free-proxy-list.net/',
'https://www.proxy-list.download/',
'https://www.proxyscan.io/api/proxies?type=http',
# New additional proxy sources
'https://www.proxy-list.org/eng/proxylist.txt',
'https://www.proxy-list.download/api/v1/get?type=all',
'https://www.proxynova.com/proxy-server-list/',
'https://www.proxy-list.download/api/v1/get?type=http',
'https://www.proxy-list.download/api/v1/get?type=https',
'https://www.proxy-list.download/api/v1/get?type=socks4',
'https://www.proxy-list.download/api/v1/get?type=socks5',
'https://www.proxy-list.download/api/v1/get?type=all',
'https://www.proxy-list.org/eng/proxylist.txt',
'https://www.proxy-list.org/eng/proxylist.txt',
'https://www.proxy-list.org/eng/proxylist.txt',
'https://www.proxy-list.org/eng/proxylist.txt',
'https://www.proxy-list.org/eng/proxylist.txt',
'https://www.proxy-list.org/eng/proxylist.txt',
'https://www.proxy-list.org/eng/proxylist.txt',
'https://www.proxy-list.org/eng/proxylist.txt',
'https://www.proxy-list.org/eng/proxylist.txt',
'https://www.proxy-list.org/eng/proxylist.txt'
]
# Fetch proxies from all URLs concurrently using ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=None) as executor:
futures = [executor.submit(fetch_response, url, headers={
"User-Agent": get_random_user_agent(),
"X-Forwarded-For": get_random_ip(),
"Client-IP": get_random_ip(),
"X-Real-IP": get_random_ip()
}) for url in proxy_urls]
for future in as_completed(futures):
response_text = future.result()
if response_text is None:
continue # Skip URLs that failed
# Determine the response format and process accordingly
if response_text.startswith('{') and response_text.endswith('}'):
# Try to parse JSON response
try:
data = json.loads(response_text) # Parse JSON
if isinstance(data, dict) and 'data' in data:
# Extract only IP and port from JSON data
new_proxies = {f"{proxy_data['ip']}:{proxy_data['port']}" for proxy_data in data['data'] if 'ip' in proxy_data and 'port' in proxy_data}
proxies.update(new_proxies)
else:
raise ValueError("Expected 'data' key in JSON response")
except ValueError as e:
logging.error(f"Invalid JSON format: {e}")
continue
else:
# Treat response as plain text and extract IP and port if present
lines = response_text.splitlines()
new_proxies = {line.strip() for line in lines if line.strip()} # Extract IP:port pairs
proxies.update(new_proxies)
# Convert to list and start background verification
proxies_list = list(proxies)
background_tasks.add_task(background_verification, proxies_list)
# Write valid proxies to a text file
with open("proxy.txt", "w") as file:
file.write("\n".join(global_proxies["valid"]))
# Generate HTML content to display the response data
html_content = "<ul>" + "".join(f"<li>{proxy}</li>" for proxy in global_proxies["valid"]) + "</ul>"
html_content += '<p><a href="/proxy-file">Download proxy.txt</a></p>'
return HTMLResponse(content=html_content, status_code=200)
@app.get("/proxy-file")
def get_proxy_file():
"""Endpoint to download the proxy.txt file."""
file_path = "proxy.txt"
if os.path.exists(file_path):
return FileResponse(path=file_path, filename=file_path, media_type='text/plain')
else:
raise HTTPException(status_code=404, detail="File not found")
if __name__ == "__main__":
# Start background proxy verification
verification_thread = Thread(target=lambda: asyncio.run(verify_valid_proxies()), daemon=True)
verification_thread.start()
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |