#!/usr/bin/env python3 """ Gerenciador de Proxies Chineses Busca, testa e retorna proxies funcionais """ import re import json import time from typing import List, Optional from dataclasses import dataclass import urllib.request import urllib.error from bs4 import BeautifulSoup @dataclass class Proxy: """Proxy info""" host: str port: int protocol: str # http, https, socks5 source: str @property def url(self) -> str: return f"{self.protocol}://{self.host}:{self.port}" @property def dict(self) -> dict: return { "http": self.url, "https": self.url } def fetch_url(url: str, timeout: int = 30) -> Optional[str]: """Fetch URL usando urllib""" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" } req = urllib.request.Request(url, headers=headers) try: with urllib.request.urlopen(req, timeout=timeout) as response: # Lê como texto, trata encoding chinês content = response.read() try: return content.decode('utf-8') except: return content.decode('gbk', errors='ignore') except Exception as e: return None class ChinaProxyFinder: """Busca proxies chineses gratuitos""" def __init__(self): self.proxies: List[Proxy] = [] def fetch_geonode_api(self) -> List[Proxy]: """Busca proxies via Geonode API (JSON, mais confiável)""" proxies = [] try: url = "https://proxylist.geonode.com/api/proxy-list?limit=50&country=cn&protocol=http&format=json" data = fetch_url(url) if data: result = json.loads(data) if isinstance(result, list): for item in result[:30]: if 'ip' in item and 'port' in item: proxies.append(Proxy( host=item['ip'], port=int(item['port']), protocol="http", source="geonode" )) except Exception as e: print(f"Erro geonode: {e}") return proxies def fetch_spys_one(self) -> List[Proxy]: """Busca proxies em spys.one usando urllib""" proxies = [] try: html = fetch_url("https://spys.one/free-proxy-list/CN/") if html: # Spys.one usa formatação especial com script # Tenta encontrar padrões IP:port matches = re.findall(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\D+(\d{2,5})', html) for ip, port in matches[:30]: try: proxies.append(Proxy( host=ip, port=int(port), protocol="http", source="spys.one" )) except: continue except Exception as e: print(f"Erro spys.one: {e}") return proxies def fetch_proxylist(self) -> List[Proxy]: """API alternativa de proxy list""" proxies = [] try: url = "http://www.89ip.cn/tiqv.html?id=1" # Proxy chinês html = fetch_url(url) if html: soup = BeautifulSoup(html, 'lxml') # Tenta encontrar IPs em tbody tds = soup.find_all('td') for td in tds[:100]: text = td.get_text(strip=True) match = re.match(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d+)', text) if match: proxies.append(Proxy( host=match.group(1), port=int(match.group(2)), protocol="http", source="89ip" )) if len(proxies) >= 20: break except Exception as e: print(f"Erro proxylist: {e}") return proxies def find_all(self) -> List[Proxy]: """Busca proxies de todas as fontes""" print("🔍 Buscando proxies chineses...") # Busca sequencialmente all_proxies = [] # Geonode API (mais confiável) print(" - Geonode API...") all_proxies.extend(self.fetch_geonode_api()) # Spys.one print(" - Spys.one...") all_proxies.extend(self.fetch_spys_one()) # Outros print(" - Outras fontes...") all_proxies.extend(self.fetch_proxylist()) # Remove duplicatas seen = set() unique = [] for p in all_proxies: key = f"{p.host}:{p.port}" if key not in seen: seen.add(key) unique.append(p) self.proxies = unique print(f"✅ Encontrados {len(self.proxies)} proxies únicos") return self.proxies class ProxyTester: """Testa proxies para ver se funcionam""" def test_proxy(self, proxy: Proxy, timeout: int = 10) -> bool: """Testa se o proxy funciona""" try: proxy_handler = urllib.request.ProxyHandler({proxy.protocol: proxy.url}) opener = urllib.request.build_opener(proxy_handler) req = urllib.request.Request( "http://httpbin.org/ip", headers={"User-Agent": "Mozilla/5.0"} ) with opener.open(req, timeout=timeout) as response: if response.status == 200: return True except Exception: pass return False def test_for_taobao(self, proxy: Proxy, timeout: int = 15) -> bool: """Testa se o proxy funciona com Taobao""" try: proxy_handler = urllib.request.ProxyHandler({proxy.protocol: proxy.url}) opener = urllib.request.build_opener(proxy_handler) req = urllib.request.Request( "https://s.taobao.com/search?q=test", headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"} ) with opener.open(req, timeout=timeout) as response: text = response.read().decode('utf-8', errors='ignore') # Verifica se não tem bloqueio if "验证" not in text and len(text) > 10000: return True except Exception: pass return False def find_working(self, proxies: List[Proxy], count: int = 3, test_taobao: bool = False) -> List[Proxy]: """Encontra N proxies que funcionam""" print(f"🧪 Testando {len(proxies)} proxies...") working = [] for i, proxy in enumerate(proxies[:30]): # Limita a 30 testes if len(working) >= count: break print(f" [{i+1}] {proxy.host}:{proxy.port}... ", end="", flush=True) # Testa if test_taobao: success = self.test_for_taobao(proxy, timeout=10) else: success = self.test_proxy(proxy, timeout=5) if success: print("✅") working.append(proxy) else: print("❌") return working def main(): """Busca e retorna proxies funcionais""" finder = ChinaProxyFinder() tester = ProxyTester() # Busca proxies proxies = finder.find_all() if not proxies: print("❌ Nenhum proxy encontrado") return # Testa proxies working = tester.find_working(proxies, count=5) if working: print(f"\n✅ {len(working)} proxies funcionais:") for p in working: print(f" {p.url}") # Salva em arquivo with open("/tmp/taobao_proxies.txt", "w") as f: for p in working: f.write(f"{p.url}\n") # Salva JSON também with open("/tmp/taobao_proxies.json", "w") as f: json.dump([{"url": p.url, "host": p.host, "port": p.port} for p in working], f, indent=2) print(f"\n💾 Salvo em /tmp/taobao_proxies.txt e /tmp/taobao_proxies.json") return working[0] else: print("\n❌ Nenhum proxy funcional encontrado") print("\n💡 Sugestões:") print(" 1. Use proxy pago (mais confiável)") print(" 2. Tente novamente em alguns minutos") print(" 3. Use VPN chinês") return None if __name__ == "__main__": result = main() if result: print(f"\n🎯 Use este proxy: {result.url}")