dan92 commited on
Commit
1e77c72
·
verified ·
1 Parent(s): 9ed1a6b

Upload proxy_pool.py

Browse files
Files changed (1) hide show
  1. proxy_pool.py +108 -63
proxy_pool.py CHANGED
@@ -2,7 +2,7 @@ import requests
2
  import time
3
  import logging
4
  import random
5
- from threading import Lock
6
  from collections import defaultdict
7
  from datetime import datetime, timedelta
8
 
@@ -11,61 +11,82 @@ logger = logging.getLogger(__name__)
11
  class ProxyPool:
12
  def __init__(self):
13
  self.proxies = []
14
- self.proxy_usage = defaultdict(list) # 记录每个IP的使用时间
15
- self.lock = Lock()
16
  self.last_fetch_time = datetime.min
17
- self.fetch_interval = 300 # 5分钟更新一次代理池
 
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def _parse_proxy_list(self, text):
20
  """解析代理列表文本"""
21
  proxies = []
22
- for line in text.strip().split(','):
23
- line = line.strip()
24
- if not line:
25
- continue
26
 
27
- try:
28
- if line.startswith('http://'):
29
- proxy_type = 'http'
30
- proxy = line[7:]
31
- elif line.startswith('socks4://'):
32
- proxy_type = 'socks4'
33
- proxy = line[9:]
34
- elif line.startswith('socks5://'):
35
- proxy_type = 'socks5'
36
- proxy = line[9:]
37
- else:
 
 
 
 
 
 
 
 
 
 
 
 
38
  continue
39
-
40
- if ':' in proxy:
41
- ip, port = proxy.split(':')
42
- proxies.append({
43
- 'type': proxy_type,
44
- 'ip': ip,
45
- 'port': int(port)
46
- })
47
- except Exception as e:
48
- logger.error(f"解析代理失败: {line}, 错误: {e}")
49
- continue
50
-
51
  return proxies
52
 
53
  def _fetch_proxies(self):
54
  """从API获取代理列表"""
55
  try:
56
- # 获取混合代理池(HTTP/HTTPS各5个)
57
  urls = [
58
  "https://269900.xyz/fetch_http?count=5",
59
- "https://269900.xyz/fetch_https?count=5"
 
60
  ]
61
 
62
  new_proxies = []
63
  for url in urls:
64
- response = requests.get(url, timeout=10)
65
- if response.status_code == 200:
66
- proxies = self._parse_proxy_list(response.text)
67
- new_proxies.extend(proxies)
68
- logger.info(f"成功获取 {len(proxies)} 个代理从 {url}")
 
 
 
 
69
 
70
  return new_proxies
71
  except Exception as e:
@@ -74,57 +95,81 @@ class ProxyPool:
74
 
75
  def update_proxy_pool(self):
76
  """更新代理池"""
 
 
 
77
  with self.lock:
78
- current_time = datetime.now()
79
  if (current_time - self.last_fetch_time).total_seconds() >= self.fetch_interval:
80
- new_proxies = self._fetch_proxies()
81
- if new_proxies:
 
 
 
 
 
82
  self.proxies = new_proxies
83
- self.last_fetch_time = current_time
84
  logger.info(f"代理池更新成功,当前代理数量: {len(self.proxies)}")
85
 
86
  def can_use_proxy(self, proxy_ip):
87
- """检查代理IP是否可以使用(限制每秒最多5次请求)"""
88
  current_time = time.time()
 
89
  with self.lock:
90
- # 清理旧的使用记录
91
- self.proxy_usage[proxy_ip] = [t for t in self.proxy_usage[proxy_ip]
92
- if current_time - t < 1.0]
93
 
94
- # 检查是否超过限制
95
- if len(self.proxy_usage[proxy_ip]) >= 5:
96
  return False
97
 
98
- # 添加新的使用记录
99
  self.proxy_usage[proxy_ip].append(current_time)
100
  return True
101
 
102
  def get_proxy(self):
103
  """获取一个可用的代理"""
 
104
  self.update_proxy_pool()
105
 
106
- with self.lock:
107
- if not self.proxies:
108
- return None
109
-
110
- # 随机尝试最多5个代理
111
- for _ in range(min(5, len(self.proxies))):
112
- proxy = random.choice(self.proxies)
113
- proxy_ip = proxy.get('ip')
114
 
115
- if proxy_ip and self.can_use_proxy(proxy_ip):
 
 
 
 
 
 
 
116
  proxy_type = proxy.get('type', 'http').lower()
117
- proxy_str = f"{proxy_type}://{proxy_ip}:{proxy.get('port')}"
118
  return {
119
  'http': proxy_str,
120
  'https': proxy_str if proxy_type == 'http' else None
121
  }
122
 
123
- return None
 
 
124
 
125
  def remove_proxy(self, proxy):
126
  """移除无效的代理"""
 
 
 
127
  with self.lock:
128
- if proxy in self.proxies:
129
- self.proxies.remove(proxy)
130
- logger.info(f"移除无效代理: {proxy}")
 
 
 
 
 
 
 
 
 
 
 
2
  import time
3
  import logging
4
  import random
5
+ from threading import Lock, RLock
6
  from collections import defaultdict
7
  from datetime import datetime, timedelta
8
 
 
11
  class ProxyPool:
12
  def __init__(self):
13
  self.proxies = []
14
+ self.proxy_usage = defaultdict(list)
15
+ self.lock = RLock()
16
  self.last_fetch_time = datetime.min
17
+ self.fetch_interval = 300
18
+ self.last_cleanup_time = time.time()
19
+ self.cleanup_interval = 60
20
 
21
+ def _cleanup_usage_records(self):
22
+ """清理过期的使用记录"""
23
+ current_time = time.time()
24
+ if current_time - self.last_cleanup_time < self.cleanup_interval:
25
+ return
26
+
27
+ with self.lock:
28
+ for ip in list(self.proxy_usage.keys()):
29
+ self.proxy_usage[ip] = [t for t in self.proxy_usage[ip]
30
+ if current_time - t < 1.0]
31
+ if not self.proxy_usage[ip]:
32
+ del self.proxy_usage[ip]
33
+ self.last_cleanup_time = current_time
34
+
35
  def _parse_proxy_list(self, text):
36
  """解析代理列表文本"""
37
  proxies = []
38
+ try:
39
+ lines = [line.strip() for line in text.strip().split(',') if line.strip()]
 
 
40
 
41
+ for line in lines:
42
+ try:
43
+ if line.startswith('http://'):
44
+ proxy_type = 'http'
45
+ proxy = line[7:]
46
+ elif line.startswith('socks4://'):
47
+ proxy_type = 'socks4'
48
+ proxy = line[9:]
49
+ elif line.startswith('socks5://'):
50
+ proxy_type = 'socks5'
51
+ proxy = line[9:]
52
+ else:
53
+ continue
54
+
55
+ if ':' in proxy:
56
+ ip, port = proxy.split(':')
57
+ proxies.append({
58
+ 'type': proxy_type,
59
+ 'ip': ip.strip(),
60
+ 'port': int(port.strip())
61
+ })
62
+ except Exception as e:
63
+ logger.error(f"解析代理失败: {line}, 错误: {e}")
64
  continue
65
+
66
+ except Exception as e:
67
+ logger.error(f"解析代理列表失败: {e}")
 
 
 
 
 
 
 
 
 
68
  return proxies
69
 
70
  def _fetch_proxies(self):
71
  """从API获取代理列表"""
72
  try:
 
73
  urls = [
74
  "https://269900.xyz/fetch_http?count=5",
75
+ "https://269900.xyz/fetch_https?count=5",
76
+ "https://269900.xyz/fetch_socks5?count=5"
77
  ]
78
 
79
  new_proxies = []
80
  for url in urls:
81
+ try:
82
+ response = requests.get(url, timeout=5)
83
+ if response.status_code == 200:
84
+ proxies = self._parse_proxy_list(response.text)
85
+ new_proxies.extend(proxies)
86
+ logger.info(f"成功获取 {len(proxies)} 个代理从 {url}")
87
+ except requests.RequestException as e:
88
+ logger.error(f"获取代理失败 ({url}): {e}")
89
+ continue
90
 
91
  return new_proxies
92
  except Exception as e:
 
95
 
96
  def update_proxy_pool(self):
97
  """更新代理池"""
98
+ current_time = datetime.now()
99
+ should_update = False
100
+
101
  with self.lock:
 
102
  if (current_time - self.last_fetch_time).total_seconds() >= self.fetch_interval:
103
+ should_update = True
104
+ self.last_fetch_time = current_time
105
+
106
+ if should_update:
107
+ new_proxies = self._fetch_proxies()
108
+ if new_proxies:
109
+ with self.lock:
110
  self.proxies = new_proxies
 
111
  logger.info(f"代理池更新成功,当前代理数量: {len(self.proxies)}")
112
 
113
  def can_use_proxy(self, proxy_ip):
114
+ """检查代理IP是否可以使用"""
115
  current_time = time.time()
116
+
117
  with self.lock:
118
+ usage_times = self.proxy_usage[proxy_ip]
119
+ usage_times = [t for t in usage_times if current_time - t < 1.0]
120
+ self.proxy_usage[proxy_ip] = usage_times
121
 
122
+ if len(usage_times) >= 5:
 
123
  return False
124
 
 
125
  self.proxy_usage[proxy_ip].append(current_time)
126
  return True
127
 
128
  def get_proxy(self):
129
  """获取一个可用的代理"""
130
+ self._cleanup_usage_records()
131
  self.update_proxy_pool()
132
 
133
+ for _ in range(3):
134
+ with self.lock:
135
+ if not self.proxies:
136
+ return None
 
 
 
 
137
 
138
+ available_proxies = []
139
+ for proxy in self.proxies:
140
+ proxy_ip = proxy.get('ip')
141
+ if proxy_ip and self.can_use_proxy(proxy_ip):
142
+ available_proxies.append(proxy)
143
+
144
+ if available_proxies:
145
+ proxy = random.choice(available_proxies)
146
  proxy_type = proxy.get('type', 'http').lower()
147
+ proxy_str = f"{proxy_type}://{proxy['ip']}:{proxy['port']}"
148
  return {
149
  'http': proxy_str,
150
  'https': proxy_str if proxy_type == 'http' else None
151
  }
152
 
153
+ time.sleep(0.1)
154
+
155
+ return None
156
 
157
  def remove_proxy(self, proxy):
158
  """移除无效的代理"""
159
+ if not proxy:
160
+ return
161
+
162
  with self.lock:
163
+ try:
164
+ proxy_str = proxy.get('http') or proxy.get('https')
165
+ if not proxy_str:
166
+ return
167
+
168
+ for p in self.proxies[:]:
169
+ current_proxy_str = f"{p['type']}://{p['ip']}:{p['port']}"
170
+ if current_proxy_str == proxy_str:
171
+ self.proxies.remove(p)
172
+ logger.info(f"移除无效代理: {proxy_str}")
173
+ break
174
+ except Exception as e:
175
+ logger.error(f"移除代理时出错: {e}")