paijo77 commited on
Commit
d22bb02
·
verified ·
1 Parent(s): 8e2aea8

update app/grabber/base.py

Browse files
Files changed (1) hide show
  1. app/grabber/base.py +87 -0
app/grabber/base.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import List
3
+ import asyncio
4
+ from app.models.proxy import Proxy
5
+ from app.models.source import SourceConfig, SourceType
6
+ from app.grabber.patterns import ProxyPatterns
7
+ from app.grabber.parsers import VMessParser, VLESSParser, TrojanParser, SSParser
8
+ from app.utils.base64_decoder import SubscriptionDecoder
9
+
10
+
11
+ class BaseGrabber(ABC):
12
+ def __init__(
13
+ self, max_retries: int = 3, retry_delay: float = 1.0, timeout: int = 30
14
+ ):
15
+ self.max_retries = max_retries
16
+ self.retry_delay = retry_delay
17
+ self.timeout = timeout
18
+
19
+ @abstractmethod
20
+ async def fetch_content(self, source: SourceConfig) -> str:
21
+ pass
22
+
23
+ async def extract_proxies(self, source: SourceConfig) -> List[Proxy]:
24
+ content = await self.fetch_content(source)
25
+
26
+ if source.selector:
27
+ proxies = await self._try_exact_selector(content, source)
28
+ if proxies:
29
+ return proxies
30
+
31
+ proxies = await self.parse_content(content, source.type)
32
+
33
+ return proxies
34
+
35
+ async def _try_exact_selector(
36
+ self, content: str, source: SourceConfig
37
+ ) -> List[Proxy]:
38
+ return []
39
+
40
+ async def parse_content(self, content: str, source_type: SourceType) -> List[Proxy]:
41
+ proxies = []
42
+
43
+ if source_type == SourceType.SUBSCRIPTION_BASE64:
44
+ try:
45
+ content = SubscriptionDecoder.decode(content)
46
+ except ValueError:
47
+ pass
48
+
49
+ http_matches = ProxyPatterns.extract_http_proxies(content)
50
+ for ip, port in http_matches:
51
+ proxies.append(
52
+ Proxy(ip=ip, port=int(port), protocol="http", source=str(source_type))
53
+ )
54
+
55
+ vmess_urls = ProxyPatterns.extract_vmess_urls(content)
56
+ for url in vmess_urls:
57
+ try:
58
+ proxy = VMessParser.parse(url)
59
+ proxies.append(proxy)
60
+ except ValueError:
61
+ continue
62
+
63
+ vless_urls = ProxyPatterns.extract_vless_urls(content)
64
+ for url in vless_urls:
65
+ try:
66
+ proxy = VLESSParser.parse(url)
67
+ proxies.append(proxy)
68
+ except ValueError:
69
+ continue
70
+
71
+ trojan_urls = ProxyPatterns.extract_trojan_urls(content)
72
+ for url in trojan_urls:
73
+ try:
74
+ proxy = TrojanParser.parse(url)
75
+ proxies.append(proxy)
76
+ except ValueError:
77
+ continue
78
+
79
+ ss_urls = ProxyPatterns.extract_ss_urls(content)
80
+ for url in ss_urls:
81
+ try:
82
+ proxy = SSParser.parse(url)
83
+ proxies.append(proxy)
84
+ except ValueError:
85
+ continue
86
+
87
+ return proxies