paijo77 commited on
Commit
0e9cd60
·
verified ·
1 Parent(s): 31c7681

update app/hunter/strategies/github.py

Browse files
Files changed (1) hide show
  1. app/hunter/strategies/github.py +92 -0
app/hunter/strategies/github.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import aiohttp
3
+ import logging
4
+ from typing import List
5
+ from datetime import datetime, timedelta
6
+ from app.hunter.strategy import BaseStrategy
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class GitHubStrategy(BaseStrategy):
12
+ BASE_URL = "https://api.github.com/search/code"
13
+
14
+ def __init__(self):
15
+ self.token = os.getenv("GITHUB_TOKEN")
16
+
17
+ @property
18
+ def name(self) -> str:
19
+ return "github"
20
+
21
+ async def discover(self) -> List[str]:
22
+ """
23
+ Search GitHub for recently updated proxy files.
24
+ """
25
+ urls = []
26
+ # Search queries to try
27
+ queries = [
28
+ "filename:proxy.txt",
29
+ "filename:proxies.txt",
30
+ "extension:yaml proxies",
31
+ "extension:txt vmess://",
32
+ ]
33
+
34
+ # Calculate date for "pushed:>" filter (last 24h)
35
+ yesterday = (datetime.utcnow() - timedelta(hours=24)).strftime("%Y-%m-%d")
36
+
37
+ headers = {"Accept": "application/vnd.github.v3+json"}
38
+ if self.token:
39
+ headers["Authorization"] = f"token {self.token}"
40
+
41
+ async with aiohttp.ClientSession() as session:
42
+ for q in queries:
43
+ try:
44
+ # Construct query with date filter
45
+ full_query = f"{q} pushed:>{yesterday}"
46
+ params = {
47
+ "q": full_query,
48
+ "sort": "indexed",
49
+ "order": "desc",
50
+ "per_page": 10, # Limit to top 10 per query to save quota
51
+ }
52
+
53
+ async with session.get(
54
+ self.BASE_URL, params=params, headers=headers
55
+ ) as resp:
56
+ if resp.status == 401:
57
+ logger.warning(
58
+ "GitHub API authentication failed (401). "
59
+ "Ensure GITHUB_TOKEN is valid or unset it to use public rate limits."
60
+ )
61
+ # If token is invalid, try removing it for next iterations
62
+ if "Authorization" in headers:
63
+ del headers["Authorization"]
64
+ continue
65
+ break
66
+
67
+ if resp.status == 403:
68
+ logger.warning("GitHub API rate limit exceeded")
69
+ break
70
+
71
+ if resp.status != 200:
72
+ logger.error(f"GitHub Search failed: {resp.status}")
73
+ continue
74
+
75
+ data = await resp.json()
76
+ items = data.get("items", [])
77
+
78
+ for item in items:
79
+ # Convert blob URL to raw URL
80
+ # Blob: https://github.com/user/repo/blob/main/file.txt
81
+ # Raw: https://raw.githubusercontent.com/user/repo/main/file.txt
82
+ html_url = item.get("html_url", "")
83
+ if html_url:
84
+ raw_url = html_url.replace(
85
+ "github.com", "raw.githubusercontent.com"
86
+ ).replace("/blob/", "/")
87
+ urls.append(raw_url)
88
+
89
+ except Exception as e:
90
+ logger.error(f"Error in GitHub strategy: {str(e)}")
91
+
92
+ return list(set(urls)) # Deduplicate