Jack698 commited on
Commit
2133ad1
·
verified ·
1 Parent(s): 98aef54

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. dorker.py +57 -26
dorker.py CHANGED
@@ -1,58 +1,89 @@
1
  import asyncio
2
  import httpx
 
 
3
  from itertools import cycle
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  GITHUB_API_URL = "https://api.github.com/search/code"
6
 
7
  def urlencode(s: str) -> str:
8
- """A simple URL encoder for query strings."""
9
  return s.replace(':', '%3A').replace('"', '%22').replace(' ', '+')
10
 
11
  async def search(tokens: list, query: str, dorks: list):
12
- """
13
- Async generator to search GitHub for dorks and stream results in real-time.
14
- """
15
  if not tokens:
16
- yield "[ERROR] No GitHub tokens provided in GHA_TOKENS secret."
17
  return
18
 
19
  token_cycler = cycle(tokens)
20
  headers = {"Accept": "application/vnd.github.v3+json"}
21
 
22
- async with httpx.AsyncClient() as client:
23
  for i, dork in enumerate(dorks):
24
  full_query = f"{query} {dork}"
25
- url = f"{GITHUB_API_URL}?q={urlencode(full_query)}"
26
 
27
- current_token = next(token_cycler)
28
- headers["Authorization"] = f"token {current_token}"
29
-
30
  yield f"[INFO] [{i+1}/{len(dorks)}] Searching with dork: {dork}"
31
 
32
  try:
33
- res = await client.get(url, headers=headers)
34
-
35
- if res.status_code == 403 and 'rate limit' in res.text.lower():
36
- yield "[WARN] Rate limit hit. Sleeping for 60 seconds..."
 
 
37
  await asyncio.sleep(60)
38
- res = await client.get(url, headers=headers) # Retry
 
39
 
40
- res.raise_for_status()
41
-
42
- data = res.json()
43
- count = data.get("total_count", 0)
44
-
45
- github_search_url = f"https://github.com/search?q={urlencode(full_query)}&type=Code"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- if count > 0:
48
- yield f"[+] FOUND ({count} results): {dork} -> {github_search_url}"
49
- else:
50
- yield f"[-] Not Found: {dork}"
 
 
 
 
51
 
52
  except httpx.HTTPStatusError as e:
53
  yield f"[ERROR] HTTP Error for '{dork}': {e.response.status_code}"
54
  except Exception as e:
55
  yield f"[ERROR] Unexpected error for '{dork}': {str(e)}"
56
 
57
- # Sleep to stay within the 30 requests/minute limit
58
  await asyncio.sleep(2.1)
 
1
  import asyncio
2
  import httpx
3
+ import base64
4
+ import re
5
  from itertools import cycle
6
 
7
+ # A dictionary of regex patterns to find secrets
8
+ SECRET_PATTERNS = {
9
+ "GitHub Token": r'ghp_[0-9a-zA-Z]{36}',
10
+ "GitHub App Token": r'ghu_[0-9a-zA-Z]{36}',
11
+ "GitHub Refresh Token": r'ghr_[0-9a-zA-Z]{76}',
12
+ "AWS Access Key": r'AKIA[0-9A-Z]{16}',
13
+ "Google API Key": r'AIza[0-9A-Za-z\-_]{35}',
14
+ "Heroku API Key": r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}',
15
+ "Stripe API Key": r'sk_live_[0-9a-zA-Z]{24}',
16
+ "Slack Token": r'xox[baprs]-[0-9a-zA-Z]{10,48}',
17
+ "SSH Private Key": r'-----BEGIN (RSA|OPENSSH|EC) PRIVATE KEY-----',
18
+ "Clash Subscription": r'https?://[\w.-]+(?:\:[0-9]+)?(?:/.*)?(?:clash|sub|v2ray|trojan|shadowsocks)(?:/.*)?',
19
+ }
20
+
21
  GITHUB_API_URL = "https://api.github.com/search/code"
22
 
23
  def urlencode(s: str) -> str:
 
24
  return s.replace(':', '%3A').replace('"', '%22').replace(' ', '+')
25
 
26
  async def search(tokens: list, query: str, dorks: list):
 
 
 
27
  if not tokens:
28
+ yield "[ERROR] No GitHub tokens provided."
29
  return
30
 
31
  token_cycler = cycle(tokens)
32
  headers = {"Accept": "application/vnd.github.v3+json"}
33
 
34
+ async with httpx.AsyncClient(timeout=30.0) as client:
35
  for i, dork in enumerate(dorks):
36
  full_query = f"{query} {dork}"
37
+ search_url = f"{GITHUB_API_URL}?q={urlencode(full_query)}&per_page=5" # Limit to top 5 results per dork
38
 
 
 
 
39
  yield f"[INFO] [{i+1}/{len(dorks)}] Searching with dork: {dork}"
40
 
41
  try:
42
+ current_token = next(token_cycler)
43
+ headers["Authorization"] = f"token {current_token}"
44
+
45
+ search_res = await client.get(search_url, headers=headers)
46
+ if search_res.status_code == 403: # Rate limit
47
+ yield "[WARN] Rate limit hit. Sleeping for 60s..."
48
  await asyncio.sleep(60)
49
+ search_res = await client.get(search_url, headers=headers) # Retry
50
+ search_res.raise_for_status()
51
 
52
+ search_data = search_res.json()
53
+ if not search_data.get("items"):
54
+ yield f"[-] No files found for: {dork}"
55
+ await asyncio.sleep(2.1) # Still sleep to respect search rate limit
56
+ continue
57
+
58
+ # Stage 2: Fetch and scan content of found files
59
+ for item in search_data["items"]:
60
+ file_api_url = item.get("url")
61
+ if not file_api_url: continue
62
+
63
+ file_res = await client.get(file_api_url, headers=headers)
64
+ if file_res.status_code != 200: continue # Skip if we can't fetch content
65
+
66
+ content_b64 = file_res.json().get("content")
67
+ if not content_b64: continue
68
+
69
+ try:
70
+ decoded_content = base64.b64decode(content_b64).decode("utf-8")
71
+ except Exception:
72
+ continue # Skip non-utf8 files
73
 
74
+ # Scan for all secret patterns
75
+ for secret_type, pattern in SECRET_PATTERNS.items():
76
+ # Use re.MULTILINE to handle keys that span lines (like SSH keys)
77
+ matches = re.finditer(pattern, decoded_content, re.MULTILINE)
78
+ for match in matches:
79
+ # For clarity, show a snippet of the found secret
80
+ found_secret_snippet = match.group(0).strip().split('\n')[0] # Get first line of match
81
+ yield f"[+] FOUND [{secret_type}] in {item['html_url']}: {found_secret_snippet}..."
82
 
83
  except httpx.HTTPStatusError as e:
84
  yield f"[ERROR] HTTP Error for '{dork}': {e.response.status_code}"
85
  except Exception as e:
86
  yield f"[ERROR] Unexpected error for '{dork}': {str(e)}"
87
 
88
+ # Main sleep to respect the search API rate limit (30/min)
89
  await asyncio.sleep(2.1)