Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
dorker.py
CHANGED
|
@@ -1,58 +1,89 @@
|
|
| 1 |
import asyncio
|
| 2 |
import httpx
|
|
|
|
|
|
|
| 3 |
from itertools import cycle
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
GITHUB_API_URL = "https://api.github.com/search/code"
|
| 6 |
|
| 7 |
def urlencode(s: str) -> str:
|
| 8 |
-
"""A simple URL encoder for query strings."""
|
| 9 |
return s.replace(':', '%3A').replace('"', '%22').replace(' ', '+')
|
| 10 |
|
| 11 |
async def search(tokens: list, query: str, dorks: list):
|
| 12 |
-
"""
|
| 13 |
-
Async generator to search GitHub for dorks and stream results in real-time.
|
| 14 |
-
"""
|
| 15 |
if not tokens:
|
| 16 |
-
yield "[ERROR] No GitHub tokens provided
|
| 17 |
return
|
| 18 |
|
| 19 |
token_cycler = cycle(tokens)
|
| 20 |
headers = {"Accept": "application/vnd.github.v3+json"}
|
| 21 |
|
| 22 |
-
async with httpx.AsyncClient() as client:
|
| 23 |
for i, dork in enumerate(dorks):
|
| 24 |
full_query = f"{query} {dork}"
|
| 25 |
-
|
| 26 |
|
| 27 |
-
current_token = next(token_cycler)
|
| 28 |
-
headers["Authorization"] = f"token {current_token}"
|
| 29 |
-
|
| 30 |
yield f"[INFO] [{i+1}/{len(dorks)}] Searching with dork: {dork}"
|
| 31 |
|
| 32 |
try:
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
| 37 |
await asyncio.sleep(60)
|
| 38 |
-
|
|
|
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
except httpx.HTTPStatusError as e:
|
| 53 |
yield f"[ERROR] HTTP Error for '{dork}': {e.response.status_code}"
|
| 54 |
except Exception as e:
|
| 55 |
yield f"[ERROR] Unexpected error for '{dork}': {str(e)}"
|
| 56 |
|
| 57 |
-
#
|
| 58 |
await asyncio.sleep(2.1)
|
|
|
|
| 1 |
import asyncio
|
| 2 |
import httpx
|
| 3 |
+
import base64
|
| 4 |
+
import re
|
| 5 |
from itertools import cycle
|
| 6 |
|
| 7 |
+
# A dictionary of regex patterns to find secrets
|
| 8 |
+
SECRET_PATTERNS = {
|
| 9 |
+
"GitHub Token": r'ghp_[0-9a-zA-Z]{36}',
|
| 10 |
+
"GitHub App Token": r'ghu_[0-9a-zA-Z]{36}',
|
| 11 |
+
"GitHub Refresh Token": r'ghr_[0-9a-zA-Z]{76}',
|
| 12 |
+
"AWS Access Key": r'AKIA[0-9A-Z]{16}',
|
| 13 |
+
"Google API Key": r'AIza[0-9A-Za-z\-_]{35}',
|
| 14 |
+
"Heroku API Key": r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}',
|
| 15 |
+
"Stripe API Key": r'sk_live_[0-9a-zA-Z]{24}',
|
| 16 |
+
"Slack Token": r'xox[baprs]-[0-9a-zA-Z]{10,48}',
|
| 17 |
+
"SSH Private Key": r'-----BEGIN (RSA|OPENSSH|EC) PRIVATE KEY-----',
|
| 18 |
+
"Clash Subscription": r'https?://[\w.-]+(?:\:[0-9]+)?(?:/.*)?(?:clash|sub|v2ray|trojan|shadowsocks)(?:/.*)?',
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
GITHUB_API_URL = "https://api.github.com/search/code"
|
| 22 |
|
| 23 |
def urlencode(s: str) -> str:
|
|
|
|
| 24 |
return s.replace(':', '%3A').replace('"', '%22').replace(' ', '+')
|
| 25 |
|
| 26 |
async def search(tokens: list, query: str, dorks: list):
|
|
|
|
|
|
|
|
|
|
| 27 |
if not tokens:
|
| 28 |
+
yield "[ERROR] No GitHub tokens provided."
|
| 29 |
return
|
| 30 |
|
| 31 |
token_cycler = cycle(tokens)
|
| 32 |
headers = {"Accept": "application/vnd.github.v3+json"}
|
| 33 |
|
| 34 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 35 |
for i, dork in enumerate(dorks):
|
| 36 |
full_query = f"{query} {dork}"
|
| 37 |
+
search_url = f"{GITHUB_API_URL}?q={urlencode(full_query)}&per_page=5" # Limit to top 5 results per dork
|
| 38 |
|
|
|
|
|
|
|
|
|
|
| 39 |
yield f"[INFO] [{i+1}/{len(dorks)}] Searching with dork: {dork}"
|
| 40 |
|
| 41 |
try:
|
| 42 |
+
current_token = next(token_cycler)
|
| 43 |
+
headers["Authorization"] = f"token {current_token}"
|
| 44 |
+
|
| 45 |
+
search_res = await client.get(search_url, headers=headers)
|
| 46 |
+
if search_res.status_code == 403: # Rate limit
|
| 47 |
+
yield "[WARN] Rate limit hit. Sleeping for 60s..."
|
| 48 |
await asyncio.sleep(60)
|
| 49 |
+
search_res = await client.get(search_url, headers=headers) # Retry
|
| 50 |
+
search_res.raise_for_status()
|
| 51 |
|
| 52 |
+
search_data = search_res.json()
|
| 53 |
+
if not search_data.get("items"):
|
| 54 |
+
yield f"[-] No files found for: {dork}"
|
| 55 |
+
await asyncio.sleep(2.1) # Still sleep to respect search rate limit
|
| 56 |
+
continue
|
| 57 |
+
|
| 58 |
+
# Stage 2: Fetch and scan content of found files
|
| 59 |
+
for item in search_data["items"]:
|
| 60 |
+
file_api_url = item.get("url")
|
| 61 |
+
if not file_api_url: continue
|
| 62 |
+
|
| 63 |
+
file_res = await client.get(file_api_url, headers=headers)
|
| 64 |
+
if file_res.status_code != 200: continue # Skip if we can't fetch content
|
| 65 |
+
|
| 66 |
+
content_b64 = file_res.json().get("content")
|
| 67 |
+
if not content_b64: continue
|
| 68 |
+
|
| 69 |
+
try:
|
| 70 |
+
decoded_content = base64.b64decode(content_b64).decode("utf-8")
|
| 71 |
+
except Exception:
|
| 72 |
+
continue # Skip non-utf8 files
|
| 73 |
|
| 74 |
+
# Scan for all secret patterns
|
| 75 |
+
for secret_type, pattern in SECRET_PATTERNS.items():
|
| 76 |
+
# Use re.MULTILINE to handle keys that span lines (like SSH keys)
|
| 77 |
+
matches = re.finditer(pattern, decoded_content, re.MULTILINE)
|
| 78 |
+
for match in matches:
|
| 79 |
+
# For clarity, show a snippet of the found secret
|
| 80 |
+
found_secret_snippet = match.group(0).strip().split('\n')[0] # Get first line of match
|
| 81 |
+
yield f"[+] FOUND [{secret_type}] in {item['html_url']}: {found_secret_snippet}..."
|
| 82 |
|
| 83 |
except httpx.HTTPStatusError as e:
|
| 84 |
yield f"[ERROR] HTTP Error for '{dork}': {e.response.status_code}"
|
| 85 |
except Exception as e:
|
| 86 |
yield f"[ERROR] Unexpected error for '{dork}': {str(e)}"
|
| 87 |
|
| 88 |
+
# Main sleep to respect the search API rate limit (30/min)
|
| 89 |
await asyncio.sleep(2.1)
|