HuB commited on
Commit Β·
559d8c0
1
Parent(s): 7061bd5
Enhance blacklist check with PhishTank and implement robust logging system
Browse files- app.py +5 -0
- checkers/blacklist_check.py +23 -0
- config/logging.py +28 -0
app.py
CHANGED
|
@@ -5,20 +5,25 @@ from fastapi.staticfiles import StaticFiles
|
|
| 5 |
from fastapi.responses import FileResponse
|
| 6 |
from contextlib import asynccontextmanager
|
| 7 |
|
|
|
|
| 8 |
from storage.db import init_db
|
| 9 |
from scheduler.runner import start_scheduler
|
| 10 |
from api.routes import router
|
| 11 |
from config.settings import HOST, PORT
|
| 12 |
|
|
|
|
| 13 |
|
| 14 |
@asynccontextmanager
|
| 15 |
async def lifespan(app: FastAPI):
|
| 16 |
# Startup
|
|
|
|
| 17 |
init_db()
|
| 18 |
start_scheduler()
|
| 19 |
os.makedirs("screenshots", exist_ok=True)
|
| 20 |
os.makedirs("recordings", exist_ok=True)
|
|
|
|
| 21 |
yield
|
|
|
|
| 22 |
# Shutdown (nothing needed)
|
| 23 |
|
| 24 |
|
|
|
|
| 5 |
from fastapi.responses import FileResponse
|
| 6 |
from contextlib import asynccontextmanager
|
| 7 |
|
| 8 |
+
from config.logging import get_logger
|
| 9 |
from storage.db import init_db
|
| 10 |
from scheduler.runner import start_scheduler
|
| 11 |
from api.routes import router
|
| 12 |
from config.settings import HOST, PORT
|
| 13 |
|
| 14 |
+
logger = get_logger("main")
|
| 15 |
|
| 16 |
@asynccontextmanager
|
| 17 |
async def lifespan(app: FastAPI):
|
| 18 |
# Startup
|
| 19 |
+
logger.info("Starting WebGuard application...")
|
| 20 |
init_db()
|
| 21 |
start_scheduler()
|
| 22 |
os.makedirs("screenshots", exist_ok=True)
|
| 23 |
os.makedirs("recordings", exist_ok=True)
|
| 24 |
+
logger.info("WebGuard startup complete.")
|
| 25 |
yield
|
| 26 |
+
logger.info("Shutting down WebGuard...")
|
| 27 |
# Shutdown (nothing needed)
|
| 28 |
|
| 29 |
|
checkers/blacklist_check.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
| 1 |
import httpx
|
| 2 |
import json
|
| 3 |
from config.settings import GOOGLE_API_KEY
|
|
|
|
| 4 |
|
|
|
|
| 5 |
|
| 6 |
KNOWN_BLACKLISTS = [
|
| 7 |
"https://raw.githubusercontent.com/nickspaargaren/no-google/master/categories/malware.txt",
|
|
@@ -9,14 +11,34 @@ KNOWN_BLACKLISTS = [
|
|
| 9 |
|
| 10 |
|
| 11 |
async def run(url: str) -> dict:
|
|
|
|
| 12 |
if not url.startswith("http"):
|
| 13 |
url = "https://" + url
|
| 14 |
|
| 15 |
results = {}
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# ββ Google Safe Browsing βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 18 |
if GOOGLE_API_KEY:
|
| 19 |
try:
|
|
|
|
| 20 |
payload = {
|
| 21 |
"client": {"clientId": "webguard", "clientVersion": "1.0"},
|
| 22 |
"threatInfo": {
|
|
@@ -39,6 +61,7 @@ async def run(url: str) -> dict:
|
|
| 39 |
"note": "Threats detected!" if matches else "Clean",
|
| 40 |
}
|
| 41 |
except Exception as e:
|
|
|
|
| 42 |
results["google_safe_browsing"] = {"status": "info", "error": str(e)}
|
| 43 |
else:
|
| 44 |
results["google_safe_browsing"] = {
|
|
|
|
| 1 |
import httpx
|
| 2 |
import json
|
| 3 |
from config.settings import GOOGLE_API_KEY
|
| 4 |
+
from config.logging import get_logger
|
| 5 |
|
| 6 |
+
logger = get_logger("checkers.blacklist")
|
| 7 |
|
| 8 |
KNOWN_BLACKLISTS = [
|
| 9 |
"https://raw.githubusercontent.com/nickspaargaren/no-google/master/categories/malware.txt",
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
async def run(url: str) -> dict:
|
| 14 |
+
logger.info(f"Running blacklist check for: {url}")
|
| 15 |
if not url.startswith("http"):
|
| 16 |
url = "https://" + url
|
| 17 |
|
| 18 |
results = {}
|
| 19 |
|
| 20 |
+
# ββ PhishTank (Open Source / Community) ββββββββββββββββββββββββββββββββββ
|
| 21 |
+
try:
|
| 22 |
+
logger.debug("Checking PhishTank...")
|
| 23 |
+
async with httpx.AsyncClient(timeout=10) as client:
|
| 24 |
+
# Note: This is a simplified check. Real usage might need an API key for higher limits.
|
| 25 |
+
p_res = await client.post("https://checkurl.phishtank.com/checkurl/", data={"url": url, "format": "json"})
|
| 26 |
+
if p_res.status_code == 200:
|
| 27 |
+
p_data = p_res.json()
|
| 28 |
+
in_database = p_data.get("results", {}).get("in_database", False)
|
| 29 |
+
results["phishtank"] = {
|
| 30 |
+
"status": "error" if in_database else "ok",
|
| 31 |
+
"in_database": in_database,
|
| 32 |
+
"note": "Verified Phish detected" if in_database else "Not found in PhishTank",
|
| 33 |
+
}
|
| 34 |
+
except Exception as e:
|
| 35 |
+
logger.error(f"PhishTank check failed: {e}")
|
| 36 |
+
results["phishtank"] = {"status": "info", "error": str(e)}
|
| 37 |
+
|
| 38 |
# ββ Google Safe Browsing βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 39 |
if GOOGLE_API_KEY:
|
| 40 |
try:
|
| 41 |
+
logger.debug("Checking Google Safe Browsing...")
|
| 42 |
payload = {
|
| 43 |
"client": {"clientId": "webguard", "clientVersion": "1.0"},
|
| 44 |
"threatInfo": {
|
|
|
|
| 61 |
"note": "Threats detected!" if matches else "Clean",
|
| 62 |
}
|
| 63 |
except Exception as e:
|
| 64 |
+
logger.error(f"Google Safe Browsing failed: {e}")
|
| 65 |
results["google_safe_browsing"] = {"status": "info", "error": str(e)}
|
| 66 |
else:
|
| 67 |
results["google_safe_browsing"] = {
|
config/logging.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import sys
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# Create a custom logger
|
| 6 |
+
logger = logging.getLogger("webguard")
|
| 7 |
+
logger.setLevel(logging.DEBUG)
|
| 8 |
+
|
| 9 |
+
# Create handlers
|
| 10 |
+
c_handler = logging.StreamHandler(sys.stdout)
|
| 11 |
+
f_handler = logging.FileHandler("webguard.log", encoding="utf-8")
|
| 12 |
+
|
| 13 |
+
c_handler.setLevel(logging.INFO)
|
| 14 |
+
f_handler.setLevel(logging.DEBUG)
|
| 15 |
+
|
| 16 |
+
# Create formatters and add it to handlers
|
| 17 |
+
c_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
| 18 |
+
f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s')
|
| 19 |
+
|
| 20 |
+
c_handler.setFormatter(c_format)
|
| 21 |
+
f_handler.setFormatter(f_format)
|
| 22 |
+
|
| 23 |
+
# Add handlers to the logger
|
| 24 |
+
logger.addHandler(c_handler)
|
| 25 |
+
logger.addHandler(f_handler)
|
| 26 |
+
|
| 27 |
+
def get_logger(module_name):
|
| 28 |
+
return logging.getLogger(f"webguard.{module_name}")
|