HuB commited on
Commit
559d8c0
Β·
1 Parent(s): 7061bd5

Enhance blacklist check with PhishTank and implement robust logging system

Browse files
Files changed (3) hide show
  1. app.py +5 -0
  2. checkers/blacklist_check.py +23 -0
  3. config/logging.py +28 -0
app.py CHANGED
@@ -5,20 +5,25 @@ from fastapi.staticfiles import StaticFiles
5
  from fastapi.responses import FileResponse
6
  from contextlib import asynccontextmanager
7
 
 
8
  from storage.db import init_db
9
  from scheduler.runner import start_scheduler
10
  from api.routes import router
11
  from config.settings import HOST, PORT
12
 
 
13
 
14
  @asynccontextmanager
15
  async def lifespan(app: FastAPI):
16
  # Startup
 
17
  init_db()
18
  start_scheduler()
19
  os.makedirs("screenshots", exist_ok=True)
20
  os.makedirs("recordings", exist_ok=True)
 
21
  yield
 
22
  # Shutdown (nothing needed)
23
 
24
 
 
5
  from fastapi.responses import FileResponse
6
  from contextlib import asynccontextmanager
7
 
8
+ from config.logging import get_logger
9
  from storage.db import init_db
10
  from scheduler.runner import start_scheduler
11
  from api.routes import router
12
  from config.settings import HOST, PORT
13
 
14
+ logger = get_logger("main")
15
 
16
  @asynccontextmanager
17
  async def lifespan(app: FastAPI):
18
  # Startup
19
+ logger.info("Starting WebGuard application...")
20
  init_db()
21
  start_scheduler()
22
  os.makedirs("screenshots", exist_ok=True)
23
  os.makedirs("recordings", exist_ok=True)
24
+ logger.info("WebGuard startup complete.")
25
  yield
26
+ logger.info("Shutting down WebGuard...")
27
  # Shutdown (nothing needed)
28
 
29
 
checkers/blacklist_check.py CHANGED
@@ -1,7 +1,9 @@
1
  import httpx
2
  import json
3
  from config.settings import GOOGLE_API_KEY
 
4
 
 
5
 
6
  KNOWN_BLACKLISTS = [
7
  "https://raw.githubusercontent.com/nickspaargaren/no-google/master/categories/malware.txt",
@@ -9,14 +11,34 @@ KNOWN_BLACKLISTS = [
9
 
10
 
11
  async def run(url: str) -> dict:
 
12
  if not url.startswith("http"):
13
  url = "https://" + url
14
 
15
  results = {}
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # ── Google Safe Browsing ─────────────────────────────────────────────────
18
  if GOOGLE_API_KEY:
19
  try:
 
20
  payload = {
21
  "client": {"clientId": "webguard", "clientVersion": "1.0"},
22
  "threatInfo": {
@@ -39,6 +61,7 @@ async def run(url: str) -> dict:
39
  "note": "Threats detected!" if matches else "Clean",
40
  }
41
  except Exception as e:
 
42
  results["google_safe_browsing"] = {"status": "info", "error": str(e)}
43
  else:
44
  results["google_safe_browsing"] = {
 
1
  import httpx
2
  import json
3
  from config.settings import GOOGLE_API_KEY
4
+ from config.logging import get_logger
5
 
6
+ logger = get_logger("checkers.blacklist")
7
 
8
  KNOWN_BLACKLISTS = [
9
  "https://raw.githubusercontent.com/nickspaargaren/no-google/master/categories/malware.txt",
 
11
 
12
 
13
  async def run(url: str) -> dict:
14
+ logger.info(f"Running blacklist check for: {url}")
15
  if not url.startswith("http"):
16
  url = "https://" + url
17
 
18
  results = {}
19
 
20
+ # ── PhishTank (Open Source / Community) ──────────────────────────────────
21
+ try:
22
+ logger.debug("Checking PhishTank...")
23
+ async with httpx.AsyncClient(timeout=10) as client:
24
+ # Note: This is a simplified check. Real usage might need an API key for higher limits.
25
+ p_res = await client.post("https://checkurl.phishtank.com/checkurl/", data={"url": url, "format": "json"})
26
+ if p_res.status_code == 200:
27
+ p_data = p_res.json()
28
+ in_database = p_data.get("results", {}).get("in_database", False)
29
+ results["phishtank"] = {
30
+ "status": "error" if in_database else "ok",
31
+ "in_database": in_database,
32
+ "note": "Verified Phish detected" if in_database else "Not found in PhishTank",
33
+ }
34
+ except Exception as e:
35
+ logger.error(f"PhishTank check failed: {e}")
36
+ results["phishtank"] = {"status": "info", "error": str(e)}
37
+
38
  # ── Google Safe Browsing ─────────────────────────────────────────────────
39
  if GOOGLE_API_KEY:
40
  try:
41
+ logger.debug("Checking Google Safe Browsing...")
42
  payload = {
43
  "client": {"clientId": "webguard", "clientVersion": "1.0"},
44
  "threatInfo": {
 
61
  "note": "Threats detected!" if matches else "Clean",
62
  }
63
  except Exception as e:
64
+ logger.error(f"Google Safe Browsing failed: {e}")
65
  results["google_safe_browsing"] = {"status": "info", "error": str(e)}
66
  else:
67
  results["google_safe_browsing"] = {
config/logging.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ import os
4
+
5
+ # Create a custom logger
6
+ logger = logging.getLogger("webguard")
7
+ logger.setLevel(logging.DEBUG)
8
+
9
+ # Create handlers
10
+ c_handler = logging.StreamHandler(sys.stdout)
11
+ f_handler = logging.FileHandler("webguard.log", encoding="utf-8")
12
+
13
+ c_handler.setLevel(logging.INFO)
14
+ f_handler.setLevel(logging.DEBUG)
15
+
16
+ # Create formatters and add it to handlers
17
+ c_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
18
+ f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s')
19
+
20
+ c_handler.setFormatter(c_format)
21
+ f_handler.setFormatter(f_format)
22
+
23
+ # Add handlers to the logger
24
+ logger.addHandler(c_handler)
25
+ logger.addHandler(f_handler)
26
+
27
+ def get_logger(module_name):
28
+ return logging.getLogger(f"webguard.{module_name}")