bharatgraph / api /dependencies.py
abinazebinoy's picture
fix(H-06): add lockless pre-check in get_driver() -- prevents lock contention serializing all requests under load
45f51d2
Raw
History Blame Contribute Delete
3.71 kB
"""
BharatGraph API Dependencies
BUG-20 FIX: verify_connectivity() was called on EVERY single API request.
With 50 concurrent cold-start requests this creates a connection storm where
50 simultaneous verify_connectivity() calls hit Neo4j AuraDB, which rate-limits
verification and causes cascading 503 errors.
Fix: TTL-based health cache -- only re-verify if more than 30 seconds have
elapsed since the last successful verification. A threading.Lock ensures
only one reconnect attempt runs at a time.
"""
import os
import time
import threading
from dotenv import load_dotenv
from loguru import logger
from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable, AuthError
load_dotenv()
_driver = None
_last_verified_at = 0.0
_VERIFY_TTL = 30.0 # seconds between connectivity re-checks
_driver_lock = threading.Lock()
def get_driver():
global _driver, _last_verified_at
uri = os.getenv("NEO4J_URI", "")
user = os.getenv("NEO4J_USER", "neo4j")
pwd = os.getenv("NEO4J_PASSWORD", "")
if not uri:
logger.warning("[API] NEO4J_URI not set -- running without database")
return None
now = time.monotonic()
# H-06 FIX: lockless fast path -- if driver is valid and TTL fresh,
# return immediately without acquiring the lock. The lock is only needed
# for reconnection (rare). Under 50 concurrent requests this prevents
# all threads serializing behind the lock for a trivial pointer check.
if _driver is not None and (now - _last_verified_at) < _VERIFY_TTL:
return _driver
with _driver_lock:
# Re-check inside lock in case another thread just reconnected
now = time.monotonic()
if _driver is not None and (now - _last_verified_at) < _VERIFY_TTL:
return _driver
# TTL expired or first call -- need to verify (and reconnect if needed)
if _driver is not None:
try:
_driver.verify_connectivity()
_last_verified_at = time.monotonic()
return _driver
except Exception as e:
logger.warning(
f"[API] Cached Neo4j driver dead ({type(e).__name__}), reconnecting..."
)
try:
_driver.close()
except Exception:
pass
_driver = None
try:
_driver = GraphDatabase.driver(uri, auth=(user, pwd))
_driver.verify_connectivity()
_last_verified_at = time.monotonic()
logger.success(f"[API] Neo4j connected: {uri[:30]}...")
except AuthError as e:
logger.error(f"[API] Neo4j auth failed -- check NEO4J_USER/NEO4J_PASSWORD: {e}")
_driver = None
except ServiceUnavailable as e:
logger.error(f"[API] Neo4j service unavailable: {e}")
_driver = None
except Exception as e:
logger.error(f"[API] Neo4j connection failed: {type(e).__name__}: {e}")
_driver = None
return _driver
def close_driver():
global _driver
with _driver_lock:
if _driver:
try:
_driver.close()
except Exception:
pass
_driver = None
def get_db():
"""FastAPI dependency -- returns live driver or raises 503."""
from fastapi import HTTPException
driver = get_driver()
if driver is None:
raise HTTPException(
status_code=503,
detail=(
"Graph database unavailable. "
"Check NEO4J_URI and NEO4J_PASSWORD in environment secrets."
),
)
return driver