""" setup_db.py ----------- Run once to create all MongoDB collections, validators, and indexes for the FactCheck AI project on Atlas. Usage: python setup_db.py """ import os import sys from dotenv import load_dotenv from pymongo import MongoClient, ASCENDING, DESCENDING from pymongo.errors import CollectionInvalid load_dotenv() MONGO_URI = os.getenv("MONGO_URI") MONGO_DB = os.getenv("MONGO_DB_NAME", "factcheck") if not MONGO_URI: sys.exit("[ERROR] MONGO_URI not found in .env") print(f"[DB] Connecting to Atlas … db={MONGO_DB}") client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=8000) db = client[MONGO_DB] # ── helper ────────────────────────────────────────────────────────────────────── def create_or_update(name: str, validator: dict): existing = db.list_collection_names() if name not in existing: db.create_collection(name, validator=validator) print(f" [+] created '{name}'") else: db.command("collMod", name, validator=validator) print(f" [~] updated '{name}' validator") # ════════════════════════════════════════════════════════════════════════════════ # 1. users # ════════════════════════════════════════════════════════════════════════════════ create_or_update("users", { "$jsonSchema": { "bsonType": "object", "required": ["username", "email", "password_hash", "created_at"], "properties": { "username": {"bsonType": "string", "minLength": 1, "maxLength": 80}, "email": {"bsonType": "string", "minLength": 3, "maxLength": 254}, "password_hash": {"bsonType": "string"}, "created_at": {"bsonType": "date"}, } } }) db.users.create_index([("email", ASCENDING)], unique=True, background=True, name="email_unique") db.users.create_index([("username", ASCENDING)], background=True, name="username_idx") print(" indexes: email (unique), username") # ════════════════════════════════════════════════════════════════════════════════ # 2. history # ════════════════════════════════════════════════════════════════════════════════ create_or_update("history", { "$jsonSchema": { "bsonType": "object", "required": ["user_id", "claim", "verdict", "confidence", "evidence_count", "created_at"], "properties": { "user_id": {"bsonType": "string"}, "claim": {"bsonType": "string", "minLength": 1}, "verdict": {"bsonType": "string"}, "confidence": {"bsonType": "double", "minimum": 0, "maximum": 1}, "evidence_count": {"bsonType": "int", "minimum": 0}, "created_at": {"bsonType": "date"}, } } }) db.history.create_index( [("user_id", ASCENDING), ("created_at", DESCENDING)], background=True, name="user_history_idx" ) print(" indexes: user_id + created_at") # ════════════════════════════════════════════════════════════════════════════════ # 3. evidence (TTL: auto-delete after 30 days) # ════════════════════════════════════════════════════════════════════════════════ create_or_update("evidence", { "$jsonSchema": { "bsonType": "object", "required": ["text", "source", "created_at"], "properties": { "text": {"bsonType": "string", "minLength": 1}, "source": {"bsonType": "string"}, "created_at": {"bsonType": "date"}, } } }) db.evidence.create_index( [("created_at", ASCENDING)], expireAfterSeconds=30 * 24 * 3600, background=True, name="evidence_ttl" ) db.evidence.create_index([("source", ASCENDING)], background=True, name="source_idx") print(" indexes: created_at (TTL 30d), source") # ════════════════════════════════════════════════════════════════════════════════ # 4. revoked_tokens (JWT blocklist, TTL = token exp) # ════════════════════════════════════════════════════════════════════════════════ create_or_update("revoked_tokens", { "$jsonSchema": { "bsonType": "object", "required": ["jti", "exp", "revoked_at"], "properties": { "jti": {"bsonType": "string"}, "exp": {"bsonType": "date"}, "revoked_at": {"bsonType": "date"}, } } }) db.revoked_tokens.create_index( [("exp", ASCENDING)], expireAfterSeconds=0, # removed the moment exp is reached background=True, name="token_ttl" ) db.revoked_tokens.create_index([("jti", ASCENDING)], unique=True, background=True, name="jti_unique") print(" indexes: exp (TTL), jti (unique)") # ════════════════════════════════════════════════════════════════════════════════ print("\n[DB] All collections and indexes are ready on Atlas! OK") client.close()