Spaces:
Running
Running
| """ | |
| setup_db.py | |
| ----------- | |
| Run once to create all MongoDB collections, validators, and indexes | |
| for the FactCheck AI project on Atlas. | |
| Usage: | |
| python setup_db.py | |
| """ | |
| import os | |
| import sys | |
| from dotenv import load_dotenv | |
| from pymongo import MongoClient, ASCENDING, DESCENDING | |
| from pymongo.errors import CollectionInvalid | |
| load_dotenv() | |
| MONGO_URI = os.getenv("MONGO_URI") | |
| MONGO_DB = os.getenv("MONGO_DB_NAME", "factcheck") | |
| if not MONGO_URI: | |
| sys.exit("[ERROR] MONGO_URI not found in .env") | |
| print(f"[DB] Connecting to Atlas β¦ db={MONGO_DB}") | |
| client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=8000) | |
| db = client[MONGO_DB] | |
| # ββ helper ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def create_or_update(name: str, validator: dict): | |
| existing = db.list_collection_names() | |
| if name not in existing: | |
| db.create_collection(name, validator=validator) | |
| print(f" [+] created '{name}'") | |
| else: | |
| db.command("collMod", name, validator=validator) | |
| print(f" [~] updated '{name}' validator") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. users | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| create_or_update("users", { | |
| "$jsonSchema": { | |
| "bsonType": "object", | |
| "required": ["username", "email", "password_hash", "created_at"], | |
| "properties": { | |
| "username": {"bsonType": "string", "minLength": 1, "maxLength": 80}, | |
| "email": {"bsonType": "string", "minLength": 3, "maxLength": 254}, | |
| "password_hash": {"bsonType": "string"}, | |
| "created_at": {"bsonType": "date"}, | |
| } | |
| } | |
| }) | |
| db.users.create_index([("email", ASCENDING)], unique=True, background=True, name="email_unique") | |
| db.users.create_index([("username", ASCENDING)], background=True, name="username_idx") | |
| print(" indexes: email (unique), username") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. history | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| create_or_update("history", { | |
| "$jsonSchema": { | |
| "bsonType": "object", | |
| "required": ["user_id", "claim", "verdict", "confidence", "evidence_count", "created_at"], | |
| "properties": { | |
| "user_id": {"bsonType": "string"}, | |
| "claim": {"bsonType": "string", "minLength": 1}, | |
| "verdict": {"bsonType": "string"}, | |
| "confidence": {"bsonType": "double", "minimum": 0, "maximum": 1}, | |
| "evidence_count": {"bsonType": "int", "minimum": 0}, | |
| "created_at": {"bsonType": "date"}, | |
| } | |
| } | |
| }) | |
| db.history.create_index( | |
| [("user_id", ASCENDING), ("created_at", DESCENDING)], | |
| background=True, name="user_history_idx" | |
| ) | |
| print(" indexes: user_id + created_at") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. evidence (TTL: auto-delete after 30 days) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| create_or_update("evidence", { | |
| "$jsonSchema": { | |
| "bsonType": "object", | |
| "required": ["text", "source", "created_at"], | |
| "properties": { | |
| "text": {"bsonType": "string", "minLength": 1}, | |
| "source": {"bsonType": "string"}, | |
| "created_at": {"bsonType": "date"}, | |
| } | |
| } | |
| }) | |
| db.evidence.create_index( | |
| [("created_at", ASCENDING)], | |
| expireAfterSeconds=30 * 24 * 3600, | |
| background=True, name="evidence_ttl" | |
| ) | |
| db.evidence.create_index([("source", ASCENDING)], background=True, name="source_idx") | |
| print(" indexes: created_at (TTL 30d), source") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4. revoked_tokens (JWT blocklist, TTL = token exp) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| create_or_update("revoked_tokens", { | |
| "$jsonSchema": { | |
| "bsonType": "object", | |
| "required": ["jti", "exp", "revoked_at"], | |
| "properties": { | |
| "jti": {"bsonType": "string"}, | |
| "exp": {"bsonType": "date"}, | |
| "revoked_at": {"bsonType": "date"}, | |
| } | |
| } | |
| }) | |
| db.revoked_tokens.create_index( | |
| [("exp", ASCENDING)], | |
| expireAfterSeconds=0, # removed the moment exp is reached | |
| background=True, name="token_ttl" | |
| ) | |
| db.revoked_tokens.create_index([("jti", ASCENDING)], unique=True, background=True, name="jti_unique") | |
| print(" indexes: exp (TTL), jti (unique)") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| print("\n[DB] All collections and indexes are ready on Atlas! OK") | |
| client.close() | |