QuerySearcher / test /db_debug.py
LiamKhoaLe's picture
Upd pg search method with hardcode url technique
0d7e5cb
"""
Quick CLI helper to peek at MongoDB + GridFS.
Usage:
python test/db_debug.py
or python test/db_debug.py --doc <document_id>
"""
import argparse, os, json, pprint
from pymongo import MongoClient
from gridfs import GridFSBucket
MONGO_URI = os.getenv("MONGODB_URI")
MONGO_DB_NAME = os.getenv("MONGODB_DB", "querysearcher")
client = MongoClient(MONGO_URI)
db = client[MONGO_DB_NAME]
fs = GridFSBucket(db)
def list_documents():
for doc in db.documents.find():
print(f"{doc['_id']} | {doc['title']:<60} | {doc['status']}")
def doc_detail(doc_id):
doc = db.documents.find_one({"_id": doc_id})
if not doc:
print("No such doc.")
return
pprint.pp(doc)
# print how many embeddings
cnt = db.embeddings.count_documents({"document_id": doc_id})
print(f"Embeddings chunks: {cnt}")
# confirm pdf file exists
try:
fs.open_download_stream_by_name(f"{doc_id}.pdf").close()
print("✅ PDF present in GridFS")
except:
print("⚠️ PDF missing in GridFS")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--doc", help="document_id to inspect")
args = parser.parse_args()
if args.doc:
doc_detail(args.doc)
else:
list_documents()