Spaces:
Sleeping
Sleeping
File size: 1,294 Bytes
0d7e5cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
"""
Quick CLI helper to peek at MongoDB + GridFS.
Usage:
python test/db_debug.py
or python test/db_debug.py --doc <document_id>
"""
import argparse, os, json, pprint
from pymongo import MongoClient
from gridfs import GridFSBucket
MONGO_URI = os.getenv("MONGODB_URI")
MONGO_DB_NAME = os.getenv("MONGODB_DB", "querysearcher")
client = MongoClient(MONGO_URI)
db = client[MONGO_DB_NAME]
fs = GridFSBucket(db)
def list_documents():
for doc in db.documents.find():
print(f"{doc['_id']} | {doc['title']:<60} | {doc['status']}")
def doc_detail(doc_id):
doc = db.documents.find_one({"_id": doc_id})
if not doc:
print("No such doc.")
return
pprint.pp(doc)
# print how many embeddings
cnt = db.embeddings.count_documents({"document_id": doc_id})
print(f"Embeddings chunks: {cnt}")
# confirm pdf file exists
try:
fs.open_download_stream_by_name(f"{doc_id}.pdf").close()
print("✅ PDF present in GridFS")
except:
print("⚠️ PDF missing in GridFS")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--doc", help="document_id to inspect")
args = parser.parse_args()
if args.doc:
doc_detail(args.doc)
else:
list_documents()
|