File size: 1,294 Bytes
0d7e5cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""
Quick CLI helper to peek at MongoDB + GridFS.
Usage:
    python test/db_debug.py
or   python test/db_debug.py --doc <document_id>
"""

import argparse, os, json, pprint
from pymongo import MongoClient
from gridfs import GridFSBucket

MONGO_URI = os.getenv("MONGODB_URI")
MONGO_DB_NAME = os.getenv("MONGODB_DB", "querysearcher")

client = MongoClient(MONGO_URI)
db = client[MONGO_DB_NAME]
fs = GridFSBucket(db)

def list_documents():
    for doc in db.documents.find():
        print(f"{doc['_id']}  |  {doc['title']:<60}  |  {doc['status']}")
        
def doc_detail(doc_id):
    doc = db.documents.find_one({"_id": doc_id})
    if not doc:
        print("No such doc.")
        return
    pprint.pp(doc)
    # print how many embeddings
    cnt = db.embeddings.count_documents({"document_id": doc_id})
    print(f"Embeddings chunks: {cnt}")
    # confirm pdf file exists
    try:
        fs.open_download_stream_by_name(f"{doc_id}.pdf").close()
        print("✅ PDF present in GridFS")
    except:
        print("⚠️  PDF missing in GridFS")

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--doc", help="document_id to inspect")
    args = parser.parse_args()
    if args.doc:
        doc_detail(args.doc)
    else:
        list_documents()