Spaces:
Sleeping
Sleeping
| """ | |
| Quick CLI helper to peek at MongoDB + GridFS. | |
| Usage: | |
| python test/db_debug.py | |
| or python test/db_debug.py --doc <document_id> | |
| """ | |
| import argparse, os, json, pprint | |
| from pymongo import MongoClient | |
| from gridfs import GridFSBucket | |
| MONGO_URI = os.getenv("MONGODB_URI") | |
| MONGO_DB_NAME = os.getenv("MONGODB_DB", "querysearcher") | |
| client = MongoClient(MONGO_URI) | |
| db = client[MONGO_DB_NAME] | |
| fs = GridFSBucket(db) | |
| def list_documents(): | |
| for doc in db.documents.find(): | |
| print(f"{doc['_id']} | {doc['title']:<60} | {doc['status']}") | |
| def doc_detail(doc_id): | |
| doc = db.documents.find_one({"_id": doc_id}) | |
| if not doc: | |
| print("No such doc.") | |
| return | |
| pprint.pp(doc) | |
| # print how many embeddings | |
| cnt = db.embeddings.count_documents({"document_id": doc_id}) | |
| print(f"Embeddings chunks: {cnt}") | |
| # confirm pdf file exists | |
| try: | |
| fs.open_download_stream_by_name(f"{doc_id}.pdf").close() | |
| print("✅ PDF present in GridFS") | |
| except: | |
| print("⚠️ PDF missing in GridFS") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--doc", help="document_id to inspect") | |
| args = parser.parse_args() | |
| if args.doc: | |
| doc_detail(args.doc) | |
| else: | |
| list_documents() | |