Spaces:
Sleeping
Sleeping
| import os | |
| import gdown | |
| import sqlite3 | |
| import faiss | |
| import numpy as np | |
| import gradio as gr | |
| #----------------------------------------------------------------------------------------------- | |
| #----------------------------------------------------------------------------------------------- | |
| #----------------------------------------------------------------------------------------------- | |
| #--------------------------------------SOURCE FILE DOWNLOADS------------------------------------ | |
| #----------------------------------------------------------------------------------------------- | |
| #----------------------------------------------------------------------------------------------- | |
| #----------------------------------------------------------------------------------------------- | |
| # Use a writable cache directory inside the Space | |
| os.environ["GDOWN_CACHE"] = "./cache/gdown" | |
| os.makedirs(os.environ["GDOWN_CACHE"], exist_ok=True) | |
| def download_file(file_id, output_name): | |
| """ | |
| Download a file from Google Drive if it doesn't already exist locally. | |
| """ | |
| if not os.path.exists(output_name): | |
| print(f"⬇️ Downloading {output_name} from Google Drive...") | |
| url = f"https://drive.google.com/uc?id={file_id}" | |
| gdown.download(url, output_name, quiet=False, use_cookies=False) | |
| else: | |
| print(f"✅ Found cached file: {output_name}") | |
| DATA_DIR = "./data" | |
| os.makedirs(DATA_DIR, exist_ok=True) | |
| GDRIVE_FILES = { | |
| #ROOT DOCUMENTS | |
| os.path.join(DATA_DIR, "Document_root_faiss_index.bin"): "16FV9zE0T7Fvzs9XS1y0e27iPcORWQ1nv", | |
| os.path.join(DATA_DIR, "Document_root_faiss_metadata.db"): "14u0u8UpUKFyC4mNmuj6tWe5sANe31fkW", | |
| #HEADING LEVEL DB DOCUMENTS | |
| os.path.join(DATA_DIR, "bnf_drug_information_heading_level_faiss_metadata.db"): "1JSZAtKIMlA8hmMurA-DWu5zw4sUz3sui", | |
| os.path.join(DATA_DIR, "bnf_drug_interactions_heading_level_faiss_metadata.db"): "10daMvJZRcPn6yyyyo7P_gKrxqRLvH2Ku", | |
| os.path.join(DATA_DIR, "bnf_treatment_summaries_heading_level_faiss_metadata.db"): "1yk7c9H9TNvxmRRzOUNqkYQP7Cl47gBCI", | |
| os.path.join(DATA_DIR, "bnfc_drug_information_heading_level_faiss_metadata.db"): "1EiEpU2DhIeulzrKaALKva8Kx4D3FLSrR", | |
| os.path.join(DATA_DIR, "bnfc_drug_interactions_heading_level_faiss_metadata.db"): "1GE-HW-MHl9MYy0xd25xn0HVPX8sfSp2z", | |
| os.path.join(DATA_DIR, "bnfc_treatment_summaries_heading_level_faiss_metadata.db"): "1VwoQoub5edU5RmCpBNuN7NwR3jZXvYp5", | |
| os.path.join(DATA_DIR, "medusa_heading_level_faiss_metadata.db"): "1o8jNXATW2v2-Cr2FyGh1CXraoSyfdhvP", | |
| os.path.join(DATA_DIR, "nice_CKS_heading_level_faiss_metadata.db"): "1aggkKqbowmtI-nZPPwDNj0mC5_AuBfMa", | |
| os.path.join(DATA_DIR, "NICE_Guidance_heading_level_faiss_metadata.db"): "1j76yPQ3CtbNAGvIDrBqDqcp8hZUaPdcK", | |
| #HEADING LEVEL FAISS DOCUMENTS | |
| os.path.join(DATA_DIR, "bnf_drug_information_heading_level_faiss_index.bin"): "1acaXaIeqM2b8_5-RurWSZaqepXAKcJ19", | |
| os.path.join(DATA_DIR, "bnf_drug_interactions_heading_level_faiss_index.bin"): "1nVo4Cm_-VXkY5kn65Jm4ip1XjMPO2t9h", | |
| os.path.join(DATA_DIR, "bnf_treatment_summaries_heading_level_faiss_index.bin"): "133NCRoKgrkO2poNf_8Mzg9FurRGEq4T-", | |
| os.path.join(DATA_DIR, "bnfc_drug_information_heading_level_faiss_index.bin"): "1K4U7QOwf3srG5deKfNZhUJ9oNROZHPkI", | |
| os.path.join(DATA_DIR, "bnfc_drug_interactions_heading_level_faiss_index.bin"): "1RytTc9SPkEe16mBZmVyZ3C8do8hylfM7", | |
| os.path.join(DATA_DIR, "bnfc_treatment_summaries_heading_level_faiss_index.bin"): "1oUtfVZWu3cGTLfGKQ_nM5ZFy3AeA2_aA", | |
| os.path.join(DATA_DIR, "medusa_heading_level_faiss_index.bin"): "1irWOdN02gbX5pDxo2BbMkavAKIFlzoN3", | |
| os.path.join(DATA_DIR, "nice_CKS_heading_level_faiss_index.bin"): "16VoNX-xe2kqvsj3LnPs73SQREc7rn_1U", | |
| os.path.join(DATA_DIR, "NICE_Guidance_heading_level_faiss_index.bin"): "1oovBWuo5gaZoc5DpDeiDykdcE2q067kE", | |
| #LEAF LEVEL DOCUMENTS | |
| os.path.join(DATA_DIR, "nice_CKS_leaf_level_faiss_metadata.db"): "1JkIc4RCULDxkXPWXnN7JL6Qn7bfrjZL4", | |
| os.path.join(DATA_DIR, "NICE_Guidance_leaf_level_faiss_metadata.db"): "1kaBH5wT0yjpsa-7Ey1UDU84oHVolg6lH", | |
| os.path.join(DATA_DIR, "bnf_drug_information_leaf_level_faiss_metadata.db"): "1UWWqbf0UUTUo9JV9wh91jShP3wypiSq9", | |
| os.path.join(DATA_DIR, "bnf_drug_interactions_leaf_level_faiss_metadata.db"): "1BI4VESFtpdBEZKzTCaVjei9ay0nfginG", | |
| os.path.join(DATA_DIR, "bnf_treatment_summaries_leaf_level_faiss_metadata.db"): "1C_pBwIOKxJHaI4OTqri0W9dpWdKCf69U", | |
| os.path.join(DATA_DIR, "bnfc_drug_information_leaf_level_faiss_metadata.db"): "1ResNxOLAX5Ug50-S_kr2p-TEq76LtK1m", | |
| os.path.join(DATA_DIR, "bnfc_drug_interactions_leaf_level_faiss_metadata.db"): "1jcFLByDHNMjdcGu0S3oAXSkM0lQC_tMG", | |
| os.path.join(DATA_DIR, "bnfc_treatment_summaries_leaf_level_faiss_metadata.db"): "1J19w3RbFwiZzzJK9khPLNa-3Kw6YGvLX", | |
| os.path.join(DATA_DIR, "medusa_leaf_level_faiss_metadata.db"): "1m7wf1sG5d22Xxa1dEIFGCBp_Plvp3pu0", | |
| } | |
| #Download and assignment | |
| for local_path, file_id in GDRIVE_FILES.items(): | |
| download_file(file_id, local_path) | |
| #ROOT ASSIGNMENTS | |
| ROOT_INDEX = os.path.join(DATA_DIR, "Document_root_faiss_index.bin") | |
| ROOT_SQLITE = os.path.join(DATA_DIR, "Document_root_faiss_metadata.db") | |
| #HEADING-LEVEL DB ASSIGNMENTS | |
| HEADING_DB_PATHS = { | |
| "BNF DRUG INFORMATION": os.path.join(DATA_DIR, "bnf_drug_information_heading_level_faiss_metadata.db"), | |
| "BNF DRUG INTERACTIONS": os.path.join(DATA_DIR, "bnf_drug_interactions_heading_level_faiss_metadata.db"), | |
| "BNF TREATMENT SUMMARIES": os.path.join(DATA_DIR, "bnf_treatment_summaries_heading_level_faiss_metadata.db"), | |
| "BNFC DRUG INFORMATION": os.path.join(DATA_DIR, "bnfc_drug_information_heading_level_faiss_metadata.db"), | |
| "BNFC DRUG INTERACTIONS": os.path.join(DATA_DIR, "bnfc_drug_interactions_heading_level_faiss_metadata.db"), | |
| "BNFC TREATMENT SUMMARIES":os.path.join(DATA_DIR, "bnfc_treatment_summaries_heading_level_faiss_metadata.db"), | |
| "MEDUSA": os.path.join(DATA_DIR, "medusa_heading_level_faiss_metadata.db"), | |
| "NICE CKS": os.path.join(DATA_DIR, "nice_CKS_heading_level_faiss_metadata.db"), | |
| "NICE GUIDANCE": os.path.join(DATA_DIR, "NICE_Guidance_heading_level_faiss_metadata.db"), | |
| } | |
| #HEADING-LEVEL FAISS ASSIGNMENTS | |
| HEADING_FAISS_PATHS = { | |
| "BNF DRUG INFORMATION": os.path.join(DATA_DIR, "bnf_drug_information_heading_level_faiss_index.bin"), | |
| "BNF DRUG INTERACTIONS": os.path.join(DATA_DIR, "bnf_drug_interactions_heading_level_faiss_index.bin"), | |
| "BNF TREATMENT SUMMARIES": os.path.join(DATA_DIR, "bnf_treatment_summaries_heading_level_faiss_index.bin"), | |
| "BNFC DRUG INFORMATION": os.path.join(DATA_DIR, "bnfc_drug_information_heading_level_faiss_index.bin"), | |
| "BNFC DRUG INTERACTIONS": os.path.join(DATA_DIR, "bnfc_drug_interactions_heading_level_faiss_index.bin"), | |
| "BNFC TREATMENT SUMMARIES":os.path.join(DATA_DIR, "bnfc_treatment_summaries_heading_level_faiss_index.bin"), | |
| "MEDUSA": os.path.join(DATA_DIR, "medusa_heading_level_faiss_index.bin"), | |
| "NICE CKS": os.path.join(DATA_DIR, "nice_CKS_heading_level_faiss_index.bin"), | |
| "NICE GUIDANCE": os.path.join(DATA_DIR, "NICE_Guidance_heading_level_faiss_index.bin"), | |
| } | |
| #LEAF-LEVEL ASSIGNMENTS | |
| LEAF_DB_PATHS = { | |
| "NICE CKS": os.path.join(DATA_DIR, "nice_CKS_leaf_level_faiss_metadata.db"), | |
| "NICE GUIDANCE": os.path.join(DATA_DIR, "NICE_Guidance_leaf_level_faiss_metadata.db"), | |
| "BNF DRUG INFORMATION": os.path.join(DATA_DIR, "bnf_drug_information_leaf_level_faiss_metadata.db"), | |
| "BNF DRUG INTERACTIONS": os.path.join(DATA_DIR, "bnf_drug_interactions_leaf_level_faiss_metadata.db"), | |
| "BNF TREATMENT SUMMARIES": os.path.join(DATA_DIR, "bnf_treatment_summaries_leaf_level_faiss_metadata.db"), | |
| "BNFC DRUG INFORMATION": os.path.join(DATA_DIR, "bnfc_drug_information_leaf_level_faiss_metadata.db"), | |
| "BNFC DRUG INTERACTIONS": os.path.join(DATA_DIR, "bnfc_drug_interactions_leaf_level_faiss_metadata.db"), | |
| "BNFC TREATMENT SUMMARIES":os.path.join(DATA_DIR, "bnfc_treatment_summaries_leaf_level_faiss_metadata.db"), | |
| "MEDUSA": os.path.join(DATA_DIR, "medusa_leaf_level_faiss_metadata.db"), | |
| } | |
| #----------------------------------------------------------------------------------------------- | |
| #--------------------------------------DUMMY CHECK FUNCTION------------------------------------- | |
| #----------------------------------------------------------------------------------------------- | |
| def run_dummy_checks(): | |
| logs = [] | |
| logs.append("--- Running Dummy Checks ---") | |
| # Root FAISS + SQLite | |
| if os.path.exists(ROOT_INDEX) and os.path.exists(ROOT_SQLITE): | |
| index = faiss.read_index(ROOT_INDEX) | |
| logs.append(f"ROOT FAISS: {index.ntotal} vectors") | |
| conn = sqlite3.connect(ROOT_SQLITE) | |
| cursor = conn.cursor() | |
| # show table schema | |
| cursor.execute("PRAGMA table_info(metadata);") | |
| logs.append(f"ROOT DB schema: {cursor.fetchall()}") | |
| # show a few rows | |
| cursor.execute("SELECT * FROM metadata LIMIT 5;") | |
| logs.append(f"ROOT DB sample rows: {cursor.fetchall()}") | |
| conn.close() | |
| else: | |
| logs.append("❌ Root files missing") | |
| # Heading-level DB | |
| sample_heading_db = list(HEADING_DB_PATHS.values())[0] | |
| if os.path.exists(sample_heading_db): | |
| conn = sqlite3.connect(sample_heading_db) | |
| cursor = conn.cursor() | |
| cursor.execute("PRAGMA table_info(metadata);") | |
| logs.append(f"HEADING DB schema: {cursor.fetchall()}") | |
| cursor.execute("SELECT * FROM metadata LIMIT 5;") | |
| logs.append(f"HEADING DB sample rows: {cursor.fetchall()}") | |
| conn.close() | |
| else: | |
| logs.append("❌ Heading DB missing") | |
| # Heading-level FAISS | |
| sample_heading_faiss = list(HEADING_FAISS_PATHS.values())[0] | |
| if os.path.exists(sample_heading_faiss): | |
| index = faiss.read_index(sample_heading_faiss) | |
| logs.append(f"HEADING FAISS vectors: {index.ntotal}") | |
| if index.ntotal > 0: | |
| q = np.random.rand(index.d).astype("float32").reshape(1, -1) | |
| D, I = index.search(q, 1) | |
| logs.append(f"HEADING FAISS dummy query -> ID {I[0][0]}, Dist {D[0][0]}") | |
| else: | |
| logs.append("❌ Heading FAISS missing") | |
| # Leaf-level DB | |
| sample_leaf_db = list(LEAF_DB_PATHS.values())[0] | |
| if os.path.exists(sample_leaf_db): | |
| conn = sqlite3.connect(sample_leaf_db) | |
| cursor = conn.cursor() | |
| cursor.execute("PRAGMA table_info(metadata);") | |
| logs.append(f"LEAF DB schema: {cursor.fetchall()}") | |
| cursor.execute("SELECT * FROM metadata LIMIT 5;") | |
| logs.append(f"LEAF DB sample rows: {cursor.fetchall()}") | |
| conn.close() | |
| else: | |
| logs.append("❌ Leaf DB missing") | |
| logs.append("--- Checks finished ---") | |
| return "\n".join(map(str, logs)) | |
| #----------------------------------------------------------------------------------------------- | |
| #--------------------------------------GRADIO APP---------------------------------------------- | |
| #----------------------------------------------------------------------------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🔍 Dummy File Checks") | |
| gr.Markdown("Click the button to validate downloads and run test operations on FAISS + SQLite files.") | |
| output = gr.Textbox(label="Results", lines=20) | |
| run_btn = gr.Button("Run Dummy Checks") | |
| run_btn.click(fn=run_dummy_checks, outputs=output) | |
| if __name__ == "__main__": | |
| demo.launch() | |