Download_test / app.py
HIB-Automedica's picture
Update app.py
a0d3017 verified
import os
import gdown
import sqlite3
import faiss
import numpy as np
import gradio as gr
#-----------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------
#--------------------------------------SOURCE FILE DOWNLOADS------------------------------------
#-----------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------
# Use a writable cache directory inside the Space
os.environ["GDOWN_CACHE"] = "./cache/gdown"
os.makedirs(os.environ["GDOWN_CACHE"], exist_ok=True)
def download_file(file_id, output_name):
"""
Download a file from Google Drive if it doesn't already exist locally.
"""
if not os.path.exists(output_name):
print(f"⬇️ Downloading {output_name} from Google Drive...")
url = f"https://drive.google.com/uc?id={file_id}"
gdown.download(url, output_name, quiet=False, use_cookies=False)
else:
print(f"✅ Found cached file: {output_name}")
DATA_DIR = "./data"
os.makedirs(DATA_DIR, exist_ok=True)
GDRIVE_FILES = {
#ROOT DOCUMENTS
os.path.join(DATA_DIR, "Document_root_faiss_index.bin"): "16FV9zE0T7Fvzs9XS1y0e27iPcORWQ1nv",
os.path.join(DATA_DIR, "Document_root_faiss_metadata.db"): "14u0u8UpUKFyC4mNmuj6tWe5sANe31fkW",
#HEADING LEVEL DB DOCUMENTS
os.path.join(DATA_DIR, "bnf_drug_information_heading_level_faiss_metadata.db"): "1JSZAtKIMlA8hmMurA-DWu5zw4sUz3sui",
os.path.join(DATA_DIR, "bnf_drug_interactions_heading_level_faiss_metadata.db"): "10daMvJZRcPn6yyyyo7P_gKrxqRLvH2Ku",
os.path.join(DATA_DIR, "bnf_treatment_summaries_heading_level_faiss_metadata.db"): "1yk7c9H9TNvxmRRzOUNqkYQP7Cl47gBCI",
os.path.join(DATA_DIR, "bnfc_drug_information_heading_level_faiss_metadata.db"): "1EiEpU2DhIeulzrKaALKva8Kx4D3FLSrR",
os.path.join(DATA_DIR, "bnfc_drug_interactions_heading_level_faiss_metadata.db"): "1GE-HW-MHl9MYy0xd25xn0HVPX8sfSp2z",
os.path.join(DATA_DIR, "bnfc_treatment_summaries_heading_level_faiss_metadata.db"): "1VwoQoub5edU5RmCpBNuN7NwR3jZXvYp5",
os.path.join(DATA_DIR, "medusa_heading_level_faiss_metadata.db"): "1o8jNXATW2v2-Cr2FyGh1CXraoSyfdhvP",
os.path.join(DATA_DIR, "nice_CKS_heading_level_faiss_metadata.db"): "1aggkKqbowmtI-nZPPwDNj0mC5_AuBfMa",
os.path.join(DATA_DIR, "NICE_Guidance_heading_level_faiss_metadata.db"): "1j76yPQ3CtbNAGvIDrBqDqcp8hZUaPdcK",
#HEADING LEVEL FAISS DOCUMENTS
os.path.join(DATA_DIR, "bnf_drug_information_heading_level_faiss_index.bin"): "1acaXaIeqM2b8_5-RurWSZaqepXAKcJ19",
os.path.join(DATA_DIR, "bnf_drug_interactions_heading_level_faiss_index.bin"): "1nVo4Cm_-VXkY5kn65Jm4ip1XjMPO2t9h",
os.path.join(DATA_DIR, "bnf_treatment_summaries_heading_level_faiss_index.bin"): "133NCRoKgrkO2poNf_8Mzg9FurRGEq4T-",
os.path.join(DATA_DIR, "bnfc_drug_information_heading_level_faiss_index.bin"): "1K4U7QOwf3srG5deKfNZhUJ9oNROZHPkI",
os.path.join(DATA_DIR, "bnfc_drug_interactions_heading_level_faiss_index.bin"): "1RytTc9SPkEe16mBZmVyZ3C8do8hylfM7",
os.path.join(DATA_DIR, "bnfc_treatment_summaries_heading_level_faiss_index.bin"): "1oUtfVZWu3cGTLfGKQ_nM5ZFy3AeA2_aA",
os.path.join(DATA_DIR, "medusa_heading_level_faiss_index.bin"): "1irWOdN02gbX5pDxo2BbMkavAKIFlzoN3",
os.path.join(DATA_DIR, "nice_CKS_heading_level_faiss_index.bin"): "16VoNX-xe2kqvsj3LnPs73SQREc7rn_1U",
os.path.join(DATA_DIR, "NICE_Guidance_heading_level_faiss_index.bin"): "1oovBWuo5gaZoc5DpDeiDykdcE2q067kE",
#LEAF LEVEL DOCUMENTS
os.path.join(DATA_DIR, "nice_CKS_leaf_level_faiss_metadata.db"): "1JkIc4RCULDxkXPWXnN7JL6Qn7bfrjZL4",
os.path.join(DATA_DIR, "NICE_Guidance_leaf_level_faiss_metadata.db"): "1kaBH5wT0yjpsa-7Ey1UDU84oHVolg6lH",
os.path.join(DATA_DIR, "bnf_drug_information_leaf_level_faiss_metadata.db"): "1UWWqbf0UUTUo9JV9wh91jShP3wypiSq9",
os.path.join(DATA_DIR, "bnf_drug_interactions_leaf_level_faiss_metadata.db"): "1BI4VESFtpdBEZKzTCaVjei9ay0nfginG",
os.path.join(DATA_DIR, "bnf_treatment_summaries_leaf_level_faiss_metadata.db"): "1C_pBwIOKxJHaI4OTqri0W9dpWdKCf69U",
os.path.join(DATA_DIR, "bnfc_drug_information_leaf_level_faiss_metadata.db"): "1ResNxOLAX5Ug50-S_kr2p-TEq76LtK1m",
os.path.join(DATA_DIR, "bnfc_drug_interactions_leaf_level_faiss_metadata.db"): "1jcFLByDHNMjdcGu0S3oAXSkM0lQC_tMG",
os.path.join(DATA_DIR, "bnfc_treatment_summaries_leaf_level_faiss_metadata.db"): "1J19w3RbFwiZzzJK9khPLNa-3Kw6YGvLX",
os.path.join(DATA_DIR, "medusa_leaf_level_faiss_metadata.db"): "1m7wf1sG5d22Xxa1dEIFGCBp_Plvp3pu0",
}
#Download and assignment
for local_path, file_id in GDRIVE_FILES.items():
download_file(file_id, local_path)
#ROOT ASSIGNMENTS
ROOT_INDEX = os.path.join(DATA_DIR, "Document_root_faiss_index.bin")
ROOT_SQLITE = os.path.join(DATA_DIR, "Document_root_faiss_metadata.db")
#HEADING-LEVEL DB ASSIGNMENTS
HEADING_DB_PATHS = {
"BNF DRUG INFORMATION": os.path.join(DATA_DIR, "bnf_drug_information_heading_level_faiss_metadata.db"),
"BNF DRUG INTERACTIONS": os.path.join(DATA_DIR, "bnf_drug_interactions_heading_level_faiss_metadata.db"),
"BNF TREATMENT SUMMARIES": os.path.join(DATA_DIR, "bnf_treatment_summaries_heading_level_faiss_metadata.db"),
"BNFC DRUG INFORMATION": os.path.join(DATA_DIR, "bnfc_drug_information_heading_level_faiss_metadata.db"),
"BNFC DRUG INTERACTIONS": os.path.join(DATA_DIR, "bnfc_drug_interactions_heading_level_faiss_metadata.db"),
"BNFC TREATMENT SUMMARIES":os.path.join(DATA_DIR, "bnfc_treatment_summaries_heading_level_faiss_metadata.db"),
"MEDUSA": os.path.join(DATA_DIR, "medusa_heading_level_faiss_metadata.db"),
"NICE CKS": os.path.join(DATA_DIR, "nice_CKS_heading_level_faiss_metadata.db"),
"NICE GUIDANCE": os.path.join(DATA_DIR, "NICE_Guidance_heading_level_faiss_metadata.db"),
}
#HEADING-LEVEL FAISS ASSIGNMENTS
HEADING_FAISS_PATHS = {
"BNF DRUG INFORMATION": os.path.join(DATA_DIR, "bnf_drug_information_heading_level_faiss_index.bin"),
"BNF DRUG INTERACTIONS": os.path.join(DATA_DIR, "bnf_drug_interactions_heading_level_faiss_index.bin"),
"BNF TREATMENT SUMMARIES": os.path.join(DATA_DIR, "bnf_treatment_summaries_heading_level_faiss_index.bin"),
"BNFC DRUG INFORMATION": os.path.join(DATA_DIR, "bnfc_drug_information_heading_level_faiss_index.bin"),
"BNFC DRUG INTERACTIONS": os.path.join(DATA_DIR, "bnfc_drug_interactions_heading_level_faiss_index.bin"),
"BNFC TREATMENT SUMMARIES":os.path.join(DATA_DIR, "bnfc_treatment_summaries_heading_level_faiss_index.bin"),
"MEDUSA": os.path.join(DATA_DIR, "medusa_heading_level_faiss_index.bin"),
"NICE CKS": os.path.join(DATA_DIR, "nice_CKS_heading_level_faiss_index.bin"),
"NICE GUIDANCE": os.path.join(DATA_DIR, "NICE_Guidance_heading_level_faiss_index.bin"),
}
#LEAF-LEVEL ASSIGNMENTS
LEAF_DB_PATHS = {
"NICE CKS": os.path.join(DATA_DIR, "nice_CKS_leaf_level_faiss_metadata.db"),
"NICE GUIDANCE": os.path.join(DATA_DIR, "NICE_Guidance_leaf_level_faiss_metadata.db"),
"BNF DRUG INFORMATION": os.path.join(DATA_DIR, "bnf_drug_information_leaf_level_faiss_metadata.db"),
"BNF DRUG INTERACTIONS": os.path.join(DATA_DIR, "bnf_drug_interactions_leaf_level_faiss_metadata.db"),
"BNF TREATMENT SUMMARIES": os.path.join(DATA_DIR, "bnf_treatment_summaries_leaf_level_faiss_metadata.db"),
"BNFC DRUG INFORMATION": os.path.join(DATA_DIR, "bnfc_drug_information_leaf_level_faiss_metadata.db"),
"BNFC DRUG INTERACTIONS": os.path.join(DATA_DIR, "bnfc_drug_interactions_leaf_level_faiss_metadata.db"),
"BNFC TREATMENT SUMMARIES":os.path.join(DATA_DIR, "bnfc_treatment_summaries_leaf_level_faiss_metadata.db"),
"MEDUSA": os.path.join(DATA_DIR, "medusa_leaf_level_faiss_metadata.db"),
}
#-----------------------------------------------------------------------------------------------
#--------------------------------------DUMMY CHECK FUNCTION-------------------------------------
#-----------------------------------------------------------------------------------------------
def run_dummy_checks():
logs = []
logs.append("--- Running Dummy Checks ---")
# Root FAISS + SQLite
if os.path.exists(ROOT_INDEX) and os.path.exists(ROOT_SQLITE):
index = faiss.read_index(ROOT_INDEX)
logs.append(f"ROOT FAISS: {index.ntotal} vectors")
conn = sqlite3.connect(ROOT_SQLITE)
cursor = conn.cursor()
# show table schema
cursor.execute("PRAGMA table_info(metadata);")
logs.append(f"ROOT DB schema: {cursor.fetchall()}")
# show a few rows
cursor.execute("SELECT * FROM metadata LIMIT 5;")
logs.append(f"ROOT DB sample rows: {cursor.fetchall()}")
conn.close()
else:
logs.append("❌ Root files missing")
# Heading-level DB
sample_heading_db = list(HEADING_DB_PATHS.values())[0]
if os.path.exists(sample_heading_db):
conn = sqlite3.connect(sample_heading_db)
cursor = conn.cursor()
cursor.execute("PRAGMA table_info(metadata);")
logs.append(f"HEADING DB schema: {cursor.fetchall()}")
cursor.execute("SELECT * FROM metadata LIMIT 5;")
logs.append(f"HEADING DB sample rows: {cursor.fetchall()}")
conn.close()
else:
logs.append("❌ Heading DB missing")
# Heading-level FAISS
sample_heading_faiss = list(HEADING_FAISS_PATHS.values())[0]
if os.path.exists(sample_heading_faiss):
index = faiss.read_index(sample_heading_faiss)
logs.append(f"HEADING FAISS vectors: {index.ntotal}")
if index.ntotal > 0:
q = np.random.rand(index.d).astype("float32").reshape(1, -1)
D, I = index.search(q, 1)
logs.append(f"HEADING FAISS dummy query -> ID {I[0][0]}, Dist {D[0][0]}")
else:
logs.append("❌ Heading FAISS missing")
# Leaf-level DB
sample_leaf_db = list(LEAF_DB_PATHS.values())[0]
if os.path.exists(sample_leaf_db):
conn = sqlite3.connect(sample_leaf_db)
cursor = conn.cursor()
cursor.execute("PRAGMA table_info(metadata);")
logs.append(f"LEAF DB schema: {cursor.fetchall()}")
cursor.execute("SELECT * FROM metadata LIMIT 5;")
logs.append(f"LEAF DB sample rows: {cursor.fetchall()}")
conn.close()
else:
logs.append("❌ Leaf DB missing")
logs.append("--- Checks finished ---")
return "\n".join(map(str, logs))
#-----------------------------------------------------------------------------------------------
#--------------------------------------GRADIO APP----------------------------------------------
#-----------------------------------------------------------------------------------------------
with gr.Blocks() as demo:
gr.Markdown("# 🔍 Dummy File Checks")
gr.Markdown("Click the button to validate downloads and run test operations on FAISS + SQLite files.")
output = gr.Textbox(label="Results", lines=20)
run_btn = gr.Button("Run Dummy Checks")
run_btn.click(fn=run_dummy_checks, outputs=output)
if __name__ == "__main__":
demo.launch()