import os import urllib.request import zipfile import subprocess def setup(): os.makedirs("data", exist_ok=True) os.makedirs("artifacts", exist_ok=True) if not os.path.exists("data/medquad.json"): print("Downloading MedQuAD dataset...") url = "https://github.com/abachaa/MedQuAD/archive/refs/heads/master.zip" urllib.request.urlretrieve(url, "medquad.zip") print("Extracting...") with zipfile.ZipFile("medquad.zip", "r") as z: z.extractall(".") os.remove("medquad.zip") # Rename MedQuAD-master to MedQuAD if os.path.exists("MedQuAD-master"): os.rename("MedQuAD-master", "MedQuAD") print("Renamed MedQuAD-master to MedQuAD") print("Parsing XML files...") result = subprocess.run(["python", "parse_dataset.py"], capture_output=True, text=True) print(result.stdout) if result.returncode != 0: print("Parse error:", result.stderr) print("Dataset ready!") else: print("Dataset already exists, skipping download") if not os.path.exists("artifacts/retriever.pkl"): print("Building retriever...") result = subprocess.run(["python", "retriever.py"], capture_output=True, text=True) print(result.stdout) if result.returncode != 0: print("Retriever error:", result.stderr) print("Retriever ready!") else: print("Retriever already exists, skipping build") if __name__ == "__main__": setup()