medimind-api / startup.py
Manikantaperla's picture
fix startup script MedQuAD folder name
39b7eaf
import os
import urllib.request
import zipfile
import subprocess
def setup():
os.makedirs("data", exist_ok=True)
os.makedirs("artifacts", exist_ok=True)
if not os.path.exists("data/medquad.json"):
print("Downloading MedQuAD dataset...")
url = "https://github.com/abachaa/MedQuAD/archive/refs/heads/master.zip"
urllib.request.urlretrieve(url, "medquad.zip")
print("Extracting...")
with zipfile.ZipFile("medquad.zip", "r") as z:
z.extractall(".")
os.remove("medquad.zip")
# Rename MedQuAD-master to MedQuAD
if os.path.exists("MedQuAD-master"):
os.rename("MedQuAD-master", "MedQuAD")
print("Renamed MedQuAD-master to MedQuAD")
print("Parsing XML files...")
result = subprocess.run(["python", "parse_dataset.py"],
capture_output=True, text=True)
print(result.stdout)
if result.returncode != 0:
print("Parse error:", result.stderr)
print("Dataset ready!")
else:
print("Dataset already exists, skipping download")
if not os.path.exists("artifacts/retriever.pkl"):
print("Building retriever...")
result = subprocess.run(["python", "retriever.py"],
capture_output=True, text=True)
print(result.stdout)
if result.returncode != 0:
print("Retriever error:", result.stderr)
print("Retriever ready!")
else:
print("Retriever already exists, skipping build")
if __name__ == "__main__":
setup()