Spaces:
Running
Running
| import os | |
| import urllib.request | |
| import zipfile | |
| import subprocess | |
| def setup(): | |
| os.makedirs("data", exist_ok=True) | |
| os.makedirs("artifacts", exist_ok=True) | |
| if not os.path.exists("data/medquad.json"): | |
| print("Downloading MedQuAD dataset...") | |
| url = "https://github.com/abachaa/MedQuAD/archive/refs/heads/master.zip" | |
| urllib.request.urlretrieve(url, "medquad.zip") | |
| print("Extracting...") | |
| with zipfile.ZipFile("medquad.zip", "r") as z: | |
| z.extractall(".") | |
| os.remove("medquad.zip") | |
| # Rename MedQuAD-master to MedQuAD | |
| if os.path.exists("MedQuAD-master"): | |
| os.rename("MedQuAD-master", "MedQuAD") | |
| print("Renamed MedQuAD-master to MedQuAD") | |
| print("Parsing XML files...") | |
| result = subprocess.run(["python", "parse_dataset.py"], | |
| capture_output=True, text=True) | |
| print(result.stdout) | |
| if result.returncode != 0: | |
| print("Parse error:", result.stderr) | |
| print("Dataset ready!") | |
| else: | |
| print("Dataset already exists, skipping download") | |
| if not os.path.exists("artifacts/retriever.pkl"): | |
| print("Building retriever...") | |
| result = subprocess.run(["python", "retriever.py"], | |
| capture_output=True, text=True) | |
| print(result.stdout) | |
| if result.returncode != 0: | |
| print("Retriever error:", result.stderr) | |
| print("Retriever ready!") | |
| else: | |
| print("Retriever already exists, skipping build") | |
| if __name__ == "__main__": | |
| setup() |