""" MediRAG Backend - FastAPI only (No Gradio) React frontend on Vercel, this is just the API backend """ import os import sys import subprocess import logging import requests # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Set cache directories for Hugging Face os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache" os.environ["HF_HOME"] = "/tmp/hf_home" os.environ["TORCH_HOME"] = "/tmp/torch_cache" # Add src to path sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src")) # Install spaCy model if not present (optional — server starts without it) try: import spacy try: spacy.load("en_core_sci_lg") logger.info("spaCy model en_core_sci_lg loaded.") except OSError: # Try installing the model at runtime try: logger.info("Attempting to install scispacy model en_core_sci_lg...") subprocess.run([ sys.executable, "-m", "pip", "install", "--quiet", "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.4/en_core_sci_lg-0.5.4.tar.gz" ], check=True, timeout=300) spacy.load("en_core_sci_lg") logger.info("spaCy model installed and loaded.") except Exception as model_err: logger.warning(f"Could not install spaCy model: {model_err}. NER features will be limited.") except ImportError: logger.warning("spacy/scispacy not installed. NER features will be limited but server will still start.") # Download datasets using huggingface_hub from huggingface_hub import hf_hub_download # Check and download index and data files data_dir = os.path.join(os.path.dirname(__file__), "data") index_dir = os.path.join(data_dir, "index") os.makedirs(index_dir, exist_ok=True) faiss_path = os.path.join(index_dir, "faiss.index") metadata_path = os.path.join(index_dir, "metadata_store.pkl") bm25_path = os.path.join(index_dir, "bm25_cache.pkl") vocab_path = os.path.join(data_dir, "drugbank vocabulary.csv") rxnorm_path = os.path.join(data_dir, "rxnorm_cache.csv") def download_dataset_files(): """Download FAISS index and other core data from Hugging Face Dataset""" repo_id = "joytheslothh/MediRAG-Index-Data" token = os.environ.get("HF_TOKEN") if not token: logger.warning("HF_TOKEN environment variable is not set. Dataset download might fail if repo is private.") try: if not os.path.exists(faiss_path): logger.info("Downloading faiss.index from HF dataset...") hf_hub_download(repo_id=repo_id, filename="index/faiss.index", local_dir=data_dir, repo_type="dataset", token=token) if not os.path.exists(metadata_path): logger.info("Downloading metadata_store.pkl from HF dataset...") hf_hub_download(repo_id=repo_id, filename="index/metadata_store.pkl", local_dir=data_dir, repo_type="dataset", token=token) if not os.path.exists(bm25_path): logger.info("Downloading bm25_cache.pkl from HF dataset...") hf_hub_download(repo_id=repo_id, filename="index/bm25_cache.pkl", local_dir=data_dir, repo_type="dataset", token=token) if not os.path.exists(vocab_path): logger.info("Downloading drugbank vocabulary.csv from HF dataset...") hf_hub_download(repo_id=repo_id, filename="drugbank vocabulary.csv", local_dir=data_dir, repo_type="dataset", token=token) if not os.path.exists(rxnorm_path): logger.info("Downloading rxnorm_cache.csv from HF dataset...") hf_hub_download(repo_id=repo_id, filename="rxnorm_cache.csv", local_dir=data_dir, repo_type="dataset", token=token) except Exception as e: logger.error(f"Failed to download dataset files: {e}") logger.warning("Backend may not start correctly or queries may fail.") # Trigger download at startup download_dataset_files() # Import FastAPI app - this is the main backend for React frontend from src.api.main import app if __name__ == "__main__": import uvicorn # Get port from environment (Hugging Face uses 7860) port = int(os.environ.get("PORT", 7860)) logger.info("Starting FastAPI backend on port {}".format(port)) uvicorn.run(app, host="0.0.0.0", port=port)