MediRAG-API / app.py
joytheslothh's picture
deploy: clean build
b6f9fa8
"""
MediRAG Backend - FastAPI only (No Gradio)
React frontend on Vercel, this is just the API backend
"""
import os
import sys
import subprocess
import logging
import requests
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Set cache directories for Hugging Face
os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache"
os.environ["HF_HOME"] = "/tmp/hf_home"
os.environ["TORCH_HOME"] = "/tmp/torch_cache"
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))
# Install spaCy model if not present (optional — server starts without it)
try:
import spacy
try:
spacy.load("en_core_sci_lg")
logger.info("spaCy model en_core_sci_lg loaded.")
except OSError:
# Try installing the model at runtime
try:
logger.info("Attempting to install scispacy model en_core_sci_lg...")
subprocess.run([
sys.executable, "-m", "pip", "install", "--quiet",
"https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.4/en_core_sci_lg-0.5.4.tar.gz"
], check=True, timeout=300)
spacy.load("en_core_sci_lg")
logger.info("spaCy model installed and loaded.")
except Exception as model_err:
logger.warning(f"Could not install spaCy model: {model_err}. NER features will be limited.")
except ImportError:
logger.warning("spacy/scispacy not installed. NER features will be limited but server will still start.")
# Download datasets using huggingface_hub
from huggingface_hub import hf_hub_download
# Check and download index and data files
data_dir = os.path.join(os.path.dirname(__file__), "data")
index_dir = os.path.join(data_dir, "index")
os.makedirs(index_dir, exist_ok=True)
faiss_path = os.path.join(index_dir, "faiss.index")
metadata_path = os.path.join(index_dir, "metadata_store.pkl")
bm25_path = os.path.join(index_dir, "bm25_cache.pkl")
vocab_path = os.path.join(data_dir, "drugbank vocabulary.csv")
rxnorm_path = os.path.join(data_dir, "rxnorm_cache.csv")
def download_dataset_files():
"""Download FAISS index and other core data from Hugging Face Dataset"""
repo_id = "joytheslothh/MediRAG-Index-Data"
token = os.environ.get("HF_TOKEN")
if not token:
logger.warning("HF_TOKEN environment variable is not set. Dataset download might fail if repo is private.")
try:
if not os.path.exists(faiss_path):
logger.info("Downloading faiss.index from HF dataset...")
hf_hub_download(repo_id=repo_id, filename="index/faiss.index", local_dir=data_dir, repo_type="dataset", token=token)
if not os.path.exists(metadata_path):
logger.info("Downloading metadata_store.pkl from HF dataset...")
hf_hub_download(repo_id=repo_id, filename="index/metadata_store.pkl", local_dir=data_dir, repo_type="dataset", token=token)
if not os.path.exists(bm25_path):
logger.info("Downloading bm25_cache.pkl from HF dataset...")
hf_hub_download(repo_id=repo_id, filename="index/bm25_cache.pkl", local_dir=data_dir, repo_type="dataset", token=token)
if not os.path.exists(vocab_path):
logger.info("Downloading drugbank vocabulary.csv from HF dataset...")
hf_hub_download(repo_id=repo_id, filename="drugbank vocabulary.csv", local_dir=data_dir, repo_type="dataset", token=token)
if not os.path.exists(rxnorm_path):
logger.info("Downloading rxnorm_cache.csv from HF dataset...")
hf_hub_download(repo_id=repo_id, filename="rxnorm_cache.csv", local_dir=data_dir, repo_type="dataset", token=token)
except Exception as e:
logger.error(f"Failed to download dataset files: {e}")
logger.warning("Backend may not start correctly or queries may fail.")
# Trigger download at startup
download_dataset_files()
# Import FastAPI app - this is the main backend for React frontend
from src.api.main import app
if __name__ == "__main__":
import uvicorn
# Get port from environment (Hugging Face uses 7860)
port = int(os.environ.get("PORT", 7860))
logger.info("Starting FastAPI backend on port {}".format(port))
uvicorn.run(app, host="0.0.0.0", port=port)