Spaces:
Sleeping
Sleeping
File size: 5,103 Bytes
1bf0a27 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | """
MediRAG Backend - FastAPI only (No Gradio)
React frontend on Vercel, this is just the API backend
"""
import os
import sys
import subprocess
import logging
import requests
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Set cache directories for Hugging Face
os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache"
os.environ["HF_HOME"] = "/tmp/hf_home"
os.environ["TORCH_HOME"] = "/tmp/torch_cache"
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))
# Install spaCy model if not present (optional — server starts without it)
try:
import spacy
try:
spacy.load("en_core_sci_lg")
logger.info("spaCy model en_core_sci_lg loaded.")
except OSError:
# Try installing the model at runtime
try:
logger.info("Attempting to install scispacy model en_core_sci_lg...")
subprocess.run([
sys.executable, "-m", "pip", "install", "--quiet",
"https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.4/en_core_sci_lg-0.5.4.tar.gz"
], check=True, timeout=300)
spacy.load("en_core_sci_lg")
logger.info("spaCy model installed and loaded.")
except Exception as model_err:
logger.warning(f"Could not install spaCy model: {model_err}. NER features will be limited.")
except ImportError:
logger.warning("spacy/scispacy not installed. NER features will be limited but server will still start.")
# Download datasets using huggingface_hub
from huggingface_hub import hf_hub_download
import yaml
from pathlib import Path
# Check if config_local.yaml exists or USE_LOCAL_DATASET is set to skip HF downloads
config_path = os.environ.get("MEDIRAG_CONFIG", "config_local.yaml" if Path("config_local.yaml").exists() else "config.yaml")
try:
with open(config_path, "r", encoding="utf-8") as f:
config_data = yaml.safe_load(f)
except Exception:
config_data = {}
use_local_dataset = config_data.get("retrieval", {}).get("use_local_dataset", False) or os.environ.get("USE_LOCAL_DATASET", "false").lower() == "true"
# Check and download index and data files
data_dir = os.path.join(os.path.dirname(__file__), "data")
index_dir = os.path.join(data_dir, "index")
os.makedirs(index_dir, exist_ok=True)
faiss_path = os.path.join(index_dir, "faiss.index")
metadata_path = os.path.join(index_dir, "metadata_store.pkl")
bm25_path = os.path.join(index_dir, "bm25_cache.pkl")
vocab_path = os.path.join(data_dir, "drugbank vocabulary.csv")
rxnorm_path = os.path.join(data_dir, "rxnorm_cache.csv")
def download_dataset_files():
"""Download FAISS index and other core data from Hugging Face Dataset"""
if use_local_dataset:
logger.info("[LOCAL MODE] Bypassing Hugging Face repository download. Relying on local datasets in data/index/.")
return
repo_id = "joytheslothh/MediRAG-Index-Data"
token = os.environ.get("HF_TOKEN")
if not token:
logger.warning("HF_TOKEN environment variable is not set. Dataset download might fail if repo is private.")
try:
if not os.path.exists(faiss_path):
logger.info("Downloading faiss.index from HF dataset...")
hf_hub_download(repo_id=repo_id, filename="index/faiss.index", local_dir=data_dir, repo_type="dataset", token=token)
if not os.path.exists(metadata_path):
logger.info("Downloading metadata_store.pkl from HF dataset...")
hf_hub_download(repo_id=repo_id, filename="index/metadata_store.pkl", local_dir=data_dir, repo_type="dataset", token=token)
if not os.path.exists(bm25_path):
logger.info("Downloading bm25_cache.pkl from HF dataset...")
hf_hub_download(repo_id=repo_id, filename="index/bm25_cache.pkl", local_dir=data_dir, repo_type="dataset", token=token)
if not os.path.exists(vocab_path):
logger.info("Downloading drugbank vocabulary.csv from HF dataset...")
hf_hub_download(repo_id=repo_id, filename="drugbank vocabulary.csv", local_dir=data_dir, repo_type="dataset", token=token)
if not os.path.exists(rxnorm_path):
logger.info("Downloading rxnorm_cache.csv from HF dataset...")
hf_hub_download(repo_id=repo_id, filename="rxnorm_cache.csv", local_dir=data_dir, repo_type="dataset", token=token)
except Exception as e:
logger.error(f"Failed to download dataset files: {e}")
logger.warning("Backend may not start correctly or queries may fail.")
# Trigger download at startup
download_dataset_files()
# Import FastAPI app - this is the main backend for React frontend
from src.api.main import app
if __name__ == "__main__":
import uvicorn
# Get port from environment (Hugging Face uses 7860)
port = int(os.environ.get("PORT", 7860))
logger.info("Starting FastAPI backend on port {}".format(port))
uvicorn.run(app, host="0.0.0.0", port=port)
|