from fastapi import FastAPI from pydantic import BaseModel from typing import List from sentence_transformers import SentenceTransformer import uvicorn app = FastAPI(title="Medical Embedding Service") # Load model ONCE at startup print("Loading Medical RAG Model... this may take a moment.") model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") print("Model loaded successfully!") class QueryRequest(BaseModel): text: str class DocumentRequest(BaseModel): texts: List[str] @app.post("/embed_query") async def embed_query(request: QueryRequest): # Uses specialized encode_query for IR tasks embedding = model.encode_query(request.text).tolist() return {"embedding": embedding} @app.post("/embed_docs") async def embed_docs(request: DocumentRequest): # Uses specialized encode_document for IR tasks embeddings = model.encode_document(request.texts).tolist() return {"embeddings": embeddings}