""" SELFIES-TED Embeddings API for ParetoMol. Wraps ibm-research/materials.selfies-ted via sentence-transformers. Accepts SMILES strings, returns embedding vectors for cosine similarity. Reference: Srinivasan et al. arXiv:2410.12348 """ from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel import uvicorn, logging, os logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI(title="SELFIES-TED Embeddings API", version="1.0.0") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=False, allow_methods=["GET", "POST", "OPTIONS"], allow_headers=["*"], ) _model = None def get_model(): global _model if _model is None: logger.info("Loading SELFIES-TED model (ibm-research/materials.selfies-ted)...") from sentence_transformers import SentenceTransformer _model = SentenceTransformer("ibm-research/materials.selfies-ted") logger.info("SELFIES-TED model loaded.") return _model class EmbedRequest(BaseModel): smiles: list[str] @app.get("/health") def health(): return {"status": "ok", "model_loaded": _model is not None} @app.post("/embeddings") def embeddings(req: EmbedRequest): if not req.smiles: raise HTTPException(400, "smiles list is empty") if len(req.smiles) > 200: raise HTTPException(400, "Maximum 200 SMILES per request") import selfies as sf selfies_strings = [] for smi in req.smiles: try: selfies_strings.append(sf.encoder(smi)) except Exception: selfies_strings.append(smi) # fallback: use SMILES as-is model = get_model() vecs = model.encode(selfies_strings, batch_size=32, show_progress_bar=False) return {"embeddings": vecs.tolist()} if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))