selfies-ted-api / app.py
Ilkham Yabbarov
init: SELFIES-TED FastAPI Space for ParetoMol
538e8d5
"""
SELFIES-TED Embeddings API for ParetoMol.
Wraps ibm-research/materials.selfies-ted via sentence-transformers.
Accepts SMILES strings, returns embedding vectors for cosine similarity.
Reference: Srinivasan et al. arXiv:2410.12348
"""
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import uvicorn, logging, os
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(title="SELFIES-TED Embeddings API", version="1.0.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=False,
allow_methods=["GET", "POST", "OPTIONS"],
allow_headers=["*"],
)
_model = None
def get_model():
global _model
if _model is None:
logger.info("Loading SELFIES-TED model (ibm-research/materials.selfies-ted)...")
from sentence_transformers import SentenceTransformer
_model = SentenceTransformer("ibm-research/materials.selfies-ted")
logger.info("SELFIES-TED model loaded.")
return _model
class EmbedRequest(BaseModel):
smiles: list[str]
@app.get("/health")
def health():
return {"status": "ok", "model_loaded": _model is not None}
@app.post("/embeddings")
def embeddings(req: EmbedRequest):
if not req.smiles:
raise HTTPException(400, "smiles list is empty")
if len(req.smiles) > 200:
raise HTTPException(400, "Maximum 200 SMILES per request")
import selfies as sf
selfies_strings = []
for smi in req.smiles:
try:
selfies_strings.append(sf.encoder(smi))
except Exception:
selfies_strings.append(smi) # fallback: use SMILES as-is
model = get_model()
vecs = model.encode(selfies_strings, batch_size=32, show_progress_bar=False)
return {"embeddings": vecs.tolist()}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))