Spaces:

ilkhamfy
/

selfies-ted-api

Sleeping

App Files Files Community

Ilkham Yabbarov commited on Apr 7

Commit

538e8d5

1 Parent(s): 36ebbe8

init: SELFIES-TED FastAPI Space for ParetoMol

Browse files

Files changed (4) hide show

Dockerfile +12 -0
README.md +22 -5
app.py +69 -0
requirements.txt +7 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.11-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY app.py .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,27 @@
 ---
-title: Selfies Ted Api
-emoji: 🌖
-colorFrom: yellow
-colorTo: red
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: SELFIES-TED Embeddings API
+emoji: 🧪
+colorFrom: green
+colorTo: blue
 sdk: docker
 pinned: false
 ---
+# SELFIES-TED Embeddings API
+CORS-enabled REST API wrapping [ibm-research/materials.selfies-ted](https://huggingface.co/ibm-research/materials.selfies-ted) for [ParetoMol](https://paretomol.com).
+## Endpoint
+**POST /embeddings**
+```json
+{ "smiles": ["CCO", "c1ccccc1"] }
+```
+Returns:
+```json
+{ "embeddings": [[...], [...]] }
+```
+**GET /health**
+Used by ParetoMol for the SELFIES-TED similarity metric in the Similarity Matrix and Activity Cliffs views.

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""
+SELFIES-TED Embeddings API for ParetoMol.
+Wraps ibm-research/materials.selfies-ted via sentence-transformers.
+Accepts SMILES strings, returns embedding vectors for cosine similarity.
+Reference: Srinivasan et al. arXiv:2410.12348
+"""
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import uvicorn, logging, os
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="SELFIES-TED Embeddings API", version="1.0.0")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=False,
+    allow_methods=["GET", "POST", "OPTIONS"],
+    allow_headers=["*"],
+)
+_model = None
+def get_model():
+    global _model
+    if _model is None:
+        logger.info("Loading SELFIES-TED model (ibm-research/materials.selfies-ted)...")
+        from sentence_transformers import SentenceTransformer
+        _model = SentenceTransformer("ibm-research/materials.selfies-ted")
+        logger.info("SELFIES-TED model loaded.")
+    return _model
+class EmbedRequest(BaseModel):
+    smiles: list[str]
+@app.get("/health")
+def health():
+    return {"status": "ok", "model_loaded": _model is not None}
+@app.post("/embeddings")
+def embeddings(req: EmbedRequest):
+    if not req.smiles:
+        raise HTTPException(400, "smiles list is empty")
+    if len(req.smiles) > 200:
+        raise HTTPException(400, "Maximum 200 SMILES per request")
+    import selfies as sf
+    selfies_strings = []
+    for smi in req.smiles:
+        try:
+            selfies_strings.append(sf.encoder(smi))
+        except Exception:
+            selfies_strings.append(smi)  # fallback: use SMILES as-is
+    model = get_model()
+    vecs = model.encode(selfies_strings, batch_size=32, show_progress_bar=False)
+    return {"embeddings": vecs.tolist()}
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn
+sentence-transformers
+selfies
+torch
+numpy
+pydantic