Ilkham Yabbarov commited on
Commit
538e8d5
·
1 Parent(s): 36ebbe8

init: SELFIES-TED FastAPI Space for ParetoMol

Browse files
Files changed (4) hide show
  1. Dockerfile +12 -0
  2. README.md +22 -5
  3. app.py +69 -0
  4. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY app.py .
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,27 @@
1
  ---
2
- title: Selfies Ted Api
3
- emoji: 🌖
4
- colorFrom: yellow
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: SELFIES-TED Embeddings API
3
+ emoji: 🧪
4
+ colorFrom: green
5
+ colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  ---
9
 
10
+ # SELFIES-TED Embeddings API
11
+
12
+ CORS-enabled REST API wrapping [ibm-research/materials.selfies-ted](https://huggingface.co/ibm-research/materials.selfies-ted) for [ParetoMol](https://paretomol.com).
13
+
14
+ ## Endpoint
15
+
16
+ **POST /embeddings**
17
+ ```json
18
+ { "smiles": ["CCO", "c1ccccc1"] }
19
+ ```
20
+ Returns:
21
+ ```json
22
+ { "embeddings": [[...], [...]] }
23
+ ```
24
+
25
+ **GET /health**
26
+
27
+ Used by ParetoMol for the SELFIES-TED similarity metric in the Similarity Matrix and Activity Cliffs views.
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SELFIES-TED Embeddings API for ParetoMol.
3
+
4
+ Wraps ibm-research/materials.selfies-ted via sentence-transformers.
5
+ Accepts SMILES strings, returns embedding vectors for cosine similarity.
6
+
7
+ Reference: Srinivasan et al. arXiv:2410.12348
8
+ """
9
+ from fastapi import FastAPI, HTTPException
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from pydantic import BaseModel
12
+ import uvicorn, logging, os
13
+
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ app = FastAPI(title="SELFIES-TED Embeddings API", version="1.0.0")
18
+
19
+ app.add_middleware(
20
+ CORSMiddleware,
21
+ allow_origins=["*"],
22
+ allow_credentials=False,
23
+ allow_methods=["GET", "POST", "OPTIONS"],
24
+ allow_headers=["*"],
25
+ )
26
+
27
+ _model = None
28
+
29
+ def get_model():
30
+ global _model
31
+ if _model is None:
32
+ logger.info("Loading SELFIES-TED model (ibm-research/materials.selfies-ted)...")
33
+ from sentence_transformers import SentenceTransformer
34
+ _model = SentenceTransformer("ibm-research/materials.selfies-ted")
35
+ logger.info("SELFIES-TED model loaded.")
36
+ return _model
37
+
38
+
39
+ class EmbedRequest(BaseModel):
40
+ smiles: list[str]
41
+
42
+
43
+ @app.get("/health")
44
+ def health():
45
+ return {"status": "ok", "model_loaded": _model is not None}
46
+
47
+
48
+ @app.post("/embeddings")
49
+ def embeddings(req: EmbedRequest):
50
+ if not req.smiles:
51
+ raise HTTPException(400, "smiles list is empty")
52
+ if len(req.smiles) > 200:
53
+ raise HTTPException(400, "Maximum 200 SMILES per request")
54
+
55
+ import selfies as sf
56
+ selfies_strings = []
57
+ for smi in req.smiles:
58
+ try:
59
+ selfies_strings.append(sf.encoder(smi))
60
+ except Exception:
61
+ selfies_strings.append(smi) # fallback: use SMILES as-is
62
+
63
+ model = get_model()
64
+ vecs = model.encode(selfies_strings, batch_size=32, show_progress_bar=False)
65
+ return {"embeddings": vecs.tolist()}
66
+
67
+
68
+ if __name__ == "__main__":
69
+ uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ sentence-transformers
4
+ selfies
5
+ torch
6
+ numpy
7
+ pydantic