Thiagoc.machado commited on
Commit
bbb7cd2
·
1 Parent(s): 22abf22

Subindo API Docker com modelo de embeddings

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +31 -0
  3. dockerfile +9 -0
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  title: Bible Embeddings Api
3
- emoji: 🐠
4
- colorFrom: indigo
5
  colorTo: purple
6
  sdk: docker
7
  pinned: false
 
1
  ---
2
  title: Bible Embeddings Api
3
+ emoji: 📚
4
+ colorFrom: red
5
  colorTo: purple
6
  sdk: docker
7
  pinned: false
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from transformers import AutoTokenizer, AutoModel
3
+ import torch
4
+
5
+ app = FastAPI()
6
+
7
+ tokenizer = AutoTokenizer.from_pretrained("thiagocmach/paraphrase-pt-bible")
8
+ model = AutoModel.from_pretrained("thiagocmach/paraphrase-pt-bible").eval()
9
+
10
+ @app.get("/")
11
+ def read_root():
12
+ return {"message": "API ativa 🙌"}
13
+
14
+ @app.post("/embed")
15
+ async def embed_text(request: Request):
16
+ data = await request.json()
17
+ texto = data.get('texto')
18
+ if not texto:
19
+ return {"error": "Campo 'texto' obrigatório"}
20
+
21
+ inputs = tokenizer(texto, return_tensors='pt', truncation=True, padding=True)
22
+ with torch.no_grad():
23
+ outputs = model(**inputs)
24
+ embeddings = outputs.last_hidden_state
25
+ mask = inputs['attention_mask'].unsqueeze(-1).expand(embeddings.size())
26
+ masked_embeddings = embeddings * mask
27
+ summed = torch.sum(masked_embeddings, dim=1)
28
+ counted = torch.clamp(mask.sum(1), min=1e-9)
29
+ mean_pooled = (summed / counted).squeeze().tolist()
30
+
31
+ return {"embedding": mean_pooled}
dockerfile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+ COPY . /app
5
+
6
+ RUN pip install --upgrade pip && \
7
+ pip install --no-cache-dir -r requirements.txt
8
+
9
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]