Spaces:
Sleeping
Sleeping
Felipe Maya Muniz
commited on
Commit
·
4a6cfc9
1
Parent(s):
5985d55
Deploy FastAPI decoder with model
Browse files- .gitattributes +1 -0
- Dockerfile +20 -0
- app.py +45 -0
- decoder_model.h5 +3 -0
- requirements.txt +3 -0
- tokenizer_utils.py +12 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.keras filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Usa uma imagem leve com Python 3.10
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Define diretório de trabalho dentro do container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copia arquivos de dependência
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
|
| 10 |
+
# Instala dependências
|
| 11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
+
|
| 13 |
+
# Copia o restante do código
|
| 14 |
+
COPY . .
|
| 15 |
+
|
| 16 |
+
# Expõe a porta padrão usada pelo Spaces
|
| 17 |
+
EXPOSE 7860
|
| 18 |
+
|
| 19 |
+
# Comando para rodar o app com Uvicorn
|
| 20 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
import numpy as np
|
| 5 |
+
from tokenizer_utils import load_index_to_word
|
| 6 |
+
|
| 7 |
+
app = FastAPI()
|
| 8 |
+
|
| 9 |
+
# Carrega o modelo apenas uma vez
|
| 10 |
+
decoder_model = tf.keras.models.load_model("decoder_model.h5")
|
| 11 |
+
|
| 12 |
+
# Pydantic model para a requisição
|
| 13 |
+
class EmbeddingRequest(BaseModel):
|
| 14 |
+
embedding: list[float]
|
| 15 |
+
|
| 16 |
+
@app.post("/decode")
|
| 17 |
+
async def decode(req: EmbeddingRequest):
|
| 18 |
+
try:
|
| 19 |
+
# Prepara tensor com shape (1, embedding_dim)
|
| 20 |
+
input_tensor = np.array([req.embedding], dtype=np.float32)
|
| 21 |
+
|
| 22 |
+
# Faz a previsão
|
| 23 |
+
prediction = decoder_model.predict(input_tensor)
|
| 24 |
+
|
| 25 |
+
# Pega os índices com maior probabilidade
|
| 26 |
+
token_ids = prediction.argmax(axis=-1)
|
| 27 |
+
|
| 28 |
+
# Normaliza para lista de inteiros
|
| 29 |
+
if isinstance(token_ids, np.ndarray):
|
| 30 |
+
token_ids = token_ids[0] if token_ids.ndim > 1 else token_ids
|
| 31 |
+
token_ids = token_ids.tolist()
|
| 32 |
+
|
| 33 |
+
if isinstance(token_ids, (np.integer, int)):
|
| 34 |
+
token_ids = [int(token_ids)]
|
| 35 |
+
|
| 36 |
+
# Carrega dicionário index → word
|
| 37 |
+
index_to_word = load_index_to_word()
|
| 38 |
+
|
| 39 |
+
# Decodifica
|
| 40 |
+
decoded = " ".join(index_to_word.get(i, "") for i in token_ids).strip()
|
| 41 |
+
|
| 42 |
+
return {"decoded": decoded}
|
| 43 |
+
|
| 44 |
+
except Exception as e:
|
| 45 |
+
return {"error": str(e)}
|
decoder_model.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cac6aa6c5cef86e36ff0d8a5e7aeafc7d417bf8d960467b2eb7141b77bc8f8ec
|
| 3 |
+
size 27866032
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
tensorflow
|
tokenizer_utils.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### 📄 tokenizer_utils.py
|
| 2 |
+
def load_index_to_word():
|
| 3 |
+
# Mock de exemplo. Substitua por loading real do seu JSON ou pickle.
|
| 4 |
+
return {
|
| 5 |
+
0: "<PAD>",
|
| 6 |
+
1: "hello",
|
| 7 |
+
2: "world",
|
| 8 |
+
3: "sage",
|
| 9 |
+
4: "is",
|
| 10 |
+
5: "thinking",
|
| 11 |
+
|
| 12 |
+
}
|