Felipe Maya Muniz commited on
Commit
4a6cfc9
·
1 Parent(s): 5985d55

Deploy FastAPI decoder with model

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +20 -0
  3. app.py +45 -0
  4. decoder_model.h5 +3 -0
  5. requirements.txt +3 -0
  6. tokenizer_utils.py +12 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.keras filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Usa uma imagem leve com Python 3.10
2
+ FROM python:3.10-slim
3
+
4
+ # Define diretório de trabalho dentro do container
5
+ WORKDIR /app
6
+
7
+ # Copia arquivos de dependência
8
+ COPY requirements.txt .
9
+
10
+ # Instala dependências
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copia o restante do código
14
+ COPY . .
15
+
16
+ # Expõe a porta padrão usada pelo Spaces
17
+ EXPOSE 7860
18
+
19
+ # Comando para rodar o app com Uvicorn
20
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ import tensorflow as tf
4
+ import numpy as np
5
+ from tokenizer_utils import load_index_to_word
6
+
7
+ app = FastAPI()
8
+
9
+ # Carrega o modelo apenas uma vez
10
+ decoder_model = tf.keras.models.load_model("decoder_model.h5")
11
+
12
+ # Pydantic model para a requisição
13
+ class EmbeddingRequest(BaseModel):
14
+ embedding: list[float]
15
+
16
+ @app.post("/decode")
17
+ async def decode(req: EmbeddingRequest):
18
+ try:
19
+ # Prepara tensor com shape (1, embedding_dim)
20
+ input_tensor = np.array([req.embedding], dtype=np.float32)
21
+
22
+ # Faz a previsão
23
+ prediction = decoder_model.predict(input_tensor)
24
+
25
+ # Pega os índices com maior probabilidade
26
+ token_ids = prediction.argmax(axis=-1)
27
+
28
+ # Normaliza para lista de inteiros
29
+ if isinstance(token_ids, np.ndarray):
30
+ token_ids = token_ids[0] if token_ids.ndim > 1 else token_ids
31
+ token_ids = token_ids.tolist()
32
+
33
+ if isinstance(token_ids, (np.integer, int)):
34
+ token_ids = [int(token_ids)]
35
+
36
+ # Carrega dicionário index → word
37
+ index_to_word = load_index_to_word()
38
+
39
+ # Decodifica
40
+ decoded = " ".join(index_to_word.get(i, "") for i in token_ids).strip()
41
+
42
+ return {"decoded": decoded}
43
+
44
+ except Exception as e:
45
+ return {"error": str(e)}
decoder_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cac6aa6c5cef86e36ff0d8a5e7aeafc7d417bf8d960467b2eb7141b77bc8f8ec
3
+ size 27866032
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ tensorflow
tokenizer_utils.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### 📄 tokenizer_utils.py
2
+ def load_index_to_word():
3
+ # Mock de exemplo. Substitua por loading real do seu JSON ou pickle.
4
+ return {
5
+ 0: "<PAD>",
6
+ 1: "hello",
7
+ 2: "world",
8
+ 3: "sage",
9
+ 4: "is",
10
+ 5: "thinking",
11
+
12
+ }