Spaces:
Sleeping
Sleeping
| import os | |
| from fastapi import FastAPI, Request, HTTPException, Header | |
| from transformers import AutoTokenizer, AutoModel | |
| from dotenv import load_dotenv | |
| import torch | |
| import datetime | |
| # Carrega variáveis do .env | |
| load_dotenv() | |
| API_TOKEN = os.getenv('API_TOKEN') | |
| # Configura cache do Hugging Face | |
| os.environ['TRANSFORMERS_CACHE'] = '/code/cache' | |
| app = FastAPI() | |
| print('🔄 Carregando modelo e5-large-v2 do Hugging Face...') | |
| tokenizer = AutoTokenizer.from_pretrained("intfloat/e5-large-v2") | |
| model = AutoModel.from_pretrained("intfloat/e5-large-v2").eval() | |
| def read_root(): | |
| return {"message": "API ativa 🙌"} | |
| async def embed_text(request: Request, authorization: str = Header(None)): | |
| print(f'{datetime.datetime.now()} - Requisição recebida para /embed') | |
| if authorization != f'Bearer {API_TOKEN}': | |
| raise HTTPException(status_code=401, detail="Não autorizado") | |
| data = await request.json() | |
| texto = data.get('texto') | |
| if not texto: | |
| return {"error": "Campo 'texto' obrigatório"} | |
| # e5 requer o prefixo 'query: ' para textos de consulta | |
| texto = 'query: ' + texto.strip() | |
| # texto = 'passage: ' + texto.strip() | |
| print(f'{datetime.datetime.now()} - 🔍 Texto recebido para embedding: {texto}') | |
| inputs = tokenizer(texto, return_tensors='pt', truncation=True, padding=True) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| embeddings = outputs.last_hidden_state | |
| mask = inputs['attention_mask'].unsqueeze(-1).expand(embeddings.size()) | |
| masked_embeddings = embeddings * mask | |
| summed = torch.sum(masked_embeddings, dim=1) | |
| counted = torch.clamp(mask.sum(1), min=1e-9) | |
| mean_pooled = (summed / counted).squeeze().tolist() | |
| return {"embedding": mean_pooled} | |