import os from fastapi import FastAPI, Request, HTTPException, Header from transformers import AutoTokenizer, AutoModel from dotenv import load_dotenv import torch import datetime # Carrega variáveis do .env load_dotenv() API_TOKEN = os.getenv('API_TOKEN') # Configura cache do Hugging Face os.environ['TRANSFORMERS_CACHE'] = '/code/cache' app = FastAPI() print('🔄 Carregando modelo e5-large-v2 do Hugging Face...') tokenizer = AutoTokenizer.from_pretrained("intfloat/e5-large-v2") model = AutoModel.from_pretrained("intfloat/e5-large-v2").eval() @app.get("/") def read_root(): return {"message": "API ativa 🙌"} @app.post("/embed") async def embed_text(request: Request, authorization: str = Header(None)): print(f'{datetime.datetime.now()} - Requisição recebida para /embed') if authorization != f'Bearer {API_TOKEN}': raise HTTPException(status_code=401, detail="Não autorizado") data = await request.json() texto = data.get('texto') if not texto: return {"error": "Campo 'texto' obrigatório"} # e5 requer o prefixo 'query: ' para textos de consulta texto = 'query: ' + texto.strip() # texto = 'passage: ' + texto.strip() print(f'{datetime.datetime.now()} - 🔍 Texto recebido para embedding: {texto}') inputs = tokenizer(texto, return_tensors='pt', truncation=True, padding=True) with torch.no_grad(): outputs = model(**inputs) embeddings = outputs.last_hidden_state mask = inputs['attention_mask'].unsqueeze(-1).expand(embeddings.size()) masked_embeddings = embeddings * mask summed = torch.sum(masked_embeddings, dim=1) counted = torch.clamp(mask.sum(1), min=1e-9) mean_pooled = (summed / counted).squeeze().tolist() return {"embedding": mean_pooled}