Spaces:
Running
Running
| from fastapi import FastAPI, Request | |
| from pydantic import BaseModel | |
| from transformers import AutoTokenizer, AutoModel | |
| import torch | |
| import time | |
| import logging | |
| from datetime import datetime | |
| from concurrent.futures import ThreadPoolExecutor | |
| # Cấu hình logging | |
| logging.basicConfig( | |
| format="%(asctime)s - %(levelname)s - %(message)s", | |
| level=logging.INFO | |
| ) | |
| # Giới hạn số thread = 1 để không quá tải CPU HFS free | |
| executor = ThreadPoolExecutor(max_workers=1) | |
| app = FastAPI() | |
| # Load model | |
| model_name = "AITeamVN/Vietnamese_Embedding_v2" | |
| logging.info(f"Loading model: {model_name}") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModel.from_pretrained(model_name) | |
| model.eval() | |
| torch.set_num_threads(1) | |
| logging.info("Model loaded successfully.") | |
| class InputText(BaseModel): | |
| text: str | |
| def root(): | |
| now = datetime.now().isoformat() | |
| logging.info(f"[GET /] Health check at {now}") | |
| return {"message": "Vietnamese Embedding API is running."} | |
| # Hàm xử lý embedding tách riêng | |
| def compute_embedding(text: str): | |
| start_time = time.time() | |
| start_ts = datetime.now().isoformat() | |
| inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
| token_count = inputs["input_ids"].shape[1] | |
| logging.info(f"[EMBED] Start: {start_ts} | Input: '{text[:50]}'... | Tokens: {token_count}") | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| embedding = outputs.last_hidden_state[:, 0, :].squeeze().tolist() | |
| end_ts = datetime.now().isoformat() | |
| duration_ms = (time.time() - start_time) * 1000 | |
| logging.info(f"[EMBED] Done: {end_ts} | Embedding size: {len(embedding)} | Time: {duration_ms:.2f} ms") | |
| return embedding | |
| def get_embedding(data: InputText): | |
| # Gửi sang thread pool (sẽ đợi đến khi xong) | |
| embedding = executor.submit(compute_embedding, data.text).result() | |
| return {"embedding": embedding} | |