Spaces:
Running
Running
File size: 1,956 Bytes
b5c6b08 a692f28 c09c72a a692f28 b5c6b08 c09c72a b5c6b08 a692f28 b5c6b08 c09c72a a692f28 b5c6b08 a692f28 c09c72a b5c6b08 c09c72a a692f28 c09c72a a692f28 b5c6b08 a692f28 c09c72a a692f28 c09c72a b5c6b08 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
from fastapi import FastAPI, Request
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModel
import torch
import time
import logging
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
# Cấu hình logging
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(message)s",
level=logging.INFO
)
# Giới hạn số thread = 1 để không quá tải CPU HFS free
executor = ThreadPoolExecutor(max_workers=1)
app = FastAPI()
# Load model
model_name = "AITeamVN/Vietnamese_Embedding_v2"
logging.info(f"Loading model: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.eval()
torch.set_num_threads(1)
logging.info("Model loaded successfully.")
class InputText(BaseModel):
text: str
@app.get("/")
def root():
now = datetime.now().isoformat()
logging.info(f"[GET /] Health check at {now}")
return {"message": "Vietnamese Embedding API is running."}
# Hàm xử lý embedding tách riêng
def compute_embedding(text: str):
start_time = time.time()
start_ts = datetime.now().isoformat()
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
token_count = inputs["input_ids"].shape[1]
logging.info(f"[EMBED] Start: {start_ts} | Input: '{text[:50]}'... | Tokens: {token_count}")
with torch.no_grad():
outputs = model(**inputs)
embedding = outputs.last_hidden_state[:, 0, :].squeeze().tolist()
end_ts = datetime.now().isoformat()
duration_ms = (time.time() - start_time) * 1000
logging.info(f"[EMBED] Done: {end_ts} | Embedding size: {len(embedding)} | Time: {duration_ms:.2f} ms")
return embedding
@app.post("/embed")
def get_embedding(data: InputText):
# Gửi sang thread pool (sẽ đợi đến khi xong)
embedding = executor.submit(compute_embedding, data.text).result()
return {"embedding": embedding}
|