Spaces:

VietCat
/

VietnameseEmbeddingV2

Running

File size: 1,956 Bytes

b5c6b08
 
 
 
a692f28
 
 
c09c72a
a692f28
 
 
 
 
 
b5c6b08
c09c72a
 
 
b5c6b08
 
 
 
a692f28
b5c6b08
 
c09c72a
 
a692f28
b5c6b08
 
 
 
 
 
a692f28
c09c72a
 
b5c6b08
c09c72a
 
a692f28
 
 
c09c72a
 
 
 
a692f28
b5c6b08
 
 
a692f28
 
 
c09c72a
a692f28
c09c72a
 
 
 
 
 
b5c6b08

from fastapi import FastAPI, Request
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModel
import torch
import time
import logging
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor

# Cấu hình logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s",
    level=logging.INFO
)

# Giới hạn số thread = 1 để không quá tải CPU HFS free
executor = ThreadPoolExecutor(max_workers=1)

app = FastAPI()

# Load model
model_name = "AITeamVN/Vietnamese_Embedding_v2"
logging.info(f"Loading model: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.eval()
torch.set_num_threads(1)
logging.info("Model loaded successfully.")

class InputText(BaseModel):
    text: str

@app.get("/")
def root():
    now = datetime.now().isoformat()
    logging.info(f"[GET /] Health check at {now}")
    return {"message": "Vietnamese Embedding API is running."}

# Hàm xử lý embedding tách riêng
def compute_embedding(text: str):
    start_time = time.time()
    start_ts = datetime.now().isoformat()

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    token_count = inputs["input_ids"].shape[1]

    logging.info(f"[EMBED] Start: {start_ts} | Input: '{text[:50]}'... | Tokens: {token_count}")

    with torch.no_grad():
        outputs = model(**inputs)
        embedding = outputs.last_hidden_state[:, 0, :].squeeze().tolist()

    end_ts = datetime.now().isoformat()
    duration_ms = (time.time() - start_time) * 1000
    logging.info(f"[EMBED] Done: {end_ts} | Embedding size: {len(embedding)} | Time: {duration_ms:.2f} ms")

    return embedding

@app.post("/embed")
def get_embedding(data: InputText):
    # Gửi sang thread pool (sẽ đợi đến khi xong)
    embedding = executor.submit(compute_embedding, data.text).result()
    return {"embedding": embedding}