VietCat commited on
Commit
a692f28
·
1 Parent(s): b5c6b08

add more log

Browse files
Files changed (1) hide show
  1. app.py +26 -1
app.py CHANGED
@@ -2,26 +2,51 @@ from fastapi import FastAPI, Request
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModel
4
  import torch
 
 
 
 
 
 
 
 
 
5
 
6
  app = FastAPI()
7
 
8
  # Load model
9
  model_name = "AITeamVN/Vietnamese_Embedding_v2"
 
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
  model = AutoModel.from_pretrained(model_name)
 
12
 
13
  class InputText(BaseModel):
14
  text: str
15
 
16
  @app.get("/")
17
  def root():
 
 
18
  return {"message": "AITeamVN/Vietnamese_Embedding_v2 embedding API is running."}
19
 
20
  @app.post("/embed")
21
  def get_embedding(data: InputText):
 
 
 
 
22
  inputs = tokenizer(data.text, return_tensors="pt", padding=True, truncation=True)
 
 
 
 
23
  with torch.no_grad():
24
  outputs = model(**inputs)
25
- # Get CLS token or use pooling method
26
  embedding = outputs.last_hidden_state[:, 0, :].squeeze().tolist()
 
 
 
 
 
27
  return {"embedding": embedding}
 
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModel
4
  import torch
5
+ import time
6
+ import logging
7
+ from datetime import datetime
8
+
9
+ # Cấu hình logging
10
+ logging.basicConfig(
11
+ format="%(asctime)s - %(levelname)s - %(message)s",
12
+ level=logging.INFO
13
+ )
14
 
15
  app = FastAPI()
16
 
17
  # Load model
18
  model_name = "AITeamVN/Vietnamese_Embedding_v2"
19
+ logging.info(f"Loading model: {model_name}")
20
  tokenizer = AutoTokenizer.from_pretrained(model_name)
21
  model = AutoModel.from_pretrained(model_name)
22
+ logging.info("Model loaded successfully.")
23
 
24
  class InputText(BaseModel):
25
  text: str
26
 
27
  @app.get("/")
28
  def root():
29
+ now = datetime.now().isoformat()
30
+ logging.info(f"[GET /] Received health check at {now}")
31
  return {"message": "AITeamVN/Vietnamese_Embedding_v2 embedding API is running."}
32
 
33
  @app.post("/embed")
34
  def get_embedding(data: InputText):
35
+ start_time = time.time()
36
+ start_ts = datetime.now().isoformat()
37
+
38
+ # Tokenize input
39
  inputs = tokenizer(data.text, return_tensors="pt", padding=True, truncation=True)
40
+ input_token_count = inputs["input_ids"].shape[1]
41
+ logging.info(f"[POST /embed] Start at {start_ts} | Input text: '{data.text[:50]}'... | Tokens: {input_token_count}")
42
+
43
+ # Run model inference
44
  with torch.no_grad():
45
  outputs = model(**inputs)
 
46
  embedding = outputs.last_hidden_state[:, 0, :].squeeze().tolist()
47
+
48
+ end_ts = datetime.now().isoformat()
49
+ duration_ms = (time.time() - start_time) * 1000
50
+ logging.info(f"[POST /embed] Done at {end_ts} | Embedding size: {len(embedding)} | Time: {duration_ms:.2f} ms")
51
+
52
  return {"embedding": embedding}