ex510 commited on
Commit
27b8745
·
verified ·
1 Parent(s): d3399d8

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +17 -7
main.py CHANGED
@@ -6,11 +6,7 @@ import asyncio
6
  from concurrent.futures import ThreadPoolExecutor
7
  from typing import List
8
  import numpy as np
9
-
10
- app = FastAPI(title="Text Embedding API (Qwen/Qwen3-Embedding-0.6B)")
11
-
12
- class TextRequest(BaseModel):
13
- text: str = Field(..., min_length=1, description="Text to embed")
14
 
15
  # Globals
16
  model = None
@@ -19,13 +15,27 @@ model_id = 'Qwen/Qwen3-Embedding-0.6B'
19
  executor = ThreadPoolExecutor(max_workers=4)
20
  MAX_TOKENS = 512
21
 
22
- @app.on_event("startup")
23
- async def load_model():
 
24
  global model, tokenizer
25
  print(f"Loading model: {model_id}...")
26
  model = SentenceTransformer(model_id)
27
  tokenizer = model.tokenizer
28
  print("Model loaded successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  @app.get("/")
31
  def home():
 
6
  from concurrent.futures import ThreadPoolExecutor
7
  from typing import List
8
  import numpy as np
9
+ from contextlib import asynccontextmanager
 
 
 
 
10
 
11
  # Globals
12
  model = None
 
15
  executor = ThreadPoolExecutor(max_workers=4)
16
  MAX_TOKENS = 512
17
 
18
+ @asynccontextmanager
19
+ async def lifespan(app: FastAPI):
20
+ # Load the model and tokenizer at startup
21
  global model, tokenizer
22
  print(f"Loading model: {model_id}...")
23
  model = SentenceTransformer(model_id)
24
  tokenizer = model.tokenizer
25
  print("Model loaded successfully")
26
+ yield
27
+ # (Optional) Clean up resources at shutdown
28
+ print("Cleaning up resources...")
29
+ model = None
30
+ tokenizer = None
31
+
32
+ app = FastAPI(
33
+ title="Text Embedding API (Qwen/Qwen3-Embedding-0.6B)",
34
+ lifespan=lifespan
35
+ )
36
+
37
+ class TextRequest(BaseModel):
38
+ text: str = Field(..., min_length=1, description="Text to embed")
39
 
40
  @app.get("/")
41
  def home():