quan3s commited on
Commit
d3d6770
Β·
verified Β·
1 Parent(s): ddc9788

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -9
app.py CHANGED
@@ -21,8 +21,8 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(me
21
  logger = logging.getLogger(__name__)
22
 
23
  # ─── Config ────────────────────────────────────────────────────────────────────
24
- BEARER_TOKEN = os.environ.get("BEARER_TOKEN", "")
25
- MODEL_PATH = os.environ.get("MODEL_PATH", "/app/models/model.gguf")
26
  MODEL_NAME = os.environ.get("MODEL_NAME", "qwen2.5-coder-7b-instruct")
27
  N_CTX = int(os.environ.get("N_CTX", "4096"))
28
  N_THREADS = int(os.environ.get("N_THREADS", "4"))
@@ -35,11 +35,38 @@ if not BEARER_TOKEN:
35
  # ─── Global model holder ────────────────────────────────────────────────────────
36
  llm = None
37
 
38
- # ─── Lifespan: load model once at startup ──────────────────────────────────────
39
  @asynccontextmanager
40
  async def lifespan(app: FastAPI):
41
- global llm
42
- logger.info(f"πŸ”„ Loading model from: {MODEL_PATH}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  try:
44
  from llama_cpp import Llama
45
  llm = Llama(
@@ -48,14 +75,14 @@ async def lifespan(app: FastAPI):
48
  n_threads=N_THREADS,
49
  n_gpu_layers=N_GPU_LAYERS,
50
  verbose=False,
51
- chat_format="chatml", # works for Qwen2.5 / most instruct models
52
  )
53
- logger.info("βœ… Model loaded successfully.")
54
  except Exception as e:
55
- logger.error(f"❌ Failed to load model: {e}")
56
  raise RuntimeError(f"Model load failed: {e}")
57
  yield
58
- logger.info("πŸ›‘ Shutting down.")
59
 
60
  # ─── App ────────────────────────────────────────────────────────────────────────
61
  app = FastAPI(
 
21
  logger = logging.getLogger(__name__)
22
 
23
  # ─── Config ────────────────────────────────────────────────────────────────────
24
+ BEARER_TOKEN = os.environ.get("quan11082012, "")
25
+ MODEL_PATH = os.environ.get("MODEL_PATH", "/app/models/qwen2.5-coder-7b-instruct-q4_k_m.gguf")
26
  MODEL_NAME = os.environ.get("MODEL_NAME", "qwen2.5-coder-7b-instruct")
27
  N_CTX = int(os.environ.get("N_CTX", "4096"))
28
  N_THREADS = int(os.environ.get("N_THREADS", "4"))
 
35
  # ─── Global model holder ────────────────────────────────────────────────────────
36
  llm = None
37
 
38
+ # ─── Lifespan: download and load model once at startup ─────────────────────────
39
  @asynccontextmanager
40
  async def lifespan(app: FastAPI):
41
+ global llm, MODEL_PATH
42
+
43
+ # 1. Tα»± Δ‘α»™ng kiểm tra vΓ  tαΊ£i model nαΊΏu chΖ°a tα»“n tαΊ‘i ở Runtime
44
+ if not os.path.exists(MODEL_PATH):
45
+ logger.info("🚚 Model khΓ΄ng tΓ¬m thαΊ₯y tαΊ‘i local. Đang tiαΊΏn hΓ nh tαΊ£i tα»« Hugging Face Hub...")
46
+ try:
47
+ from huggingface_hub import hf_hub_download
48
+
49
+ model_dir = os.path.dirname(MODEL_PATH)
50
+ model_filename = os.path.basename(MODEL_PATH)
51
+
52
+ # Đảm bαΊ£o thΖ° mα»₯c Δ‘Γ­ch tα»“n tαΊ‘i
53
+ os.makedirs(model_dir, exist_ok=True)
54
+
55
+ # Thα»±c hiện tαΊ£i file (sα»­ dα»₯ng mαΊ‘ng runtime cα»±c kα»³ α»•n Δ‘α»‹nh cα»§a HF)
56
+ downloaded_path = hf_hub_download(
57
+ repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
58
+ filename=model_filename,
59
+ local_dir=model_dir,
60
+ local_dir_use_symlinks=False
61
+ )
62
+ MODEL_PATH = downloaded_path
63
+ logger.info(f"βœ… TαΊ£i model thΓ nh cΓ΄ng! Đường dαΊ«n thα»±c tαΊΏ: {MODEL_PATH}")
64
+ except Exception as e:
65
+ logger.error(f"❌ Lα»—i tαΊ£i model tα»« Hugging Face Hub: {e}")
66
+ raise RuntimeError(f"Model download failed: {e}")
67
+
68
+ # 2. Khởi tαΊ‘o vΓ  nαΊ‘p model vΓ o bα»™ nhα»› RAM
69
+ logger.info(f"πŸ”„ Đang nαΊ‘p model tα»«: {MODEL_PATH}")
70
  try:
71
  from llama_cpp import Llama
72
  llm = Llama(
 
75
  n_threads=N_THREADS,
76
  n_gpu_layers=N_GPU_LAYERS,
77
  verbose=False,
78
+ chat_format="chatml", # PhΓΉ hợp cho cαΊ₯u trΓΊc Qwen2.5 Instruct
79
  )
80
+ logger.info("βœ… Model Δ‘Γ£ được nαΊ‘p thΓ nh cΓ΄ng vΓ  sαΊ΅n sΓ ng xα»­ lΓ½.")
81
  except Exception as e:
82
+ logger.error(f"❌ KhΓ΄ng thể nαΊ‘p cαΊ₯u hΓ¬nh model LLM: {e}")
83
  raise RuntimeError(f"Model load failed: {e}")
84
  yield
85
+ logger.info("πŸ›‘ Đang tαΊ―t α»©ng dα»₯ng Backend.")
86
 
87
  # ─── App ────────────────────────────────────────────────────────────────────────
88
  app = FastAPI(