Update app.py
Browse files
app.py
CHANGED
|
@@ -21,8 +21,8 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(me
|
|
| 21 |
logger = logging.getLogger(__name__)
|
| 22 |
|
| 23 |
# βββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
-
BEARER_TOKEN = os.environ.get("
|
| 25 |
-
MODEL_PATH = os.environ.get("MODEL_PATH", "/app/models/
|
| 26 |
MODEL_NAME = os.environ.get("MODEL_NAME", "qwen2.5-coder-7b-instruct")
|
| 27 |
N_CTX = int(os.environ.get("N_CTX", "4096"))
|
| 28 |
N_THREADS = int(os.environ.get("N_THREADS", "4"))
|
|
@@ -35,11 +35,38 @@ if not BEARER_TOKEN:
|
|
| 35 |
# βββ Global model holder ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
llm = None
|
| 37 |
|
| 38 |
-
# βββ Lifespan: load model once at startup βββββββββββββββββββββββββ
|
| 39 |
@asynccontextmanager
|
| 40 |
async def lifespan(app: FastAPI):
|
| 41 |
-
global llm
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
try:
|
| 44 |
from llama_cpp import Llama
|
| 45 |
llm = Llama(
|
|
@@ -48,14 +75,14 @@ async def lifespan(app: FastAPI):
|
|
| 48 |
n_threads=N_THREADS,
|
| 49 |
n_gpu_layers=N_GPU_LAYERS,
|
| 50 |
verbose=False,
|
| 51 |
-
chat_format="chatml", #
|
| 52 |
)
|
| 53 |
-
logger.info("β
Model
|
| 54 |
except Exception as e:
|
| 55 |
-
logger.error(f"β
|
| 56 |
raise RuntimeError(f"Model load failed: {e}")
|
| 57 |
yield
|
| 58 |
-
logger.info("π
|
| 59 |
|
| 60 |
# βββ App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
app = FastAPI(
|
|
|
|
| 21 |
logger = logging.getLogger(__name__)
|
| 22 |
|
| 23 |
# βββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
+
BEARER_TOKEN = os.environ.get("quan11082012, "")
|
| 25 |
+
MODEL_PATH = os.environ.get("MODEL_PATH", "/app/models/qwen2.5-coder-7b-instruct-q4_k_m.gguf")
|
| 26 |
MODEL_NAME = os.environ.get("MODEL_NAME", "qwen2.5-coder-7b-instruct")
|
| 27 |
N_CTX = int(os.environ.get("N_CTX", "4096"))
|
| 28 |
N_THREADS = int(os.environ.get("N_THREADS", "4"))
|
|
|
|
| 35 |
# βββ Global model holder ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
llm = None
|
| 37 |
|
| 38 |
+
# βββ Lifespan: download and load model once at startup βββββββββββββββββββββββββ
|
| 39 |
@asynccontextmanager
|
| 40 |
async def lifespan(app: FastAPI):
|
| 41 |
+
global llm, MODEL_PATH
|
| 42 |
+
|
| 43 |
+
# 1. Tα»± Δα»ng kiα»m tra vΓ tαΊ£i model nαΊΏu chΖ°a tα»n tαΊ‘i α» Runtime
|
| 44 |
+
if not os.path.exists(MODEL_PATH):
|
| 45 |
+
logger.info("π Model khΓ΄ng tΓ¬m thαΊ₯y tαΊ‘i local. Δang tiαΊΏn hΓ nh tαΊ£i tα»« Hugging Face Hub...")
|
| 46 |
+
try:
|
| 47 |
+
from huggingface_hub import hf_hub_download
|
| 48 |
+
|
| 49 |
+
model_dir = os.path.dirname(MODEL_PATH)
|
| 50 |
+
model_filename = os.path.basename(MODEL_PATH)
|
| 51 |
+
|
| 52 |
+
# ΔαΊ£m bαΊ£o thΖ° mα»₯c ΔΓch tα»n tαΊ‘i
|
| 53 |
+
os.makedirs(model_dir, exist_ok=True)
|
| 54 |
+
|
| 55 |
+
# Thα»±c hiα»n tαΊ£i file (sα» dα»₯ng mαΊ‘ng runtime cα»±c kα»³ α»n Δα»nh cα»§a HF)
|
| 56 |
+
downloaded_path = hf_hub_download(
|
| 57 |
+
repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
|
| 58 |
+
filename=model_filename,
|
| 59 |
+
local_dir=model_dir,
|
| 60 |
+
local_dir_use_symlinks=False
|
| 61 |
+
)
|
| 62 |
+
MODEL_PATH = downloaded_path
|
| 63 |
+
logger.info(f"β
TαΊ£i model thΓ nh cΓ΄ng! ΔΖ°α»ng dαΊ«n thα»±c tαΊΏ: {MODEL_PATH}")
|
| 64 |
+
except Exception as e:
|
| 65 |
+
logger.error(f"β Lα»i tαΊ£i model tα»« Hugging Face Hub: {e}")
|
| 66 |
+
raise RuntimeError(f"Model download failed: {e}")
|
| 67 |
+
|
| 68 |
+
# 2. Khα»i tαΊ‘o vΓ nαΊ‘p model vΓ o bα» nhα» RAM
|
| 69 |
+
logger.info(f"π Δang nαΊ‘p model tα»«: {MODEL_PATH}")
|
| 70 |
try:
|
| 71 |
from llama_cpp import Llama
|
| 72 |
llm = Llama(
|
|
|
|
| 75 |
n_threads=N_THREADS,
|
| 76 |
n_gpu_layers=N_GPU_LAYERS,
|
| 77 |
verbose=False,
|
| 78 |
+
chat_format="chatml", # PhΓΉ hợp cho cαΊ₯u trΓΊc Qwen2.5 Instruct
|
| 79 |
)
|
| 80 |
+
logger.info("β
Model ΔΓ£ Δược nαΊ‘p thΓ nh cΓ΄ng vΓ sαΊ΅n sΓ ng xα» lΓ½.")
|
| 81 |
except Exception as e:
|
| 82 |
+
logger.error(f"β KhΓ΄ng thα» nαΊ‘p cαΊ₯u hΓ¬nh model LLM: {e}")
|
| 83 |
raise RuntimeError(f"Model load failed: {e}")
|
| 84 |
yield
|
| 85 |
+
logger.info("π Δang tαΊ―t α»©ng dα»₯ng Backend.")
|
| 86 |
|
| 87 |
# βββ App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 88 |
app = FastAPI(
|