davidtran999 commited on
Commit
4509701
·
verified ·
1 Parent(s): 5150cc5

Upload backend/hue_portal/preload_models.py with huggingface_hub

Browse files
backend/hue_portal/preload_models.py CHANGED
@@ -1,62 +1 @@
1
- """
2
- Preload all models when worker process starts.
3
- This module is imported to ensure models are loaded before first request.
4
- """
5
- import os
6
-
7
-
8
- def preload_all_models() -> None:
9
- """Preload embedding, LLM, and reranker models in the worker process."""
10
- print("[PRELOAD] 🔄 Starting model preload in worker process...", flush=True)
11
- try:
12
- # 1) Embedding model
13
- try:
14
- print("[PRELOAD] 📦 Preloading embedding model (BGE-M3)...", flush=True)
15
- from hue_portal.core.embeddings import get_embedding_model
16
-
17
- embedding_model = get_embedding_model()
18
- if embedding_model:
19
- print("[PRELOAD] ✅ Embedding model preloaded successfully", flush=True)
20
- else:
21
- print("[PRELOAD] ⚠️ Embedding model not loaded", flush=True)
22
- except Exception as e:
23
- print(f"[PRELOAD] ⚠️ Embedding model preload failed: {e}", flush=True)
24
-
25
- # 2) LLM model (llama.cpp)
26
- llm_provider = os.environ.get("DEFAULT_LLM_PROVIDER") or os.environ.get("LLM_PROVIDER", "")
27
- if llm_provider.lower() == "llama_cpp":
28
- try:
29
- print("[PRELOAD] 📦 Preloading LLM model (llama.cpp)...", flush=True)
30
- from hue_portal.chatbot.llm_integration import get_llm_generator
31
-
32
- llm_gen = get_llm_generator()
33
- if llm_gen and hasattr(llm_gen, "llama_cpp") and llm_gen.llama_cpp:
34
- print("[PRELOAD] ✅ LLM model preloaded successfully", flush=True)
35
- else:
36
- print("[PRELOAD] ⚠️ LLM model not loaded (may load on first request)", flush=True)
37
- except Exception as e:
38
- print(f"[PRELOAD] ⚠️ LLM model preload failed: {e} (will load on first request)", flush=True)
39
- else:
40
- print(f"[PRELOAD] ⏭️ Skipping LLM preload (provider is {llm_provider or 'not set'}, not llama_cpp)", flush=True)
41
-
42
- # 3) Reranker model
43
- try:
44
- print("[PRELOAD] 📦 Preloading reranker model...", flush=True)
45
- from hue_portal.core.reranker import get_reranker
46
-
47
- reranker = get_reranker()
48
- if reranker:
49
- print("[PRELOAD] ✅ Reranker model preloaded successfully", flush=True)
50
- else:
51
- print("[PRELOAD] ⚠️ Reranker model not loaded (may load on first request)", flush=True)
52
- except Exception as e:
53
- print(f"[PRELOAD] ⚠️ Reranker preload failed: {e} (will load on first request)", flush=True)
54
-
55
- print("[PRELOAD] ✅ Model preload completed in worker process", flush=True)
56
- except Exception as e:
57
- print(f"[PRELOAD] ⚠️ Model preload error: {e} (models will load on first request)", flush=True)
58
- import traceback
59
-
60
- traceback.print_exc()
61
-
62
-
 
1
+ hơvân