Davidtran99 commited on
Commit
7a88b22
·
1 Parent(s): 78adb6c

Update: disable Query Rewrite, optimize performance

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -63
  2. README.md +1 -1
Dockerfile CHANGED
@@ -54,73 +54,11 @@ fi
54
  echo "[Docker] Collecting static files..."
55
  python /app/hue_portal/manage.py collectstatic --noinput || echo "[Docker] Collectstatic failed, continuing..."
56
 
57
- echo "[Docker] Preloading all models to avoid first-request timeout..."
58
- python -c "
59
- import os
60
- import sys
61
- os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'hue_portal.hue_portal.settings')
62
- import django
63
- django.setup()
64
-
65
- print('[Docker] 🔄 Starting model preload...', flush=True)
66
-
67
- # 1. Preload Embedding Model (BGE-M3)
68
- try:
69
- print('[Docker] 📦 Preloading embedding model (BGE-M3)...', flush=True)
70
- from hue_portal.core.embeddings import get_embedding_model
71
- embedding_model = get_embedding_model()
72
- if embedding_model:
73
- print('[Docker] ✅ Embedding model preloaded successfully', flush=True)
74
- else:
75
- print('[Docker] ⚠️ Embedding model not loaded', flush=True)
76
- except Exception as e:
77
- print(f'[Docker] ⚠️ Embedding model preload failed: {e}', flush=True)
78
-
79
- # 2. Preload LLM Model (llama.cpp)
80
- llm_provider = os.environ.get('DEFAULT_LLM_PROVIDER') or os.environ.get('LLM_PROVIDER', '')
81
- if llm_provider.lower() == 'llama_cpp':
82
- try:
83
- print('[Docker] 📦 Preloading LLM model (llama.cpp)...', flush=True)
84
- from hue_portal.chatbot.llm_integration import get_llm_generator
85
- llm_gen = get_llm_generator()
86
- if llm_gen and hasattr(llm_gen, 'llama_cpp') and llm_gen.llama_cpp:
87
- print('[Docker] ✅ LLM model preloaded successfully', flush=True)
88
- else:
89
- print('[Docker] ⚠️ LLM model not loaded (may load on first request)', flush=True)
90
- except Exception as e:
91
- print(f'[Docker] ⚠️ LLM model preload failed: {e} (will load on first request)', flush=True)
92
- else:
93
- print(f'[Docker] ⏭️ Skipping LLM preload (provider is {llm_provider or \"not set\"}, not llama_cpp)', flush=True)
94
-
95
- # 3. Preload Reranker Model
96
- try:
97
- print('[Docker] 📦 Preloading reranker model...', flush=True)
98
- from hue_portal.core.reranker import get_reranker
99
- reranker = get_reranker()
100
- if reranker:
101
- print('[Docker] ✅ Reranker model preloaded successfully', flush=True)
102
- else:
103
- print('[Docker] ⚠️ Reranker model not loaded (may load on first request)', flush=True)
104
- except Exception as e:
105
- print(f'[Docker] ⚠️ Reranker preload failed: {e} (will load on first request)', flush=True)
106
-
107
- print('[Docker] ✅ Model preload completed', flush=True)
108
- " || echo "[Docker] ⚠️ Model preload had errors (models will load on first request)"
109
-
110
  echo "[Docker] Starting gunicorn..."
111
- # Reduce tokenizers parallelism warnings and risk of fork deadlocks
112
- export TOKENIZERS_PARALLELISM=false
113
- # Shorter timeouts to avoid long hangs; adjust if needed
114
- cd /app/backend && export PYTHONPATH="/app/backend:${PYTHONPATH}" && exec gunicorn -b 0.0.0.0:7860 --timeout 600 --graceful-timeout 600 --worker-class sync --config python:hue_portal.hue_portal.gunicorn_app hue_portal.hue_portal.gunicorn_app:application
115
  EOF
116
 
117
  RUN chmod +x /entrypoint.sh
118
 
119
  EXPOSE 7860
120
  CMD ["/entrypoint.sh"]
121
-
122
- EXPOSE 7860
123
- CMD ["/entrypoint.sh"]
124
-
125
- EXPOSE 7860
126
- CMD ["/entrypoint.sh"]
 
54
  echo "[Docker] Collecting static files..."
55
  python /app/hue_portal/manage.py collectstatic --noinput || echo "[Docker] Collectstatic failed, continuing..."
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  echo "[Docker] Starting gunicorn..."
58
+ exec gunicorn -b 0.0.0.0:7860 --timeout 1800 --graceful-timeout 1800 --worker-class sync hue_portal.hue_portal.wsgi:application
 
 
 
59
  EOF
60
 
61
  RUN chmod +x /entrypoint.sh
62
 
63
  EXPOSE 7860
64
  CMD ["/entrypoint.sh"]
 
 
 
 
 
 
README.md CHANGED
@@ -447,7 +447,7 @@ class SlowPathHandler:
447
  ## 🚀 Deployment
448
 
449
  ### Hugging Face Spaces
450
- - **Space:** `davidtran999/hue-portal-backend`
451
  - **SDK:** Docker
452
  - **Resources:** CPU, 16GB RAM (free tier)
453
  - **Database:** Railway PostgreSQL (external)
 
447
  ## 🚀 Deployment
448
 
449
  ### Hugging Face Spaces
450
+ - **Space:** `davidtran999/hue-portal-backend-v2`
451
  - **SDK:** Docker
452
  - **Resources:** CPU, 16GB RAM (free tier)
453
  - **Database:** Railway PostgreSQL (external)