Update app.py
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ import asyncio
|
|
| 10 |
import uvicorn
|
| 11 |
|
| 12 |
# Set up logging
|
| 13 |
-
logging.basicConfig(level=logging.INFO)
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
# Set up cache directory
|
|
@@ -30,10 +30,10 @@ t5_model = None
|
|
| 30 |
mistral = None
|
| 31 |
models_loaded = False
|
| 32 |
|
| 33 |
-
# Root endpoint
|
| 34 |
@app.get("/")
|
| 35 |
async def root():
|
| 36 |
-
logger.info("Root endpoint called at
|
| 37 |
return JSONResponse(
|
| 38 |
content={"message": "MGZON Smart Assistant is running"},
|
| 39 |
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
|
|
@@ -42,7 +42,7 @@ async def root():
|
|
| 42 |
# Health check endpoint
|
| 43 |
@app.get("/health")
|
| 44 |
async def health_check():
|
| 45 |
-
logger.info("Health check endpoint called at
|
| 46 |
if not models_loaded:
|
| 47 |
logger.info("Returning 'loading' status")
|
| 48 |
return JSONResponse(
|
|
@@ -88,9 +88,9 @@ async def load_models():
|
|
| 88 |
logger.info(f"Loading Mistral model from {gguf_path}")
|
| 89 |
mistral = Llama(
|
| 90 |
model_path=gguf_path,
|
| 91 |
-
n_ctx=
|
| 92 |
-
n_threads=
|
| 93 |
-
n_batch=
|
| 94 |
verbose=True
|
| 95 |
)
|
| 96 |
logger.info(f"Successfully loaded Mistral model from {gguf_path} in {time.time() - start_time} seconds")
|
|
@@ -105,7 +105,7 @@ async def load_models():
|
|
| 105 |
# Run model loading in the background
|
| 106 |
@app.on_event("startup")
|
| 107 |
async def startup_event():
|
| 108 |
-
logger.info("Startup event triggered at
|
| 109 |
asyncio.create_task(load_models())
|
| 110 |
|
| 111 |
# Define request schema
|
|
@@ -153,6 +153,8 @@ if __name__ == "__main__":
|
|
| 153 |
host="0.0.0.0",
|
| 154 |
port=8080,
|
| 155 |
log_level="info",
|
| 156 |
-
workers=1,
|
| 157 |
-
timeout_keep_alive=30
|
|
|
|
|
|
|
| 158 |
)
|
|
|
|
| 10 |
import uvicorn
|
| 11 |
|
| 12 |
# Set up logging
|
| 13 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
# Set up cache directory
|
|
|
|
| 30 |
mistral = None
|
| 31 |
models_loaded = False
|
| 32 |
|
| 33 |
+
# Root endpoint
|
| 34 |
@app.get("/")
|
| 35 |
async def root():
|
| 36 |
+
logger.info(f"Root endpoint called at {time.time()}")
|
| 37 |
return JSONResponse(
|
| 38 |
content={"message": "MGZON Smart Assistant is running"},
|
| 39 |
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
|
|
|
|
| 42 |
# Health check endpoint
|
| 43 |
@app.get("/health")
|
| 44 |
async def health_check():
|
| 45 |
+
logger.info(f"Health check endpoint called at {time.time()}")
|
| 46 |
if not models_loaded:
|
| 47 |
logger.info("Returning 'loading' status")
|
| 48 |
return JSONResponse(
|
|
|
|
| 88 |
logger.info(f"Loading Mistral model from {gguf_path}")
|
| 89 |
mistral = Llama(
|
| 90 |
model_path=gguf_path,
|
| 91 |
+
n_ctx=1024, # قللنا n_ctx عشان نقلل استهلاك الذاكرة
|
| 92 |
+
n_threads=1, # قللنا الـ threads إلى 1 عشان نقلل الحمل
|
| 93 |
+
n_batch=256, # قللنا n_batch عشان نقلل الحمل
|
| 94 |
verbose=True
|
| 95 |
)
|
| 96 |
logger.info(f"Successfully loaded Mistral model from {gguf_path} in {time.time() - start_time} seconds")
|
|
|
|
| 105 |
# Run model loading in the background
|
| 106 |
@app.on_event("startup")
|
| 107 |
async def startup_event():
|
| 108 |
+
logger.info(f"Startup event triggered at {time.time()}")
|
| 109 |
asyncio.create_task(load_models())
|
| 110 |
|
| 111 |
# Define request schema
|
|
|
|
| 153 |
host="0.0.0.0",
|
| 154 |
port=8080,
|
| 155 |
log_level="info",
|
| 156 |
+
workers=1,
|
| 157 |
+
timeout_keep_alive=30,
|
| 158 |
+
limit_concurrency=10, # تقليل الاتصالات المتزامنة
|
| 159 |
+
limit_max_requests=100 # تقليل عدد الطلبات القصوى
|
| 160 |
)
|