Update app.py
Browse files
app.py
CHANGED
|
@@ -30,6 +30,15 @@ t5_model = None
|
|
| 30 |
mistral = None
|
| 31 |
models_loaded = False
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
# Health check endpoint
|
| 34 |
@app.get("/health")
|
| 35 |
async def health_check():
|
|
@@ -80,7 +89,7 @@ async def load_models():
|
|
| 80 |
mistral = Llama(
|
| 81 |
model_path=gguf_path,
|
| 82 |
n_ctx=2048,
|
| 83 |
-
n_threads=
|
| 84 |
n_batch=512,
|
| 85 |
verbose=True
|
| 86 |
)
|
|
@@ -96,7 +105,7 @@ async def load_models():
|
|
| 96 |
# Run model loading in the background
|
| 97 |
@app.on_event("startup")
|
| 98 |
async def startup_event():
|
| 99 |
-
logger.info("Startup event triggered")
|
| 100 |
asyncio.create_task(load_models())
|
| 101 |
|
| 102 |
# Define request schema
|
|
@@ -107,7 +116,7 @@ class AskRequest(BaseModel):
|
|
| 107 |
# Endpoint: /ask
|
| 108 |
@app.post("/ask")
|
| 109 |
async def ask(req: AskRequest):
|
| 110 |
-
logger.info(f"Received ask request: {req.question}")
|
| 111 |
if not models_loaded:
|
| 112 |
logger.error("Models not loaded yet")
|
| 113 |
raise HTTPException(status_code=503, detail="Models are still loading, please try again later")
|
|
@@ -139,4 +148,11 @@ async def ask(req: AskRequest):
|
|
| 139 |
|
| 140 |
# Run the app
|
| 141 |
if __name__ == "__main__":
|
| 142 |
-
uvicorn.run(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
mistral = None
|
| 31 |
models_loaded = False
|
| 32 |
|
| 33 |
+
# Root endpoint to confirm server is running
|
| 34 |
+
@app.get("/")
|
| 35 |
+
async def root():
|
| 36 |
+
logger.info("Root endpoint called at %s", time.time())
|
| 37 |
+
return JSONResponse(
|
| 38 |
+
content={"message": "MGZON Smart Assistant is running"},
|
| 39 |
+
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
# Health check endpoint
|
| 43 |
@app.get("/health")
|
| 44 |
async def health_check():
|
|
|
|
| 89 |
mistral = Llama(
|
| 90 |
model_path=gguf_path,
|
| 91 |
n_ctx=2048,
|
| 92 |
+
n_threads=2, # قللنا الـ threads إلى 2 عشان نقلل الحمل أكتر
|
| 93 |
n_batch=512,
|
| 94 |
verbose=True
|
| 95 |
)
|
|
|
|
| 105 |
# Run model loading in the background
|
| 106 |
@app.on_event("startup")
|
| 107 |
async def startup_event():
|
| 108 |
+
logger.info("Startup event triggered at %s", time.time())
|
| 109 |
asyncio.create_task(load_models())
|
| 110 |
|
| 111 |
# Define request schema
|
|
|
|
| 116 |
# Endpoint: /ask
|
| 117 |
@app.post("/ask")
|
| 118 |
async def ask(req: AskRequest):
|
| 119 |
+
logger.info(f"Received ask request: {req.question} at {time.time()}")
|
| 120 |
if not models_loaded:
|
| 121 |
logger.error("Models not loaded yet")
|
| 122 |
raise HTTPException(status_code=503, detail="Models are still loading, please try again later")
|
|
|
|
| 148 |
|
| 149 |
# Run the app
|
| 150 |
if __name__ == "__main__":
|
| 151 |
+
uvicorn.run(
|
| 152 |
+
app,
|
| 153 |
+
host="0.0.0.0",
|
| 154 |
+
port=8080,
|
| 155 |
+
log_level="info",
|
| 156 |
+
workers=1, # استخدام worker واحد عشان نقلل الحمل
|
| 157 |
+
timeout_keep_alive=30 # ضمان استجابة سريعة
|
| 158 |
+
)
|