MGZON commited on
Commit
0b445a6
·
verified ·
1 Parent(s): 547ce4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -10,7 +10,7 @@ import asyncio
10
  import uvicorn
11
 
12
  # Set up logging
13
- logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
  # Set up cache directory
@@ -30,10 +30,10 @@ t5_model = None
30
  mistral = None
31
  models_loaded = False
32
 
33
- # Root endpoint to confirm server is running
34
  @app.get("/")
35
  async def root():
36
- logger.info("Root endpoint called at %s", time.time())
37
  return JSONResponse(
38
  content={"message": "MGZON Smart Assistant is running"},
39
  headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
@@ -42,7 +42,7 @@ async def root():
42
  # Health check endpoint
43
  @app.get("/health")
44
  async def health_check():
45
- logger.info("Health check endpoint called at %s", time.time())
46
  if not models_loaded:
47
  logger.info("Returning 'loading' status")
48
  return JSONResponse(
@@ -88,9 +88,9 @@ async def load_models():
88
  logger.info(f"Loading Mistral model from {gguf_path}")
89
  mistral = Llama(
90
  model_path=gguf_path,
91
- n_ctx=2048,
92
- n_threads=2, # قللنا الـ threads إلى 2 عشان نقلل الحمل أكتر
93
- n_batch=512,
94
  verbose=True
95
  )
96
  logger.info(f"Successfully loaded Mistral model from {gguf_path} in {time.time() - start_time} seconds")
@@ -105,7 +105,7 @@ async def load_models():
105
  # Run model loading in the background
106
  @app.on_event("startup")
107
  async def startup_event():
108
- logger.info("Startup event triggered at %s", time.time())
109
  asyncio.create_task(load_models())
110
 
111
  # Define request schema
@@ -153,6 +153,8 @@ if __name__ == "__main__":
153
  host="0.0.0.0",
154
  port=8080,
155
  log_level="info",
156
- workers=1, # استخدام worker واحد عشان نقلل الحمل
157
- timeout_keep_alive=30 # ضمان استجابة سريعة
 
 
158
  )
 
10
  import uvicorn
11
 
12
  # Set up logging
13
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
14
  logger = logging.getLogger(__name__)
15
 
16
  # Set up cache directory
 
30
  mistral = None
31
  models_loaded = False
32
 
33
+ # Root endpoint
34
  @app.get("/")
35
  async def root():
36
+ logger.info(f"Root endpoint called at {time.time()}")
37
  return JSONResponse(
38
  content={"message": "MGZON Smart Assistant is running"},
39
  headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
 
42
  # Health check endpoint
43
  @app.get("/health")
44
  async def health_check():
45
+ logger.info(f"Health check endpoint called at {time.time()}")
46
  if not models_loaded:
47
  logger.info("Returning 'loading' status")
48
  return JSONResponse(
 
88
  logger.info(f"Loading Mistral model from {gguf_path}")
89
  mistral = Llama(
90
  model_path=gguf_path,
91
+ n_ctx=1024, # قللنا n_ctx عشان نقلل استهلاك الذاكرة
92
+ n_threads=1, # قللنا الـ threads إلى 1 عشان نقلل الحمل
93
+ n_batch=256, # قللنا n_batch عشان نقلل الحمل
94
  verbose=True
95
  )
96
  logger.info(f"Successfully loaded Mistral model from {gguf_path} in {time.time() - start_time} seconds")
 
105
  # Run model loading in the background
106
  @app.on_event("startup")
107
  async def startup_event():
108
+ logger.info(f"Startup event triggered at {time.time()}")
109
  asyncio.create_task(load_models())
110
 
111
  # Define request schema
 
153
  host="0.0.0.0",
154
  port=8080,
155
  log_level="info",
156
+ workers=1,
157
+ timeout_keep_alive=30,
158
+ limit_concurrency=10, # تقليل الاتصالات المتزامنة
159
+ limit_max_requests=100 # تقليل عدد الطلبات القصوى
160
  )