Spaces:

MGZON
/

api-mg

Runtime error

App Files Files Community

MGZON commited on Aug 26, 2025

Commit

0b445a6

verified ·

1 Parent(s): 547ce4f

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -10

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import asyncio
 import uvicorn
 # Set up logging
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Set up cache directory
@@ -30,10 +30,10 @@ t5_model = None
 mistral = None
 models_loaded = False
-# Root endpoint to confirm server is running
 @app.get("/")
 async def root():
-    logger.info("Root endpoint called at %s", time.time())
     return JSONResponse(
         content={"message": "MGZON Smart Assistant is running"},
         headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
@@ -42,7 +42,7 @@ async def root():
 # Health check endpoint
 @app.get("/health")
 async def health_check():
-    logger.info("Health check endpoint called at %s", time.time())
     if not models_loaded:
         logger.info("Returning 'loading' status")
         return JSONResponse(
@@ -88,9 +88,9 @@ async def load_models():
         logger.info(f"Loading Mistral model from {gguf_path}")
         mistral = Llama(
             model_path=gguf_path,
-            n_ctx=2048,
-            n_threads=2,  # قللنا الـ threads إلى 2 عشان نقلل الحمل أكتر
-            n_batch=512,
             verbose=True
         )
         logger.info(f"Successfully loaded Mistral model from {gguf_path} in {time.time() - start_time} seconds")
@@ -105,7 +105,7 @@ async def load_models():
 # Run model loading in the background
 @app.on_event("startup")
 async def startup_event():
-    logger.info("Startup event triggered at %s", time.time())
     asyncio.create_task(load_models())
 # Define request schema
@@ -153,6 +153,8 @@ if __name__ == "__main__":
         host="0.0.0.0",
         port=8080,
         log_level="info",
-        workers=1,  # استخدام worker واحد عشان نقلل الحمل
-        timeout_keep_alive=30  # ضمان استجابة سريعة
     )

 import uvicorn
 # Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 # Set up cache directory
 mistral = None
 models_loaded = False
+# Root endpoint
 @app.get("/")
 async def root():
+    logger.info(f"Root endpoint called at {time.time()}")
     return JSONResponse(
         content={"message": "MGZON Smart Assistant is running"},
         headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
 # Health check endpoint
 @app.get("/health")
 async def health_check():
+    logger.info(f"Health check endpoint called at {time.time()}")
     if not models_loaded:
         logger.info("Returning 'loading' status")
         return JSONResponse(
         logger.info(f"Loading Mistral model from {gguf_path}")
         mistral = Llama(
             model_path=gguf_path,
+            n_ctx=1024,  # قللنا n_ctx عشان نقلل استهلاك الذاكرة
+            n_threads=1,  # قللنا الـ threads إلى 1 عشان نقلل الحمل
+            n_batch=256,  # قللنا n_batch عشان نقلل الحمل
             verbose=True
         )
         logger.info(f"Successfully loaded Mistral model from {gguf_path} in {time.time() - start_time} seconds")
 # Run model loading in the background
 @app.on_event("startup")
 async def startup_event():
+    logger.info(f"Startup event triggered at {time.time()}")
     asyncio.create_task(load_models())
 # Define request schema
         host="0.0.0.0",
         port=8080,
         log_level="info",
+        workers=1,
+        timeout_keep_alive=30,
+        limit_concurrency=10,  # تقليل الاتصالات المتزامنة
+        limit_max_requests=100  # تقليل عدد الطلبات القصوى
     )