Spaces:

MGZON
/

api-mg

Runtime error

App Files Files Community

MGZON commited on Aug 26, 2025

Commit

75fed13

verified ·

1 Parent(s): 26f5dc6

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -5

app.py CHANGED Viewed

@@ -2,10 +2,12 @@ import os
 import logging
 import time
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from llama_cpp import Llama
 import asyncio
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -31,9 +33,18 @@ models_loaded = False
 # Health check endpoint
 @app.get("/health")
 async def health_check():
     if not models_loaded:
-        return {"status": "loading", "message": "Models are still loading"}
-    return {"status": "healthy"}
 # Async function to load models
 async def load_models():
@@ -69,7 +80,7 @@ async def load_models():
         mistral = Llama(
             model_path=gguf_path,
             n_ctx=2048,
-            n_threads=4,  # قللنا عدد الـ threads عشان نقلل الحمل
             n_batch=512,
             verbose=True
         )
@@ -85,6 +96,7 @@ async def load_models():
 # Run model loading in the background
 @app.on_event("startup")
 async def startup_event():
     asyncio.create_task(load_models())
 # Define request schema
@@ -95,10 +107,11 @@ class AskRequest(BaseModel):
 # Endpoint: /ask
 @app.post("/ask")
 async def ask(req: AskRequest):
     if not models_loaded:
         raise HTTPException(status_code=503, detail="Models are still loading, please try again later")
-    logger.info(f"Received question: {req.question}")
     q = req.question.strip()
     if not q:
         logger.error("Empty question received")
@@ -122,4 +135,8 @@ async def ask(req: AskRequest):
         return {"model": model_name, "response": answer}
     except Exception as e:
         logger.error(f"Error processing request: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"خطأ أثناء معالجة الطلب: {str(e)}")

 import logging
 import time
 from fastapi import FastAPI, HTTPException
+from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from llama_cpp import Llama
 import asyncio
+import uvicorn
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 # Health check endpoint
 @app.get("/health")
 async def health_check():
+    logger.info("Health check endpoint called at %s", time.time())
     if not models_loaded:
+        logger.info("Returning 'loading' status")
+        return JSONResponse(
+            content={"status": "loading", "message": "Models are still loading"},
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
+        )
+    logger.info("Returning 'healthy' status")
+    return JSONResponse(
+        content={"status": "healthy"},
+        headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
+    )
 # Async function to load models
 async def load_models():
         mistral = Llama(
             model_path=gguf_path,
             n_ctx=2048,
+            n_threads=4,
             n_batch=512,
             verbose=True
         )
 # Run model loading in the background
 @app.on_event("startup")
 async def startup_event():
+    logger.info("Startup event triggered")
     asyncio.create_task(load_models())
 # Define request schema
 # Endpoint: /ask
 @app.post("/ask")
 async def ask(req: AskRequest):
+    logger.info(f"Received ask request: {req.question}")
     if not models_loaded:
+        logger.error("Models not loaded yet")
         raise HTTPException(status_code=503, detail="Models are still loading, please try again later")
     q = req.question.strip()
     if not q:
         logger.error("Empty question received")
         return {"model": model_name, "response": answer}
     except Exception as e:
         logger.error(f"Error processing request: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"خطأ أثناء معالجة الطلب: {str(e)}")
+# Run the app
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8080, log_level="info")