Spaces:

viskav
/

Summarizer

Sleeping

App Files Files Community

viskav commited on Dec 16, 2025

Commit

105b25f

verified ·

1 Parent(s): 939d683

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -29

app.py CHANGED Viewed

@@ -4,6 +4,11 @@ from pydantic import BaseModel, Field
 from contextlib import asynccontextmanager
 import re
 import os
 try:
     from llama_cpp import Llama
@@ -14,14 +19,18 @@ MODEL_REPO = "bartowski/Phi-3.5-mini-instruct-GGUF"
 MODEL_FILE = "Phi-3.5-mini-instruct-Q4_K_M.gguf"
 llm = None
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    global llm
     try:
-        print("🚀 Loading Phi-3.5 Mini (Fast Summarizer)...")
-        # Try to load model with error handling
         llm = Llama.from_pretrained(
             repo_id=MODEL_REPO,
             filename=MODEL_FILE,
@@ -31,18 +40,16 @@ async def lifespan(app: FastAPI):
             n_gpu_layers=0,
             verbose=False,
         )
-        print("✅ Model loaded successfully")
     except Exception as e:
-        print(f"❌ Error loading model: {e}")
-        print("Make sure you have:")
-        print("1. Installed llama-cpp-python")
-        print("2. Have internet connection for model download")
-        print("3. Have sufficient disk space (~2GB)")
         llm = None
     yield
-    print("🛑 Shutting down...")
     if llm:
         del llm
@@ -76,12 +83,62 @@ def clean_output(text: str) -> str:
     text = re.sub(r"\s+", " ", text)
     return text.strip()
 @app.post("/api/summarize")
 async def summarize(req: SummarizeRequest):
     if llm is None:
         raise HTTPException(
             status_code=503,
-            detail="Model not loaded. Check server logs for errors."
         )
     try:
@@ -106,6 +163,8 @@ Text:
             "long": 300
         }
         output = llm(
             prompt,
             max_tokens=max_tokens_map.get(req.length, 140),
@@ -125,35 +184,25 @@ Text:
                 detail="Model produced empty output"
             )
         return {
             "summary": summary,
             "success": True,
             "length": req.length
         }
     except Exception as e:
         raise HTTPException(
             status_code=500,
             detail=f"Summarization error: {str(e)}"
         )
-@app.get("/")
-def health():
-    return {
-        "status": "ok" if llm else "model_not_loaded",
-        "model": MODEL_FILE,
-        "ready": llm is not None
-    }
-@app.get("/health")
-def detailed_health():
-    return {
-        "status": "healthy" if llm else "unhealthy",
-        "model_loaded": llm is not None,
-        "model_name": MODEL_FILE,
-        "repo": MODEL_REPO
-    }
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 from contextlib import asynccontextmanager
 import re
 import os
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 try:
     from llama_cpp import Llama
 MODEL_FILE = "Phi-3.5-mini-instruct-Q4_K_M.gguf"
 llm = None
+model_loading = False
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    global llm, model_loading
     try:
+        logger.info("🚀 Starting model load...")
+        model_loading = True
+        # Set cache directory for Hugging Face Spaces
+        cache_dir = os.getenv("HF_HOME", "./models")
         llm = Llama.from_pretrained(
             repo_id=MODEL_REPO,
             filename=MODEL_FILE,
             n_gpu_layers=0,
             verbose=False,
         )
+        model_loading = False
+        logger.info("✅ Model loaded and ready")
     except Exception as e:
+        logger.error(f"❌ Model load error: {e}")
+        model_loading = False
         llm = None
     yield
+    logger.info("🛑 Shutting down...")
     if llm:
         del llm
     text = re.sub(r"\s+", " ", text)
     return text.strip()
+@app.get("/")
+def root():
+    """Root endpoint - returns status"""
+    return {
+        "status": "healthy",
+        "model_loaded": llm is not None,
+        "model_loading": model_loading,
+        "message": "AI Summarizer API is running"
+    }
+@app.get("/health")
+def health():
+    """Health check endpoint for container orchestration"""
+    if model_loading:
+        return {
+            "status": "starting",
+            "model_loaded": False,
+            "model_loading": True,
+            "message": "Model is loading, please wait..."
+        }
+    if llm is None:
+        return {
+            "status": "unhealthy",
+            "model_loaded": False,
+            "model_loading": False,
+            "message": "Model failed to load"
+        }
+    return {
+        "status": "healthy",
+        "model_loaded": True,
+        "model_loading": False,
+        "model_name": MODEL_FILE,
+        "message": "Ready to summarize"
+    }
+@app.get("/ready")
+def readiness():
+    """Readiness probe - returns 200 only when model is loaded"""
+    if llm is not None and not model_loading:
+        return {"ready": True}
+    raise HTTPException(status_code=503, detail="Model not ready")
 @app.post("/api/summarize")
 async def summarize(req: SummarizeRequest):
+    if model_loading:
+        raise HTTPException(
+            status_code=503,
+            detail="Model is still loading. Please wait and try again."
+        )
     if llm is None:
         raise HTTPException(
             status_code=503,
+            detail="Model not loaded. Check server logs."
         )
     try:
             "long": 300
         }
+        logger.info(f"Summarizing text (length: {req.length})")
         output = llm(
             prompt,
             max_tokens=max_tokens_map.get(req.length, 140),
                 detail="Model produced empty output"
             )
+        logger.info("✅ Summary generated successfully")
         return {
             "summary": summary,
             "success": True,
             "length": req.length
         }
+    except HTTPException:
+        raise
     except Exception as e:
+        logger.error(f"Summarization error: {e}")
         raise HTTPException(
             status_code=500,
             detail=f"Summarization error: {str(e)}"
         )
 if __name__ == "__main__":
     import uvicorn
+    # Use PORT environment variable for Hugging Face Spaces
+    port = int(os.getenv("PORT", 7860))
+    uvicorn.run(app, host="0.0.0.0", port=port)