viskav commited on
Commit
105b25f
Β·
verified Β·
1 Parent(s): 939d683

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -29
app.py CHANGED
@@ -4,6 +4,11 @@ from pydantic import BaseModel, Field
4
  from contextlib import asynccontextmanager
5
  import re
6
  import os
 
 
 
 
 
7
 
8
  try:
9
  from llama_cpp import Llama
@@ -14,14 +19,18 @@ MODEL_REPO = "bartowski/Phi-3.5-mini-instruct-GGUF"
14
  MODEL_FILE = "Phi-3.5-mini-instruct-Q4_K_M.gguf"
15
 
16
  llm = None
 
17
 
18
  @asynccontextmanager
19
  async def lifespan(app: FastAPI):
20
- global llm
21
  try:
22
- print("πŸš€ Loading Phi-3.5 Mini (Fast Summarizer)...")
 
 
 
 
23
 
24
- # Try to load model with error handling
25
  llm = Llama.from_pretrained(
26
  repo_id=MODEL_REPO,
27
  filename=MODEL_FILE,
@@ -31,18 +40,16 @@ async def lifespan(app: FastAPI):
31
  n_gpu_layers=0,
32
  verbose=False,
33
  )
34
- print("βœ… Model loaded successfully")
 
35
  except Exception as e:
36
- print(f"❌ Error loading model: {e}")
37
- print("Make sure you have:")
38
- print("1. Installed llama-cpp-python")
39
- print("2. Have internet connection for model download")
40
- print("3. Have sufficient disk space (~2GB)")
41
  llm = None
42
 
43
  yield
44
 
45
- print("πŸ›‘ Shutting down...")
46
  if llm:
47
  del llm
48
 
@@ -76,12 +83,62 @@ def clean_output(text: str) -> str:
76
  text = re.sub(r"\s+", " ", text)
77
  return text.strip()
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  @app.post("/api/summarize")
80
  async def summarize(req: SummarizeRequest):
 
 
 
 
 
 
81
  if llm is None:
82
  raise HTTPException(
83
  status_code=503,
84
- detail="Model not loaded. Check server logs for errors."
85
  )
86
 
87
  try:
@@ -106,6 +163,8 @@ Text:
106
  "long": 300
107
  }
108
 
 
 
109
  output = llm(
110
  prompt,
111
  max_tokens=max_tokens_map.get(req.length, 140),
@@ -125,35 +184,25 @@ Text:
125
  detail="Model produced empty output"
126
  )
127
 
 
 
128
  return {
129
  "summary": summary,
130
  "success": True,
131
  "length": req.length
132
  }
133
 
 
 
134
  except Exception as e:
 
135
  raise HTTPException(
136
  status_code=500,
137
  detail=f"Summarization error: {str(e)}"
138
  )
139
 
140
- @app.get("/")
141
- def health():
142
- return {
143
- "status": "ok" if llm else "model_not_loaded",
144
- "model": MODEL_FILE,
145
- "ready": llm is not None
146
- }
147
-
148
- @app.get("/health")
149
- def detailed_health():
150
- return {
151
- "status": "healthy" if llm else "unhealthy",
152
- "model_loaded": llm is not None,
153
- "model_name": MODEL_FILE,
154
- "repo": MODEL_REPO
155
- }
156
-
157
  if __name__ == "__main__":
158
  import uvicorn
159
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
4
  from contextlib import asynccontextmanager
5
  import re
6
  import os
7
+ import logging
8
+
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
 
13
  try:
14
  from llama_cpp import Llama
 
19
  MODEL_FILE = "Phi-3.5-mini-instruct-Q4_K_M.gguf"
20
 
21
  llm = None
22
+ model_loading = False
23
 
24
  @asynccontextmanager
25
  async def lifespan(app: FastAPI):
26
+ global llm, model_loading
27
  try:
28
+ logger.info("πŸš€ Starting model load...")
29
+ model_loading = True
30
+
31
+ # Set cache directory for Hugging Face Spaces
32
+ cache_dir = os.getenv("HF_HOME", "./models")
33
 
 
34
  llm = Llama.from_pretrained(
35
  repo_id=MODEL_REPO,
36
  filename=MODEL_FILE,
 
40
  n_gpu_layers=0,
41
  verbose=False,
42
  )
43
+ model_loading = False
44
+ logger.info("βœ… Model loaded and ready")
45
  except Exception as e:
46
+ logger.error(f"❌ Model load error: {e}")
47
+ model_loading = False
 
 
 
48
  llm = None
49
 
50
  yield
51
 
52
+ logger.info("πŸ›‘ Shutting down...")
53
  if llm:
54
  del llm
55
 
 
83
  text = re.sub(r"\s+", " ", text)
84
  return text.strip()
85
 
86
+ @app.get("/")
87
+ def root():
88
+ """Root endpoint - returns status"""
89
+ return {
90
+ "status": "healthy",
91
+ "model_loaded": llm is not None,
92
+ "model_loading": model_loading,
93
+ "message": "AI Summarizer API is running"
94
+ }
95
+
96
+ @app.get("/health")
97
+ def health():
98
+ """Health check endpoint for container orchestration"""
99
+ if model_loading:
100
+ return {
101
+ "status": "starting",
102
+ "model_loaded": False,
103
+ "model_loading": True,
104
+ "message": "Model is loading, please wait..."
105
+ }
106
+
107
+ if llm is None:
108
+ return {
109
+ "status": "unhealthy",
110
+ "model_loaded": False,
111
+ "model_loading": False,
112
+ "message": "Model failed to load"
113
+ }
114
+
115
+ return {
116
+ "status": "healthy",
117
+ "model_loaded": True,
118
+ "model_loading": False,
119
+ "model_name": MODEL_FILE,
120
+ "message": "Ready to summarize"
121
+ }
122
+
123
+ @app.get("/ready")
124
+ def readiness():
125
+ """Readiness probe - returns 200 only when model is loaded"""
126
+ if llm is not None and not model_loading:
127
+ return {"ready": True}
128
+ raise HTTPException(status_code=503, detail="Model not ready")
129
+
130
  @app.post("/api/summarize")
131
  async def summarize(req: SummarizeRequest):
132
+ if model_loading:
133
+ raise HTTPException(
134
+ status_code=503,
135
+ detail="Model is still loading. Please wait and try again."
136
+ )
137
+
138
  if llm is None:
139
  raise HTTPException(
140
  status_code=503,
141
+ detail="Model not loaded. Check server logs."
142
  )
143
 
144
  try:
 
163
  "long": 300
164
  }
165
 
166
+ logger.info(f"Summarizing text (length: {req.length})")
167
+
168
  output = llm(
169
  prompt,
170
  max_tokens=max_tokens_map.get(req.length, 140),
 
184
  detail="Model produced empty output"
185
  )
186
 
187
+ logger.info("βœ… Summary generated successfully")
188
+
189
  return {
190
  "summary": summary,
191
  "success": True,
192
  "length": req.length
193
  }
194
 
195
+ except HTTPException:
196
+ raise
197
  except Exception as e:
198
+ logger.error(f"Summarization error: {e}")
199
  raise HTTPException(
200
  status_code=500,
201
  detail=f"Summarization error: {str(e)}"
202
  )
203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  if __name__ == "__main__":
205
  import uvicorn
206
+ # Use PORT environment variable for Hugging Face Spaces
207
+ port = int(os.getenv("PORT", 7860))
208
+ uvicorn.run(app, host="0.0.0.0", port=port)