VenkateshRoshan commited on
Commit
7115bd6
·
1 Parent(s): ff4ac0d

health check added

Browse files
Files changed (1) hide show
  1. app.py +41 -2
app.py CHANGED
@@ -85,7 +85,6 @@ class CustomerSupportBot:
85
  }
86
  return usage
87
 
88
-
89
  def create_chat_interface():
90
  bot = CustomerSupportBot(model_path="/app/models")
91
 
@@ -164,6 +163,46 @@ def create_chat_interface():
164
  # Add keyboard shortcut for submit
165
  msg.change(lambda x: gr.update(interactive=bool(x.strip())), inputs=[msg], outputs=[submit])
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  return interface
168
 
169
  if __name__ == "__main__":
@@ -173,4 +212,4 @@ if __name__ == "__main__":
173
  server_name="0.0.0.0", # Makes the server accessible from other machines
174
  server_port=7860, # Specify the port
175
  debug=True
176
- )
 
85
  }
86
  return usage
87
 
 
88
  def create_chat_interface():
89
  bot = CustomerSupportBot(model_path="/app/models")
90
 
 
163
  # Add keyboard shortcut for submit
164
  msg.change(lambda x: gr.update(interactive=bool(x.strip())), inputs=[msg], outputs=[submit])
165
 
166
+ # Add health check endpoint
167
+ @interface.route("/ping", methods=["GET"])
168
+ def ping():
169
+ try:
170
+ # Check if model and tokenizer are loaded
171
+ if not hasattr(bot, 'model') or not hasattr(bot, 'tokenizer'):
172
+ return {"status": "unhealthy", "reason": "Model or tokenizer not loaded"}, 503
173
+
174
+ # Check if CUDA is available and model is on the correct device
175
+ if torch.cuda.is_available():
176
+ if not bot.model.device.type == 'cuda':
177
+ return {"status": "unhealthy", "reason": "Model not on GPU"}, 503
178
+
179
+ # Check memory usage
180
+ usage = bot.monitor_resources()
181
+ if usage["RAM (GB)"] > 30: # Example threshold
182
+ return {"status": "unhealthy", "reason": "High memory usage"}, 503
183
+
184
+ # Try a quick model inference to ensure it's working
185
+ try:
186
+ test_response = bot.generate_response("Test message")
187
+ if test_response.startswith("An error occurred"):
188
+ return {"status": "unhealthy", "reason": "Model inference failed"}, 503
189
+ except Exception as e:
190
+ return {"status": "unhealthy", "reason": f"Model inference error: {str(e)}"}, 503
191
+
192
+ return {
193
+ "status": "healthy",
194
+ "model_loaded": True,
195
+ "device": bot.device,
196
+ "resources": usage
197
+ }
198
+ except Exception as e:
199
+ return {"status": "unhealthy", "reason": str(e)}, 503
200
+
201
+ # Add secondary health endpoint
202
+ @interface.route("/health", methods=["GET"])
203
+ def health():
204
+ return {"status": "healthy"}
205
+
206
  return interface
207
 
208
  if __name__ == "__main__":
 
212
  server_name="0.0.0.0", # Makes the server accessible from other machines
213
  server_port=7860, # Specify the port
214
  debug=True
215
+ )