Update handler.py
Browse files- handler.py +31 -2
handler.py
CHANGED
|
@@ -360,6 +360,11 @@ def generate_response(
|
|
| 360 |
def query(payload: dict):
|
| 361 |
"""HF Endpoint entry (demo-like)."""
|
| 362 |
global model_initialized, tokenizer, model, image_processor, context_len, args
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
if not model_initialized:
|
| 364 |
if not initialize_model():
|
| 365 |
return {"error": "Model initialization failed"}
|
|
@@ -396,14 +401,38 @@ def query(payload: dict):
|
|
| 396 |
return {"error": f"Query failed: {e}"}
|
| 397 |
|
| 398 |
def health_check():
|
| 399 |
-
|
| 400 |
"status": "healthy",
|
| 401 |
"model_initialized": model_initialized,
|
| 402 |
-
"cuda_available": torch.cuda.is_available(),
|
| 403 |
"llava_available": LLAVA_AVAILABLE,
|
| 404 |
"transformers_available": TRANSFORMERS_AVAILABLE,
|
|
|
|
| 405 |
}
|
| 406 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
def get_model_info():
|
| 408 |
if not model_initialized:
|
| 409 |
return {"error": "Model not initialized"}
|
|
|
|
| 360 |
def query(payload: dict):
|
| 361 |
"""HF Endpoint entry (demo-like)."""
|
| 362 |
global model_initialized, tokenizer, model, image_processor, context_len, args
|
| 363 |
+
|
| 364 |
+
# 🟢 Health check kısayolu: hem {"health_check": true} hem de {"message": "health_check"} desteklenir
|
| 365 |
+
if payload.get("health_check") or payload.get("message") == "health_check":
|
| 366 |
+
return health_check()
|
| 367 |
+
|
| 368 |
if not model_initialized:
|
| 369 |
if not initialize_model():
|
| 370 |
return {"error": "Model initialization failed"}
|
|
|
|
| 401 |
return {"error": f"Query failed: {e}"}
|
| 402 |
|
| 403 |
def health_check():
|
| 404 |
+
info = {
|
| 405 |
"status": "healthy",
|
| 406 |
"model_initialized": model_initialized,
|
|
|
|
| 407 |
"llava_available": LLAVA_AVAILABLE,
|
| 408 |
"transformers_available": TRANSFORMERS_AVAILABLE,
|
| 409 |
+
"cuda_available": torch.cuda.is_available(),
|
| 410 |
}
|
| 411 |
|
| 412 |
+
if torch.cuda.is_available():
|
| 413 |
+
try:
|
| 414 |
+
device_index = torch.cuda.current_device()
|
| 415 |
+
props = torch.cuda.get_device_properties(device_index)
|
| 416 |
+
total_vram_gb = round(props.total_memory / (1024 ** 3), 2)
|
| 417 |
+
used_vram_gb = round(torch.cuda.memory_allocated(device_index) / (1024 ** 3), 2)
|
| 418 |
+
reserved_vram_gb = round(torch.cuda.memory_reserved(device_index) / (1024 ** 3), 2)
|
| 419 |
+
|
| 420 |
+
info.update({
|
| 421 |
+
"cuda_device_index": device_index,
|
| 422 |
+
"cuda_name": props.name,
|
| 423 |
+
"cuda_compute_capability": f"{props.major}.{props.minor}",
|
| 424 |
+
"cuda_total_vram_gb": total_vram_gb,
|
| 425 |
+
"cuda_used_vram_gb": used_vram_gb,
|
| 426 |
+
"cuda_reserved_vram_gb": reserved_vram_gb,
|
| 427 |
+
"torch_version": torch.__version__,
|
| 428 |
+
"cuda_runtime_version": torch.version.cuda,
|
| 429 |
+
})
|
| 430 |
+
except Exception as e:
|
| 431 |
+
info["cuda_error"] = str(e)
|
| 432 |
+
|
| 433 |
+
return info
|
| 434 |
+
|
| 435 |
+
|
| 436 |
def get_model_info():
|
| 437 |
if not model_initialized:
|
| 438 |
return {"error": "Model not initialized"}
|