Use dynamic GPU check
Browse files
app.py
CHANGED
|
@@ -12,11 +12,12 @@ from datetime import datetime
|
|
| 12 |
from functools import lru_cache
|
| 13 |
|
| 14 |
# GPU 추론 관련 (선택적 임포트)
|
|
|
|
| 15 |
try:
|
| 16 |
import torch
|
| 17 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 18 |
from peft import PeftModel
|
| 19 |
-
|
| 20 |
|
| 21 |
# Debug info
|
| 22 |
print("=" * 50)
|
|
@@ -27,15 +28,22 @@ try:
|
|
| 27 |
print(f"GPU count: {torch.cuda.device_count()}")
|
| 28 |
print(f"GPU name: {torch.cuda.get_device_name(0)}")
|
| 29 |
else:
|
| 30 |
-
print("CUDA not available
|
| 31 |
-
print(f"torch.backends.cudnn.enabled: {torch.backends.cudnn.enabled if hasattr(torch.backends, 'cudnn') else 'N/A'}")
|
| 32 |
print("=" * 50)
|
| 33 |
|
| 34 |
except ImportError as e:
|
| 35 |
-
GPU_AVAILABLE = False
|
| 36 |
print(f"Warning: Import error - {e}")
|
| 37 |
print("Running in mock mode")
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# ============================================================
|
| 40 |
# 모델 레지스트리 (HF Hub 경로)
|
| 41 |
# ============================================================
|
|
@@ -167,8 +175,8 @@ class ModelManager:
|
|
| 167 |
|
| 168 |
def load_model(self, model_name: str):
|
| 169 |
"""Load model with 4-bit quantization and LoRA adapter"""
|
| 170 |
-
if not
|
| 171 |
-
self.last_error = "GPU not available"
|
| 172 |
return False
|
| 173 |
|
| 174 |
if self.current_model_name == model_name:
|
|
|
|
| 12 |
from functools import lru_cache
|
| 13 |
|
| 14 |
# GPU 추론 관련 (선택적 임포트)
|
| 15 |
+
TORCH_AVAILABLE = False
|
| 16 |
try:
|
| 17 |
import torch
|
| 18 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 19 |
from peft import PeftModel
|
| 20 |
+
TORCH_AVAILABLE = True
|
| 21 |
|
| 22 |
# Debug info
|
| 23 |
print("=" * 50)
|
|
|
|
| 28 |
print(f"GPU count: {torch.cuda.device_count()}")
|
| 29 |
print(f"GPU name: {torch.cuda.get_device_name(0)}")
|
| 30 |
else:
|
| 31 |
+
print("CUDA not available at module load time")
|
|
|
|
| 32 |
print("=" * 50)
|
| 33 |
|
| 34 |
except ImportError as e:
|
|
|
|
| 35 |
print(f"Warning: Import error - {e}")
|
| 36 |
print("Running in mock mode")
|
| 37 |
|
| 38 |
+
def is_gpu_available():
|
| 39 |
+
"""Check GPU availability dynamically"""
|
| 40 |
+
if not TORCH_AVAILABLE:
|
| 41 |
+
return False
|
| 42 |
+
return torch.cuda.is_available()
|
| 43 |
+
|
| 44 |
+
# For backwards compatibility
|
| 45 |
+
GPU_AVAILABLE = is_gpu_available()
|
| 46 |
+
|
| 47 |
# ============================================================
|
| 48 |
# 모델 레지스트리 (HF Hub 경로)
|
| 49 |
# ============================================================
|
|
|
|
| 175 |
|
| 176 |
def load_model(self, model_name: str):
|
| 177 |
"""Load model with 4-bit quantization and LoRA adapter"""
|
| 178 |
+
if not is_gpu_available():
|
| 179 |
+
self.last_error = f"GPU not available (TORCH_AVAILABLE={TORCH_AVAILABLE}, cuda={torch.cuda.is_available() if TORCH_AVAILABLE else 'N/A'})"
|
| 180 |
return False
|
| 181 |
|
| 182 |
if self.current_model_name == model_name:
|