Spaces:

developer-lunark
/

kaidol-thinking-experiment

Paused

developer-lunark commited on Jan 20

Commit

bf5bc18

verified ·

1 Parent(s): f383abd

Use dynamic GPU check

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,11 +12,12 @@ from datetime import datetime
 from functools import lru_cache
 # GPU 추론 관련 (선택적 임포트)
 try:
     import torch
     from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
     from peft import PeftModel
-    GPU_AVAILABLE = torch.cuda.is_available()
     # Debug info
     print("=" * 50)
@@ -27,15 +28,22 @@ try:
         print(f"GPU count: {torch.cuda.device_count()}")
         print(f"GPU name: {torch.cuda.get_device_name(0)}")
     else:
-        print("CUDA not available - checking why...")
-        print(f"torch.backends.cudnn.enabled: {torch.backends.cudnn.enabled if hasattr(torch.backends, 'cudnn') else 'N/A'}")
     print("=" * 50)
 except ImportError as e:
-    GPU_AVAILABLE = False
     print(f"Warning: Import error - {e}")
     print("Running in mock mode")
 # ============================================================
 # 모델 레지스트리 (HF Hub 경로)
 # ============================================================
@@ -167,8 +175,8 @@ class ModelManager:
     def load_model(self, model_name: str):
         """Load model with 4-bit quantization and LoRA adapter"""
-        if not GPU_AVAILABLE:
-            self.last_error = "GPU not available"
             return False
         if self.current_model_name == model_name:

 from functools import lru_cache
 # GPU 추론 관련 (선택적 임포트)
+TORCH_AVAILABLE = False
 try:
     import torch
     from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
     from peft import PeftModel
+    TORCH_AVAILABLE = True
     # Debug info
     print("=" * 50)
         print(f"GPU count: {torch.cuda.device_count()}")
         print(f"GPU name: {torch.cuda.get_device_name(0)}")
     else:
+        print("CUDA not available at module load time")
     print("=" * 50)
 except ImportError as e:
     print(f"Warning: Import error - {e}")
     print("Running in mock mode")
+def is_gpu_available():
+    """Check GPU availability dynamically"""
+    if not TORCH_AVAILABLE:
+        return False
+    return torch.cuda.is_available()
+# For backwards compatibility
+GPU_AVAILABLE = is_gpu_available()
 # ============================================================
 # 모델 레지스트리 (HF Hub 경로)
 # ============================================================
     def load_model(self, model_name: str):
         """Load model with 4-bit quantization and LoRA adapter"""
+        if not is_gpu_available():
+            self.last_error = f"GPU not available (TORCH_AVAILABLE={TORCH_AVAILABLE}, cuda={torch.cuda.is_available() if TORCH_AVAILABLE else 'N/A'})"
             return False
         if self.current_model_name == model_name: