developer-lunark commited on
Commit
bf5bc18
·
verified ·
1 Parent(s): f383abd

Use dynamic GPU check

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -12,11 +12,12 @@ from datetime import datetime
12
  from functools import lru_cache
13
 
14
  # GPU 추론 관련 (선택적 임포트)
 
15
  try:
16
  import torch
17
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
18
  from peft import PeftModel
19
- GPU_AVAILABLE = torch.cuda.is_available()
20
 
21
  # Debug info
22
  print("=" * 50)
@@ -27,15 +28,22 @@ try:
27
  print(f"GPU count: {torch.cuda.device_count()}")
28
  print(f"GPU name: {torch.cuda.get_device_name(0)}")
29
  else:
30
- print("CUDA not available - checking why...")
31
- print(f"torch.backends.cudnn.enabled: {torch.backends.cudnn.enabled if hasattr(torch.backends, 'cudnn') else 'N/A'}")
32
  print("=" * 50)
33
 
34
  except ImportError as e:
35
- GPU_AVAILABLE = False
36
  print(f"Warning: Import error - {e}")
37
  print("Running in mock mode")
38
 
 
 
 
 
 
 
 
 
 
39
  # ============================================================
40
  # 모델 레지스트리 (HF Hub 경로)
41
  # ============================================================
@@ -167,8 +175,8 @@ class ModelManager:
167
 
168
  def load_model(self, model_name: str):
169
  """Load model with 4-bit quantization and LoRA adapter"""
170
- if not GPU_AVAILABLE:
171
- self.last_error = "GPU not available"
172
  return False
173
 
174
  if self.current_model_name == model_name:
 
12
  from functools import lru_cache
13
 
14
  # GPU 추론 관련 (선택적 임포트)
15
+ TORCH_AVAILABLE = False
16
  try:
17
  import torch
18
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
19
  from peft import PeftModel
20
+ TORCH_AVAILABLE = True
21
 
22
  # Debug info
23
  print("=" * 50)
 
28
  print(f"GPU count: {torch.cuda.device_count()}")
29
  print(f"GPU name: {torch.cuda.get_device_name(0)}")
30
  else:
31
+ print("CUDA not available at module load time")
 
32
  print("=" * 50)
33
 
34
  except ImportError as e:
 
35
  print(f"Warning: Import error - {e}")
36
  print("Running in mock mode")
37
 
38
+ def is_gpu_available():
39
+ """Check GPU availability dynamically"""
40
+ if not TORCH_AVAILABLE:
41
+ return False
42
+ return torch.cuda.is_available()
43
+
44
+ # For backwards compatibility
45
+ GPU_AVAILABLE = is_gpu_available()
46
+
47
  # ============================================================
48
  # 모델 레지스트리 (HF Hub 경로)
49
  # ============================================================
 
175
 
176
  def load_model(self, model_name: str):
177
  """Load model with 4-bit quantization and LoRA adapter"""
178
+ if not is_gpu_available():
179
+ self.last_error = f"GPU not available (TORCH_AVAILABLE={TORCH_AVAILABLE}, cuda={torch.cuda.is_available() if TORCH_AVAILABLE else 'N/A'})"
180
  return False
181
 
182
  if self.current_model_name == model_name: