Spaces:

X-iZhang
/

CCD

Running

App Files Files Community

X-iZhang commited on Oct 6, 2025

Commit

842642b

verified ·

1 Parent(s): ab315c5

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -47

app.py CHANGED Viewed

@@ -1,15 +1,11 @@
 import os
 # Force CPU-only in this process by hiding CUDA devices (set before importing heavy libs)
-os.environ['CUDA_VISIBLE_DEVICES'] = ''
-os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
 import torch
 import gradio as gr
 import time
-# Force CPU device globally by overriding torch.cuda.is_available
-torch.cuda.is_available = lambda: False
 # =========================================
 # Safe Libra Hook (CPU fallback + dtype fix)
 # This hook must run before any heavyweight libra model-loading occurs.
@@ -23,16 +19,14 @@ _original_load_pretrained_model = getattr(builder, 'load_pretrained_model', None
 def safe_load_pretrained_model(model_path, model_base=None, model_name=None, **kwargs):
     print("[INFO] Hook activated: safe_load_pretrained_model()")
-    # Complete model_name to avoid .lower() on None
     if model_name is None:
         model_name = model_path
-    # Force CPU parameters when calling original function
     kwargs = dict(kwargs)
-    kwargs['device'] = 'cpu'
-    kwargs['device_map'] = 'cpu'
-    kwargs.setdefault('torch_dtype', torch.float32)
-    kwargs.setdefault('low_cpu_mem_usage', True)
     if _original_load_pretrained_model is None:
         raise RuntimeError('Original load_pretrained_model not found in builder')
@@ -56,31 +50,20 @@ def safe_load_pretrained_model(model_path, model_base=None, model_name=None, **k
         # propagate other errors
         raise
-    # Force all model components to CPU with float32 for compatibility
-    print('[INFO] Forcing all components to CPU with float32 dtype...')
-    try:
-        model = model.to(device='cpu', dtype=torch.float32)
-        print('[INFO] Model moved to CPU (float32).')
-    except Exception as e:
-        print(f"[WARN] Could not move model to cpu/float32: {e}")
-    try:
-        if hasattr(model, 'get_vision_tower'):
             vt = model.get_vision_tower()
-            if vt is not None:
-                vt = vt.to(device='cpu', dtype=torch.float32)
-                print('[INFO] Vision tower moved to CPU (float32).')
-    except Exception as e:
-        print(f"[WARN] Could not move vision_tower to cpu/float32: {e}")
-    try:
-        if hasattr(model, 'get_model'):
-            inner_model = model.get_model()
-            if inner_model is not None:
-                inner_model = inner_model.to(device='cpu', dtype=torch.float32)
-                print('[INFO] Inner model moved to CPU (float32).')
-    except Exception as e:
-        print(f"[WARN] Could not move inner model to cpu/float32: {e}")
     return tokenizer, model, image_processor, context_len
@@ -97,12 +80,7 @@ def safe_load_model(model_path, model_base=None, model_name=None):
 run_libra.load_model = safe_load_model
-# Now import CCD and hook ccd_utils to force CPU for expert models
-import ccd.ccd_utils as ccd_utils_module
-ccd_utils_module._DEVICE = torch.device('cpu')
-print('[INFO] Forced ccd_utils._DEVICE to CPU')
-# Now import the evaluation functions
 from ccd import ccd_eval, run_eval
 from libra.eval.run_libra import load_model
@@ -126,14 +104,13 @@ _loaded_models = {}
 # Environment Setup
 # =========================================
 def setup_environment():
-    print("🔹 Running in CPU-only mode (forced for Hugging Face Spaces)")
     os.environ['TOKENIZERS_PARALLELISM'] = 'false'
     os.environ['TRANSFORMERS_CACHE'] = './cache'
-    # Set number of threads for CPU inference
-    num_threads = min(os.cpu_count() or 4, 8)
-    torch.set_num_threads(num_threads)
-    print(f"🔹 Using {num_threads} CPU threads")
 # =========================================

 import os
 # Force CPU-only in this process by hiding CUDA devices (set before importing heavy libs)
+os.environ.setdefault('CUDA_VISIBLE_DEVICES', '')
 import torch
 import gradio as gr
 import time
 # =========================================
 # Safe Libra Hook (CPU fallback + dtype fix)
 # This hook must run before any heavyweight libra model-loading occurs.
 def safe_load_pretrained_model(model_path, model_base=None, model_name=None, **kwargs):
     print("[INFO] Hook activated: safe_load_pretrained_model()")
+    # 补全 model_name，避免 .lower() on None
     if model_name is None:
         model_name = model_path
+    # 强制以 CPU 参数调用原函数，尽量避免 CUDA 初始化
     kwargs = dict(kwargs)
+    kwargs.setdefault('device', 'cpu')
+    kwargs.setdefault('device_map', 'cpu')
     if _original_load_pretrained_model is None:
         raise RuntimeError('Original load_pretrained_model not found in builder')
         # propagate other errors
         raise
+    # 在 CPU 情况下尝试把模型和视觉塔上调到 float32，减少 CPU 上的兼容问题
+    if not torch.cuda.is_available():
+        try:
+            model.to(dtype=torch.float32)
+        except Exception as e:
+            print(f"[WARN] Could not upcast LM to float32: {e}")
+        try:
             vt = model.get_vision_tower()
+            vt.to(device='cpu', dtype=torch.float32)
+            print('[INFO] Vision tower moved to cpu (float32).')
+        except Exception as e:
+            print(f"[WARN] Could not move vision_tower to cpu/float32: {e}")
+    else:
+        print('[INFO] GPU available — keeping original device/dtype behavior.')
     return tokenizer, model, image_processor, context_len
 run_libra.load_model = safe_load_model
+# 现在导入 CCD 与其他被 hook 的符号（导入放在 hook 之后以确保生效）
 from ccd import ccd_eval, run_eval
 from libra.eval.run_libra import load_model
 # Environment Setup
 # =========================================
 def setup_environment():
+    if torch.cuda.is_available():
+        print("🔹 Using GPU:", torch.cuda.get_device_name(0))
+    else:
+        print("🔹 Using CPU")
     os.environ['TOKENIZERS_PARALLELISM'] = 'false'
     os.environ['TRANSFORMERS_CACHE'] = './cache'
+    torch.set_num_threads(4)
 # =========================================