mclemcrew commited on
Commit
567c1ca
·
1 Parent(s): 0c81883

try again

Browse files
Files changed (1) hide show
  1. app.py +12 -24
app.py CHANGED
@@ -51,34 +51,22 @@ def load_model():
51
  processor = AutoProcessor.from_pretrained(MODEL_ID)
52
  logger.info("Processor loaded successfully")
53
 
54
- # Check if Accelerate is available
55
- try:
56
- import accelerate
57
- logger.info(f"Accelerate version: {accelerate.__version__}")
58
- has_accelerate = True
59
- except ImportError:
60
- logger.warning("Accelerate not found. Will load model without device mapping.")
61
- has_accelerate = False
62
 
63
- # Check if GPU is available
64
  if torch.cuda.is_available():
65
  gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
66
  logger.info(f"GPU memory: {gpu_memory:.2f} GB")
67
 
68
- if has_accelerate:
69
- # With Accelerate, use device mapping
70
- logger.info("Loading model with FP16 precision on GPU")
71
- model = Qwen2AudioForConditionalGeneration.from_pretrained(
72
- MODEL_ID,
73
- torch_dtype=torch.float16,
74
- device_map="auto",
75
- low_cpu_mem_usage=True
76
- )
77
- else:
78
- # Without Accelerate, load directly to GPU
79
- logger.info("Loading model directly to GPU")
80
- model = Qwen2AudioForConditionalGeneration.from_pretrained(MODEL_ID)
81
- model = model.to("cuda").half() # Move to GPU and convert to FP16
82
  else:
83
  # Load on CPU if no GPU
84
  logger.info("Loading model on CPU")
@@ -90,7 +78,7 @@ def load_model():
90
  except Exception as e:
91
  logger.error(f"Error loading model or processor: {e}")
92
  raise
93
-
94
  def process_audio(audio_url):
95
  """Process audio from URL"""
96
  logger.info(f"Processing audio: {audio_url}")
 
51
  processor = AutoProcessor.from_pretrained(MODEL_ID)
52
  logger.info("Processor loaded successfully")
53
 
54
+ # Force disable bitsandbytes integration
55
+ os.environ["DISABLE_BITSANDBYTES_CUDA_SETUP"] = "TRUE"
 
 
 
 
 
 
56
 
 
57
  if torch.cuda.is_available():
58
  gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
59
  logger.info(f"GPU memory: {gpu_memory:.2f} GB")
60
 
61
+ # Load directly with FP16 but without 8-bit quantization
62
+ logger.info("Loading model with FP16 precision")
63
+ model = Qwen2AudioForConditionalGeneration.from_pretrained(
64
+ MODEL_ID,
65
+ torch_dtype=torch.float16,
66
+ device_map="auto",
67
+ quantization_config=None, # Explicitly disable quantization
68
+ low_cpu_mem_usage=True
69
+ )
 
 
 
 
 
70
  else:
71
  # Load on CPU if no GPU
72
  logger.info("Loading model on CPU")
 
78
  except Exception as e:
79
  logger.error(f"Error loading model or processor: {e}")
80
  raise
81
+
82
  def process_audio(audio_url):
83
  """Process audio from URL"""
84
  logger.info(f"Processing audio: {audio_url}")