mclemcrew commited on
Commit
53c24d3
·
1 Parent(s): 89b08d6
Files changed (3) hide show
  1. app.py +31 -16
  2. bitsandbytes +1 -0
  3. requirements.txt +3 -3
app.py CHANGED
@@ -51,29 +51,43 @@ def load_model():
51
  processor = AutoProcessor.from_pretrained(MODEL_ID)
52
  logger.info("Processor loaded successfully")
53
 
54
- # Explicitly avoid any quantization/bitsandbytes paths
55
- logger.info(f"Loading model with direct GPU loading")
56
 
57
- # Check if GPU is available
58
  if torch.cuda.is_available():
59
  try:
60
- # Try direct GPU loading with FP16
61
- logger.info("Using FP16 precision on GPU")
62
- # Override the device_map to be more explicit
63
- model = Qwen2AudioForConditionalGeneration.from_pretrained(
64
- MODEL_ID,
65
- torch_dtype=torch.float16, # Use float16 precision
66
- device_map="auto",
67
- # Explicitly disable any 8-bit or 4-bit quantization
68
- load_in_8bit=False,
69
- load_in_4bit=False,
70
- )
71
- logger.info("Model loaded successfully with FP16 on GPU")
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  except Exception as gpu_error:
73
  logger.warning(f"GPU loading failed: {gpu_error}. Falling back to CPU.")
74
  model = Qwen2AudioForConditionalGeneration.from_pretrained(
75
  MODEL_ID,
76
  device_map="cpu",
 
77
  )
78
  logger.info("Model loaded successfully on CPU")
79
  else:
@@ -81,9 +95,10 @@ def load_model():
81
  model = Qwen2AudioForConditionalGeneration.from_pretrained(
82
  MODEL_ID,
83
  device_map="cpu",
 
84
  )
85
  logger.info("Model loaded successfully on CPU")
86
-
87
  model.eval()
88
  log_gpu_memory("After model loading")
89
  return model, processor
 
51
  processor = AutoProcessor.from_pretrained(MODEL_ID)
52
  logger.info("Processor loaded successfully")
53
 
54
+ # Skip quantization attempts since we know it's problematic with CUDA 12.4
55
+ logger.info(f"Loading model with optimized settings for your environment")
56
 
57
+ # Check if GPU is available and has enough memory
58
  if torch.cuda.is_available():
59
  try:
60
+ # Get GPU memory info
61
+ gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
62
+ logger.info(f"GPU memory: {gpu_memory:.2f} GB")
63
+
64
+ # If GPU has enough memory, try loading directly without quantization
65
+ if gpu_memory > 16: # For GPUs with >16GB memory
66
+ logger.info("Using FP16 precision on GPU")
67
+ model = Qwen2AudioForConditionalGeneration.from_pretrained(
68
+ MODEL_ID,
69
+ torch_dtype=torch.float16,
70
+ device_map="auto",
71
+ low_cpu_mem_usage=True
72
+ )
73
+ logger.info("Model loaded successfully with FP16")
74
+ else:
75
+ # For smaller GPUs, use CPU offloading
76
+ logger.info("Using CPU offloading for model components")
77
+ model = Qwen2AudioForConditionalGeneration.from_pretrained(
78
+ MODEL_ID,
79
+ torch_dtype=torch.float16,
80
+ device_map="auto",
81
+ offload_folder="offload",
82
+ low_cpu_mem_usage=True
83
+ )
84
+ logger.info("Model loaded successfully with CPU offloading")
85
  except Exception as gpu_error:
86
  logger.warning(f"GPU loading failed: {gpu_error}. Falling back to CPU.")
87
  model = Qwen2AudioForConditionalGeneration.from_pretrained(
88
  MODEL_ID,
89
  device_map="cpu",
90
+ low_cpu_mem_usage=True
91
  )
92
  logger.info("Model loaded successfully on CPU")
93
  else:
 
95
  model = Qwen2AudioForConditionalGeneration.from_pretrained(
96
  MODEL_ID,
97
  device_map="cpu",
98
+ low_cpu_mem_usage=True
99
  )
100
  logger.info("Model loaded successfully on CPU")
101
+
102
  model.eval()
103
  log_gpu_memory("After model loading")
104
  return model, processor
bitsandbytes ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit e82f72b3acd37bfa9f32773e8844ac7bafad2b19
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
  gradio==4.44.1
2
- transformers>=4.35.0
3
  torch>=2.0.1
4
- accelerate>=0.20.0
5
  numpy>=1.24.0
6
  librosa>=0.10.0
7
  soundfile>=0.12.1
8
  requests>=2.28.0
9
  pillow>=9.5.0
10
  huggingface_hub>=0.16.0
11
- scikit-learn>=1.0.2
 
 
 
1
  gradio==4.44.1
 
2
  torch>=2.0.1
 
3
  numpy>=1.24.0
4
  librosa>=0.10.0
5
  soundfile>=0.12.1
6
  requests>=2.28.0
7
  pillow>=9.5.0
8
  huggingface_hub>=0.16.0
9
+ scikit-learn>=1.0.2
10
+ git+https://github.com/huggingface/accelerate.git
11
+ git+https://github.com/huggingface/transformers.git