mclemcrew commited on
Commit
c1a0ce1
·
1 Parent(s): 7324297
Files changed (2) hide show
  1. app.py +26 -41
  2. requirements.txt +1 -1
app.py CHANGED
@@ -51,53 +51,38 @@ def load_model():
51
  processor = AutoProcessor.from_pretrained(MODEL_ID)
52
  logger.info("Processor loaded successfully")
53
 
54
- # Skip quantization attempts since we know it's problematic with CUDA 12.4
55
- logger.info(f"Loading model with optimized settings for your environment")
 
 
 
 
 
 
56
 
57
- # Check if GPU is available and has enough memory
58
  if torch.cuda.is_available():
59
- try:
60
- # Get GPU memory info
61
- gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
62
- logger.info(f"GPU memory: {gpu_memory:.2f} GB")
63
-
64
- # If GPU has enough memory, try loading directly without quantization
65
- if gpu_memory > 16: # For GPUs with >16GB memory
66
- logger.info("Using FP16 precision on GPU")
67
- model = Qwen2AudioForConditionalGeneration.from_pretrained(
68
- MODEL_ID,
69
- torch_dtype=torch.float16,
70
- device_map="auto",
71
- low_cpu_mem_usage=True
72
- )
73
- logger.info("Model loaded successfully with FP16")
74
- else:
75
- # For smaller GPUs, use CPU offloading
76
- logger.info("Using CPU offloading for model components")
77
- model = Qwen2AudioForConditionalGeneration.from_pretrained(
78
- MODEL_ID,
79
- torch_dtype=torch.float16,
80
- device_map="auto",
81
- offload_folder="offload",
82
- low_cpu_mem_usage=True
83
- )
84
- logger.info("Model loaded successfully with CPU offloading")
85
- except Exception as gpu_error:
86
- logger.warning(f"GPU loading failed: {gpu_error}. Falling back to CPU.")
87
  model = Qwen2AudioForConditionalGeneration.from_pretrained(
88
- MODEL_ID,
89
- device_map="cpu",
 
90
  low_cpu_mem_usage=True
91
  )
92
- logger.info("Model loaded successfully on CPU")
 
 
 
 
93
  else:
94
  # Load on CPU if no GPU
95
- model = Qwen2AudioForConditionalGeneration.from_pretrained(
96
- MODEL_ID,
97
- device_map="cpu",
98
- low_cpu_mem_usage=True
99
- )
100
- logger.info("Model loaded successfully on CPU")
101
 
102
  model.eval()
103
  log_gpu_memory("After model loading")
@@ -105,7 +90,7 @@ def load_model():
105
  except Exception as e:
106
  logger.error(f"Error loading model or processor: {e}")
107
  raise
108
-
109
  def process_audio(audio_url):
110
  """Process audio from URL"""
111
  logger.info(f"Processing audio: {audio_url}")
 
51
  processor = AutoProcessor.from_pretrained(MODEL_ID)
52
  logger.info("Processor loaded successfully")
53
 
54
+ # Check if Accelerate is available
55
+ try:
56
+ import accelerate
57
+ logger.info(f"Accelerate version: {accelerate.__version__}")
58
+ has_accelerate = True
59
+ except ImportError:
60
+ logger.warning("Accelerate not found. Will load model without device mapping.")
61
+ has_accelerate = False
62
 
63
+ # Check if GPU is available
64
  if torch.cuda.is_available():
65
+ gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
66
+ logger.info(f"GPU memory: {gpu_memory:.2f} GB")
67
+
68
+ if has_accelerate:
69
+ # With Accelerate, use device mapping
70
+ logger.info("Loading model with FP16 precision on GPU")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  model = Qwen2AudioForConditionalGeneration.from_pretrained(
72
+ MODEL_ID,
73
+ torch_dtype=torch.float16,
74
+ device_map="auto",
75
  low_cpu_mem_usage=True
76
  )
77
+ else:
78
+ # Without Accelerate, load directly to GPU
79
+ logger.info("Loading model directly to GPU")
80
+ model = Qwen2AudioForConditionalGeneration.from_pretrained(MODEL_ID)
81
+ model = model.to("cuda").half() # Move to GPU and convert to FP16
82
  else:
83
  # Load on CPU if no GPU
84
+ logger.info("Loading model on CPU")
85
+ model = Qwen2AudioForConditionalGeneration.from_pretrained(MODEL_ID)
 
 
 
 
86
 
87
  model.eval()
88
  log_gpu_memory("After model loading")
 
90
  except Exception as e:
91
  logger.error(f"Error loading model or processor: {e}")
92
  raise
93
+
94
  def process_audio(audio_url):
95
  """Process audio from URL"""
96
  logger.info(f"Processing audio: {audio_url}")
requirements.txt CHANGED
@@ -4,7 +4,7 @@ transformers
4
  datasets
5
  peft
6
  bitsandbytes==0.41.1
7
- accelerate==0.25.0
8
  hf_transfer
9
  tensorboard
10
  requests
 
4
  datasets
5
  peft
6
  bitsandbytes==0.41.1
7
+ accelerate>=0.26.0
8
  hf_transfer
9
  tensorboard
10
  requests