mclemcrew commited on
Commit
ed55f0b
·
1 Parent(s): f533e2c

updates for app

Browse files
Files changed (2) hide show
  1. app.py +51 -10
  2. requirements.txt +3 -1
app.py CHANGED
@@ -51,17 +51,58 @@ def load_model():
51
  processor = AutoProcessor.from_pretrained(MODEL_ID)
52
  logger.info("Processor loaded successfully")
53
 
54
- # Load model with basic FP16 config - using the correct model class
55
- logger.info(f"Loading model from {MODEL_ID}")
56
- model = Qwen2AudioForConditionalGeneration.from_pretrained(
57
- MODEL_ID,
58
- torch_dtype=torch.float16,
59
- device_map="auto",
60
- low_cpu_mem_usage=True
61
- )
62
- model.eval()
63
- logger.info("Model loaded successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
 
65
  log_gpu_memory("After model loading")
66
  return model, processor
67
  except Exception as e:
 
51
  processor = AutoProcessor.from_pretrained(MODEL_ID)
52
  logger.info("Processor loaded successfully")
53
 
54
+ # Try loading model with quantization first
55
+ try:
56
+ logger.info(f"Attempting to load model with quantization from {MODEL_ID}")
57
+ from transformers import BitsAndBytesConfig
58
+
59
+ # Configure BitsAndBytes for 4-bit quantization
60
+ bnb_config = BitsAndBytesConfig(
61
+ load_in_4bit=True,
62
+ bnb_4bit_use_double_quant=True,
63
+ bnb_4bit_quant_type="nf4",
64
+ bnb_4bit_compute_dtype=torch.float16
65
+ )
66
+
67
+ model = Qwen2AudioForConditionalGeneration.from_pretrained(
68
+ MODEL_ID,
69
+ quantization_config=bnb_config,
70
+ device_map="auto",
71
+ low_cpu_mem_usage=True
72
+ )
73
+ logger.info("Model loaded successfully with quantization")
74
+ except Exception as quant_error:
75
+ # If quantization fails, fall back to basic loading
76
+ logger.warning(f"Quantization failed: {quant_error}. Falling back to standard loading.")
77
+
78
+ # Try FP16 if GPU available
79
+ if torch.cuda.is_available():
80
+ try:
81
+ model = Qwen2AudioForConditionalGeneration.from_pretrained(
82
+ MODEL_ID,
83
+ torch_dtype=torch.float16,
84
+ device_map="auto",
85
+ low_cpu_mem_usage=True
86
+ )
87
+ logger.info("Model loaded successfully with FP16")
88
+ except Exception as fp16_error:
89
+ logger.warning(f"FP16 loading failed: {fp16_error}. Falling back to CPU.")
90
+ model = Qwen2AudioForConditionalGeneration.from_pretrained(
91
+ MODEL_ID,
92
+ device_map="cpu",
93
+ low_cpu_mem_usage=True
94
+ )
95
+ logger.info("Model loaded successfully on CPU")
96
+ else:
97
+ # Load on CPU if no GPU
98
+ model = Qwen2AudioForConditionalGeneration.from_pretrained(
99
+ MODEL_ID,
100
+ device_map="cpu",
101
+ low_cpu_mem_usage=True
102
+ )
103
+ logger.info("Model loaded successfully on CPU")
104
 
105
+ model.eval()
106
  log_gpu_memory("After model loading")
107
  return model, processor
108
  except Exception as e:
requirements.txt CHANGED
@@ -7,4 +7,6 @@ librosa>=0.10.0
7
  soundfile>=0.12.1
8
  requests>=2.28.0
9
  pillow>=9.5.0
10
- huggingface_hub>=0.16.0
 
 
 
7
  soundfile>=0.12.1
8
  requests>=2.28.0
9
  pillow>=9.5.0
10
+ huggingface_hub>=0.16.0
11
+ bitsandbytes>=0.41.0
12
+ scikit-learn>=1.0.2