Spaces:

mclemcrew
/

CoMix-Demo

Sleeping

App Files Files Community

mclemcrew commited on Mar 25, 2025

Commit

ed55f0b

1 Parent(s): f533e2c

updates for app

Browse files

Files changed (2) hide show

app.py +51 -10
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -51,17 +51,58 @@ def load_model():
         processor = AutoProcessor.from_pretrained(MODEL_ID)
         logger.info("Processor loaded successfully")
-        # Load model with basic FP16 config - using the correct model class
-        logger.info(f"Loading model from {MODEL_ID}")
-        model = Qwen2AudioForConditionalGeneration.from_pretrained(
-            MODEL_ID,
-            torch_dtype=torch.float16,
-            device_map="auto",
-            low_cpu_mem_usage=True
-        )
-        model.eval()
-        logger.info("Model loaded successfully")
         log_gpu_memory("After model loading")
         return model, processor
     except Exception as e:

         processor = AutoProcessor.from_pretrained(MODEL_ID)
         logger.info("Processor loaded successfully")
+        # Try loading model with quantization first
+        try:
+            logger.info(f"Attempting to load model with quantization from {MODEL_ID}")
+            from transformers import BitsAndBytesConfig
+            # Configure BitsAndBytes for 4-bit quantization
+            bnb_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_compute_dtype=torch.float16
+            )
+            model = Qwen2AudioForConditionalGeneration.from_pretrained(
+                MODEL_ID,
+                quantization_config=bnb_config,
+                device_map="auto",
+                low_cpu_mem_usage=True
+            )
+            logger.info("Model loaded successfully with quantization")
+        except Exception as quant_error:
+            # If quantization fails, fall back to basic loading
+            logger.warning(f"Quantization failed: {quant_error}. Falling back to standard loading.")
+            # Try FP16 if GPU available
+            if torch.cuda.is_available():
+                try:
+                    model = Qwen2AudioForConditionalGeneration.from_pretrained(
+                        MODEL_ID,
+                        torch_dtype=torch.float16,
+                        device_map="auto",
+                        low_cpu_mem_usage=True
+                    )
+                    logger.info("Model loaded successfully with FP16")
+                except Exception as fp16_error:
+                    logger.warning(f"FP16 loading failed: {fp16_error}. Falling back to CPU.")
+                    model = Qwen2AudioForConditionalGeneration.from_pretrained(
+                        MODEL_ID,
+                        device_map="cpu",
+                        low_cpu_mem_usage=True
+                    )
+                    logger.info("Model loaded successfully on CPU")
+            else:
+                # Load on CPU if no GPU
+                model = Qwen2AudioForConditionalGeneration.from_pretrained(
+                    MODEL_ID,
+                    device_map="cpu",
+                    low_cpu_mem_usage=True
+                )
+                logger.info("Model loaded successfully on CPU")
+        model.eval()
         log_gpu_memory("After model loading")
         return model, processor
     except Exception as e:

requirements.txt CHANGED Viewed

@@ -7,4 +7,6 @@ librosa>=0.10.0
 soundfile>=0.12.1
 requests>=2.28.0
 pillow>=9.5.0
-huggingface_hub>=0.16.0

 soundfile>=0.12.1
 requests>=2.28.0
 pillow>=9.5.0
+huggingface_hub>=0.16.0
+bitsandbytes>=0.41.0
+scikit-learn>=1.0.2