def load_model_with_quantization_fallback( model_name: str = "deepseek-ai/DeepSeek-R1", trust_remote_code: bool = True, device_map: Optional[Union[str, Dict[str, Any]]] = "auto", **kwargs ) -> Tuple[PreTrainedModel, PreTrainedTokenizer]: try: model = AutoModel.from_pretrained( model_name, trust_remote_code=trust_remote_code, device_map=device_map, **kwargs ) tokenizer = AutoTokenizer.from_pretrained(model_name) logger.info("Model loaded successfully with original configuration") return model, tokenizer except ValueError as e: if "Unknown quantization type" in str(e): logger.warning( "Quantization type not supported directly. " "Attempting to load without quantization..." ) config = AutoConfig.from_pretrained( model_name, trust_remote_code=trust_remote_code ) if hasattr(config, "quantization_config"): delattr(config, "quantization_config") try: model = AutoModel.from_pretrained( model_name, config=config, trust_remote_code=trust_remote_code, device_map=device_map, **kwargs ) tokenizer = AutoTokenizer.from_pretrained( model_name, trust_remote_code=trust_remote_code ) logger.info("Model loaded successfully without quantization") return model, tokenizer except Exception as inner_e: logger.error(f"Failed to load model without quantization: {str(inner_e)}") raise else: logger.error(f"Unexpected error during model loading: {str(e)}") raise