DeepSeek-R1 / config.json
jana0010's picture
Update config.json
e75c925 verified
raw
history blame
1.89 kB
Invalid JSON: Unexpected token 'd', "def load_m"... is not valid JSON
def load_model_with_quantization_fallback(
model_name: str = "deepseek-ai/DeepSeek-R1",
trust_remote_code: bool = True,
device_map: Optional[Union[str, Dict[str, Any]]] = "auto",
**kwargs
) -> Tuple[PreTrainedModel, PreTrainedTokenizer]:
try:
model = AutoModel.from_pretrained(
model_name,
trust_remote_code=trust_remote_code,
device_map=device_map,
**kwargs
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
logger.info("Model loaded successfully with original configuration")
return model, tokenizer
except ValueError as e:
if "Unknown quantization type" in str(e):
logger.warning(
"Quantization type not supported directly. "
"Attempting to load without quantization..."
)
config = AutoConfig.from_pretrained(
model_name,
trust_remote_code=trust_remote_code
)
if hasattr(config, "quantization_config"):
delattr(config, "quantization_config")
try:
model = AutoModel.from_pretrained(
model_name,
config=config,
trust_remote_code=trust_remote_code,
device_map=device_map,
**kwargs
)
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=trust_remote_code
)
logger.info("Model loaded successfully without quantization")
return model, tokenizer
except Exception as inner_e:
logger.error(f"Failed to load model without quantization: {str(inner_e)}")
raise
else:
logger.error(f"Unexpected error during model loading: {str(e)}")
raise