DeepSeek-R1 / config.json

Update config.json

e75c925 verified 12 months ago

1.89 kB

Invalid JSON: Unexpected token 'd', "def load_m"... is not valid JSON

	def load_model_with_quantization_fallback(
	model_name: str = "deepseek-ai/DeepSeek-R1",
	trust_remote_code: bool = True,
	device_map: Optional[Union[str, Dict[str, Any]]] = "auto",
	**kwargs
	) -> Tuple[PreTrainedModel, PreTrainedTokenizer]:

	try:
	model = AutoModel.from_pretrained(
	model_name,
	trust_remote_code=trust_remote_code,
	device_map=device_map,
	**kwargs
	)
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	logger.info("Model loaded successfully with original configuration")
	return model, tokenizer
	except ValueError as e:
	if "Unknown quantization type" in str(e):
	logger.warning(
	"Quantization type not supported directly. "
	"Attempting to load without quantization..."
	)

	config = AutoConfig.from_pretrained(
	model_name,
	trust_remote_code=trust_remote_code
	)
	if hasattr(config, "quantization_config"):
	delattr(config, "quantization_config")

	try:
	model = AutoModel.from_pretrained(
	model_name,
	config=config,
	trust_remote_code=trust_remote_code,
	device_map=device_map,
	**kwargs
	)
	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	trust_remote_code=trust_remote_code
	)
	logger.info("Model loaded successfully without quantization")
	return model, tokenizer

	except Exception as inner_e:
	logger.error(f"Failed to load model without quantization: {str(inner_e)}")
	raise
	else:
	logger.error(f"Unexpected error during model loading: {str(e)}")
	raise