AngelSlim
/

Qwen2.5-VL-72B-Instruct-FP8-Dynamic

Model card Files Files and versions

Qwen2.5-VL-72B-Instruct-FP8-Dynamic / angelslim_config.json

woodchen7's picture

Upload angelslim_config.json with huggingface_hub

a1482c9 verified 9 months ago

history blame contribute delete

1.99 kB

	{
	"model_config": {
	"name": "QwenVL",
	"model_path": "Qwen/Qwen2.5-VL-72B-Instruct",
	"trust_remote_code": true,
	"torch_dtype": "auto",
	"device_map": "auto",
	"low_cpu_mem_usage": true,
	"use_cache": false,
	"cache_dir": null
	},
	"compression_config": {
	"name": "PTQ",
	"quantization": {
	"name": "fp8_dynamic",
	"bits": 8,
	"quant_method": {
	"weight": "per-tensor",
	"activation": "per-tensor"
	},
	"quant_helpers": [],
	"smooth_alpha": 0.5,
	"low_memory": false,
	"modules_to_quantize": [],
	"zero_point": true,
	"mse_range": false,
	"ignore_layers": [
	"model.visual.patch_embed.proj",
	"model.lm_head",
	"model.language_model.embed_tokens",
	"model.visual.merger.mlp.0",
	"model.visual.merger.mlp.2",
	"lm_head"
	],
	"quant_analyse": false,
	"quant_vit": true
	},
	"cache": null
	},
	"dataset_config": null,
	"global_config": {
	"save_path": "./qwen2_5_vl-72b_fp8_dynamic",
	"max_seq_length": 2048,
	"hidden_size": 8192,
	"model_arch_type": "qwen2_5_vl",
	"deploy_backend": "vllm"
	},
	"infer_config": null,
	"debug_info": {
	"python": "3.12.11 (main, Jun 4 2025, 08:56:18) [GCC 11.4.0]",
	"angelslim": {
	"name": "angelslim",
	"version": "677193e9ad36197c3a8ff1caa82dd230abf2447e",
	"source": "git"
	},
	"torch": {
	"name": "torch",
	"version": "2.7.1",
	"source": "pip"
	},
	"transformers": {
	"name": "transformers",
	"version": "4.53.3",
	"source": "pip"
	},
	"torch_cuda_version": "12.6"
	}
	}