benchmark-push-examples / save /path /special_config_name.json

Upload save/path/special_config_name.json with huggingface_hub

e3340dc verified about 2 years ago

3.56 kB

	{
	"backend": {
	"name": "pytorch",
	"version": "2.1.0+rocm5.6",
	"_target_": "optimum_benchmark.backends.pytorch.backend.PyTorchBackend",
	"model": "TheBloke/Mistral-7B-Instruct-v0.1-AWQ",
	"task": "text-generation",
	"library": "transformers",
	"device": "cuda",
	"device_ids": "0",
	"seed": 42,
	"inter_op_num_threads": null,
	"intra_op_num_threads": null,
	"hub_kwargs": {
	"revision": "main",
	"force_download": false,
	"local_files_only": false,
	"trust_remote_code": false
	},
	"no_weights": true,
	"device_map": null,
	"torch_dtype": null,
	"amp_autocast": false,
	"amp_dtype": null,
	"eval_mode": true,
	"to_bettertransformer": false,
	"low_cpu_mem_usage": null,
	"attn_implementation": null,
	"cache_implementation": null,
	"torch_compile": false,
	"torch_compile_config": {},
	"quantization_scheme": "awq",
	"quantization_config": {
	"version": "exllama"
	},
	"deepspeed_inference": false,
	"deepspeed_inference_config": {},
	"peft_type": null,
	"peft_config": {}
	},
	"launcher": {
	"name": "process",
	"_target_": "optimum_benchmark.launchers.process.launcher.ProcessLauncher",
	"device_isolation": false,
	"start_method": "spawn"
	},
	"benchmark": {
	"name": "inference",
	"_target_": "optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark",
	"duration": 10,
	"warmup_runs": 10,
	"input_shapes": {
	"batch_size": 4,
	"num_choices": 2,
	"sequence_length": 128
	},
	"new_tokens": null,
	"energy": false,
	"memory": true,
	"latency": true,
	"forward_kwargs": {},
	"generate_kwargs": {
	"max_new_tokens": 128,
	"min_new_tokens": 128
	},
	"call_kwargs": {}
	},
	"experiment_name": "awq-exllamav2",
	"task": null,
	"model": null,
	"device": null,
	"library": null,
	"environment": {
	"cpu": " AMD EPYC 7763 64-Core Processor",
	"cpu_count": 128,
	"cpu_ram_mb": 1082015.236096,
	"system": "Linux",
	"machine": "x86_64",
	"platform": "Linux-5.15.0-84-generic-x86_64-with-glibc2.35",
	"processor": "x86_64",
	"python_version": "3.9.18",
	"gpu": [
	"AMD INSTINCT MI250 (MCM) OAM AC MBA",
	"AMD INSTINCT MI250 (MCM) OAM AC MBA",
	"AMD INSTINCT MI250 (MCM) OAM AC MBA",
	"AMD INSTINCT MI250 (MCM) OAM AC MBA",
	"AMD INSTINCT MI250 (MCM) OAM AC MBA",
	"AMD INSTINCT MI250 (MCM) OAM AC MBA",
	"AMD INSTINCT MI250 (MCM) OAM AC MBA",
	"AMD INSTINCT MI250 (MCM) OAM AC MBA"
	],
	"gpu_count": 8,
	"gpu_vram_mb": 549621596160,
	"optimum_benchmark_version": "0.2.0",
	"optimum_benchmark_commit": "09f95ce7707eb32c7880f72cdd9e14e8b7554315",
	"transformers_version": "4.37.2",
	"transformers_commit": null,
	"accelerate_version": "0.24.1",
	"accelerate_commit": null,
	"diffusers_version": "0.26.3",
	"diffusers_commit": null,
	"optimum_version": "1.14.1",
	"optimum_commit": null,
	"timm_version": "0.9.16",
	"timm_commit": null,
	"peft_version": "0.8.2",
	"peft_commit": null
	}
	}