| import fire | |
| import torch | |
| import moe_peft | |
| def main( | |
| base_model: str, | |
| task_name: str, | |
| data_path: str = None, | |
| lora_weights: str = None, | |
| load_16bit: bool = True, | |
| load_8bit: bool = False, | |
| load_4bit: bool = False, | |
| flash_attn: bool = False, | |
| save_file: str = None, | |
| batch_size: int = 32, | |
| router_profile: bool = False, | |
| device: str = moe_peft.executor.default_device_name(), | |
| ): | |
| moe_peft.setup_logging("INFO") | |
| if not moe_peft.executor.check_available(): | |
| exit(-1) | |
| model = moe_peft.LLMModel.from_pretrained( | |
| base_model, | |
| device=device, | |
| attn_impl="flash_attn" if flash_attn else "eager", | |
| bits=(8 if load_8bit else (4 if load_4bit else None)), | |
| load_dtype=torch.bfloat16 if load_16bit else torch.float32, | |
| ) | |
| tokenizer = moe_peft.Tokenizer(base_model) | |
| if lora_weights: | |
| adapter_name = model.load_adapter(lora_weights) | |
| else: | |
| adapter_name = model.init_adapter( | |
| moe_peft.AdapterConfig(adapter_name="default") | |
| ) | |
| evaluate_paramas = moe_peft.EvaluateConfig( | |
| adapter_name=adapter_name, | |
| task_name=task_name, | |
| data_path=data_path, | |
| batch_size=batch_size, | |
| router_profile=router_profile, | |
| ) | |
| moe_peft.evaluate(model, tokenizer, [evaluate_paramas], save_file=save_file) | |
| if __name__ == "__main__": | |
| fire.Fire(main) | |