yuccaaa commited on
Commit
5e94860
·
verified ·
1 Parent(s): ad719e4

Upload ms-swift/examples/infer/demo_hf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. ms-swift/examples/infer/demo_hf.py +61 -0
ms-swift/examples/infer/demo_hf.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def infer_hf():
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from peft import PeftModel
4
+ from modelscope import snapshot_download
5
+ model_dir = snapshot_download('Qwen/Qwen2.5-7B-Instruct')
6
+ adapter_dir = snapshot_download('swift/test_lora')
7
+ model = AutoModelForCausalLM.from_pretrained(
8
+ model_dir, torch_dtype='auto', device_map='auto', trust_remote_code=True)
9
+ model = PeftModel.from_pretrained(model, adapter_dir)
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
12
+
13
+ messages = [{
14
+ 'role': 'system',
15
+ 'content': 'You are a helpful assistant.'
16
+ }, {
17
+ 'role': 'user',
18
+ 'content': 'who are you?'
19
+ }]
20
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
21
+ model_inputs = tokenizer([text], return_tensors='pt', add_special_tokens=False).to(model.device)
22
+
23
+ generated_ids = model.generate(**model_inputs, max_new_tokens=512, do_sample=False)
24
+ generated_ids = [
25
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
26
+ ]
27
+
28
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
29
+ print(f'response: {response}')
30
+ return response
31
+
32
+
33
+ def infer_swift():
34
+ from swift.llm import get_model_tokenizer, get_template, InferRequest, RequestConfig, PtEngine
35
+ from modelscope import snapshot_download
36
+ from swift.tuners import Swift
37
+ model_dir = snapshot_download('Qwen/Qwen2.5-7B-Instruct')
38
+ adapter_dir = snapshot_download('swift/test_lora')
39
+ model, tokenizer = get_model_tokenizer(model_dir, device_map='auto')
40
+ model = Swift.from_pretrained(model, adapter_dir)
41
+ template = get_template(model.model_meta.template, tokenizer)
42
+ engine = PtEngine.from_model_template(model, template)
43
+
44
+ messages = [{
45
+ 'role': 'system',
46
+ 'content': 'You are a helpful assistant.'
47
+ }, {
48
+ 'role': 'user',
49
+ 'content': 'who are you?'
50
+ }]
51
+ request_config = RequestConfig(max_tokens=512, temperature=0)
52
+ resp_list = engine.infer([InferRequest(messages=messages)], request_config=request_config)
53
+ response = resp_list[0].choices[0].message.content
54
+ print(f'response: {response}')
55
+ return response
56
+
57
+
58
+ if __name__ == '__main__':
59
+ response = infer_hf()
60
+ response2 = infer_swift()
61
+ assert response == response2