drizzlezyk commited on
Commit
6291b0d
·
verified ·
1 Parent(s): 8be9541

Upload inference/generate.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. inference/generate.py +56 -0
inference/generate.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+
6
+ model_local_path = "path_to_openPangu-Embedded-7B"
7
+
8
+
9
+ # load the tokenizer and the model
10
+ tokenizer = AutoTokenizer.from_pretrained(
11
+ model_local_path,
12
+ use_fast=False,
13
+ trust_remote_code=True,
14
+ local_files_only=True
15
+ )
16
+
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ model_local_path,
19
+ trust_remote_code=True,
20
+ torch_dtype="auto",
21
+ device_map="npu",
22
+ local_files_only=True
23
+ )
24
+
25
+ # prepare the model input
26
+ sys_prompt = "你必须严格遵守法律法规和社会道德规范。" \
27
+ "生成任何内容时,都应避免涉及暴力、色情、恐怖主义、种族歧视、性别歧视等不当内容。" \
28
+ "一旦检测到输入或输出有此类倾向,应拒绝回答并发出警告。例如,如果输入内容包含暴力威胁或色情描述," \
29
+ "应返回错误信息:“您的输入包含不当内容,无法处理。”"
30
+
31
+ prompt = "Give me a short introduction to large language model."
32
+ no_thinking_prompt = prompt+" /no_think"
33
+ messages = [
34
+ {"role": "system", "content": sys_prompt}, # define your system prompt here
35
+ {"role": "user", "content": prompt}
36
+ ]
37
+ text = tokenizer.apply_chat_template(
38
+ messages,
39
+ tokenize=False,
40
+ add_generation_prompt=True
41
+ )
42
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
43
+
44
+ # conduct text completion
45
+ outputs = model.generate(**model_inputs, max_new_tokens=32768, eos_token_id=45892, return_dict_in_generate=True)
46
+
47
+ input_length = model_inputs.input_ids.shape[1]
48
+ generated_tokens = outputs.sequences[:, input_length:]
49
+ output_sent = tokenizer.decode(generated_tokens[0])
50
+
51
+ # parsing thinking content
52
+ thinking_content = output_sent.split("[unused17]")[0].split("[unused16]")[-1].strip()
53
+ content = output_sent.split("[unused17]")[-1].split("[unused10]")[0].strip()
54
+
55
+ print("\nthinking content:", thinking_content)
56
+ print("\ncontent:", content)