update README.md
Browse files
README.md
CHANGED
|
@@ -151,9 +151,9 @@ model_inputs = tokenizer([prompt_text], return_tensors="pt").to(device)
|
|
| 151 |
|
| 152 |
model_outputs = model.generate(
|
| 153 |
**model_inputs,
|
| 154 |
-
max_new_tokens=
|
| 155 |
-
top_p=0.
|
| 156 |
-
temperature=0.
|
| 157 |
)
|
| 158 |
output_token_ids = [
|
| 159 |
model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs['input_ids']))
|
|
@@ -242,8 +242,8 @@ response = client.chat.completions.create(
|
|
| 242 |
messages=[
|
| 243 |
{"role": "user", "content": "Write an article about Artificial Intelligence."},
|
| 244 |
],
|
| 245 |
-
temperature=0.
|
| 246 |
-
max_tokens=
|
| 247 |
)
|
| 248 |
|
| 249 |
print(response.choices[0].message.content)
|
|
@@ -263,7 +263,7 @@ from transformers import AutoTokenizer
|
|
| 263 |
from vllm import LLM, SamplingParams
|
| 264 |
|
| 265 |
model_name = "openbmb/MiniCPM4.1-8B"
|
| 266 |
-
prompt = [{"role": "user", "content": "
|
| 267 |
|
| 268 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 269 |
input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
|
@@ -275,7 +275,7 @@ llm = LLM(
|
|
| 275 |
dtype="bfloat16",
|
| 276 |
gpu_memory_utilization=0.8,
|
| 277 |
)
|
| 278 |
-
sampling_params = SamplingParams(top_p=0.
|
| 279 |
|
| 280 |
outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
|
| 281 |
|
|
@@ -301,8 +301,8 @@ response = client.chat.completions.create(
|
|
| 301 |
messages=[
|
| 302 |
{"role": "user", "content": "Write an article about Artificial Intelligence."},
|
| 303 |
],
|
| 304 |
-
temperature=0.
|
| 305 |
-
max_tokens=
|
| 306 |
extra_body=dict(add_special_tokens=True), # Ensures special tokens are added for chat template
|
| 307 |
|
| 308 |
)
|
|
|
|
| 151 |
|
| 152 |
model_outputs = model.generate(
|
| 153 |
**model_inputs,
|
| 154 |
+
max_new_tokens=32768,
|
| 155 |
+
top_p=0.95,
|
| 156 |
+
temperature=0.6
|
| 157 |
)
|
| 158 |
output_token_ids = [
|
| 159 |
model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs['input_ids']))
|
|
|
|
| 242 |
messages=[
|
| 243 |
{"role": "user", "content": "Write an article about Artificial Intelligence."},
|
| 244 |
],
|
| 245 |
+
temperature=0.6,
|
| 246 |
+
max_tokens=32768,
|
| 247 |
)
|
| 248 |
|
| 249 |
print(response.choices[0].message.content)
|
|
|
|
| 263 |
from vllm import LLM, SamplingParams
|
| 264 |
|
| 265 |
model_name = "openbmb/MiniCPM4.1-8B"
|
| 266 |
+
prompt = [{"role": "user", "content": "Write an article about Artificial Intelligence."}]
|
| 267 |
|
| 268 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 269 |
input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
|
|
|
| 275 |
dtype="bfloat16",
|
| 276 |
gpu_memory_utilization=0.8,
|
| 277 |
)
|
| 278 |
+
sampling_params = SamplingParams(top_p=0.95, temperature=0.6, max_tokens=32768)
|
| 279 |
|
| 280 |
outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
|
| 281 |
|
|
|
|
| 301 |
messages=[
|
| 302 |
{"role": "user", "content": "Write an article about Artificial Intelligence."},
|
| 303 |
],
|
| 304 |
+
temperature=0.6,
|
| 305 |
+
max_tokens=32768,
|
| 306 |
extra_body=dict(add_special_tokens=True), # Ensures special tokens are added for chat template
|
| 307 |
|
| 308 |
)
|