update long rope setting
Browse files
README.md
CHANGED
|
@@ -88,9 +88,9 @@ MiniCPM4.1 natively supports context lengths of up to 65,536(64k) tokens. To rep
|
|
| 88 |
...,
|
| 89 |
"rope_scaling": {
|
| 90 |
"rope_type": "longrope",
|
| 91 |
-
"long_factor": [0.
|
| 92 |
-
"short_factor": [0.
|
| 93 |
-
"original_max_position_embeddings":
|
| 94 |
}
|
| 95 |
}
|
| 96 |
```
|
|
@@ -208,9 +208,9 @@ You can apply the LongRoPE factor modification by modifying the model files. Spe
|
|
| 208 |
...,
|
| 209 |
"rope_scaling": {
|
| 210 |
"rope_type": "longrope",
|
| 211 |
-
"long_factor": [0.
|
| 212 |
-
"short_factor": [0.
|
| 213 |
-
"original_max_position_embeddings":
|
| 214 |
}
|
| 215 |
}
|
| 216 |
```
|
|
@@ -263,7 +263,7 @@ from transformers import AutoTokenizer
|
|
| 263 |
from vllm import LLM, SamplingParams
|
| 264 |
|
| 265 |
model_name = "openbmb/MiniCPM4.1-8B"
|
| 266 |
-
prompt = [{"role": "user", "content": "Please recommend 5 tourist attractions in Beijing.
|
| 267 |
|
| 268 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 269 |
input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
|
@@ -271,11 +271,11 @@ input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generatio
|
|
| 271 |
llm = LLM(
|
| 272 |
model=model_name,
|
| 273 |
trust_remote_code=True,
|
| 274 |
-
max_num_batched_tokens=
|
| 275 |
dtype="bfloat16",
|
| 276 |
gpu_memory_utilization=0.8,
|
| 277 |
)
|
| 278 |
-
sampling_params = SamplingParams(top_p=0.7, temperature=0.7, max_tokens=
|
| 279 |
|
| 280 |
outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
|
| 281 |
|
|
|
|
| 88 |
...,
|
| 89 |
"rope_scaling": {
|
| 90 |
"rope_type": "longrope",
|
| 91 |
+
"long_factor": [0.9982316082870437, 1.033048153422584, 1.0749920956484724, 1.1255096879436193, 1.1863348602111476, 1.259543828902579, 1.3476188888731149, 1.4535223827776373, 1.5807816745852985, 1.7335856049489526, 1.9168922912975785, 2.1365471404135326, 2.3994084200118646, 2.713475511863602, 3.0880118452194134, 3.533650295140154, 4.062463396503134, 4.687974098908333, 5.425075306704039, 6.289818967956352, 7.29902962722721, 8.6357018163639, 10.210822723989212, 12.053807765671676, 14.193944598909404, 16.65780676784363, 19.463620727694074, 22.628311203524586, 26.150106147261315, 30.02526691405111, 34.23183327975347, 38.73811934094828, 43.502489489729555, 48.47627117965394, 53.61139491762471, 58.857366522037935, 64.16798299215064, 69.51359464319125, 74.86555458220285, 80.21497790341579, 85.55322183307433, 90.89611806932027, 96.26245306514224, 101.68269304046481, 107.18619510219668, 112.82253283014026, 118.63764063163615, 119.88866203644656, 120.9462882391725, 121.837565139014, 122.58663780572562, 123.2147719894291, 123.74049454862576, 124.17980424685767, 124.54641761955492, 124.85202548028222, 125.10654406389756, 125.31835105170659, 125.49450117164764, 125.64091910903052, 125.76256945356558, 125.86360463815589, 125.94749252260765, 126.01712561287873],
|
| 92 |
+
"short_factor": [0.9982316082870437, 1.033048153422584, 1.0749920956484724, 1.1255096879436193, 1.1863348602111476, 1.259543828902579, 1.3476188888731149, 1.4535223827776373, 1.5807816745852985, 1.7335856049489526, 1.9168922912975785, 2.1365471404135326, 2.3994084200118646, 2.713475511863602, 3.0880118452194134, 3.533650295140154, 4.062463396503134, 4.687974098908333, 5.425075306704039, 6.289818967956352, 7.29902962722721, 8.6357018163639, 10.210822723989212, 12.053807765671676, 14.193944598909404, 16.65780676784363, 19.463620727694074, 22.628311203524586, 26.150106147261315, 30.02526691405111, 34.23183327975347, 38.73811934094828, 43.502489489729555, 48.47627117965394, 53.61139491762471, 58.857366522037935, 64.16798299215064, 69.51359464319125, 74.86555458220285, 80.21497790341579, 85.55322183307433, 90.89611806932027, 96.26245306514224, 101.68269304046481, 107.18619510219668, 112.82253283014026, 118.63764063163615, 119.88866203644656, 120.9462882391725, 121.837565139014, 122.58663780572562, 123.2147719894291, 123.74049454862576, 124.17980424685767, 124.54641761955492, 124.85202548028222, 125.10654406389756, 125.31835105170659, 125.49450117164764, 125.64091910903052, 125.76256945356558, 125.86360463815589, 125.94749252260765, 126.01712561287873],
|
| 93 |
+
"original_max_position_embeddings": 65536
|
| 94 |
}
|
| 95 |
}
|
| 96 |
```
|
|
|
|
| 208 |
...,
|
| 209 |
"rope_scaling": {
|
| 210 |
"rope_type": "longrope",
|
| 211 |
+
"long_factor": [0.9982316082870437, 1.033048153422584, 1.0749920956484724, 1.1255096879436193, 1.1863348602111476, 1.259543828902579, 1.3476188888731149, 1.4535223827776373, 1.5807816745852985, 1.7335856049489526, 1.9168922912975785, 2.1365471404135326, 2.3994084200118646, 2.713475511863602, 3.0880118452194134, 3.533650295140154, 4.062463396503134, 4.687974098908333, 5.425075306704039, 6.289818967956352, 7.29902962722721, 8.6357018163639, 10.210822723989212, 12.053807765671676, 14.193944598909404, 16.65780676784363, 19.463620727694074, 22.628311203524586, 26.150106147261315, 30.02526691405111, 34.23183327975347, 38.73811934094828, 43.502489489729555, 48.47627117965394, 53.61139491762471, 58.857366522037935, 64.16798299215064, 69.51359464319125, 74.86555458220285, 80.21497790341579, 85.55322183307433, 90.89611806932027, 96.26245306514224, 101.68269304046481, 107.18619510219668, 112.82253283014026, 118.63764063163615, 119.88866203644656, 120.9462882391725, 121.837565139014, 122.58663780572562, 123.2147719894291, 123.74049454862576, 124.17980424685767, 124.54641761955492, 124.85202548028222, 125.10654406389756, 125.31835105170659, 125.49450117164764, 125.64091910903052, 125.76256945356558, 125.86360463815589, 125.94749252260765, 126.01712561287873],
|
| 212 |
+
"short_factor": [0.9982316082870437, 1.033048153422584, 1.0749920956484724, 1.1255096879436193, 1.1863348602111476, 1.259543828902579, 1.3476188888731149, 1.4535223827776373, 1.5807816745852985, 1.7335856049489526, 1.9168922912975785, 2.1365471404135326, 2.3994084200118646, 2.713475511863602, 3.0880118452194134, 3.533650295140154, 4.062463396503134, 4.687974098908333, 5.425075306704039, 6.289818967956352, 7.29902962722721, 8.6357018163639, 10.210822723989212, 12.053807765671676, 14.193944598909404, 16.65780676784363, 19.463620727694074, 22.628311203524586, 26.150106147261315, 30.02526691405111, 34.23183327975347, 38.73811934094828, 43.502489489729555, 48.47627117965394, 53.61139491762471, 58.857366522037935, 64.16798299215064, 69.51359464319125, 74.86555458220285, 80.21497790341579, 85.55322183307433, 90.89611806932027, 96.26245306514224, 101.68269304046481, 107.18619510219668, 112.82253283014026, 118.63764063163615, 119.88866203644656, 120.9462882391725, 121.837565139014, 122.58663780572562, 123.2147719894291, 123.74049454862576, 124.17980424685767, 124.54641761955492, 124.85202548028222, 125.10654406389756, 125.31835105170659, 125.49450117164764, 125.64091910903052, 125.76256945356558, 125.86360463815589, 125.94749252260765, 126.01712561287873],
|
| 213 |
+
"original_max_position_embeddings": 65536
|
| 214 |
}
|
| 215 |
}
|
| 216 |
```
|
|
|
|
| 263 |
from vllm import LLM, SamplingParams
|
| 264 |
|
| 265 |
model_name = "openbmb/MiniCPM4.1-8B"
|
| 266 |
+
prompt = [{"role": "user", "content": "Please recommend 5 tourist attractions in Beijing."}]
|
| 267 |
|
| 268 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 269 |
input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
|
|
|
| 271 |
llm = LLM(
|
| 272 |
model=model_name,
|
| 273 |
trust_remote_code=True,
|
| 274 |
+
max_num_batched_tokens=65536,
|
| 275 |
dtype="bfloat16",
|
| 276 |
gpu_memory_utilization=0.8,
|
| 277 |
)
|
| 278 |
+
sampling_params = SamplingParams(top_p=0.7, temperature=0.7, max_tokens=8192, repetition_penalty=1.02)
|
| 279 |
|
| 280 |
outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
|
| 281 |
|