Update src/distilabel_dataset_generator/pipelines/sft.py
Browse files
src/distilabel_dataset_generator/pipelines/sft.py
CHANGED
|
@@ -114,7 +114,7 @@ The prompt you write should follow the same style and structure as the following
|
|
| 114 |
User dataset description:
|
| 115 |
"""
|
| 116 |
|
| 117 |
-
MODEL = "meta-llama/Meta-Llama-3.1-
|
| 118 |
DEFAULT_DATASET_DESCRIPTIONS = (
|
| 119 |
"A chemistry dataset for an assistant that explains chemical reactions and formulas.",
|
| 120 |
"A dataset for an assistant that work in the customer support domain.",
|
|
@@ -207,7 +207,7 @@ def get_pipeline(num_turns, num_rows, system_prompt):
|
|
| 207 |
generation_kwargs={
|
| 208 |
"temperature": 0.8, # it's the best value for Llama 3.1 70B Instruct
|
| 209 |
"do_sample": True,
|
| 210 |
-
"max_new_tokens":
|
| 211 |
"stop_sequences": _STOP_SEQUENCES,
|
| 212 |
},
|
| 213 |
),
|
|
@@ -224,7 +224,7 @@ def get_pipeline(num_turns, num_rows, system_prompt):
|
|
| 224 |
model_id=MODEL,
|
| 225 |
tokenizer_id=MODEL,
|
| 226 |
api_key=api_key,
|
| 227 |
-
generation_kwargs={"temperature": 0.8, "max_new_tokens":
|
| 228 |
),
|
| 229 |
system_prompt=system_prompt,
|
| 230 |
output_mappings={"generation": "completion"},
|
|
|
|
| 114 |
User dataset description:
|
| 115 |
"""
|
| 116 |
|
| 117 |
+
MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
| 118 |
DEFAULT_DATASET_DESCRIPTIONS = (
|
| 119 |
"A chemistry dataset for an assistant that explains chemical reactions and formulas.",
|
| 120 |
"A dataset for an assistant that work in the customer support domain.",
|
|
|
|
| 207 |
generation_kwargs={
|
| 208 |
"temperature": 0.8, # it's the best value for Llama 3.1 70B Instruct
|
| 209 |
"do_sample": True,
|
| 210 |
+
"max_new_tokens": 256,
|
| 211 |
"stop_sequences": _STOP_SEQUENCES,
|
| 212 |
},
|
| 213 |
),
|
|
|
|
| 224 |
model_id=MODEL,
|
| 225 |
tokenizer_id=MODEL,
|
| 226 |
api_key=api_key,
|
| 227 |
+
generation_kwargs={"temperature": 0.8, "max_new_tokens": 256},
|
| 228 |
),
|
| 229 |
system_prompt=system_prompt,
|
| 230 |
output_mappings={"generation": "completion"},
|