Improve template pipeline
Browse files
src/distilabel_dataset_generator/apps/sft.py
CHANGED
|
@@ -150,6 +150,10 @@ from distilabel.llms import InferenceEndpointsLLM
|
|
| 150 |
|
| 151 |
MODEL = "{MODEL}"
|
| 152 |
SYSTEM_PROMPT = "{system_prompt}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
with Pipeline(name="sft") as pipeline:
|
| 155 |
magpie = MagpieGenerator(
|
|
@@ -170,8 +174,8 @@ with Pipeline(name="sft") as pipeline:
|
|
| 170 |
],
|
| 171 |
}}
|
| 172 |
),
|
| 173 |
-
n_turns=
|
| 174 |
-
num_rows=
|
| 175 |
system_prompt=SYSTEM_PROMPT,
|
| 176 |
)
|
| 177 |
|
|
|
|
| 150 |
|
| 151 |
MODEL = "{MODEL}"
|
| 152 |
SYSTEM_PROMPT = "{system_prompt}"
|
| 153 |
+
# increase this to generate multi-turn conversations
|
| 154 |
+
NUM_TURNS = 1
|
| 155 |
+
# increase this to generate a larger dataset
|
| 156 |
+
NUM_ROWS = 100
|
| 157 |
|
| 158 |
with Pipeline(name="sft") as pipeline:
|
| 159 |
magpie = MagpieGenerator(
|
|
|
|
| 174 |
],
|
| 175 |
}}
|
| 176 |
),
|
| 177 |
+
n_turns=NUM_TURNS,
|
| 178 |
+
num_rows=NUM_ROWS,
|
| 179 |
system_prompt=SYSTEM_PROMPT,
|
| 180 |
)
|
| 181 |
|