Spaces:
Runtime error
Runtime error
Commit
Β·
7a3ba19
1
Parent(s):
8a94398
feat: update flow sample dataset
Browse files
src/distilabel_dataset_generator/apps/sft.py
CHANGED
|
@@ -175,13 +175,13 @@ with gr.Blocks(
|
|
| 175 |
|
| 176 |
gr.Markdown("## Iterate on a sample dataset")
|
| 177 |
with gr.Column() as main_ui:
|
| 178 |
-
dataset_description = gr.
|
| 179 |
label="Give a precise description of the assistant or tool. Don't describe the dataset",
|
| 180 |
value=DEFAULT_DATASET_DESCRIPTIONS[0],
|
| 181 |
)
|
| 182 |
examples = gr.Examples(
|
| 183 |
elem_id="system_prompt_examples",
|
| 184 |
-
examples=[[example] for example in DEFAULT_DATASET_DESCRIPTIONS
|
| 185 |
inputs=[dataset_description],
|
| 186 |
)
|
| 187 |
with gr.Row():
|
|
@@ -189,13 +189,13 @@ with gr.Blocks(
|
|
| 189 |
btn_generate_system_prompt = gr.Button(value="Generate sample")
|
| 190 |
gr.Column(scale=1)
|
| 191 |
|
| 192 |
-
system_prompt = gr.
|
| 193 |
label="System prompt for dataset generation. You can tune it and regenerate the sample",
|
| 194 |
value=DEFAULT_SYSTEM_PROMPTS[0],
|
| 195 |
)
|
| 196 |
|
| 197 |
with gr.Row():
|
| 198 |
-
|
| 199 |
value=DEFAULT_DATASETS[0],
|
| 200 |
label="Sample dataset. Prompts and completions truncated to 256 tokens.",
|
| 201 |
interactive=False,
|
|
@@ -217,14 +217,14 @@ with gr.Blocks(
|
|
| 217 |
).then(
|
| 218 |
fn=generate_sample_dataset,
|
| 219 |
inputs=[system_prompt],
|
| 220 |
-
outputs=[
|
| 221 |
show_progress=True,
|
| 222 |
)
|
| 223 |
|
| 224 |
btn_generate_sample_dataset.click(
|
| 225 |
fn=generate_sample_dataset,
|
| 226 |
inputs=[system_prompt],
|
| 227 |
-
outputs=[
|
| 228 |
show_progress=True,
|
| 229 |
)
|
| 230 |
|
|
@@ -302,6 +302,12 @@ with gr.Blocks(
|
|
| 302 |
def hide_success_message():
|
| 303 |
return gr.Markdown(visible=False)
|
| 304 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
btn_generate_full_dataset.click(
|
| 306 |
fn=hide_success_message,
|
| 307 |
outputs=[success_message],
|
|
|
|
| 175 |
|
| 176 |
gr.Markdown("## Iterate on a sample dataset")
|
| 177 |
with gr.Column() as main_ui:
|
| 178 |
+
dataset_description = gr.Textbox(
|
| 179 |
label="Give a precise description of the assistant or tool. Don't describe the dataset",
|
| 180 |
value=DEFAULT_DATASET_DESCRIPTIONS[0],
|
| 181 |
)
|
| 182 |
examples = gr.Examples(
|
| 183 |
elem_id="system_prompt_examples",
|
| 184 |
+
examples=[[example] for example in DEFAULT_DATASET_DESCRIPTIONS],
|
| 185 |
inputs=[dataset_description],
|
| 186 |
)
|
| 187 |
with gr.Row():
|
|
|
|
| 189 |
btn_generate_system_prompt = gr.Button(value="Generate sample")
|
| 190 |
gr.Column(scale=1)
|
| 191 |
|
| 192 |
+
system_prompt = gr.Textbox(
|
| 193 |
label="System prompt for dataset generation. You can tune it and regenerate the sample",
|
| 194 |
value=DEFAULT_SYSTEM_PROMPTS[0],
|
| 195 |
)
|
| 196 |
|
| 197 |
with gr.Row():
|
| 198 |
+
sample_dataset = gr.DataFrame(
|
| 199 |
value=DEFAULT_DATASETS[0],
|
| 200 |
label="Sample dataset. Prompts and completions truncated to 256 tokens.",
|
| 201 |
interactive=False,
|
|
|
|
| 217 |
).then(
|
| 218 |
fn=generate_sample_dataset,
|
| 219 |
inputs=[system_prompt],
|
| 220 |
+
outputs=[sample_dataset],
|
| 221 |
show_progress=True,
|
| 222 |
)
|
| 223 |
|
| 224 |
btn_generate_sample_dataset.click(
|
| 225 |
fn=generate_sample_dataset,
|
| 226 |
inputs=[system_prompt],
|
| 227 |
+
outputs=[sample_dataset],
|
| 228 |
show_progress=True,
|
| 229 |
)
|
| 230 |
|
|
|
|
| 302 |
def hide_success_message():
|
| 303 |
return gr.Markdown(visible=False)
|
| 304 |
|
| 305 |
+
sample_dataset.change(
|
| 306 |
+
fn=lambda x: x,
|
| 307 |
+
inputs=[sample_dataset],
|
| 308 |
+
outputs=[final_dataset],
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
btn_generate_full_dataset.click(
|
| 312 |
fn=hide_success_message,
|
| 313 |
outputs=[success_message],
|
src/distilabel_dataset_generator/pipelines/sft.py
CHANGED
|
@@ -117,6 +117,7 @@ User dataset description:
|
|
| 117 |
MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
| 118 |
DEFAULT_DATASET_DESCRIPTIONS = (
|
| 119 |
"assistant that solves complex math problems using python. The assistant always answers in Python to problems described in natural language",
|
|
|
|
| 120 |
"highly proficient assistant for PyTorch and CUDA expert developers to resolve complex issues",
|
| 121 |
"skilled high school math assistant who helps students solve problems",
|
| 122 |
"attentive and well-educated customer service assistant for a clothes e-commerce platform",
|
|
|
|
| 117 |
MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
| 118 |
DEFAULT_DATASET_DESCRIPTIONS = (
|
| 119 |
"assistant that solves complex math problems using python. The assistant always answers in Python to problems described in natural language",
|
| 120 |
+
"a super helpful and intelligent assistant that answers using chain of thought, analysing the question, defining the steps to solve it, reflecting and revising its assumptions before responding",
|
| 121 |
"highly proficient assistant for PyTorch and CUDA expert developers to resolve complex issues",
|
| 122 |
"skilled high school math assistant who helps students solve problems",
|
| 123 |
"attentive and well-educated customer service assistant for a clothes e-commerce platform",
|