Commit
·
55add14
1
Parent(s):
7fcc088
chore: add token to env var
Browse files
src/distilabel_dataset_generator/sft.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import multiprocessing
|
|
|
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
import pandas as pd
|
|
@@ -122,6 +123,7 @@ MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
|
| 122 |
def _run_pipeline(
|
| 123 |
result_queue, _num_turns, _num_rows, _system_prompt, _token: OAuthToken = None
|
| 124 |
):
|
|
|
|
| 125 |
with Pipeline(name="sft") as pipeline:
|
| 126 |
magpie_step = MagpieGenerator(
|
| 127 |
llm=InferenceEndpointsLLM(
|
|
@@ -142,6 +144,7 @@ def _run_pipeline(
|
|
| 142 |
|
| 143 |
|
| 144 |
def _generate_system_prompt(_dataset_description, _token: OAuthToken = None):
|
|
|
|
| 145 |
generate_description = TextGeneration(
|
| 146 |
llm=InferenceEndpointsLLM(
|
| 147 |
model_id=MODEL,
|
|
@@ -185,6 +188,7 @@ def _generate_dataset(
|
|
| 185 |
distiset = result_queue.get()
|
| 186 |
|
| 187 |
if _dataset_name is not None:
|
|
|
|
| 188 |
gr.Info("Pushing dataset to Hugging Face Hub...")
|
| 189 |
distiset.push_to_hub(
|
| 190 |
repo_id=_dataset_name,
|
|
|
|
| 1 |
import multiprocessing
|
| 2 |
+
import os
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import pandas as pd
|
|
|
|
| 123 |
def _run_pipeline(
|
| 124 |
result_queue, _num_turns, _num_rows, _system_prompt, _token: OAuthToken = None
|
| 125 |
):
|
| 126 |
+
os.environ["HF_TOKEN"] = _token.token
|
| 127 |
with Pipeline(name="sft") as pipeline:
|
| 128 |
magpie_step = MagpieGenerator(
|
| 129 |
llm=InferenceEndpointsLLM(
|
|
|
|
| 144 |
|
| 145 |
|
| 146 |
def _generate_system_prompt(_dataset_description, _token: OAuthToken = None):
|
| 147 |
+
os.environ["HF_TOKEN"] = _token.token
|
| 148 |
generate_description = TextGeneration(
|
| 149 |
llm=InferenceEndpointsLLM(
|
| 150 |
model_id=MODEL,
|
|
|
|
| 188 |
distiset = result_queue.get()
|
| 189 |
|
| 190 |
if _dataset_name is not None:
|
| 191 |
+
os.environ["HF_TOKEN"] = _token.token
|
| 192 |
gr.Info("Pushing dataset to Hugging Face Hub...")
|
| 193 |
distiset.push_to_hub(
|
| 194 |
repo_id=_dataset_name,
|