Spaces:
Runtime error
Runtime error
menouar
commited on
Commit
·
e75ffde
1
Parent(s):
592b663
Update the generated Notebook to push properly to HF
Browse files- app.py +32 -2
- utils/__init__.py +6 -0
- utils/components_creator.py +13 -5
- utils/notebook_generator.py +30 -3
app.py
CHANGED
|
@@ -68,6 +68,10 @@ def change_model_selection(model_id):
|
|
| 68 |
return None
|
| 69 |
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
def check_valid_input(value):
|
| 72 |
if isinstance(value, str):
|
| 73 |
return value and value.strip()
|
|
@@ -186,10 +190,12 @@ def generate_code(components: dict[Component, Any]):
|
|
| 186 |
|
| 187 |
create_merge_lora_cells(notebook['cells'], output_dir)
|
| 188 |
|
|
|
|
|
|
|
| 189 |
if push_to_hub:
|
| 190 |
if not should_login:
|
| 191 |
create_login_hf_cells(notebook['cells'])
|
| 192 |
-
push_merged_model_cells(notebook['cells'], output_dir)
|
| 193 |
|
| 194 |
file_name = f"{finetuning_notebook}.ipynb"
|
| 195 |
|
|
@@ -279,7 +285,10 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(text_size='lg', font=["monospace"],
|
|
| 279 |
gr.HTML("<h2 style='text-align: center;'>Outputs</h2>")
|
| 280 |
with gr.Row():
|
| 281 |
with centered_column():
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
| 283 |
with centered_column():
|
| 284 |
all_components.update(add_outputs1())
|
| 285 |
|
|
@@ -308,4 +317,25 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(text_size='lg', font=["monospace"],
|
|
| 308 |
outputs=version_selection
|
| 309 |
)
|
| 310 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
demo.launch(allowed_paths=["/"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
return None
|
| 69 |
|
| 70 |
|
| 71 |
+
def display_push_type(value):
|
| 72 |
+
return gr.Radio(visible=value)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
def check_valid_input(value):
|
| 76 |
if isinstance(value, str):
|
| 77 |
return value and value.strip()
|
|
|
|
| 190 |
|
| 191 |
create_merge_lora_cells(notebook['cells'], output_dir)
|
| 192 |
|
| 193 |
+
push_type_value = get_value(components, PUSH_TYPE_ID)
|
| 194 |
+
|
| 195 |
if push_to_hub:
|
| 196 |
if not should_login:
|
| 197 |
create_login_hf_cells(notebook['cells'])
|
| 198 |
+
push_merged_model_cells(notebook['cells'], output_dir, push_type_value)
|
| 199 |
|
| 200 |
file_name = f"{finetuning_notebook}.ipynb"
|
| 201 |
|
|
|
|
| 285 |
gr.HTML("<h2 style='text-align: center;'>Outputs</h2>")
|
| 286 |
with gr.Row():
|
| 287 |
with centered_column():
|
| 288 |
+
output_dir_cmp, push_to_hub_cmp = add_outputs()
|
| 289 |
+
all_components.update({output_dir_cmp, push_to_hub_cmp})
|
| 290 |
+
push_type_cmp = add_push_type_cmp()
|
| 291 |
+
all_components.update({push_type_cmp})
|
| 292 |
with centered_column():
|
| 293 |
all_components.update(add_outputs1())
|
| 294 |
|
|
|
|
| 317 |
outputs=version_selection
|
| 318 |
)
|
| 319 |
|
| 320 |
+
push_to_hub_cmp.change(
|
| 321 |
+
fn=display_push_type,
|
| 322 |
+
inputs=push_to_hub_cmp,
|
| 323 |
+
outputs=push_type_cmp
|
| 324 |
+
)
|
| 325 |
+
|
| 326 |
demo.launch(allowed_paths=["/"])
|
| 327 |
+
|
| 328 |
+
# Upload metrics to the hub....
|
| 329 |
+
"""
|
| 330 |
+
import os
|
| 331 |
+
from huggingface_hub import Repository
|
| 332 |
+
|
| 333 |
+
# Create a repository object
|
| 334 |
+
repo = Repository("Menouar/ft-phi-1")
|
| 335 |
+
|
| 336 |
+
# Push the runs directory
|
| 337 |
+
os.system(f"git -C {repo.local_dir} add output_dir/runs")
|
| 338 |
+
repo.git_commit("Adding TensorBoard logs")
|
| 339 |
+
repo.push_to_hub(commit_message="Adding TensorBoard logs")
|
| 340 |
+
|
| 341 |
+
"""
|
utils/__init__.py
CHANGED
|
@@ -32,8 +32,14 @@ LEARNING_RATE_ID = "learning_rate"
|
|
| 32 |
MAX_GRAD_NORM_ID = "max_grad_norm"
|
| 33 |
WARMUP_RATIO_ID = "warmup_ratio"
|
| 34 |
LR_SCHEDULER_TYPE_ID = "lr_scheduler_type"
|
|
|
|
| 35 |
OUTPUT_DIR_ID = "output_dir"
|
|
|
|
| 36 |
PUSH_TO_HUB_ID = "push_to_hub"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
REPORT_TO_ID = "report_to"
|
| 38 |
|
| 39 |
MAX_SEQ_LENGTH_ID = "max_seq_length"
|
|
|
|
| 32 |
MAX_GRAD_NORM_ID = "max_grad_norm"
|
| 33 |
WARMUP_RATIO_ID = "warmup_ratio"
|
| 34 |
LR_SCHEDULER_TYPE_ID = "lr_scheduler_type"
|
| 35 |
+
|
| 36 |
OUTPUT_DIR_ID = "output_dir"
|
| 37 |
+
|
| 38 |
PUSH_TO_HUB_ID = "push_to_hub"
|
| 39 |
+
PUSH_TYPE_ID = "push_type"
|
| 40 |
+
PUSH_TYPES_ALL = "Push all the outputs"
|
| 41 |
+
PUSH_TYPES_ONLY_MODEL = "Push only the Model and Tokenizer"
|
| 42 |
+
|
| 43 |
REPORT_TO_ID = "report_to"
|
| 44 |
|
| 45 |
MAX_SEQ_LENGTH_ID = "max_seq_length"
|
utils/components_creator.py
CHANGED
|
@@ -167,7 +167,7 @@ def add_training_args_3() -> Set[Component]:
|
|
| 167 |
return out_components
|
| 168 |
|
| 169 |
|
| 170 |
-
def add_outputs() ->
|
| 171 |
output_dir = gr.Textbox(interactive=True,
|
| 172 |
label="output_dir",
|
| 173 |
info='The output directory where the model predictions and checkpoints will be written.',
|
|
@@ -178,10 +178,18 @@ def add_outputs() -> Set[Component]:
|
|
| 178 |
"True, you must specify 'HF_TOKEN'.",
|
| 179 |
elem_id=PUSH_TO_HUB_ID)
|
| 180 |
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
|
| 187 |
def add_outputs1() -> Set[Component]:
|
|
|
|
| 167 |
return out_components
|
| 168 |
|
| 169 |
|
| 170 |
+
def add_outputs() -> (Component, Component):
|
| 171 |
output_dir = gr.Textbox(interactive=True,
|
| 172 |
label="output_dir",
|
| 173 |
info='The output directory where the model predictions and checkpoints will be written.',
|
|
|
|
| 178 |
"True, you must specify 'HF_TOKEN'.",
|
| 179 |
elem_id=PUSH_TO_HUB_ID)
|
| 180 |
|
| 181 |
+
return output_dir, push_to_hub
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def add_push_type_cmp() -> Component:
|
| 185 |
+
push_type = gr.Radio([PUSH_TYPES_ONLY_MODEL, PUSH_TYPES_ALL],
|
| 186 |
+
label="Output Push Option",
|
| 187 |
+
info="Select whether to push only the Model and Tokenizer or all the outputs.",
|
| 188 |
+
interactive=True,
|
| 189 |
+
visible=False,
|
| 190 |
+
value=PUSH_TYPES_ALL,
|
| 191 |
+
elem_id=PUSH_TYPE_ID)
|
| 192 |
+
return push_type
|
| 193 |
|
| 194 |
|
| 195 |
def add_outputs1() -> Set[Component]:
|
utils/notebook_generator.py
CHANGED
|
@@ -2,7 +2,7 @@ from typing import Optional
|
|
| 2 |
|
| 3 |
import nbformat as nbf
|
| 4 |
|
| 5 |
-
from utils import FTDataSet
|
| 6 |
|
| 7 |
|
| 8 |
def create_install_libraries_cells(cells: list):
|
|
@@ -389,7 +389,7 @@ merged_model.save_pretrained("{output_dir}", safe_serialization=True, max_shard_
|
|
| 389 |
cells.append(code_cell)
|
| 390 |
|
| 391 |
|
| 392 |
-
def push_merged_model_cells(cells: list, output_dir):
|
| 393 |
text_cell = nbf.v4.new_markdown_cell(
|
| 394 |
"""### Push the Merged model as well as the Tokenizer to HF hub""")
|
| 395 |
|
|
@@ -398,6 +398,33 @@ merged_model.push_to_hub("{output_dir}", use_temp_dir=False)
|
|
| 398 |
|
| 399 |
tokenizer.push_to_hub("{output_dir}", use_temp_dir=False)
|
| 400 |
"""
|
| 401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
cells.append(text_cell)
|
| 403 |
cells.append(code_cell)
|
|
|
|
| 2 |
|
| 3 |
import nbformat as nbf
|
| 4 |
|
| 5 |
+
from utils import FTDataSet, PUSH_TYPES_ONLY_MODEL
|
| 6 |
|
| 7 |
|
| 8 |
def create_install_libraries_cells(cells: list):
|
|
|
|
| 389 |
cells.append(code_cell)
|
| 390 |
|
| 391 |
|
| 392 |
+
def push_merged_model_cells(cells: list, output_dir, push_type_value):
|
| 393 |
text_cell = nbf.v4.new_markdown_cell(
|
| 394 |
"""### Push the Merged model as well as the Tokenizer to HF hub""")
|
| 395 |
|
|
|
|
| 398 |
|
| 399 |
tokenizer.push_to_hub("{output_dir}", use_temp_dir=False)
|
| 400 |
"""
|
| 401 |
+
|
| 402 |
+
code_all = f"""
|
| 403 |
+
|
| 404 |
+
from huggingface_hub import HfApi, HfFolder
|
| 405 |
+
|
| 406 |
+
# Instantiate the HfApi class
|
| 407 |
+
api = HfApi()
|
| 408 |
+
|
| 409 |
+
# Your Hugging Face repository
|
| 410 |
+
repo_name = "Menouar/test"
|
| 411 |
+
|
| 412 |
+
# Create a repository on the Hugging Face Hub
|
| 413 |
+
api.create_repo(token=HfFolder.get_token(), name=repo_name, repo_type="model")
|
| 414 |
+
|
| 415 |
+
# Path to your local folder
|
| 416 |
+
folder_path = "{output_dir}"
|
| 417 |
+
|
| 418 |
+
# Create a repository object
|
| 419 |
+
repo = Repository(local_dir=folder_path, clone_from=repo_name)
|
| 420 |
+
|
| 421 |
+
# Commit and push your changes
|
| 422 |
+
repo.git_add(commit_message="Initial commit", git_push=True)
|
| 423 |
+
"""
|
| 424 |
+
|
| 425 |
+
if push_type_value == PUSH_TYPES_ONLY_MODEL:
|
| 426 |
+
code_cell = nbf.v4.new_code_cell(code)
|
| 427 |
+
else:
|
| 428 |
+
code_cell = nbf.v4.new_code_cell(code_all)
|
| 429 |
cells.append(text_cell)
|
| 430 |
cells.append(code_cell)
|