Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| """ | |
| Hugging Face Spaces App for LoRA Fine-tuning | |
| Provides a Gradio UI for training Nemotron-3-8B on HF infrastructure | |
| """ | |
| import os | |
| import gradio as gr | |
| import torch | |
| from datetime import datetime | |
| from datasets import load_dataset | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| BitsAndBytesConfig, | |
| TrainingArguments, | |
| Trainer, | |
| DataCollatorForLanguageModeling, | |
| TrainerCallback | |
| ) | |
| from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training | |
| from huggingface_hub import HfApi, create_repo, upload_folder | |
| import threading | |
| class ProgressCallback(TrainerCallback): | |
| """Custom callback to track training progress""" | |
| def __init__(self, progress_callback): | |
| self.progress_callback = progress_callback | |
| self.current_step = 0 | |
| self.total_steps = 0 | |
| def on_train_begin(self, args, state, control, **kwargs): | |
| self.total_steps = state.max_steps | |
| self.progress_callback(0, f"Starting training for {self.total_steps} steps...") | |
| def on_step_end(self, args, state, control, **kwargs): | |
| self.current_step = state.global_step | |
| progress = self.current_step / self.total_steps if self.total_steps > 0 else 0 | |
| self.progress_callback( | |
| progress, | |
| f"Step {self.current_step}/{self.total_steps} | Loss: {state.log_history[-1].get('loss', 'N/A'):.4f}" | |
| ) | |
| def on_train_end(self, args, state, control, **kwargs): | |
| self.progress_callback(1.0, "Training completed!") | |
| def train_model( | |
| model_name, | |
| dataset_name, | |
| hf_token, | |
| output_repo_name, | |
| lora_r, | |
| lora_alpha, | |
| learning_rate, | |
| num_epochs, | |
| batch_size, | |
| gradient_accumulation_steps, | |
| max_seq_length, | |
| use_4bit, | |
| push_to_hub, | |
| progress=gr.Progress() | |
| ): | |
| """Main training function with progress tracking""" | |
| try: | |
| # Validate inputs | |
| if not model_name or not dataset_name: | |
| return "Error: Model name and dataset name are required!" | |
| if push_to_hub and not hf_token: | |
| return "Error: HF token required to push to hub!" | |
| # Set HF token | |
| if hf_token: | |
| os.environ["HF_TOKEN"] = hf_token | |
| output_dir = f"./outputs/{output_repo_name or 'lora-model'}" | |
| # Progress tracking | |
| def update_progress(value, message): | |
| progress(value, desc=message) | |
| # Load tokenizer | |
| update_progress(0.05, "Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=hf_token) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| tokenizer.pad_token_id = tokenizer.eos_token_id | |
| # Configure quantization | |
| bnb_config = None | |
| if use_4bit: | |
| update_progress(0.1, "Configuring 4-bit quantization...") | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.bfloat16 | |
| ) | |
| # Load model | |
| update_progress(0.15, "Loading model...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| quantization_config=bnb_config, | |
| device_map="auto", | |
| trust_remote_code=True, | |
| torch_dtype=torch.bfloat16 if use_4bit else torch.float16, | |
| token=hf_token | |
| ) | |
| if use_4bit: | |
| model = prepare_model_for_kbit_training(model) | |
| # Setup LoRA | |
| update_progress(0.2, "Configuring LoRA...") | |
| lora_config = LoraConfig( | |
| r=lora_r, | |
| lora_alpha=lora_alpha, | |
| target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], | |
| lora_dropout=0.05, | |
| bias="none", | |
| task_type="CAUSAL_LM" | |
| ) | |
| model = get_peft_model(model, lora_config) | |
| trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
| all_params = sum(p.numel() for p in model.parameters()) | |
| trainable_percent = 100 * trainable_params / all_params | |
| update_progress(0.25, f"LoRA setup complete. Trainable: {trainable_percent:.2f}%") | |
| # Load dataset | |
| update_progress(0.3, f"Loading dataset: {dataset_name}") | |
| dataset = load_dataset(dataset_name, token=hf_token) | |
| # Format dataset | |
| def format_instruction(example): | |
| if 'question' in example and 'answer' in example: | |
| text = f"### Question:\n{example['question']}\n\n### Answer:\n{example['answer']}" | |
| elif 'instruction' in example and 'response' in example: | |
| text = f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['response']}" | |
| elif 'text' in example: | |
| text = example['text'] | |
| else: | |
| keys = [k for k in example.keys() if isinstance(example[k], str)] | |
| if len(keys) >= 2: | |
| text = f"### Input:\n{example[keys[0]]}\n\n### Output:\n{example[keys[1]]}" | |
| else: | |
| text = str(example) | |
| return {"text": text} | |
| update_progress(0.35, "Formatting dataset...") | |
| column_names = dataset['train'].column_names if 'train' in dataset else dataset.column_names | |
| dataset = dataset.map(format_instruction, remove_columns=column_names) | |
| # Tokenize | |
| def tokenize_function(examples): | |
| outputs = tokenizer( | |
| examples["text"], | |
| truncation=True, | |
| max_length=max_seq_length, | |
| padding="max_length", | |
| return_tensors=None | |
| ) | |
| outputs["labels"] = outputs["input_ids"].copy() | |
| return outputs | |
| update_progress(0.4, "Tokenizing dataset...") | |
| tokenized_dataset = dataset.map( | |
| tokenize_function, | |
| batched=True, | |
| remove_columns=["text"], | |
| desc="Tokenizing" | |
| ) | |
| # Prepare data splits | |
| if 'train' in tokenized_dataset: | |
| train_dataset = tokenized_dataset['train'] | |
| eval_dataset = tokenized_dataset.get('validation', None) | |
| else: | |
| split = tokenized_dataset.train_test_split(test_size=0.1) | |
| train_dataset = split['train'] | |
| eval_dataset = split['test'] | |
| # Training arguments | |
| update_progress(0.45, "Setting up training configuration...") | |
| training_args = TrainingArguments( | |
| output_dir=output_dir, | |
| num_train_epochs=num_epochs, | |
| per_device_train_batch_size=batch_size, | |
| gradient_accumulation_steps=gradient_accumulation_steps, | |
| learning_rate=learning_rate, | |
| warmup_ratio=0.03, | |
| logging_steps=10, | |
| save_steps=100, | |
| save_total_limit=2, | |
| fp16=False, | |
| bf16=True, | |
| optim="paged_adamw_8bit" if use_4bit else "adamw_torch", | |
| gradient_checkpointing=True, | |
| max_grad_norm=0.3, | |
| lr_scheduler_type="cosine", | |
| report_to="none", | |
| save_strategy="steps", | |
| group_by_length=True, | |
| push_to_hub=push_to_hub, | |
| hub_token=hf_token if push_to_hub else None, | |
| hub_model_id=output_repo_name if push_to_hub else None, | |
| ) | |
| # Data collator | |
| data_collator = DataCollatorForLanguageModeling( | |
| tokenizer=tokenizer, | |
| mlm=False | |
| ) | |
| # Trainer | |
| update_progress(0.5, "Initializing trainer...") | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=train_dataset, | |
| eval_dataset=eval_dataset, | |
| data_collator=data_collator, | |
| tokenizer=tokenizer, | |
| callbacks=[ProgressCallback(update_progress)] | |
| ) | |
| # Train | |
| update_progress(0.55, f"Starting training on {len(train_dataset)} examples...") | |
| trainer.train() | |
| # Save model | |
| update_progress(0.95, "Saving model...") | |
| model.save_pretrained(output_dir) | |
| tokenizer.save_pretrained(output_dir) | |
| # Push to hub if requested | |
| if push_to_hub and hf_token: | |
| update_progress(0.97, "Pushing to Hugging Face Hub...") | |
| try: | |
| api = HfApi(token=hf_token) | |
| repo_id = output_repo_name | |
| # Create repo if it doesn't exist | |
| try: | |
| create_repo(repo_id, token=hf_token, exist_ok=True) | |
| except: | |
| pass | |
| # Upload | |
| api.upload_folder( | |
| folder_path=output_dir, | |
| repo_id=repo_id, | |
| token=hf_token | |
| ) | |
| update_progress(1.0, "Training complete! Model pushed to hub.") | |
| return f"β Training complete!\n\nModel saved to: {output_dir}\nUploaded to: https://huggingface.co/{repo_id}\n\nTrainable parameters: {trainable_percent:.2f}%" | |
| except Exception as e: | |
| return f"β οΈ Training complete but upload failed: {str(e)}\n\nModel saved locally to: {output_dir}" | |
| update_progress(1.0, "Training complete!") | |
| return f"β Training complete!\n\nModel saved to: {output_dir}\n\nTrainable parameters: {trainable_percent:.2f}%" | |
| except Exception as e: | |
| return f"β Error during training: {str(e)}" | |
| # Gradio Interface | |
| with gr.Blocks(title="LoRA Fine-tuning on Hugging Face", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π LoRA Fine-tuning for Nemotron-3-8B | |
| Train your model on Hugging Face infrastructure with PEFT and 4-bit quantization | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Model & Dataset Configuration") | |
| model_name = gr.Textbox( | |
| label="Base Model", | |
| value="nvidia/nemotron-3-8b-base-4k", | |
| placeholder="nvidia/nemotron-3-8b-base-4k" | |
| ) | |
| dataset_name = gr.Textbox( | |
| label="Dataset", | |
| value="crazycog/linux-sysadmin-qa-askhole-v1", | |
| placeholder="username/dataset-name" | |
| ) | |
| hf_token = gr.Textbox( | |
| label="HuggingFace Token (optional, for private datasets/pushing to hub)", | |
| type="password", | |
| placeholder="hf_..." | |
| ) | |
| output_repo_name = gr.Textbox( | |
| label="Output Repository Name", | |
| value="nemotron-lora-sysadmin", | |
| placeholder="username/model-name" | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("### Training Configuration") | |
| use_4bit = gr.Checkbox(label="Use 4-bit Quantization", value=True) | |
| lora_r = gr.Slider(minimum=4, maximum=64, value=16, step=4, label="LoRA Rank (r)") | |
| lora_alpha = gr.Slider(minimum=8, maximum=128, value=32, step=8, label="LoRA Alpha") | |
| learning_rate = gr.Number(label="Learning Rate", value=2e-4) | |
| num_epochs = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Number of Epochs") | |
| batch_size = gr.Slider(minimum=1, maximum=16, value=4, step=1, label="Batch Size") | |
| gradient_accumulation_steps = gr.Slider(minimum=1, maximum=16, value=4, step=1, label="Gradient Accumulation Steps") | |
| max_seq_length = gr.Slider(minimum=512, maximum=4096, value=2048, step=512, label="Max Sequence Length") | |
| push_to_hub = gr.Checkbox(label="Push to Hugging Face Hub", value=False) | |
| train_btn = gr.Button("π Start Training", variant="primary", size="lg") | |
| output = gr.Textbox(label="Training Status", lines=10, interactive=False) | |
| train_btn.click( | |
| fn=train_model, | |
| inputs=[ | |
| model_name, | |
| dataset_name, | |
| hf_token, | |
| output_repo_name, | |
| lora_r, | |
| lora_alpha, | |
| learning_rate, | |
| num_epochs, | |
| batch_size, | |
| gradient_accumulation_steps, | |
| max_seq_length, | |
| use_4bit, | |
| push_to_hub | |
| ], | |
| outputs=output | |
| ) | |
| gr.Markdown(""" | |
| ### π Notes | |
| - Training requires GPU access (upgrade your Space to GPU) | |
| - 4-bit quantization recommended for 24GB VRAM | |
| - HF token required for private datasets or pushing to hub | |
| - Training time depends on dataset size and configuration | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |