crazycog's picture
Update: LoRA fine-tuning app
1674aa4
#!/usr/bin/env python3
"""
Hugging Face Spaces App for LoRA Fine-tuning
Provides a Gradio UI for training Nemotron-3-8B on HF infrastructure
"""
import os
import gradio as gr
import torch
from datetime import datetime
from datasets import load_dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
TrainingArguments,
Trainer,
DataCollatorForLanguageModeling,
TrainerCallback
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from huggingface_hub import HfApi, create_repo, upload_folder
import threading
class ProgressCallback(TrainerCallback):
"""Custom callback to track training progress"""
def __init__(self, progress_callback):
self.progress_callback = progress_callback
self.current_step = 0
self.total_steps = 0
def on_train_begin(self, args, state, control, **kwargs):
self.total_steps = state.max_steps
self.progress_callback(0, f"Starting training for {self.total_steps} steps...")
def on_step_end(self, args, state, control, **kwargs):
self.current_step = state.global_step
progress = self.current_step / self.total_steps if self.total_steps > 0 else 0
self.progress_callback(
progress,
f"Step {self.current_step}/{self.total_steps} | Loss: {state.log_history[-1].get('loss', 'N/A'):.4f}"
)
def on_train_end(self, args, state, control, **kwargs):
self.progress_callback(1.0, "Training completed!")
def train_model(
model_name,
dataset_name,
hf_token,
output_repo_name,
lora_r,
lora_alpha,
learning_rate,
num_epochs,
batch_size,
gradient_accumulation_steps,
max_seq_length,
use_4bit,
push_to_hub,
progress=gr.Progress()
):
"""Main training function with progress tracking"""
try:
# Validate inputs
if not model_name or not dataset_name:
return "Error: Model name and dataset name are required!"
if push_to_hub and not hf_token:
return "Error: HF token required to push to hub!"
# Set HF token
if hf_token:
os.environ["HF_TOKEN"] = hf_token
output_dir = f"./outputs/{output_repo_name or 'lora-model'}"
# Progress tracking
def update_progress(value, message):
progress(value, desc=message)
# Load tokenizer
update_progress(0.05, "Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=hf_token)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
# Configure quantization
bnb_config = None
if use_4bit:
update_progress(0.1, "Configuring 4-bit quantization...")
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
# Load model
update_progress(0.15, "Loading model...")
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
torch_dtype=torch.bfloat16 if use_4bit else torch.float16,
token=hf_token
)
if use_4bit:
model = prepare_model_for_kbit_training(model)
# Setup LoRA
update_progress(0.2, "Configuring LoRA...")
lora_config = LoraConfig(
r=lora_r,
lora_alpha=lora_alpha,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
all_params = sum(p.numel() for p in model.parameters())
trainable_percent = 100 * trainable_params / all_params
update_progress(0.25, f"LoRA setup complete. Trainable: {trainable_percent:.2f}%")
# Load dataset
update_progress(0.3, f"Loading dataset: {dataset_name}")
dataset = load_dataset(dataset_name, token=hf_token)
# Format dataset
def format_instruction(example):
if 'question' in example and 'answer' in example:
text = f"### Question:\n{example['question']}\n\n### Answer:\n{example['answer']}"
elif 'instruction' in example and 'response' in example:
text = f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['response']}"
elif 'text' in example:
text = example['text']
else:
keys = [k for k in example.keys() if isinstance(example[k], str)]
if len(keys) >= 2:
text = f"### Input:\n{example[keys[0]]}\n\n### Output:\n{example[keys[1]]}"
else:
text = str(example)
return {"text": text}
update_progress(0.35, "Formatting dataset...")
column_names = dataset['train'].column_names if 'train' in dataset else dataset.column_names
dataset = dataset.map(format_instruction, remove_columns=column_names)
# Tokenize
def tokenize_function(examples):
outputs = tokenizer(
examples["text"],
truncation=True,
max_length=max_seq_length,
padding="max_length",
return_tensors=None
)
outputs["labels"] = outputs["input_ids"].copy()
return outputs
update_progress(0.4, "Tokenizing dataset...")
tokenized_dataset = dataset.map(
tokenize_function,
batched=True,
remove_columns=["text"],
desc="Tokenizing"
)
# Prepare data splits
if 'train' in tokenized_dataset:
train_dataset = tokenized_dataset['train']
eval_dataset = tokenized_dataset.get('validation', None)
else:
split = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset = split['train']
eval_dataset = split['test']
# Training arguments
update_progress(0.45, "Setting up training configuration...")
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=num_epochs,
per_device_train_batch_size=batch_size,
gradient_accumulation_steps=gradient_accumulation_steps,
learning_rate=learning_rate,
warmup_ratio=0.03,
logging_steps=10,
save_steps=100,
save_total_limit=2,
fp16=False,
bf16=True,
optim="paged_adamw_8bit" if use_4bit else "adamw_torch",
gradient_checkpointing=True,
max_grad_norm=0.3,
lr_scheduler_type="cosine",
report_to="none",
save_strategy="steps",
group_by_length=True,
push_to_hub=push_to_hub,
hub_token=hf_token if push_to_hub else None,
hub_model_id=output_repo_name if push_to_hub else None,
)
# Data collator
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False
)
# Trainer
update_progress(0.5, "Initializing trainer...")
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
data_collator=data_collator,
tokenizer=tokenizer,
callbacks=[ProgressCallback(update_progress)]
)
# Train
update_progress(0.55, f"Starting training on {len(train_dataset)} examples...")
trainer.train()
# Save model
update_progress(0.95, "Saving model...")
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
# Push to hub if requested
if push_to_hub and hf_token:
update_progress(0.97, "Pushing to Hugging Face Hub...")
try:
api = HfApi(token=hf_token)
repo_id = output_repo_name
# Create repo if it doesn't exist
try:
create_repo(repo_id, token=hf_token, exist_ok=True)
except:
pass
# Upload
api.upload_folder(
folder_path=output_dir,
repo_id=repo_id,
token=hf_token
)
update_progress(1.0, "Training complete! Model pushed to hub.")
return f"βœ… Training complete!\n\nModel saved to: {output_dir}\nUploaded to: https://huggingface.co/{repo_id}\n\nTrainable parameters: {trainable_percent:.2f}%"
except Exception as e:
return f"⚠️ Training complete but upload failed: {str(e)}\n\nModel saved locally to: {output_dir}"
update_progress(1.0, "Training complete!")
return f"βœ… Training complete!\n\nModel saved to: {output_dir}\n\nTrainable parameters: {trainable_percent:.2f}%"
except Exception as e:
return f"❌ Error during training: {str(e)}"
# Gradio Interface
with gr.Blocks(title="LoRA Fine-tuning on Hugging Face", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸš€ LoRA Fine-tuning for Nemotron-3-8B
Train your model on Hugging Face infrastructure with PEFT and 4-bit quantization
""")
with gr.Row():
with gr.Column():
gr.Markdown("### Model & Dataset Configuration")
model_name = gr.Textbox(
label="Base Model",
value="nvidia/nemotron-3-8b-base-4k",
placeholder="nvidia/nemotron-3-8b-base-4k"
)
dataset_name = gr.Textbox(
label="Dataset",
value="crazycog/linux-sysadmin-qa-askhole-v1",
placeholder="username/dataset-name"
)
hf_token = gr.Textbox(
label="HuggingFace Token (optional, for private datasets/pushing to hub)",
type="password",
placeholder="hf_..."
)
output_repo_name = gr.Textbox(
label="Output Repository Name",
value="nemotron-lora-sysadmin",
placeholder="username/model-name"
)
with gr.Column():
gr.Markdown("### Training Configuration")
use_4bit = gr.Checkbox(label="Use 4-bit Quantization", value=True)
lora_r = gr.Slider(minimum=4, maximum=64, value=16, step=4, label="LoRA Rank (r)")
lora_alpha = gr.Slider(minimum=8, maximum=128, value=32, step=8, label="LoRA Alpha")
learning_rate = gr.Number(label="Learning Rate", value=2e-4)
num_epochs = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Number of Epochs")
batch_size = gr.Slider(minimum=1, maximum=16, value=4, step=1, label="Batch Size")
gradient_accumulation_steps = gr.Slider(minimum=1, maximum=16, value=4, step=1, label="Gradient Accumulation Steps")
max_seq_length = gr.Slider(minimum=512, maximum=4096, value=2048, step=512, label="Max Sequence Length")
push_to_hub = gr.Checkbox(label="Push to Hugging Face Hub", value=False)
train_btn = gr.Button("πŸš€ Start Training", variant="primary", size="lg")
output = gr.Textbox(label="Training Status", lines=10, interactive=False)
train_btn.click(
fn=train_model,
inputs=[
model_name,
dataset_name,
hf_token,
output_repo_name,
lora_r,
lora_alpha,
learning_rate,
num_epochs,
batch_size,
gradient_accumulation_steps,
max_seq_length,
use_4bit,
push_to_hub
],
outputs=output
)
gr.Markdown("""
### πŸ“ Notes
- Training requires GPU access (upgrade your Space to GPU)
- 4-bit quantization recommended for 24GB VRAM
- HF token required for private datasets or pushing to hub
- Training time depends on dataset size and configuration
""")
if __name__ == "__main__":
demo.launch()