Spaces:
Runtime error
Runtime error
File size: 4,004 Bytes
23ed3ee cb59384 b99b2f2 cb59384 b99b2f2 cb59384 b99b2f2 4146ecf b99b2f2 cb59384 a89712a cb59384 a89712a cb59384 a89712a 47dca3b a89712a 23ed3ee 47dca3b b99b2f2 23ed3ee b99b2f2 23ed3ee b99b2f2 47dca3b b99b2f2 23ed3ee b99b2f2 f2dc681 23ed3ee b99b2f2 23ed3ee b99b2f2 23ed3ee b99b2f2 47dca3b 23ed3ee b99b2f2 23ed3ee b99b2f2 47dca3b 23ed3ee 47dca3b 23ed3ee 47dca3b 23ed3ee 47dca3b 23ed3ee 47dca3b b99b2f2 47dca3b cb59384 23ed3ee a89712a 23ed3ee a89712a a5f4140 cb59384 b99b2f2 a5f4140 cb59384 b99b2f2 a5f4140 23ed3ee a5f4140 23ed3ee a5f4140 23ed3ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os
import torch
from datasets import load_dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TrainingArguments,
pipeline
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
# --- Configuration ---
base_model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit"
output_dir = "/data/fine_tuning"
dataset_path = "dataset.jsonl"
# --- Initialize model and tokenizer variables ---
model = None
tokenizer = None
# --- Training Logic ---
# Check if a fine-tuned model adapter already exists
if not os.path.exists(os.path.join(output_dir, 'adapter_config.json')):
print("No fine-tuned model found. Starting training...")
# Load dataset
dataset = load_dataset("json", data_files=dataset_path, split="train")
# Load base model for training
model = AutoModelForCausalLM.from_pretrained(
base_model_name,
device_map="auto",
trust_remote_code=True,
)
model.config.use_cache = False
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
# Configure LoRA
peft_config = LoraConfig(
r=16,
lora_alpha=16,
lora_dropout=0.1,
bias="none",
task_type="CAUSAL_LM",
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
)
# Training args
training_arguments = TrainingArguments(
output_dir=output_dir,
num_train_epochs=1,
per_device_train_batch_size=2,
gradient_accumulation_steps=2,
optim="paged_adamw_32bit",
logging_steps=10,
learning_rate=2e-4,
fp16=True,
max_grad_norm=0.3,
max_steps=-1,
warmup_ratio=0.03,
group_by_length=True,
lr_scheduler_type="linear",
push_to_hub=True,
hub_model_id = "Nutnell/direct-ed-finetune-job"
)
# Initialize Trainer
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
peft_config=peft_config,
dataset_text_field="text", # Ensure your dataset has a 'text' column
args=training_arguments,
)
# Train the model
trainer.train()
# Save the trained adapter
trainer.model.save_pretrained(output_dir)
print(f"Fine-tuned model adapter saved to {output_dir}")
model = trainer.model
# --- Inference Logic ---
# If training did not run, load the existing model
else:
print("Found existing fine-tuned model. Loading for inference...")
# Load the base model
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
device_map="auto",
trust_remote_code=True,
)
# Apply the PEFT adapter
model = PeftModel.from_pretrained(base_model, output_dir)
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
# --- Create Inference Pipeline ---
print("Setting up inference pipeline...")
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
print("Inference pipeline ready.")
# --- FastAPI App ---
# PYDANTIC MODEL FOR THE REQUEST BODY
class GenerateRequest(BaseModel):
prompt: str
app = FastAPI(title="Fine-tuned LLaMA API")
@app.get("/")
def home():
return {"status": "ok", "message": "Fine-tuned LLaMA is ready."}
@app.post("/generate")
def generate(request: GenerateRequest):
# Access the prompt from the request object
formatted_prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{request.prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
outputs = pipe(formatted_prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
return {"response": outputs[0]["generated_text"]}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860) |