File size: 4,004 Bytes
23ed3ee
 
 
cb59384
 
 
 
 
 
 
b99b2f2
cb59384
b99b2f2
cb59384
b99b2f2
4146ecf
b99b2f2
cb59384
a89712a
cb59384
a89712a
cb59384
 
a89712a
47dca3b
 
 
a89712a
23ed3ee
47dca3b
b99b2f2
 
23ed3ee
b99b2f2
 
23ed3ee
b99b2f2
 
 
 
 
 
 
 
 
 
 
 
 
 
47dca3b
b99b2f2
 
 
 
 
 
 
23ed3ee
b99b2f2
 
 
 
 
 
 
 
 
 
 
 
 
 
f2dc681
23ed3ee
b99b2f2
 
23ed3ee
b99b2f2
 
 
 
23ed3ee
b99b2f2
 
47dca3b
23ed3ee
b99b2f2
23ed3ee
 
b99b2f2
47dca3b
 
 
 
23ed3ee
 
47dca3b
 
23ed3ee
 
47dca3b
 
 
 
 
23ed3ee
47dca3b
 
 
23ed3ee
 
47dca3b
b99b2f2
47dca3b
cb59384
23ed3ee
a89712a
23ed3ee
 
a89712a
 
 
a5f4140
cb59384
b99b2f2
a5f4140
 
cb59384
b99b2f2
a5f4140
23ed3ee
a5f4140
23ed3ee
a5f4140
23ed3ee
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139



import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn

# --- Configuration ---
base_model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit"
output_dir = "/data/fine_tuning"
dataset_path = "dataset.jsonl"

# --- Initialize model and tokenizer variables ---
model = None
tokenizer = None

# --- Training Logic ---
# Check if a fine-tuned model adapter already exists
if not os.path.exists(os.path.join(output_dir, 'adapter_config.json')):
    print("No fine-tuned model found. Starting training...")

    # Load dataset
    dataset = load_dataset("json", data_files=dataset_path, split="train")

    # Load base model for training
    model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        device_map="auto",
        trust_remote_code=True,
    )
    model.config.use_cache = False

    tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    # Configure LoRA
    peft_config = LoraConfig(
        r=16,
        lora_alpha=16,
        lora_dropout=0.1,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    )

    # Training args
    training_arguments = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=1,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=2,
        optim="paged_adamw_32bit",
        logging_steps=10,
        learning_rate=2e-4,
        fp16=True,
        max_grad_norm=0.3,
        max_steps=-1,
        warmup_ratio=0.03,
        group_by_length=True,
        lr_scheduler_type="linear",
        push_to_hub=True,
        hub_model_id = "Nutnell/direct-ed-finetune-job"
    )

    # Initialize Trainer
    trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        peft_config=peft_config,
        dataset_text_field="text", # Ensure your dataset has a 'text' column
        args=training_arguments,
    )
    
    # Train the model
    trainer.train()
    
    # Save the trained adapter
    trainer.model.save_pretrained(output_dir)
    print(f"Fine-tuned model adapter saved to {output_dir}")
    
    model = trainer.model

# --- Inference Logic ---
# If training did not run, load the existing model
else:
    print("Found existing fine-tuned model. Loading for inference...")
    
    # Load the base model
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        device_map="auto",
        trust_remote_code=True,
    )
    # Apply the PEFT adapter
    model = PeftModel.from_pretrained(base_model, output_dir)
    tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)


# --- Create Inference Pipeline ---
print("Setting up inference pipeline...")
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
print("Inference pipeline ready.")


# --- FastAPI App ---

# PYDANTIC MODEL FOR THE REQUEST BODY
class GenerateRequest(BaseModel):
    prompt: str

app = FastAPI(title="Fine-tuned LLaMA API")

@app.get("/")
def home():
    return {"status": "ok", "message": "Fine-tuned LLaMA is ready."}

@app.post("/generate")
def generate(request: GenerateRequest):
    # Access the prompt from the request object
    formatted_prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{request.prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
    
    outputs = pipe(formatted_prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
    return {"response": outputs[0]["generated_text"]}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)