Nutnell commited on
Commit
b99b2f2
·
verified ·
1 Parent(s): 255a9c8

Update fine_tune.py

Browse files
Files changed (1) hide show
  1. fine_tune.py +81 -68
fine_tune.py CHANGED
@@ -1,6 +1,4 @@
1
-
2
- # fine_tuning/fine_tune.py
3
-
4
  import os
5
  import torch
6
  from datasets import load_dataset
@@ -8,85 +6,100 @@ from transformers import (
8
  AutoModelForCausalLM,
9
  AutoTokenizer,
10
  TrainingArguments,
11
- BitsAndBytesConfig
12
  )
13
- from peft import LoraConfig
14
  from trl import SFTTrainer
15
-
 
16
 
17
  base_model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit"
18
- output_dir = "/tmp/fine_tuning"
19
 
20
  dataset_path = "dataset.jsonl"
21
 
22
-
23
- # Load the Dataset
24
- print("Loading dataset...")
25
- dataset = load_dataset("json", data_files=dataset_path, split="train")
26
- print("Dataset loaded successfully.")
27
-
28
-
29
- # Load the Base Model & Tokenizer
30
- print(f"Loading base model: {base_model_name}...")
31
- model = AutoModelForCausalLM.from_pretrained(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  base_model_name,
33
  device_map="auto",
34
  trust_remote_code=True,
35
  )
36
- model.config.use_cache = False
37
- print("Base model loaded successfully.")
38
-
39
  tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
40
- if tokenizer.pad_token is None:
41
- tokenizer.pad_token = tokenizer.eos_token
42
- tokenizer.padding_side = "right"
43
- print("Tokenizer loaded and configured.")
44
 
 
45
 
46
- # Configure PEFT (LoRA)
47
- peft_config = LoraConfig(
48
- lora_alpha=16,
49
- lora_dropout=0.1,
50
- r=16,
51
- bias="none",
52
- task_type="CAUSAL_LM",
53
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
54
- )
55
- print("LoRA configured.")
56
-
57
-
58
- # Define Training Arguments
59
- training_arguments = TrainingArguments(
60
- output_dir=output_dir,
61
- num_train_epochs=1,
62
- per_device_train_batch_size=2,
63
- gradient_accumulation_steps=2,
64
- optim="paged_adamw_32bit",
65
- logging_steps=10,
66
- learning_rate=2e-4,
67
- weight_decay=0.01,
68
- fp16=True,
69
- max_grad_norm=0.3,
70
- max_steps=-1,
71
- warmup_ratio=0.03,
72
- group_by_length=True,
73
- lr_scheduler_type="linear",
74
- )
75
- print("Training arguments set.")
76
-
77
 
78
- # Initialize and Start Training
79
- trainer = SFTTrainer(
80
- model=model,
81
- train_dataset=dataset,
82
- peft_config=peft_config,
83
- args=training_arguments,
84
- )
85
- print("Trainer initialized. Starting the fine-tuning process...")
86
- trainer.train()
87
- print("Training complete.")
88
 
 
 
 
 
89
 
90
- # Save the Final Model
91
- trainer.model.save_pretrained(output_dir)
92
- print(f"Fine-tuned model adapter saved to {output_dir}")
 
1
+ # fine_tune.py
 
 
2
  import os
3
  import torch
4
  from datasets import load_dataset
 
6
  AutoModelForCausalLM,
7
  AutoTokenizer,
8
  TrainingArguments,
9
+ pipeline
10
  )
11
+ from peft import LoraConfig, PeftModel
12
  from trl import SFTTrainer
13
+ from fastapi import FastAPI
14
+ import uvicorn
15
 
16
  base_model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit"
17
+ output_dir = "fine_tuning" # persist inside /code
18
 
19
  dataset_path = "dataset.jsonl"
20
 
21
+ # --- Check if already fine-tuned ---
22
+ if not os.path.exists(output_dir):
23
+ print("No fine-tuned model found. Starting training...")
24
+
25
+ # Load dataset
26
+ dataset = load_dataset("json", data_files=dataset_path, split="train")
27
+
28
+ # Load base model + tokenizer
29
+ model = AutoModelForCausalLM.from_pretrained(
30
+ base_model_name,
31
+ device_map="auto",
32
+ trust_remote_code=True,
33
+ )
34
+ model.config.use_cache = False
35
+
36
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
37
+ if tokenizer.pad_token is None:
38
+ tokenizer.pad_token = tokenizer.eos_token
39
+ tokenizer.padding_side = "right"
40
+
41
+ # Configure LoRA
42
+ peft_config = LoraConfig(
43
+ lora_alpha=16,
44
+ lora_dropout=0.1,
45
+ r=16,
46
+ bias="none",
47
+ task_type="CAUSAL_LM",
48
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
49
+ )
50
+
51
+ # Training args
52
+ training_arguments = TrainingArguments(
53
+ output_dir=output_dir,
54
+ num_train_epochs=1,
55
+ per_device_train_batch_size=2,
56
+ gradient_accumulation_steps=2,
57
+ optim="paged_adamw_32bit",
58
+ logging_steps=10,
59
+ learning_rate=2e-4,
60
+ weight_decay=0.01,
61
+ fp16=True,
62
+ max_grad_norm=0.3,
63
+ max_steps=-1,
64
+ warmup_ratio=0.03,
65
+ group_by_length=True,
66
+ lr_scheduler_type="linear",
67
+ )
68
+
69
+ # Train
70
+ trainer = SFTTrainer(
71
+ model=model,
72
+ train_dataset=dataset,
73
+ peft_config=peft_config,
74
+ args=training_arguments,
75
+ )
76
+ trainer.train()
77
+ trainer.model.save_pretrained(output_dir)
78
+ print(f"Fine-tuned model saved to {output_dir}")
79
+
80
+ # --- Load model for inference ---
81
+ print("Loading fine-tuned model...")
82
+ base_model = AutoModelForCausalLM.from_pretrained(
83
  base_model_name,
84
  device_map="auto",
85
  trust_remote_code=True,
86
  )
87
+ model = PeftModel.from_pretrained(base_model, output_dir)
 
 
88
  tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
 
 
 
 
89
 
90
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
91
 
92
+ # --- FastAPI app ---
93
+ app = FastAPI(title="Fine-tuned LLaMA API")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ @app.get("/")
96
+ def home():
97
+ return {"status": "ok", "message": "Fine-tuned LLaMA is ready."}
 
 
 
 
 
 
 
98
 
99
+ @app.post("/generate")
100
+ def generate(prompt: str):
101
+ outputs = pipe(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
102
+ return {"response": outputs[0]["generated_text"]}
103
 
104
+ if __name__ == "__main__":
105
+ uvicorn.run(app, host="0.0.0.0", port=7860)