Nutnell commited on
Commit
47dca3b
·
verified ·
1 Parent(s): 478ed02

Update fine_tune.py

Browse files
Files changed (1) hide show
  1. fine_tune.py +44 -22
fine_tune.py CHANGED
@@ -1,6 +1,5 @@
1
  # fine_tune.py
2
  import os
3
- os.environ["OMP_NUM_THREADS"] = "1"
4
  import torch
5
  from datasets import load_dataset
6
  from transformers import (
@@ -14,19 +13,25 @@ from trl import SFTTrainer
14
  from fastapi import FastAPI
15
  import uvicorn
16
 
 
17
  base_model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit"
18
- output_dir = "/data/fine_tuning" # persist inside /code
19
 
 
20
  dataset_path = "dataset.jsonl"
21
 
22
- # --- Check if already fine-tuned ---
23
- if not os.path.exists(output_dir):
 
 
 
 
 
24
  print("No fine-tuned model found. Starting training...")
25
 
26
  # Load dataset
27
  dataset = load_dataset("json", data_files=dataset_path, split="train")
28
 
29
- # Load base model + tokenizer
30
  model = AutoModelForCausalLM.from_pretrained(
31
  base_model_name,
32
  device_map="auto",
@@ -41,9 +46,9 @@ if not os.path.exists(output_dir):
41
 
42
  # Configure LoRA
43
  peft_config = LoraConfig(
 
44
  lora_alpha=16,
45
  lora_dropout=0.1,
46
- r=16,
47
  bias="none",
48
  task_type="CAUSAL_LM",
49
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
@@ -58,7 +63,6 @@ if not os.path.exists(output_dir):
58
  optim="paged_adamw_32bit",
59
  logging_steps=10,
60
  learning_rate=2e-4,
61
- weight_decay=0.01,
62
  fp16=True,
63
  max_grad_norm=0.3,
64
  max_steps=-1,
@@ -67,30 +71,46 @@ if not os.path.exists(output_dir):
67
  lr_scheduler_type="linear",
68
  )
69
 
70
- # Train
71
  trainer = SFTTrainer(
72
  model=model,
73
  train_dataset=dataset,
74
  peft_config=peft_config,
 
75
  args=training_arguments,
76
  )
 
 
77
  trainer.train()
 
 
78
  trainer.model.save_pretrained(output_dir)
79
- print(f"Fine-tuned model saved to {output_dir}")
80
-
81
- # --- Load model for inference ---
82
- print("Loading fine-tuned model...")
83
- base_model = AutoModelForCausalLM.from_pretrained(
84
- base_model_name,
85
- device_map="auto",
86
- trust_remote_code=True,
87
- )
88
- model = PeftModel.from_pretrained(base_model, output_dir)
89
- tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
 
 
 
 
 
 
 
 
90
 
 
 
91
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
 
92
 
93
- # --- FastAPI app ---
94
  app = FastAPI(title="Fine-tuned LLaMA API")
95
 
96
  @app.get("/")
@@ -99,8 +119,10 @@ def home():
99
 
100
  @app.post("/generate")
101
  def generate(prompt: str):
102
- outputs = pipe(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
 
 
103
  return {"response": outputs[0]["generated_text"]}
104
 
105
  if __name__ == "__main__":
106
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  # fine_tune.py
2
  import os
 
3
  import torch
4
  from datasets import load_dataset
5
  from transformers import (
 
13
  from fastapi import FastAPI
14
  import uvicorn
15
 
16
+
17
  base_model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit"
 
18
 
19
+ output_dir = "/data/fine_tuning"
20
  dataset_path = "dataset.jsonl"
21
 
22
+ # Initialize model and tokenizer variables
23
+ model = None
24
+ tokenizer = None
25
+
26
+ # Training Logic
27
+ # Check if a fine-tuned model adapter already exists
28
+ if not os.path.exists(os.path.join(output_dir, 'adapter_config.json')):
29
  print("No fine-tuned model found. Starting training...")
30
 
31
  # Load dataset
32
  dataset = load_dataset("json", data_files=dataset_path, split="train")
33
 
34
+ # Load base model for training
35
  model = AutoModelForCausalLM.from_pretrained(
36
  base_model_name,
37
  device_map="auto",
 
46
 
47
  # Configure LoRA
48
  peft_config = LoraConfig(
49
+ r=16,
50
  lora_alpha=16,
51
  lora_dropout=0.1,
 
52
  bias="none",
53
  task_type="CAUSAL_LM",
54
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
 
63
  optim="paged_adamw_32bit",
64
  logging_steps=10,
65
  learning_rate=2e-4,
 
66
  fp16=True,
67
  max_grad_norm=0.3,
68
  max_steps=-1,
 
71
  lr_scheduler_type="linear",
72
  )
73
 
74
+ # Initialize Trainer
75
  trainer = SFTTrainer(
76
  model=model,
77
  train_dataset=dataset,
78
  peft_config=peft_config,
79
+ dataset_text_field="text", # Ensure your dataset has a 'text' column
80
  args=training_arguments,
81
  )
82
+
83
+ # Train the model
84
  trainer.train()
85
+
86
+ # Save the trained adapter
87
  trainer.model.save_pretrained(output_dir)
88
+ print(f"Fine-tuned model adapter saved to {output_dir}")
89
+
90
+ model = trainer.model
91
+
92
+ # Inference Logic
93
+ # If training did not run, load the existing model
94
+ else:
95
+ print("Found existing fine-tuned model. Loading for inference...")
96
+
97
+ # Load the base model
98
+ base_model = AutoModelForCausalLM.from_pretrained(
99
+ base_model_name,
100
+ device_map="auto",
101
+ trust_remote_code=True,
102
+ )
103
+ # Apply the PEFT adapter
104
+ model = PeftModel.from_pretrained(base_model, output_dir)
105
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
106
+
107
 
108
+ # Create Inference Pipeline
109
+ print("Setting up inference pipeline...")
110
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
111
+ print("Inference pipeline ready.")
112
 
113
+ # FastAPI App
114
  app = FastAPI(title="Fine-tuned LLaMA API")
115
 
116
  @app.get("/")
 
119
 
120
  @app.post("/generate")
121
  def generate(prompt: str):
122
+
123
+ formatted_prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
124
+ outputs = pipe(formatted_prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
125
  return {"response": outputs[0]["generated_text"]}
126
 
127
  if __name__ == "__main__":
128
+ uvicorn.run(app, host="0.0.0.0", port=7860)