Javad Taghia commited on
Commit
61c72b6
·
1 Parent(s): e86ddb9

updated with evaluation

Browse files
.env.example ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # License: CC BY-NC-SA 4.0. Rights belong to Javad Taghia (taghia.javad@gmail.com).
2
+
3
+ # Copy this file to `.env` and fill in secrets.
4
+ # Never commit your real API keys.
5
+
6
+ WANDB_API_KEY=your_wandb_api_key
7
+ WANDB_PROJECT=tulu-laptop-run
8
+ WANDB_ENTITY=your_wandb_username_or_team
9
+
10
+ # Optional: where to cache/download base models and datasets
11
+ # e.g., /Volumes/JTQ-s/______GITLAB____/downloaded_base_models
12
+ BASE_MODEL_CACHE=
.gitignore CHANGED
@@ -2,7 +2,7 @@
2
 
3
  # Secrets and env files
4
  .env
5
- .env.*
6
 
7
  # Python caches and artifacts
8
  __pycache__/
@@ -13,3 +13,6 @@ offload/
13
 
14
  # Local W&B runs/artifacts
15
  wandb/
 
 
 
 
2
 
3
  # Secrets and env files
4
  .env
5
+
6
 
7
  # Python caches and artifacts
8
  __pycache__/
 
13
 
14
  # Local W&B runs/artifacts
15
  wandb/
16
+
17
+ # Training outputs and adapters
18
+ outputs/
README.md CHANGED
@@ -97,7 +97,59 @@ Key flags:
97
  - Finetuned adapters + tokenizer are written to `outputs/tulu-lora` (configurable via `--output_dir`).
98
  - `outputs/` is tracked via Git LFS (`.gitattributes`), so weights can be committed and pushed to the Hub. Run `git lfs install` once, then `git add outputs/...` before committing.
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  ## Troubleshooting
101
  - OOM? Reduce `max_seq_length`, increase `gradient_accumulation_steps`, or switch to a smaller dataset (e.g., use a tiny instruction set like `mlabonne/guanaco-llama2-1k`, or subset your dataset with `--dataset_name your/dataset --max_train_samples 500` in code/script).
102
  - bitsandbytes import errors on macOS/CPU: run with `--use_4bit false` or use a Linux+CUDA machine.
103
  - bitsandbytes install error? We pin to `0.42.0`, the latest widely distributed wheel. If you cannot install it (CPU-only/MPS), remove it from `requirements.txt` and set `--use_4bit false`.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  - Finetuned adapters + tokenizer are written to `outputs/tulu-lora` (configurable via `--output_dir`).
98
  - `outputs/` is tracked via Git LFS (`.gitattributes`), so weights can be committed and pushed to the Hub. Run `git lfs install` once, then `git add outputs/...` before committing.
99
 
100
+ ## Evaluation (inference/compare)
101
+ - Quick smoke test with the saved adapter (edit `lora_dir` inside if you used a different path):
102
+ ```bash
103
+ python evaluation/simple_inference.py
104
+ ```
105
+ - Compare base vs. LoRA outputs side-by-side:
106
+ ```bash
107
+ python evaluation/compare_lora.py \
108
+ --base_model TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
109
+ --lora_dir outputs/tinyllama-lora \
110
+ --prompt "Explain LoRA in one sentence."
111
+ ```
112
+ Optional flags: `--max_new_tokens`, `--temperature`, `--top_p`, `--torch_dtype`.
113
+
114
  ## Troubleshooting
115
  - OOM? Reduce `max_seq_length`, increase `gradient_accumulation_steps`, or switch to a smaller dataset (e.g., use a tiny instruction set like `mlabonne/guanaco-llama2-1k`, or subset your dataset with `--dataset_name your/dataset --max_train_samples 500` in code/script).
116
  - bitsandbytes import errors on macOS/CPU: run with `--use_4bit false` or use a Linux+CUDA machine.
117
  - bitsandbytes install error? We pin to `0.42.0`, the latest widely distributed wheel. If you cannot install it (CPU-only/MPS), remove it from `requirements.txt` and set `--use_4bit false`.
118
+
119
+
120
+ ===
121
+ pip install --upgrade "torch==2.2.*" "torchvision==0.17.*" "torchaudio==2.2.*" --index-url https://download.pytorch.org/whl/cu121
122
+ pip install --upgrade "bitsandbytes>=0.43.1"
123
+ pip install --upgrade "transformers>=4.40.0"
124
+
125
+ python train_tulu.py \
126
+ --model_name TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
127
+ --output_dir outputs/tinyllama-lora \
128
+ --offload_folder offload \
129
+ --device cuda \
130
+ --torch_dtype auto \
131
+ --max_seq_length 512 \
132
+ --per_device_batch_size 2 \
133
+ --gradient_accumulation_steps 8 \
134
+ --num_train_epochs 1 \
135
+ --use_4bit \
136
+ --instruction_field instruction \
137
+ --input_field input \
138
+ --output_field output
139
+
140
+ python train_tulu.py \
141
+ --model_name TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
142
+ --output_dir outputs/tinyllama-lora \
143
+ --offload_folder offload \
144
+ --device cuda \
145
+ --torch_dtype auto \
146
+ --max_seq_length 512 \
147
+ --per_device_batch_size 2 \
148
+ --gradient_accumulation_steps 8 \
149
+ --num_train_epochs 1 \
150
+ --use_4bit \
151
+ --instruction_field instruction \
152
+ --input_field input \
153
+ --output_field output
154
+
155
+
evaluation/compare_lora.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from typing import Optional
3
+
4
+ import torch
5
+ from peft import PeftModel
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
7
+
8
+
9
+ def parse_args():
10
+ p = argparse.ArgumentParser(description="Compare base vs. fine-tuned LoRA outputs.")
11
+ p.add_argument("--base_model", required=True, help="Base model id or local path.")
12
+ p.add_argument("--lora_dir", required=True, help="Path to LoRA adapter folder (e.g., outputs/tinyllama-lora).")
13
+ p.add_argument("--prompt", required=True, help="Prompt to generate with.")
14
+ p.add_argument("--max_new_tokens", type=int, default=128)
15
+ p.add_argument("--temperature", type=float, default=0.7)
16
+ p.add_argument("--top_p", type=float, default=0.9)
17
+ p.add_argument(
18
+ "--torch_dtype",
19
+ default="auto",
20
+ choices=["auto", "float16", "bfloat16", "float32"],
21
+ help="Force dtype for model load.",
22
+ )
23
+ return p.parse_args()
24
+
25
+
26
+ def resolve_dtype(name: str) -> Optional[torch.dtype]:
27
+ if name == "auto":
28
+ return None
29
+ return {"float16": torch.float16, "bfloat16": torch.bfloat16, "float32": torch.float32}[name]
30
+
31
+
32
+ def generate(model, tokenizer, prompt: str, max_new_tokens: int, temperature: float, top_p: float) -> str:
33
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
34
+ with torch.inference_mode():
35
+ output = model.generate(
36
+ **inputs,
37
+ max_new_tokens=max_new_tokens,
38
+ do_sample=True,
39
+ temperature=temperature,
40
+ top_p=top_p,
41
+ )
42
+ return tokenizer.decode(output[0], skip_special_tokens=True)
43
+
44
+
45
+ def main():
46
+ args = parse_args()
47
+ torch_dtype = resolve_dtype(args.torch_dtype)
48
+
49
+ tokenizer = AutoTokenizer.from_pretrained(args.lora_dir, use_fast=False)
50
+
51
+ base_model = AutoModelForCausalLM.from_pretrained(
52
+ args.base_model,
53
+ device_map="auto",
54
+ torch_dtype=torch_dtype,
55
+ )
56
+ lora_wrapped = AutoModelForCausalLM.from_pretrained(
57
+ args.base_model,
58
+ device_map="auto",
59
+ torch_dtype=torch_dtype,
60
+ )
61
+ lora_wrapped = PeftModel.from_pretrained(lora_wrapped, args.lora_dir)
62
+
63
+ base_out = generate(
64
+ base_model,
65
+ tokenizer,
66
+ args.prompt,
67
+ max_new_tokens=args.max_new_tokens,
68
+ temperature=args.temperature,
69
+ top_p=args.top_p,
70
+ )
71
+ lora_out = generate(
72
+ lora_wrapped,
73
+ tokenizer,
74
+ args.prompt,
75
+ max_new_tokens=args.max_new_tokens,
76
+ temperature=args.temperature,
77
+ top_p=args.top_p,
78
+ )
79
+
80
+ print("=== Base model ===")
81
+ print(base_out)
82
+ print("\n=== LoRA model ===")
83
+ print(lora_out)
84
+
85
+
86
+ if __name__ == "__main__":
87
+ main()
evaluation/simple_inference.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from peft import PeftConfig, PeftModel
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+
5
+
6
+ def main():
7
+ lora_dir = "outputs/tinyllama-lora" # change to your adapter path
8
+ cfg = PeftConfig.from_pretrained(lora_dir)
9
+ base_model = cfg.base_model_name_or_path # base model id/path
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(lora_dir, use_fast=False)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ base_model,
14
+ device_map="auto",
15
+ torch_dtype=torch.float16,
16
+ )
17
+ model = PeftModel.from_pretrained(model, lora_dir)
18
+
19
+ prompt = "### Instruction:\nExplain LoRA in one sentence.\n\n### Input:\nN/A\n\n### Response:\n"
20
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
21
+ with torch.inference_mode():
22
+ out = model.generate(**inputs, max_new_tokens=128, do_sample=True, temperature=0.7)
23
+ print(tokenizer.decode(out[0], skip_special_tokens=True))
24
+
25
+
26
+ if __name__ == "__main__":
27
+ main()