yushengsu.thu@gmail.com commited on
Commit
f92290e
·
1 Parent(s): a793de9

Upload LoRA adapter

Browse files
README.md CHANGED
@@ -1,3 +1,35 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Test LoRA Adapter
2
+
3
+ This is a test LoRA adapter (randomly initialized without tuning) with customizable target modules.
4
+ ```bash
5
+ python create_test_embedding_layer.py
6
+ ```
7
+
8
+ ## Configuration
9
+ - Base model: meta-llama/Llama-2-7b-hf
10
+ - LoRA rank (r): 8
11
+ - LoRA alpha: 16
12
+ - Target modules: embed_tokens, lm_head, q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj
13
+
14
+ ## Weight Shapes
15
+ - embed_tokens.lora_A: (8, 32000)
16
+ - embed_tokens.lora_B: (4096, 8)
17
+ - lm_head.lora_A: (8, 4096)
18
+ - lm_head.lora_B: (32000, 8)
19
+ - q_proj.lora_A: (8, 4096)
20
+ - q_proj.lora_B: (4096, 8)
21
+ - k_proj.lora_A: (8, 4096)
22
+ - k_proj.lora_B: (4096, 8)
23
+ - v_proj.lora_A: (8, 4096)
24
+ - v_proj.lora_B: (4096, 8)
25
+ - o_proj.lora_A: (8, 4096)
26
+ - o_proj.lora_B: (4096, 8)
27
+ - gate_proj.lora_A: (8, 4096)
28
+ - gate_proj.lora_B: (11008, 8)
29
+ - up_proj.lora_A: (8, 4096)
30
+ - up_proj.lora_B: (11008, 8)
31
+ - down_proj.lora_A: (8, 11008)
32
+ - down_proj.lora_B: (4096, 8)
33
+
34
+ ## Usage with SGLang
35
+ This adapter contains randomly initialized weights for testing purposes only.
adapter_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.0,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "embed_tokens",
18
+ "lm_head",
19
+ "q_proj",
20
+ "k_proj",
21
+ "v_proj",
22
+ "o_proj",
23
+ "gate_proj",
24
+ "up_proj",
25
+ "down_proj"
26
+ ],
27
+ "task_type": "CAUSAL_LM"
28
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1763a4289005798ebe6687a13afd5cfd12a64422e66e35c127e15a812ba2ec6a
3
+ size 4810984
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "model_type": "llama",
6
+ "vocab_size": 32000,
7
+ "hidden_size": 4096,
8
+ "intermediate_size": 11008,
9
+ "num_attention_heads": 32,
10
+ "num_hidden_layers": 32,
11
+ "num_key_value_heads": 32,
12
+ "max_position_embeddings": 4096,
13
+ "rms_norm_eps": 1e-05,
14
+ "rope_theta": 10000.0,
15
+ "torch_dtype": "float16",
16
+ "transformers_version": "4.36.0"
17
+ }
create_test_embedding_layer.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ create_test_embedding_lora.py
4
+ Create a test LoRA adapter containing specified modules
5
+ Based on correct dimension specifications from SGLang layers.py
6
+ """
7
+ import json
8
+ import os
9
+ import torch
10
+ from pathlib import Path
11
+
12
+ def create_test_embedding_lora(
13
+ output_dir="./test_embedding_lora",
14
+ base_model="meta-llama/Llama-2-7b-hf",
15
+ lora_rank=8,
16
+ lora_alpha=16,
17
+ target_modules=None,
18
+ added_tokens=None,
19
+ ):
20
+ """
21
+ Create a test LoRA adapter containing specified modules
22
+
23
+ Args:
24
+ output_dir: Output directory
25
+ base_model: Base model name
26
+ lora_rank: LoRA rank
27
+ lora_alpha: LoRA alpha
28
+ target_modules: List of target modules to generate LoRA for, defaults to ["embed_tokens", "lm_head"]
29
+ added_tokens: Content of added_tokens.json (dictionary), defaults to empty
30
+
31
+ Supported target_modules:
32
+ - embed_tokens: Word embedding layer
33
+ - lm_head: Language model head
34
+ - q_proj, k_proj, v_proj, o_proj: Attention layers
35
+ - gate_proj, up_proj, down_proj: FFN layers
36
+ """
37
+
38
+ # Default: only generate embed_tokens and lm_head
39
+ if target_modules is None:
40
+ # target_modules = ["embed_tokens", "lm_head"]
41
+ target_modules = ["embed_tokens", "lm_head", "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
42
+
43
+ # Llama-2-7b configuration
44
+ vocab_size = 32000
45
+ embedding_dim = 4096
46
+ hidden_dim = 4096
47
+ intermediate_size = 11008 # FFN intermediate dimension
48
+
49
+ print(f"Creating test LoRA adapter in {output_dir}")
50
+ print(f" vocab_size: {vocab_size}")
51
+ print(f" embedding_dim: {embedding_dim}")
52
+ print(f" hidden_dim: {hidden_dim}")
53
+ print(f" intermediate_size: {intermediate_size}")
54
+ print(f" lora_rank: {lora_rank}")
55
+ print(f" lora_alpha: {lora_alpha}")
56
+ print(f" target_modules: {target_modules}")
57
+ print()
58
+
59
+ os.makedirs(output_dir, exist_ok=True)
60
+
61
+ # Define weight shapes for each module
62
+ module_shapes = {
63
+ # Embedding layer: vocab_size -> embedding_dim
64
+ "embed_tokens": {
65
+ "lora_A": (lora_rank, vocab_size),
66
+ "lora_B": (embedding_dim, lora_rank),
67
+ },
68
+ # LM head: hidden_dim -> vocab_size
69
+ "lm_head": {
70
+ "lora_A": (lora_rank, hidden_dim),
71
+ "lora_B": (vocab_size, lora_rank),
72
+ },
73
+ # Attention layers: hidden_dim -> hidden_dim
74
+ "q_proj": {
75
+ "lora_A": (lora_rank, hidden_dim),
76
+ "lora_B": (hidden_dim, lora_rank),
77
+ },
78
+ "k_proj": {
79
+ "lora_A": (lora_rank, hidden_dim),
80
+ "lora_B": (hidden_dim, lora_rank),
81
+ },
82
+ "v_proj": {
83
+ "lora_A": (lora_rank, hidden_dim),
84
+ "lora_B": (hidden_dim, lora_rank),
85
+ },
86
+ "o_proj": {
87
+ "lora_A": (lora_rank, hidden_dim),
88
+ "lora_B": (hidden_dim, lora_rank),
89
+ },
90
+ # FFN layers
91
+ "gate_proj": {
92
+ "lora_A": (lora_rank, hidden_dim),
93
+ "lora_B": (intermediate_size, lora_rank),
94
+ },
95
+ "up_proj": {
96
+ "lora_A": (lora_rank, hidden_dim),
97
+ "lora_B": (intermediate_size, lora_rank),
98
+ },
99
+ "down_proj": {
100
+ "lora_A": (lora_rank, intermediate_size),
101
+ "lora_B": (hidden_dim, lora_rank),
102
+ },
103
+ }
104
+
105
+ # Create LoRA weights
106
+ print("Creating LoRA weights with shapes:")
107
+ lora_weights = {}
108
+
109
+ for module in target_modules:
110
+ if module not in module_shapes:
111
+ print(f"⚠️ Warning: Unknown module '{module}', skipping...")
112
+ continue
113
+
114
+ shapes = module_shapes[module]
115
+
116
+ # Decide weight name prefix based on module type
117
+ if module == "embed_tokens":
118
+ prefix = "base_model.model.model.embed_tokens"
119
+ elif module == "lm_head":
120
+ prefix = "base_model.model.lm_head"
121
+ else:
122
+ # Other layers (attention, FFN) need to be created for each layer
123
+ # Here we create the first layer as an example
124
+ prefix = f"base_model.model.model.layers.0.self_attn.{module}" if module in ["q_proj", "k_proj", "v_proj", "o_proj"] else f"base_model.model.model.layers.0.mlp.{module}"
125
+
126
+ lora_A_shape = shapes["lora_A"]
127
+ lora_B_shape = shapes["lora_B"]
128
+
129
+ print(f" {module}.lora_A: {lora_A_shape}")
130
+ print(f" {module}.lora_B: {lora_B_shape}")
131
+
132
+ if "embed_tokens" in module:
133
+ lora_weights[f"{prefix}.lora_embedding_A"] = torch.randn(*lora_A_shape) * 0.01
134
+ lora_weights[f"{prefix}.lora_embedding_B"] = torch.randn(*lora_B_shape) * 0.01
135
+ # lora_weights[f"{prefix}.lora_embedding_A"] = torch.randn(*lora_A_shape) * 1
136
+ # lora_weights[f"{prefix}.lora_embedding_B"] = torch.randn(*lora_B_shape) * 1
137
+ else:
138
+ lora_weights[f"{prefix}.lora_A.weight"] = torch.randn(*lora_A_shape) * 0.01
139
+ lora_weights[f"{prefix}.lora_B.weight"] = torch.randn(*lora_B_shape) * 0.01
140
+ # lora_weights[f"{prefix}.lora_A.weight"] = torch.randn(*lora_A_shape) * 1
141
+ # lora_weights[f"{prefix}.lora_B.weight"] = torch.randn(*lora_B_shape) * 1
142
+
143
+ print(lora_weights)
144
+
145
+
146
+ print()
147
+
148
+ # Verify created weight shapes
149
+ print("Verifying created weight shapes:")
150
+ for name, weight in lora_weights.items():
151
+ print(f" {name}: {weight.shape}")
152
+ print()
153
+
154
+ # Save as safetensors format
155
+ try:
156
+ from safetensors.torch import save_file
157
+ save_file(lora_weights, os.path.join(output_dir, "adapter_model.safetensors"))
158
+ print(f"✅ Saved adapter_model.safetensors")
159
+ except ImportError:
160
+ # If safetensors is not available, use pytorch format
161
+ torch.save(lora_weights, os.path.join(output_dir, "adapter_model.bin"))
162
+ print(f"✅ Saved adapter_model.bin (safetensors not available)")
163
+
164
+ # Create adapter_config.json
165
+ adapter_config = {
166
+ "auto_mapping": None,
167
+ "base_model_name_or_path": base_model,
168
+ "bias": "none",
169
+ "fan_in_fan_out": False,
170
+ "inference_mode": True,
171
+ "init_lora_weights": True,
172
+ "layers_pattern": None,
173
+ "layers_to_transform": None,
174
+ "lora_alpha": lora_alpha,
175
+ "lora_dropout": 0.0,
176
+ "modules_to_save": None,
177
+ "peft_type": "LORA",
178
+ "r": lora_rank,
179
+ "revision": None,
180
+ "target_modules": target_modules,
181
+ "task_type": "CAUSAL_LM"
182
+ }
183
+
184
+ with open(os.path.join(output_dir, "adapter_config.json"), "w") as f:
185
+ json.dump(adapter_config, f, indent=2)
186
+ print(f"✅ Saved adapter_config.json")
187
+
188
+ # Create added_tokens.json
189
+ if added_tokens is None:
190
+ added_tokens = {}
191
+
192
+ with open(os.path.join(output_dir, "added_tokens.json"), "w") as f:
193
+ json.dump(added_tokens, f, indent=2)
194
+ print(f"✅ Saved added_tokens.json")
195
+
196
+
197
+ # Create config.json (base model config)
198
+ model_config = {
199
+ "architectures": ["LlamaForCausalLM"],
200
+ "model_type": "llama",
201
+ "vocab_size": vocab_size,
202
+ "hidden_size": hidden_dim,
203
+ "intermediate_size": intermediate_size,
204
+ "num_attention_heads": 32,
205
+ "num_hidden_layers": 32,
206
+ "num_key_value_heads": 32,
207
+ "max_position_embeddings": 4096,
208
+ "rms_norm_eps": 1e-05,
209
+ "rope_theta": 10000.0,
210
+ "torch_dtype": "float16",
211
+ "transformers_version": "4.36.0"
212
+ }
213
+
214
+ with open(os.path.join(output_dir, "config.json"), "w") as f:
215
+ json.dump(model_config, f, indent=2)
216
+ print(f"✅ Saved config.json")
217
+
218
+ #################################
219
+ try:
220
+ from transformers import AutoTokenizer
221
+ print(f"Copying tokenizer files from {base_model}...")
222
+
223
+ base_tokenizer = AutoTokenizer.from_pretrained(base_model)
224
+ base_tokenizer.save_pretrained(output_dir)
225
+ print(f"✅ Saved tokenizer files (tokenizer_config.json, tokenizer.json, etc.)")
226
+ except Exception as e:
227
+ print(f"⚠️ Warning: Could not copy tokenizer files: {e}")
228
+ print(f" HuggingFace tests with embed_tokens may fail.")
229
+ # #################################
230
+
231
+ # Create README
232
+ readme = f"""# Test LoRA Adapter
233
+
234
+ This is a test LoRA adapter with customizable target modules.
235
+
236
+ ## Configuration
237
+ - Base model: {base_model}
238
+ - LoRA rank (r): {lora_rank}
239
+ - LoRA alpha: {lora_alpha}
240
+ - Target modules: {', '.join(target_modules)}
241
+
242
+ ## Weight Shapes
243
+ """
244
+
245
+ for module in target_modules:
246
+ if module in module_shapes:
247
+ shapes = module_shapes[module]
248
+ readme += f"- {module}.lora_A: {shapes['lora_A']}\n"
249
+ readme += f"- {module}.lora_B: {shapes['lora_B']}\n"
250
+
251
+ readme += f"""
252
+ ## Usage with SGLang
253
+
254
+ python hf_sgl_difference.py \\
255
+ --model-path {base_model} \\
256
+ --lora-paths {output_dir} \\
257
+ --attention-backend triton \\
258
+ --lora-backend triton \\
259
+ --port 30000 \\
260
+ --disable-cuda-graph \\
261
+ --output-dir ./logprob_results## Note
262
+ This adapter contains randomly initialized weights for testing purposes only.
263
+ """
264
+
265
+ with open(os.path.join(output_dir, "README.md"), "w") as f:
266
+ f.write(readme)
267
+ print(f"✅ Saved README.md")
268
+
269
+ print(f"\n🎉 Test LoRA adapter created successfully!")
270
+ print(f"\n📁 Output directory: {output_dir}")
271
+
272
+ if __name__ == "__main__":
273
+ import argparse
274
+
275
+ parser = argparse.ArgumentParser(
276
+ description="Create test LoRA adapter with customizable target modules",
277
+ formatter_class=argparse.RawDescriptionHelpFormatter,
278
+ epilog="""
279
+ Examples:
280
+ # Default: generate embed_tokens and lm_head
281
+ python create_test_embedding_layer.py
282
+
283
+ # Generate only attention layers
284
+ python create_test_embedding_layer.py --target-modules q_proj k_proj v_proj o_proj
285
+
286
+ # Generate all supported layers
287
+ python create_test_embedding_layer.py --target-modules embed_tokens lm_head q_proj k_proj v_proj o_proj gate_proj up_proj down_proj
288
+
289
+ # Specify custom parameters
290
+ python create_test_embedding_layer.py \\
291
+ --output-dir ./my_lora \\
292
+ --base-model meta-llama/Llama-2-7b-hf \\
293
+ --lora-rank 16 \\
294
+ --lora-alpha 32 \\
295
+ --target-modules q_proj k_proj v_proj
296
+
297
+ # Specify added_tokens
298
+ python create_test_embedding_layer.py --added-tokens '{"<special>": 32000}'
299
+ """
300
+ )
301
+
302
+ parser.add_argument("--output-dir", type=str, default="./test_embedding_lora",
303
+ help="Output directory for the adapter")
304
+ parser.add_argument("--base-model", type=str, default="meta-llama/Llama-2-7b-hf",
305
+ help="Base model name or path")
306
+ parser.add_argument("--lora-rank", type=int, default=8,
307
+ help="LoRA rank (r)")
308
+ parser.add_argument("--lora-alpha", type=int, default=16,
309
+ help="LoRA alpha (scaling factor)")
310
+ parser.add_argument("--target-modules", type=str, nargs="+",
311
+ default=["embed_tokens", "lm_head", "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
312
+ help="Target modules for LoRA. Supported: embed_tokens, lm_head, "
313
+ "q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj")
314
+ parser.add_argument("--added-tokens", type=str, default=None,
315
+ help="JSON string for added_tokens.json (e.g., '{\"<special>\": 32000}'). "
316
+ "Default is empty dict")
317
+
318
+ args = parser.parse_args()
319
+
320
+ # Parse added_tokens JSON
321
+ added_tokens_dict = None
322
+ if args.added_tokens:
323
+ try:
324
+ added_tokens_dict = json.loads(args.added_tokens)
325
+ except json.JSONDecodeError as e:
326
+ print(f"❌ Error parsing added_tokens JSON: {e}")
327
+ exit(1)
328
+
329
+ create_test_embedding_lora(
330
+ output_dir=args.output_dir,
331
+ base_model=args.base_model,
332
+ lora_rank=args.lora_rank,
333
+ lora_alpha=args.lora_alpha,
334
+ target_modules=args.target_modules,
335
+ added_tokens=added_tokens_dict,
336
+ )
337
+
338
+
339
+ # # Default: only generate embed_tokens and lm_head
340
+ # python create_test_embedding_layer.py
341
+
342
+ # # Generate only attention layers
343
+ # python create_test_embedding_layer.py --target-modules q_proj k_proj v_proj o_proj
344
+
345
+ # # Generate all layers
346
+ # python create_test_embedding_layer.py --target-modules embed_tokens lm_head q_proj k_proj v_proj o_proj gate_proj up_proj down_proj
347
+
348
+ # # Full customization
349
+ # python create_test_embedding_layer.py \
350
+ # --output-dir ./my_custom_lora \
351
+ # --base-model meta-llama/Llama-2-7b-hf \
352
+ # --lora-rank 16 \
353
+ # --lora-alpha 32 \
354
+ # --target-modules q_proj k_proj v_proj \
355
+ # --added-tokens '{"<|im_start|>": 32000, "<|im_end|>": 32001}'
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": null,
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }