Upload LoRA adapter

Files changed (10) hide show

README.md +35 -3
adapter_config.json +28 -0
adapter_model.safetensors +3 -0
added_tokens.json +1 -0
config.json +17 -0
create_test_embedding_layer.py +355 -0
special_tokens_map.json +23 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +43 -0

README.md CHANGED Viewed

@@ -1,3 +1,35 @@
----
-license: apache-2.0
----

+# Test LoRA Adapter
+This is a test LoRA adapter (randomly initialized without tuning) with customizable target modules.
+```bash
+python create_test_embedding_layer.py
+```
+## Configuration
+- Base model: meta-llama/Llama-2-7b-hf
+- LoRA rank (r): 8
+- LoRA alpha: 16
+- Target modules: embed_tokens, lm_head, q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj
+## Weight Shapes
+- embed_tokens.lora_A: (8, 32000)
+- embed_tokens.lora_B: (4096, 8)
+- lm_head.lora_A: (8, 4096)
+- lm_head.lora_B: (32000, 8)
+- q_proj.lora_A: (8, 4096)
+- q_proj.lora_B: (4096, 8)
+- k_proj.lora_A: (8, 4096)
+- k_proj.lora_B: (4096, 8)
+- v_proj.lora_A: (8, 4096)
+- v_proj.lora_B: (4096, 8)
+- o_proj.lora_A: (8, 4096)
+- o_proj.lora_B: (4096, 8)
+- gate_proj.lora_A: (8, 4096)
+- gate_proj.lora_B: (11008, 8)
+- up_proj.lora_A: (8, 4096)
+- up_proj.lora_B: (11008, 8)
+- down_proj.lora_A: (8, 11008)
+- down_proj.lora_B: (4096, 8)
+## Usage with SGLang
+This adapter contains randomly initialized weights for testing purposes only.

adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "embed_tokens",
+    "lm_head",
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "gate_proj",
+    "up_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1763a4289005798ebe6687a13afd5cfd12a64422e66e35c127e15a812ba2ec6a
+size 4810984

added_tokens.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {}

config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "model_type": "llama",
+  "vocab_size": 32000,
+  "hidden_size": 4096,
+  "intermediate_size": 11008,
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 32,
+  "max_position_embeddings": 4096,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "torch_dtype": "float16",
+  "transformers_version": "4.36.0"
+}

create_test_embedding_layer.py ADDED Viewed

	@@ -0,0 +1,355 @@

+#!/usr/bin/env python3
+"""
+create_test_embedding_lora.py
+Create a test LoRA adapter containing specified modules
+Based on correct dimension specifications from SGLang layers.py
+"""
+import json
+import os
+import torch
+from pathlib import Path
+def create_test_embedding_lora(
+    output_dir="./test_embedding_lora",
+    base_model="meta-llama/Llama-2-7b-hf",
+    lora_rank=8,
+    lora_alpha=16,
+    target_modules=None,
+    added_tokens=None,
+):
+    """
+    Create a test LoRA adapter containing specified modules
+    Args:
+        output_dir: Output directory
+        base_model: Base model name
+        lora_rank: LoRA rank
+        lora_alpha: LoRA alpha
+        target_modules: List of target modules to generate LoRA for, defaults to ["embed_tokens", "lm_head"]
+        added_tokens: Content of added_tokens.json (dictionary), defaults to empty
+    Supported target_modules:
+        - embed_tokens: Word embedding layer
+        - lm_head: Language model head
+        - q_proj, k_proj, v_proj, o_proj: Attention layers
+        - gate_proj, up_proj, down_proj: FFN layers
+    """
+    # Default: only generate embed_tokens and lm_head
+    if target_modules is None:
+        # target_modules = ["embed_tokens", "lm_head"]
+        target_modules = ["embed_tokens", "lm_head", "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
+    # Llama-2-7b configuration
+    vocab_size = 32000
+    embedding_dim = 4096
+    hidden_dim = 4096
+    intermediate_size = 11008  # FFN intermediate dimension
+    print(f"Creating test LoRA adapter in {output_dir}")
+    print(f"  vocab_size: {vocab_size}")
+    print(f"  embedding_dim: {embedding_dim}")
+    print(f"  hidden_dim: {hidden_dim}")
+    print(f"  intermediate_size: {intermediate_size}")
+    print(f"  lora_rank: {lora_rank}")
+    print(f"  lora_alpha: {lora_alpha}")
+    print(f"  target_modules: {target_modules}")
+    print()
+    os.makedirs(output_dir, exist_ok=True)
+    # Define weight shapes for each module
+    module_shapes = {
+        # Embedding layer: vocab_size -> embedding_dim
+        "embed_tokens": {
+            "lora_A": (lora_rank, vocab_size),
+            "lora_B": (embedding_dim, lora_rank),
+        },
+        # LM head: hidden_dim -> vocab_size
+        "lm_head": {
+            "lora_A": (lora_rank, hidden_dim),
+            "lora_B": (vocab_size, lora_rank),
+        },
+        # Attention layers: hidden_dim -> hidden_dim
+        "q_proj": {
+            "lora_A": (lora_rank, hidden_dim),
+            "lora_B": (hidden_dim, lora_rank),
+        },
+        "k_proj": {
+            "lora_A": (lora_rank, hidden_dim),
+            "lora_B": (hidden_dim, lora_rank),
+        },
+        "v_proj": {
+            "lora_A": (lora_rank, hidden_dim),
+            "lora_B": (hidden_dim, lora_rank),
+        },
+        "o_proj": {
+            "lora_A": (lora_rank, hidden_dim),
+            "lora_B": (hidden_dim, lora_rank),
+        },
+        # FFN layers
+        "gate_proj": {
+            "lora_A": (lora_rank, hidden_dim),
+            "lora_B": (intermediate_size, lora_rank),
+        },
+        "up_proj": {
+            "lora_A": (lora_rank, hidden_dim),
+            "lora_B": (intermediate_size, lora_rank),
+        },
+        "down_proj": {
+            "lora_A": (lora_rank, intermediate_size),
+            "lora_B": (hidden_dim, lora_rank),
+        },
+    }
+    # Create LoRA weights
+    print("Creating LoRA weights with shapes:")
+    lora_weights = {}
+    for module in target_modules:
+        if module not in module_shapes:
+            print(f"⚠️  Warning: Unknown module '{module}', skipping...")
+            continue
+        shapes = module_shapes[module]
+        # Decide weight name prefix based on module type
+        if module == "embed_tokens":
+            prefix = "base_model.model.model.embed_tokens"
+        elif module == "lm_head":
+            prefix = "base_model.model.lm_head"
+        else:
+            # Other layers (attention, FFN) need to be created for each layer
+            # Here we create the first layer as an example
+            prefix = f"base_model.model.model.layers.0.self_attn.{module}" if module in ["q_proj", "k_proj", "v_proj", "o_proj"] else f"base_model.model.model.layers.0.mlp.{module}"
+        lora_A_shape = shapes["lora_A"]
+        lora_B_shape = shapes["lora_B"]
+        print(f"  {module}.lora_A: {lora_A_shape}")
+        print(f"  {module}.lora_B: {lora_B_shape}")
+        if "embed_tokens" in module:
+            lora_weights[f"{prefix}.lora_embedding_A"] = torch.randn(*lora_A_shape) * 0.01
+            lora_weights[f"{prefix}.lora_embedding_B"] = torch.randn(*lora_B_shape) * 0.01
+            # lora_weights[f"{prefix}.lora_embedding_A"] = torch.randn(*lora_A_shape) * 1
+            # lora_weights[f"{prefix}.lora_embedding_B"] = torch.randn(*lora_B_shape) * 1
+        else:
+            lora_weights[f"{prefix}.lora_A.weight"] = torch.randn(*lora_A_shape) * 0.01
+            lora_weights[f"{prefix}.lora_B.weight"] = torch.randn(*lora_B_shape) * 0.01
+            # lora_weights[f"{prefix}.lora_A.weight"] = torch.randn(*lora_A_shape) * 1
+            # lora_weights[f"{prefix}.lora_B.weight"] = torch.randn(*lora_B_shape) * 1
+        print(lora_weights)
+    print()
+    # Verify created weight shapes
+    print("Verifying created weight shapes:")
+    for name, weight in lora_weights.items():
+        print(f"  {name}: {weight.shape}")
+    print()
+    # Save as safetensors format
+    try:
+        from safetensors.torch import save_file
+        save_file(lora_weights, os.path.join(output_dir, "adapter_model.safetensors"))
+        print(f"✅ Saved adapter_model.safetensors")
+    except ImportError:
+        # If safetensors is not available, use pytorch format
+        torch.save(lora_weights, os.path.join(output_dir, "adapter_model.bin"))
+        print(f"✅ Saved adapter_model.bin (safetensors not available)")
+    # Create adapter_config.json
+    adapter_config = {
+        "auto_mapping": None,
+        "base_model_name_or_path": base_model,
+        "bias": "none",
+        "fan_in_fan_out": False,
+        "inference_mode": True,
+        "init_lora_weights": True,
+        "layers_pattern": None,
+        "layers_to_transform": None,
+        "lora_alpha": lora_alpha,
+        "lora_dropout": 0.0,
+        "modules_to_save": None,
+        "peft_type": "LORA",
+        "r": lora_rank,
+        "revision": None,
+        "target_modules": target_modules,
+        "task_type": "CAUSAL_LM"
+    }
+    with open(os.path.join(output_dir, "adapter_config.json"), "w") as f:
+        json.dump(adapter_config, f, indent=2)
+    print(f"✅ Saved adapter_config.json")
+    # Create added_tokens.json
+    if added_tokens is None:
+        added_tokens = {}
+    with open(os.path.join(output_dir, "added_tokens.json"), "w") as f:
+        json.dump(added_tokens, f, indent=2)
+    print(f"✅ Saved added_tokens.json")
+    # Create config.json (base model config)
+    model_config = {
+        "architectures": ["LlamaForCausalLM"],
+        "model_type": "llama",
+        "vocab_size": vocab_size,
+        "hidden_size": hidden_dim,
+        "intermediate_size": intermediate_size,
+        "num_attention_heads": 32,
+        "num_hidden_layers": 32,
+        "num_key_value_heads": 32,
+        "max_position_embeddings": 4096,
+        "rms_norm_eps": 1e-05,
+        "rope_theta": 10000.0,
+        "torch_dtype": "float16",
+        "transformers_version": "4.36.0"
+    }
+    with open(os.path.join(output_dir, "config.json"), "w") as f:
+        json.dump(model_config, f, indent=2)
+    print(f"✅ Saved config.json")
+    #################################
+    try:
+        from transformers import AutoTokenizer
+        print(f"Copying tokenizer files from {base_model}...")
+        base_tokenizer = AutoTokenizer.from_pretrained(base_model)
+        base_tokenizer.save_pretrained(output_dir)
+        print(f"✅ Saved tokenizer files (tokenizer_config.json, tokenizer.json, etc.)")
+    except Exception as e:
+        print(f"⚠️  Warning: Could not copy tokenizer files: {e}")
+        print(f"    HuggingFace tests with embed_tokens may fail.")
+    # #################################
+    # Create README
+    readme = f"""# Test LoRA Adapter
+This is a test LoRA adapter with customizable target modules.
+## Configuration
+- Base model: {base_model}
+- LoRA rank (r): {lora_rank}
+- LoRA alpha: {lora_alpha}
+- Target modules: {', '.join(target_modules)}
+## Weight Shapes
+"""
+    for module in target_modules:
+        if module in module_shapes:
+            shapes = module_shapes[module]
+            readme += f"- {module}.lora_A: {shapes['lora_A']}\n"
+            readme += f"- {module}.lora_B: {shapes['lora_B']}\n"
+    readme += f"""
+## Usage with SGLang
+python hf_sgl_difference.py \\
+    --model-path {base_model} \\
+    --lora-paths {output_dir} \\
+    --attention-backend triton \\
+    --lora-backend triton \\
+    --port 30000 \\
+    --disable-cuda-graph \\
+    --output-dir ./logprob_results## Note
+This adapter contains randomly initialized weights for testing purposes only.
+"""
+    with open(os.path.join(output_dir, "README.md"), "w") as f:
+        f.write(readme)
+    print(f"✅ Saved README.md")
+    print(f"\n🎉 Test LoRA adapter created successfully!")
+    print(f"\n📁 Output directory: {output_dir}")
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(
+        description="Create test LoRA adapter with customizable target modules",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Default: generate embed_tokens and lm_head
+  python create_test_embedding_layer.py
+  # Generate only attention layers
+  python create_test_embedding_layer.py --target-modules q_proj k_proj v_proj o_proj
+  # Generate all supported layers
+  python create_test_embedding_layer.py --target-modules embed_tokens lm_head q_proj k_proj v_proj o_proj gate_proj up_proj down_proj
+  # Specify custom parameters
+  python create_test_embedding_layer.py \\
+      --output-dir ./my_lora \\
+      --base-model meta-llama/Llama-2-7b-hf \\
+      --lora-rank 16 \\
+      --lora-alpha 32 \\
+      --target-modules q_proj k_proj v_proj
+  # Specify added_tokens
+  python create_test_embedding_layer.py --added-tokens '{"<special>": 32000}'
+        """
+    )
+    parser.add_argument("--output-dir", type=str, default="./test_embedding_lora",
+                        help="Output directory for the adapter")
+    parser.add_argument("--base-model", type=str, default="meta-llama/Llama-2-7b-hf",
+                        help="Base model name or path")
+    parser.add_argument("--lora-rank", type=int, default=8,
+                        help="LoRA rank (r)")
+    parser.add_argument("--lora-alpha", type=int, default=16,
+                        help="LoRA alpha (scaling factor)")
+    parser.add_argument("--target-modules", type=str, nargs="+",
+                        default=["embed_tokens", "lm_head", "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
+                        help="Target modules for LoRA. Supported: embed_tokens, lm_head, "
+                             "q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj")
+    parser.add_argument("--added-tokens", type=str, default=None,
+                        help="JSON string for added_tokens.json (e.g., '{\"<special>\": 32000}'). "
+                             "Default is empty dict")
+    args = parser.parse_args()
+    # Parse added_tokens JSON
+    added_tokens_dict = None
+    if args.added_tokens:
+        try:
+            added_tokens_dict = json.loads(args.added_tokens)
+        except json.JSONDecodeError as e:
+            print(f"❌ Error parsing added_tokens JSON: {e}")
+            exit(1)
+    create_test_embedding_lora(
+        output_dir=args.output_dir,
+        base_model=args.base_model,
+        lora_rank=args.lora_rank,
+        lora_alpha=args.lora_alpha,
+        target_modules=args.target_modules,
+        added_tokens=added_tokens_dict,
+    )
+# # Default: only generate embed_tokens and lm_head
+# python create_test_embedding_layer.py
+# # Generate only attention layers
+# python create_test_embedding_layer.py --target-modules q_proj k_proj v_proj o_proj
+# # Generate all layers
+# python create_test_embedding_layer.py --target-modules embed_tokens lm_head q_proj k_proj v_proj o_proj gate_proj up_proj down_proj
+# # Full customization
+# python create_test_embedding_layer.py \
+#     --output-dir ./my_custom_lora \
+#     --base-model meta-llama/Llama-2-7b-hf \
+#     --lora-rank 16 \
+#     --lora-alpha 32 \
+#     --target-modules q_proj k_proj v_proj \
+#     --added-tokens '{"<|im_start|>": 32000, "<|im_end|>": 32001}'

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}