| #!/bin/bash |
|
|
| |
| |
| |
|
|
| set -e |
|
|
| |
| if [ ! -f "model.safetensors" ]; then |
| echo "Error: model.safetensors not found in current directory" |
| echo "Please run this script from the base model directory containing model.safetensors" |
| exit 1 |
| fi |
|
|
| |
| if [ -d "../venv" ]; then |
| echo "Using parent directory virtual environment..." |
| source ../venv/bin/activate |
| PYTHON_CMD="python" |
| elif [ -d "venv" ]; then |
| echo "Using local virtual environment..." |
| source venv/bin/activate |
| PYTHON_CMD="python" |
| else |
| echo "Using system python..." |
| PYTHON_CMD="python3" |
| fi |
|
|
| echo "Creating patch application script..." |
|
|
| |
| cat > apply_patches.py << 'EOF' |
| |
| """ |
| Apply Qwen Model Patches |
| Recreates the modifications found in the modified Qwen model |
| """ |
|
|
| import torch |
| import os |
| import sys |
| from pathlib import Path |
| from safetensors import safe_open |
| from safetensors.torch import save_file |
| import shutil |
| from datetime import datetime |
|
|
| def load_all_tensors(model_path): |
| """Load all tensors from a safetensors file""" |
| tensors = {} |
| with safe_open(model_path, framework="pt") as f: |
| for key in f.keys(): |
| tensors[key] = f.get_tensor(key) |
| return tensors |
|
|
| def apply_scale_patch(tensor, factor): |
| """Apply scaling to a tensor""" |
| return tensor * factor |
|
|
| def apply_clamp_patch(tensor, min_val, max_val): |
| """Apply clamping to a tensor""" |
| return torch.clamp(tensor, min=min_val, max=max_val) |
|
|
| def main(): |
| model_path = "model.safetensors" |
| |
| if not os.path.exists(model_path): |
| print(f"Error: {model_path} not found in current directory") |
| return 1 |
| |
| print("Loading original model...") |
| tensors = load_all_tensors(model_path) |
| |
| print(f"Loaded {len(tensors)} tensors") |
| |
| |
| backup_path = f"model_original_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.safetensors" |
| print(f"Creating backup: {backup_path}") |
| shutil.copy2(model_path, backup_path) |
| |
| print("Applying patches...") |
| |
| |
| scale_patches = [ |
| ("lm_head.weight", 1.03), |
| ("model.embed_tokens.weight", 1.02), |
| ("model.layers.0.input_layernorm.weight", 1.05), |
| ("model.layers.0.mlp.gate_proj.weight", 1.05), |
| ("model.layers.10.mlp.down_proj.weight", 1.02), |
| ("model.layers.10.self_attn.q_proj.weight", 1.02), |
| ("model.layers.11.mlp.down_proj.weight", 1.02), |
| ("model.layers.11.self_attn.q_proj.weight", 1.02), |
| ("model.layers.12.mlp.down_proj.weight", 1.02), |
| ("model.layers.12.self_attn.q_proj.weight", 1.02), |
| ("model.layers.13.mlp.down_proj.weight", 1.02), |
| ("model.layers.13.self_attn.q_proj.weight", 1.02), |
| ("model.layers.14.mlp.down_proj.weight", 1.02), |
| ("model.layers.14.self_attn.q_proj.weight", 1.02), |
| ("model.layers.15.mlp.down_proj.weight", 1.02), |
| ("model.layers.15.mlp.up_proj.weight", 1.03), |
| ("model.layers.15.self_attn.q_proj.weight", 1.02), |
| ("model.layers.16.mlp.down_proj.weight", 1.02), |
| ("model.layers.16.self_attn.q_proj.weight", 1.02), |
| ("model.layers.17.mlp.down_proj.weight", 1.02), |
| ("model.layers.17.self_attn.q_proj.weight", 1.02), |
| ("model.layers.18.mlp.down_proj.weight", 1.02), |
| ("model.layers.18.self_attn.q_proj.weight", 1.02), |
| ("model.layers.19.mlp.down_proj.weight", 1.02), |
| ("model.layers.19.self_attn.q_proj.weight", 1.02), |
| ("model.layers.20.mlp.down_proj.weight", 1.02), |
| ("model.layers.20.self_attn.q_proj.weight", 1.02), |
| ("model.layers.21.mlp.down_proj.weight", 1.02), |
| ("model.layers.21.self_attn.q_proj.weight", 1.02), |
| ("model.layers.22.mlp.down_proj.weight", 1.02), |
| ("model.layers.22.self_attn.q_proj.weight", 1.02), |
| ("model.layers.23.mlp.down_proj.weight", 1.02), |
| ("model.layers.23.self_attn.q_proj.weight", 1.02), |
| ("model.layers.24.mlp.down_proj.weight", 1.02), |
| ("model.layers.24.self_attn.q_proj.weight", 1.02), |
| ("model.layers.25.mlp.down_proj.weight", 1.02), |
| ("model.layers.25.self_attn.q_proj.weight", 1.02), |
| ("model.layers.26.mlp.down_proj.weight", 1.02), |
| ("model.layers.26.self_attn.q_proj.weight", 1.02), |
| ("model.layers.27.mlp.down_proj.weight", 1.02), |
| ("model.layers.27.self_attn.q_proj.weight", 1.02), |
| ] |
| |
| |
| clamp_patches = [ |
| ("model.layers.15.self_attn.k_norm.weight", -0.0032958984375, 20.0), |
| ("model.layers.27.input_layernorm.weight", 2.4375, 40.0), |
| ("model.layers.27.post_attention_layernorm.weight", -0.0174560546875, 100.0), |
| ] |
| |
| patches_applied = 0 |
| |
| |
| for tensor_name, factor in scale_patches: |
| if tensor_name in tensors: |
| print(f"Scaling {tensor_name} by factor {factor}") |
| tensors[tensor_name] = apply_scale_patch(tensors[tensor_name], factor) |
| patches_applied += 1 |
| else: |
| print(f"Warning: Tensor {tensor_name} not found, skipping...") |
| |
| |
| for tensor_name, min_val, max_val in clamp_patches: |
| if tensor_name in tensors: |
| print(f"Clamping {tensor_name} to range [{min_val}, {max_val}]") |
| tensors[tensor_name] = apply_clamp_patch(tensors[tensor_name], min_val, max_val) |
| patches_applied += 1 |
| else: |
| print(f"Warning: Tensor {tensor_name} not found, skipping...") |
| |
| |
| output_path = "model_patched.safetensors" |
| print(f"Saving modified model to {output_path}...") |
| save_file(tensors, output_path) |
| |
| print(f"Successfully applied {patches_applied} patches!") |
| print(f"Original model backed up to: {backup_path}") |
| print(f"Modified model saved as: {output_path}") |
| print("") |
| print("To use the modified model:") |
| print(f" mv {output_path} model.safetensors") |
| print("") |
| print("To restore original:") |
| print(f" mv {backup_path} model.safetensors") |
| |
| return 0 |
|
|
| if __name__ == "__main__": |
| sys.exit(main()) |
| EOF |
|
|
| echo "Running patch application..." |
| $PYTHON_CMD apply_patches.py |
|
|
| if [ $? -eq 0 ]; then |
| echo "" |
| echo "✅ Patch application completed successfully!" |
| echo "" |
| echo "Files in directory:" |
| ls -la *.safetensors |
| echo "" |
| echo "To compare with the reference modified model, run:" |
| echo " cd .. && source venv/bin/activate && python safetensors_diff_analyzer.py compare Qwen_0.6B/model_patched.safetensors Qwen_0.6B_modified/model.safetensors" |
| else |
| echo "❌ Patch application failed!" |
| fi |
|
|
| |
| rm -f apply_patches.py |