| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| """GGUF Conversion for WordPress Coder Model""" |
|
|
| import os |
| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| from peft import PeftModel |
| from huggingface_hub import HfApi |
| import subprocess |
|
|
| print("π GGUF Conversion Script") |
| print("=" * 60) |
|
|
| |
| ADAPTER_MODEL = os.environ.get("ADAPTER_MODEL", "mattPearce/qwen-wordpress-coder") |
| BASE_MODEL = os.environ.get("BASE_MODEL", "Qwen/Qwen2.5-Coder-14B-Instruct") |
| OUTPUT_REPO = os.environ.get("OUTPUT_REPO", "mattPearce/qwen-wordpress-coder-gguf") |
| username = os.environ.get("HF_USERNAME", "mattPearce") |
|
|
| print(f"\nπ¦ Configuration:") |
| print(f" Base model: {BASE_MODEL}") |
| print(f" Adapter model: {ADAPTER_MODEL}") |
| print(f" Output repo: {OUTPUT_REPO}") |
|
|
| |
| print("\nπ§ Step 1: Loading base model and LoRA adapter...") |
| base_model = AutoModelForCausalLM.from_pretrained( |
| BASE_MODEL, |
| dtype=torch.float16, |
| device_map="auto", |
| trust_remote_code=True, |
| ) |
| print(" β
Base model loaded") |
|
|
| model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL) |
| print(" β
Adapter loaded") |
|
|
| print(" Merging adapter with base model...") |
| merged_model = model.merge_and_unload() |
| print(" β
Models merged!") |
|
|
| tokenizer = AutoTokenizer.from_pretrained(ADAPTER_MODEL, trust_remote_code=True) |
| print(" β
Tokenizer loaded") |
|
|
| |
| print("\nπΎ Step 2: Saving merged model...") |
| merged_dir = "/tmp/merged_model" |
| merged_model.save_pretrained(merged_dir, safe_serialization=True) |
| tokenizer.save_pretrained(merged_dir) |
| print(f" β
Merged model saved to {merged_dir}") |
|
|
| |
| print("\nπ₯ Step 3: Setting up llama.cpp...") |
| print(" Installing build tools...") |
| subprocess.run(["apt-get", "update", "-qq"], check=True, capture_output=True) |
| subprocess.run(["apt-get", "install", "-y", "-qq", "build-essential", "cmake"], check=True, capture_output=True) |
| print(" β
Build tools installed") |
|
|
| subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", "/tmp/llama.cpp"], check=True, capture_output=True) |
| subprocess.run(["pip", "install", "-r", "/tmp/llama.cpp/requirements.txt"], check=True, capture_output=True) |
| subprocess.run(["pip", "install", "sentencepiece", "protobuf"], check=True, capture_output=True) |
| print(" β
llama.cpp setup complete") |
|
|
| |
| print("\nπ Step 4: Converting to GGUF format (FP16)...") |
| gguf_output_dir = "/tmp/gguf_output" |
| os.makedirs(gguf_output_dir, exist_ok=True) |
|
|
| model_name = "qwen-wordpress-coder" |
| gguf_file = f"{gguf_output_dir}/{model_name}-f16.gguf" |
|
|
| result = subprocess.run( |
| ["python", "/tmp/llama.cpp/convert_hf_to_gguf.py", merged_dir, "--outfile", gguf_file, "--outtype", "f16"], |
| check=True, capture_output=True, text=True |
| ) |
| print(f" β
FP16 GGUF created: {gguf_file}") |
|
|
| |
| print("\nβοΈ Step 5: Creating quantized versions...") |
| os.makedirs("/tmp/llama.cpp/build", exist_ok=True) |
|
|
| subprocess.run(["cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp", "-DGGML_CUDA=OFF"], check=True, capture_output=True, text=True) |
| subprocess.run(["cmake", "--build", "/tmp/llama.cpp/build", "--target", "llama-quantize", "-j", "4"], check=True, capture_output=True, text=True) |
| print(" β
Quantize tool built") |
|
|
| quantize_bin = "/tmp/llama.cpp/build/bin/llama-quantize" |
|
|
| quant_formats = [ |
| ("Q4_K_M", "4-bit recommended"), |
| ("Q5_K_M", "5-bit higher quality"), |
| ("Q8_0", "8-bit very high quality"), |
| ] |
|
|
| quantized_files = [] |
| for quant_type, description in quant_formats: |
| print(f" Creating {quant_type} ({description})...") |
| quant_file = f"{gguf_output_dir}/{model_name}-{quant_type.lower()}.gguf" |
| subprocess.run([quantize_bin, gguf_file, quant_file, quant_type], check=True, capture_output=True) |
| quantized_files.append((quant_file, quant_type)) |
| size_mb = os.path.getsize(quant_file) / (1024 * 1024) |
| print(f" β
{quant_type}: {size_mb:.1f} MB") |
|
|
| |
| print("\nβοΈ Step 6: Uploading to Hugging Face Hub...") |
| api = HfApi() |
|
|
| api.create_repo(repo_id=OUTPUT_REPO, repo_type="model", exist_ok=True) |
| print(" β
Repository created") |
|
|
| print(" Uploading FP16 GGUF...") |
| api.upload_file(path_or_fileobj=gguf_file, path_in_repo=f"{model_name}-f16.gguf", repo_id=OUTPUT_REPO) |
| print(" β
FP16 uploaded") |
|
|
| for quant_file, quant_type in quantized_files: |
| print(f" Uploading {quant_type}...") |
| api.upload_file(path_or_fileobj=quant_file, path_in_repo=f"{model_name}-{quant_type.lower()}.gguf", repo_id=OUTPUT_REPO) |
| print(f" β
{quant_type} uploaded") |
|
|
| |
| readme_content = f"""--- |
| base_model: {BASE_MODEL} |
| tags: |
| - gguf |
| - llama.cpp |
| - quantized |
| - wordpress |
| - qwen |
| --- |
| |
| # Qwen WordPress Coder - GGUF |
| |
| GGUF conversion of [{ADAPTER_MODEL}](https://huggingface.co/{ADAPTER_MODEL}), a fine-tuned {BASE_MODEL} for generating WordPress plugins and Gutenberg blocks. |
| |
| ## Model Details |
| |
| - **Base Model:** Qwen2.5-Coder-14B-Instruct |
| - **Fine-tuned On:** 419 WordPress plugin/block examples from Automattic repos |
| - **Training:** Supervised Fine-Tuning with LoRA |
| - **Format:** GGUF (for llama.cpp, Ollama, LM Studio) |
| |
| ## Available Quantizations |
| |
| | File | Quant | Size | Description | |
| |------|-------|------|-------------| |
| | qwen-wordpress-coder-f16.gguf | F16 | ~28GB | Full precision | |
| | qwen-wordpress-coder-q8_0.gguf | Q8_0 | ~15GB | 8-bit, very high quality | |
| | qwen-wordpress-coder-q5_k_m.gguf | Q5_K_M | ~10GB | 5-bit, good quality | |
| | qwen-wordpress-coder-q4_k_m.gguf | Q4_K_M | ~8GB | 4-bit, recommended | |
| |
| ## Usage |
| |
| ### With LM Studio |
| |
| 1. Download `qwen-wordpress-coder-q4_k_m.gguf` |
| 2. Import into LM Studio |
| 3. Prompt: "Create a Gutenberg block for..." |
| |
| ### With Ollama |
| |
| ```bash |
| # Create Modelfile |
| cat > Modelfile << 'EOF' |
| FROM ./qwen-wordpress-coder-q4_k_m.gguf |
| |
| SYSTEM You are an expert WordPress developer specializing in creating high-quality plugins and Gutenberg blocks. You write clean, well-documented code following WordPress coding standards. |
| EOF |
| |
| # Create and run |
| ollama create wordpress-coder -f Modelfile |
| ollama run wordpress-coder "Create a block for displaying testimonials" |
| ``` |
| |
| ### With llama.cpp |
| |
| ```bash |
| ./llama-cli -m qwen-wordpress-coder-q4_k_m.gguf -ngl 32 -p "Create a WordPress plugin for..." |
| ``` |
| |
| ## Example Prompts |
| |
| - "Create a Gutenberg block for displaying product reviews with star ratings" |
| - "Build a WordPress plugin for custom post type management" |
| - "Generate a block that displays recent posts in a grid layout" |
| """ |
|
|
| api.upload_file(path_or_fileobj=readme_content.encode(), path_in_repo="README.md", repo_id=OUTPUT_REPO) |
| print(" β
README uploaded") |
|
|
| print("\n" + "=" * 60) |
| print("β
GGUF Conversion Complete!") |
| print(f"π¦ Repository: https://huggingface.co/{OUTPUT_REPO}") |
| print(f"\nπ₯ Recommended download:") |
| print(f" huggingface-cli download {OUTPUT_REPO} qwen-wordpress-coder-q4_k_m.gguf") |
| print("=" * 60) |
|
|