Instructions to use salvepilo/llama-cpp-gemma3-divzero-poc with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- llama-cpp-python
How to use salvepilo/llama-cpp-gemma3-divzero-poc with llama-cpp-python:
# !pip install llama-cpp-python from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="salvepilo/llama-cpp-gemma3-divzero-poc", filename="poc_gemma3_divzero.gguf", )
output = llm( "Once upon a time,", max_tokens=512, echo=True ) print(output)
- Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- llama.cpp
How to use salvepilo/llama-cpp-gemma3-divzero-poc with llama.cpp:
Install (macOS, Linux)
curl -LsSf https://llama.app/install.sh | sh # Start a local OpenAI-compatible server with a web UI: llama serve -hf salvepilo/llama-cpp-gemma3-divzero-poc # Run inference directly in the terminal: llama cli -hf salvepilo/llama-cpp-gemma3-divzero-poc
Install from WinGet (Windows)
winget install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama serve -hf salvepilo/llama-cpp-gemma3-divzero-poc # Run inference directly in the terminal: llama cli -hf salvepilo/llama-cpp-gemma3-divzero-poc
Use pre-built binary
# Download pre-built binary from: # https://github.com/ggerganov/llama.cpp/releases # Start a local OpenAI-compatible server with a web UI: ./llama-server -hf salvepilo/llama-cpp-gemma3-divzero-poc # Run inference directly in the terminal: ./llama-cli -hf salvepilo/llama-cpp-gemma3-divzero-poc
Build from source code
git clone https://github.com/ggerganov/llama.cpp.git cd llama.cpp cmake -B build cmake --build build -j --target llama-server llama-cli # Start a local OpenAI-compatible server with a web UI: ./build/bin/llama-server -hf salvepilo/llama-cpp-gemma3-divzero-poc # Run inference directly in the terminal: ./build/bin/llama-cli -hf salvepilo/llama-cpp-gemma3-divzero-poc
Use Docker
docker model run hf.co/salvepilo/llama-cpp-gemma3-divzero-poc
- LM Studio
- Jan
- Ollama
How to use salvepilo/llama-cpp-gemma3-divzero-poc with Ollama:
ollama run hf.co/salvepilo/llama-cpp-gemma3-divzero-poc
- Unsloth Studio
How to use salvepilo/llama-cpp-gemma3-divzero-poc with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for salvepilo/llama-cpp-gemma3-divzero-poc to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for salvepilo/llama-cpp-gemma3-divzero-poc to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for salvepilo/llama-cpp-gemma3-divzero-poc to start chatting
- Atomic Chat new
- Docker Model Runner
How to use salvepilo/llama-cpp-gemma3-divzero-poc with Docker Model Runner:
docker model run hf.co/salvepilo/llama-cpp-gemma3-divzero-poc
- Lemonade
How to use salvepilo/llama-cpp-gemma3-divzero-poc with Lemonade:
Pull the model
# Download Lemonade from https://lemonade-server.ai/ lemonade pull salvepilo/llama-cpp-gemma3-divzero-poc
Run and chat with the model
lemonade run user.llama-cpp-gemma3-divzero-poc-{{QUANT_TAG}}List all available models
lemonade list
| // Minimal reproducer for Gemma3 integer division-by-zero | |
| // Mirrors the vulnerable code in src/models/gemma3.cpp:32 | |
| // and src/llama-model.cpp:1147-1171 | |
| // | |
| // Compile: g++ -o reproducer reproducer.cpp -fsanitize=undefined -fno-sanitize-recover=all | |
| // Run: ./reproducer | |
| // Mirrors llama_hparams (simplified) | |
| struct llama_hparams { | |
| uint32_t n_embd = 3072; // from gemma3.embedding_length in GGUF | |
| uint32_t n_layer_all = 62; // from gemma3.block_count = 62 β LLM_TYPE_27B | |
| uint32_t n_embd_head_k_full = 0; | |
| std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_arr = {}; // all zeros β key missing from GGUF | |
| uint32_t n_head(uint32_t il = 0) const { | |
| return n_head_arr[il]; // returns 0 when key is absent | |
| } | |
| }; | |
| enum llm_type { LLM_TYPE_UNKNOWN, LLM_TYPE_1B, LLM_TYPE_4B, LLM_TYPE_8B, LLM_TYPE_12B, LLM_TYPE_27B }; | |
| int main() { | |
| llama_hparams hparams; | |
| // --- Mirrors llama-model.cpp:1147 (general hparams loader) --- | |
| // When n_head() == 0, n_embd_head_k_full is set to 0 | |
| if (hparams.n_head() > 0) { | |
| hparams.n_embd_head_k_full = hparams.n_embd / hparams.n_head(); | |
| } else { | |
| hparams.n_embd_head_k_full = 0; | |
| } | |
| // --- Mirrors gemma3.cpp:20-32 (load_arch_hparams) --- | |
| llm_type type = LLM_TYPE_UNKNOWN; | |
| switch (hparams.n_layer_all) { | |
| case 18: type = LLM_TYPE_UNKNOWN; break; // 270M | |
| case 26: type = LLM_TYPE_1B; break; | |
| case 32: type = LLM_TYPE_8B; break; | |
| case 34: type = LLM_TYPE_4B; break; | |
| case 48: type = LLM_TYPE_12B; break; | |
| case 62: type = LLM_TYPE_27B; break; // <-- block_count=62 triggers this | |
| default: type = LLM_TYPE_UNKNOWN; break; | |
| } | |
| printf("block_count = %u β type = %s\n", hparams.n_layer_all, | |
| type == LLM_TYPE_27B ? "LLM_TYPE_27B" : "other"); | |
| printf("n_head(0) = %u (key absent from GGUF β stays 0)\n", hparams.n_head(0)); | |
| printf("n_embd = %u\n", hparams.n_embd); | |
| printf("\nExecuting vulnerable line (gemma3.cpp:32):\n"); | |
| printf(" hparams.n_embd / hparams.n_head(0) = %u / %u\n", | |
| hparams.n_embd, hparams.n_head(0)); | |
| // THE VULNERABLE COMPUTATION β mirrors gemma3.cpp:32 exactly | |
| // On x86_64: SIGFPE (exit 136) | |
| // On ARM64: silent UB (SDIV returns 0), UBSan aborts with "division by zero" | |
| float f_attention_scale = (type == LLM_TYPE_27B) | |
| ? 1.0f / std::sqrt(float(hparams.n_embd / hparams.n_head(0))) // INTEGER DIV BY ZERO | |
| : 1.0f / std::sqrt(float(hparams.n_embd_head_k_full)); | |
| // Should never reach here on x86_64 | |
| printf("f_attention_scale = %f (should not reach here on x86_64)\n", f_attention_scale); | |
| return 0; | |
| } | |