| MODEL_NAME= open-llama-7b | |
| PYTHON?= python | |
| LLAMA_BUILD= 1132 | |
| LLAMA_TAR= b$(LLAMA_BUILD).tar.gz | |
| LLAMA_DIR= llama.cpp-b$(LLAMA_BUILD) | |
| LLAMA_FLAGS= | |
| HF_REPO= openlm-research/open_llama_7b | |
| HF_REF= main | |
| HF_FILES= pytorch_model-00001-of-00002.bin \ | |
| pytorch_model-00002-of-00002.bin \ | |
| tokenizer.model \ | |
| config.json \ | |
| tokenizer_config.json | |
| $(HF_FILES): SITE= https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF) | |
| $(LLAMA_TAR): SITE= https://github.com/ggerganov/llama.cpp/archive/refs/tags | |
| QUANTS= f16 q4_0 q4_1 q5_0 q5_1 q8_0 \ | |
| q2_K \ | |
| q3_K_S q3_K_M q3_K_L \ | |
| q4_K_S q4_K_M \ | |
| q5_K_S q5_K_M \ | |
| q6_K | |
| FILES= $(HF_FILES) $(LLAMA_TAR) | |
| MODEL_FILES= $(foreach q,$(QUANTS),$(MODEL_NAME)-$(q).gguf) | |
| all: $(MODEL_FILES) SHA256SUMS | |
| $(FILES): | |
| curl -L -o $@ --url $(SITE)/$@ | |
| $(LLAMA_DIR): | $(LLAMA_TAR) | |
| tar -xf $(LLAMA_TAR) | |
| $(LLAMA_DIR)/quantize: | $(LLAMA_DIR) | |
| $(MAKE) -C $(LLAMA_DIR) $(LLAMA_FLAGS) quantize | |
| venv: | |
| $(PYTHON) -m venv venv | |
| venv/bin/pip install -e $(LLAMA_DIR)/gguf-py | |
| venv/bin/pip install -r $(LLAMA_DIR)/requirements.txt | |
| $(MODEL_NAME)-f16.gguf: $(HF_FILES) | $(LLAMA_DIR) venv | |
| venv/bin/python $(LLAMA_DIR)/convert.py --outtype f16 --outfile $@ . | |
| $(MODEL_NAME)-q%.gguf: $(MODEL_NAME)-f16.gguf | $(LLAMA_DIR)/quantize | |
| $(LLAMA_DIR)/quantize $< $@ q$* | |
| %.sha: % | |
| sha256sum $< > $@ | |
| SHA256SUMS: $(addsuffix .sha,$(MODEL_FILES)) | |
| cat $^ > $@ | |