File size: 3,881 Bytes

789c7db

#! /bin/bash

################################################################################
# Shell script that starts a copy of Ollama with a base model plus all the
# available LoRA adapters in this repository.
#
# Converts `.safetensors` to `.gguf` for running in Ollama.
# Target application is GitHub Actions and CPU only systems.
# To keep the intermediate files, remove the `rm` command at the end.
#
# To run this script:
# 1. Install an appropriate build of Ollama for your machine and use port 55555
#    See https://docs.ollama.com/linux#manual-install and set port with
#    ```
#    curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz \
#      | sudo tar zx -C /usr
#    OLLAMA_HOST="localhost:55555" ollama serve &
#    ```
#    See https://docs.ollama.com/faq#how-do-i-configure-ollama-server for other
#    operating systems.
# 3. Download the intrinsics library by running:
#    hf download ibm-granite/rag-intrinsics-lib --local-dir ./rag-intrinsics-lib
# 4. Edit the constants BASE_MODEL_NAME, BASE_MODEL_ORG as needed
# 5. Run this script from the root of your local copy of rag-intrinsics-lib.
################################################################################

echo "Setup"
OLLAMA_HOST="localhost:55555"
OLLAMA_DIR="_ollama"
mkdir -p $OLLAMA_DIR
echo ""

echo "Download base model"
MODEL_DIR="$OLLAMA_DIR/models"
mkdir -p $MODEL_DIR

OLLAMA_MODEL_NAME=granite3.3:8b # Quantized model on Ollama
BASE_MODEL_NAME=granite-3.3-8b-instruct
BASE_MODEL_NAME_UPPER=granite-3.3-8B-instruct # llama.cpp output
BASE_MODEL_ORG=ibm-granite

pip install huggingface_hub
hf download $BASE_MODEL_ORG/$BASE_MODEL_NAME --local-dir $MODEL_DIR/$BASE_MODEL_NAME
echo ""

echo "Clone llama.cpp and install dependencies"
LLAMA_CPP_FOLDER_NAME="$OLLAMA_DIR/llama.cpp"
git clone --single-branch --branch master https://github.com/ggml-org/llama.cpp.git $LLAMA_CPP_FOLDER_NAME
pip install -r $LLAMA_CPP_FOLDER_NAME/requirements/requirements-convert_hf_to_gguf.txt
pip install -r $LLAMA_CPP_FOLDER_NAME/requirements/requirements-convert_hf_to_gguf_update.txt
pip install -r $LLAMA_CPP_FOLDER_NAME/requirements/requirements-convert_lora_to_gguf.txt
echo ""

OUTTYPE="q8_0"
OUTTYPE_UPPER="Q8_0" # llama.cpp output
echo "Convert base model to GGUF: $OUTTYPE"

python $LLAMA_CPP_FOLDER_NAME/convert_hf_to_gguf.py $MODEL_DIR/$BASE_MODEL_NAME --outtype $OUTTYPE
MODEL_GGUF=$(realpath "$MODEL_DIR/$BASE_MODEL_NAME/$BASE_MODEL_NAME_UPPER-$OUTTYPE_UPPER.gguf")
echo ""

echo "Convert LoRA adapters to GGUF"
LORA_DIRS=$( find . -name "$BASE_MODEL_NAME" -path "*/lora/$BASE_MODEL_NAME*" | sort | cut -c 3- )
for LORA_DIR in $LORA_DIRS; do
    LORA_GGUF="$LORA_DIR/$BASE_MODEL_NAME_UPPER-$OUTTYPE_UPPER-LoRA.gguf"
    if [ ! -f "$LORA_GGUF" ]; then
        python $LLAMA_CPP_FOLDER_NAME/convert_lora_to_gguf.py $LORA_DIR --base $MODEL_DIR/$BASE_MODEL_NAME --outtype $OUTTYPE
    fi
done
echo ""

echo "Create Modelfiles and Ollama models"
MODELFILE_DIR="$OLLAMA_DIR/Modelfiles"
mkdir -p $MODELFILE_DIR
for LORA_DIR in $LORA_DIRS; do
    LORA_GGUF=$(realpath "$LORA_DIR/$BASE_MODEL_NAME_UPPER-$OUTTYPE_UPPER-LoRA.gguf")
    MODEL_FILE=$LORA_DIR
    MODELFILE=${MODEL_FILE//\//_}
    MODELFILE=${MODELFILE//../"Modelfile"}

    MODELFILEPATH=$MODELFILE_DIR/$MODELFILE

    LORA_NAME=$(echo "$LORA_DIR" | cut -d "/" -f 1)

    echo ""
    echo "Creating $LORA_NAME | $MODELFILEPATH"

    # Use GGUF converted model
    # printf "FROM $MODEL_GGUF\nADAPTER $LORA_GGUF\n" > $MODELFILEPATH
    # printf "FROM $MODEL_GGUF\nADAPTER $LORA_GGUF\n"

    # Use quantized model from Ollama
    printf "FROM $OLLAMA_MODEL_NAME\nADAPTER $LORA_GGUF\n" > $MODELFILEPATH
    printf "FROM $OLLAMA_MODEL_NAME\nADAPTER $LORA_GGUF\n"

    echo ""

    OLLAMA_HOST=$OLLAMA_HOST ollama create $LORA_NAME -f $MODELFILEPATH
done
echo ""

echo "Clean up"
echo "rm -rf $OLLAMA_DIR"
rm -rf $OLLAMA_DIR