#! /bin/bash hf_model_url=${HF_MODEL_URL} hf_token=${HF_TOKEN:="None"} model_org=$(echo $hf_model_url | sed -n 's/\(.*\)\/\(.*\)/\1/p') model_name=$(echo $hf_model_url | sed -n 's/\(.*\)\/\(.*\)/\2/p') keep_orgi=${KEEP_ORIGINAL_MODEL} if [ -e "/opt/app-root/src/converter/converted_models/gguf/$model_org-$model_name-${QUANTIZATION}.gguf" ]; then echo "$model_org-$model_name-${QUANTIZATION}.gguf already exists... skipping" exit 0 fi if [ -e "/opt/app-root/src/converter/converted_models/cache/models--$model_org--$model_name" ]; then echo "$hf_model_url present in cache... skipping download" fi echo "Downloading $hf_model_url" python download_huggingface.py --model $hf_model_url --token $hf_token python llama.cpp/examples/convert_legacy_llama.py /opt/app-root/src/converter/converted_models/$hf_model_url python llama.cpp/convert_hf_to_gguf.py /opt/app-root/src/converter/converted_models/$hf_model_url mkdir -p /opt/app-root/src/converter/converted_models/gguf/ llama.cpp/llama-quantize /opt/app-root/src/converter/converted_models/$hf_model_url/ggml-model-f16.gguf /opt/app-root/src/converter/converted_models/gguf/$model_org-$model_name-${QUANTIZATION}.gguf ${QUANTIZATION} rm -rf /opt/app-root/src/converter/converted_models/$model_org if [ $keep_orgi = "False" ]; then rm -rf /opt/app-root/src/converter/converted_models/cache fi echo "Converted and quantized model written to /opt/app-root/src/converter/converted_models/gguf/$model_org-$model_name.gguf" echo "$ ls /opt/app-root/src/converter/converted_models/gguf/" ls /opt/app-root/src/converter/converted_models/gguf/