#!/bin/bash

/app/llama.cpp/build/bin/llama-server \
  --model /models/model.gguf \
  --host 0.0.0.0 \
  --port 7860 \
  --fit off \
  --threads $(nproc) \
  --mlock \
  --no-mmap \