import sys # CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=3 ~/llama.cpp/build/bin/llama-server \ # -m /home/mshahidul/readctrl_model/translate_gemma/translategemma-27b-it-Q8_0.gguf \ # --n-gpu-layers 999 \ # --flash-attn on from huggingface_hub import hf_hub_download def main() -> int: try: hf_hub_download( repo_id="bullerwins/translategemma-27b-it-GGUF", filename="translategemma-27b-it-Q8_0.gguf", local_dir="/home/mshahidul/readctrl_model/translate_gemma", local_dir_use_symlinks=False, ) return 0 except ImportError: print("huggingface_hub not found. Install it and try again.", file=sys.stderr) return 1 except Exception as exc: print(str(exc), file=sys.stderr) return 1 if __name__ == "__main__": raise SystemExit(main())