readctrl / script /t3.sh
shahidul034's picture
Add files using upload-large-folder tool
1db7196 verified
# Clone the repository
git clone https://github.com/ggml-org/llama.cpp
cd llama.cpp
# Create build directory
mkdir build
cd build
# Configure for CUDA 13.0 and A100 (sm_80)
# We use -DGGML_CUDA=ON to enable GPU acceleration
cmake .. -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=80
# Build the binaries (using all CPU cores)
cmake --build . --config Release -j $(nproc)
cd ~/llama.cpp/build
rm -rf *
CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 ~/llama.cpp/build/bin/llama-server \
-m /home/mshahidul/readctrl/models/translategemma-27b-it-Q8_0.gguf \
--n-gpu-layers 999 \
--flash-attn
~/llama.cpp/build/bin/llama-server \
-m ~/models/Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf \
-ngl 99 \
-fa \
--port 8080 \
--host 0.0.0.0
python /home/mshahidul/readctrl/script/translate_correction_gpt5.py --input "/home/mshahidul/readctrl/data/translated_data/translation_wo_judge/multiclinsum_gs_train_en2zh_gemma(0_200).json" --start 0 --end 80
python /home/mshahidul/readctrl/script/translate_correction_gpt5.py --input "/home/mshahidul/readctrl/data/translated_data/translation_wo_judge/multiclinsum_gs_train_en2vi_gemma(0_200).json" --start 0 --end 80
python /home/mshahidul/readctrl/script/translate_correction_gpt5.py --input "/home/mshahidul/readctrl/data/translated_data/translation_wo_judge/multiclinsum_gs_train_en2hi_gemma(0_200).json" --start 0 --end 80