| # Clone the repository | |
| git clone https://github.com/ggml-org/llama.cpp | |
| cd llama.cpp | |
| # Create build directory | |
| mkdir build | |
| cd build | |
| # Configure for CUDA 13.0 and A100 (sm_80) | |
| # We use -DGGML_CUDA=ON to enable GPU acceleration | |
| cmake .. -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=80 | |
| # Build the binaries (using all CPU cores) | |
| cmake --build . --config Release -j $(nproc) | |
| cd ~/llama.cpp/build | |
| rm -rf * | |
| CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 ~/llama.cpp/build/bin/llama-server \ | |
| -m /home/mshahidul/readctrl/models/translategemma-27b-it-Q8_0.gguf \ | |
| --n-gpu-layers 999 \ | |
| --flash-attn | |
| ~/llama.cpp/build/bin/llama-server \ | |
| -m ~/models/Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf \ | |
| -ngl 99 \ | |
| -fa \ | |
| --port 8080 \ | |
| --host 0.0.0.0 | |
| python /home/mshahidul/readctrl/script/translate_correction_gpt5.py --input "/home/mshahidul/readctrl/data/translated_data/translation_wo_judge/multiclinsum_gs_train_en2zh_gemma(0_200).json" --start 0 --end 80 | |
| python /home/mshahidul/readctrl/script/translate_correction_gpt5.py --input "/home/mshahidul/readctrl/data/translated_data/translation_wo_judge/multiclinsum_gs_train_en2vi_gemma(0_200).json" --start 0 --end 80 | |
| python /home/mshahidul/readctrl/script/translate_correction_gpt5.py --input "/home/mshahidul/readctrl/data/translated_data/translation_wo_judge/multiclinsum_gs_train_en2hi_gemma(0_200).json" --start 0 --end 80 |