| # create dataset | |
| apt-get update -y && apt-get install -y zip unzip nano tmux | |
| mkdir dataset | |
| cd dataset | |
| wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/handing_write.zip | |
| wget https://huggingface.co/anhbn/ocr_generation/resolve/main/ocr_gen_data.zip | |
| wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/train_list.txt | |
| wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/val_list.txt | |
| rm -rf ocr_gen_data && unzip ocr_gen_data.zip && unzip handing_write.zip && rm -rf ocr_gen_data.zip && rm -rf handing_write.zip && python create_dataset.py | |
| cd .. | |
| mkdir pretrained | |
| cd pretrained | |
| wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/best_accuracy.pdopt | |
| wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/best_accuracy.pdparams | |
| wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/best_accuracy.states | |
| cd .. | |
| pip install gdown | |
| mkdir checkpoints | |
| mkdir inference | |
| gdown --id 1FMaL_GsS0et49aDcIQlLoJ74Zg-cWq1x | |
| gdown --id 1JSiLFj2A2JquzlOyC88_6UGjF9mE_7RT | |
| gdown --id 1sujVpti29Ee9bxJlj-L5jw9KfL4MzAoR | |
| nano rec_vi_paddle_v4.yml | |
| # edit dataset path, batch_size, ... | |
| # then, train | |
| git clone https://github.com/PaddlePaddle/PaddleOCR.git | |
| cd PaddleOCR | |
| pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ | |
| pip install pyclipper rapidfuzz visualdl wikipedia lmdb | |
| pip install numpy==1.25.2 scikit-image albumentations shapely | |
| python3 tools/train.py -c /rec_vi_paddle_v4.yml | |
| python3 -m paddle.distributed.launch --gpus '0,1,2,3,4,5,6,7' PaddleOCR/tools/train.py -c /root/rec_vi_paddle_v4.yml | |